aboutsummaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kbuild9
-rw-r--r--arch/s390/Kconfig807
-rw-r--r--arch/s390/Kconfig.debug34
-rw-r--r--arch/s390/Makefile60
-rw-r--r--arch/s390/appldata/appldata.h16
-rw-r--r--arch/s390/appldata/appldata_base.c325
-rw-r--r--arch/s390/appldata/appldata_mem.c67
-rw-r--r--arch/s390/appldata/appldata_net_sum.c72
-rw-r--r--arch/s390/appldata/appldata_os.c104
-rw-r--r--arch/s390/boot/.gitignore2
-rw-r--r--arch/s390/boot/Makefile16
-rw-r--r--arch/s390/boot/compressed/.gitignore3
-rw-r--r--arch/s390/boot/compressed/Makefile71
-rw-r--r--arch/s390/boot/compressed/head31.S51
-rw-r--r--arch/s390/boot/compressed/head64.S48
-rw-r--r--arch/s390/boot/compressed/misc.c175
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S55
-rw-r--r--arch/s390/boot/compressed/vmlinux.scr10
-rw-r--r--arch/s390/boot/install.sh4
-rw-r--r--arch/s390/configs/default_defconfig687
-rw-r--r--arch/s390/configs/gcov_defconfig640
-rw-r--r--arch/s390/configs/performance_defconfig632
-rw-r--r--arch/s390/configs/zfcpdump_defconfig86
-rw-r--r--arch/s390/crypto/Makefile3
-rw-r--r--arch/s390/crypto/aes_s390.c481
-rw-r--r--arch/s390/crypto/crypt_s390.h118
-rw-r--r--arch/s390/crypto/crypto_des.h18
-rw-r--r--arch/s390/crypto/des_check_key.c132
-rw-r--r--arch/s390/crypto/des_s390.c565
-rw-r--r--arch/s390/crypto/ghash_s390.c166
-rw-r--r--arch/s390/crypto/prng.c16
-rw-r--r--arch/s390/crypto/sha.h6
-rw-r--r--arch/s390/crypto/sha1_s390.c70
-rw-r--r--arch/s390/crypto/sha256_s390.c128
-rw-r--r--arch/s390/crypto/sha512_s390.c119
-rw-r--r--arch/s390/crypto/sha_common.c33
-rw-r--r--arch/s390/defconfig962
-rw-r--r--arch/s390/hypfs/Makefile2
-rw-r--r--arch/s390/hypfs/hypfs.h56
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c130
-rw-r--r--arch/s390/hypfs/hypfs_diag.c172
-rw-r--r--arch/s390/hypfs/hypfs_sprp.c141
-rw-r--r--arch/s390/hypfs/hypfs_vm.c146
-rw-r--r--arch/s390/hypfs/inode.c260
-rw-r--r--arch/s390/include/asm/Kbuild7
-rw-r--r--arch/s390/include/asm/airq.h103
-rw-r--r--arch/s390/include/asm/appldata.h88
-rw-r--r--arch/s390/include/asm/asm-offsets.h1
-rw-r--r--arch/s390/include/asm/atomic.h415
-rw-r--r--arch/s390/include/asm/barrier.h51
-rw-r--r--arch/s390/include/asm/bitops.h482
-rw-r--r--arch/s390/include/asm/bug.h71
-rw-r--r--arch/s390/include/asm/bugs.h20
-rw-r--r--arch/s390/include/asm/cache.h18
-rw-r--r--arch/s390/include/asm/cacheflush.h16
-rw-r--r--arch/s390/include/asm/ccwdev.h233
-rw-r--r--arch/s390/include/asm/ccwgroup.h73
-rw-r--r--arch/s390/include/asm/checksum.h140
-rw-r--r--arch/s390/include/asm/chpid.h50
-rw-r--r--arch/s390/include/asm/cio.h315
-rw-r--r--arch/s390/include/asm/clp.h28
-rw-r--r--arch/s390/include/asm/cmb.h12
-rw-r--r--arch/s390/include/asm/cmpxchg.h304
-rw-r--r--arch/s390/include/asm/compat.h359
-rw-r--r--arch/s390/include/asm/cpcmd.h32
-rw-r--r--arch/s390/include/asm/cpu.h26
-rw-r--r--arch/s390/include/asm/cpu_mf.h283
-rw-r--r--arch/s390/include/asm/cputime.h192
-rw-r--r--arch/s390/include/asm/crw.h69
-rw-r--r--arch/s390/include/asm/css_chars.h38
-rw-r--r--arch/s390/include/asm/ctl_reg.h82
-rw-r--r--arch/s390/include/asm/current.h18
-rw-r--r--arch/s390/include/asm/debug.h237
-rw-r--r--arch/s390/include/asm/delay.h24
-rw-r--r--arch/s390/include/asm/device.h7
-rw-r--r--arch/s390/include/asm/diag.h52
-rw-r--r--arch/s390/include/asm/dis.h52
-rw-r--r--arch/s390/include/asm/div64.h1
-rw-r--r--arch/s390/include/asm/dma-mapping.h79
-rw-r--r--arch/s390/include/asm/dma.h19
-rw-r--r--arch/s390/include/asm/eadm.h117
-rw-r--r--arch/s390/include/asm/ebcdic.h48
-rw-r--r--arch/s390/include/asm/elf.h230
-rw-r--r--arch/s390/include/asm/emergency-restart.h6
-rw-r--r--arch/s390/include/asm/etr.h256
-rw-r--r--arch/s390/include/asm/exec.h12
-rw-r--r--arch/s390/include/asm/extmem.h31
-rw-r--r--arch/s390/include/asm/facility.h67
-rw-r--r--arch/s390/include/asm/fb.h12
-rw-r--r--arch/s390/include/asm/fcx.h311
-rw-r--r--arch/s390/include/asm/ftrace.h26
-rw-r--r--arch/s390/include/asm/futex.h96
-rw-r--r--arch/s390/include/asm/hardirq.h26
-rw-r--r--arch/s390/include/asm/hugetlb.h115
-rw-r--r--arch/s390/include/asm/hw_irq.h11
-rw-r--r--arch/s390/include/asm/idals.h248
-rw-r--r--arch/s390/include/asm/io.h72
-rw-r--r--arch/s390/include/asm/ipl.h182
-rw-r--r--arch/s390/include/asm/irq.h109
-rw-r--r--arch/s390/include/asm/irq_regs.h1
-rw-r--r--arch/s390/include/asm/irqflags.h72
-rw-r--r--arch/s390/include/asm/isc.h28
-rw-r--r--arch/s390/include/asm/itcw.h30
-rw-r--r--arch/s390/include/asm/jump_label.h37
-rw-r--r--arch/s390/include/asm/kdebug.h27
-rw-r--r--arch/s390/include/asm/kexec.h63
-rw-r--r--arch/s390/include/asm/kmap_types.h6
-rw-r--r--arch/s390/include/asm/kprobes.h89
-rw-r--r--arch/s390/include/asm/kvm_host.h454
-rw-r--r--arch/s390/include/asm/kvm_para.h157
-rw-r--r--arch/s390/include/asm/linkage.h9
-rw-r--r--arch/s390/include/asm/local.h1
-rw-r--r--arch/s390/include/asm/local64.h1
-rw-r--r--arch/s390/include/asm/lowcore.h361
-rw-r--r--arch/s390/include/asm/mathemu.h28
-rw-r--r--arch/s390/include/asm/mman.h15
-rw-r--r--arch/s390/include/asm/mmu.h42
-rw-r--r--arch/s390/include/asm/mmu_context.h123
-rw-r--r--arch/s390/include/asm/module.h34
-rw-r--r--arch/s390/include/asm/mutex.h9
-rw-r--r--arch/s390/include/asm/nmi.h66
-rw-r--r--arch/s390/include/asm/os_info.h49
-rw-r--r--arch/s390/include/asm/page.h167
-rw-r--r--arch/s390/include/asm/pci.h192
-rw-r--r--arch/s390/include/asm/pci_clp.h186
-rw-r--r--arch/s390/include/asm/pci_debug.h28
-rw-r--r--arch/s390/include/asm/pci_dma.h196
-rw-r--r--arch/s390/include/asm/pci_insn.h86
-rw-r--r--arch/s390/include/asm/pci_io.h198
-rw-r--r--arch/s390/include/asm/percpu.h190
-rw-r--r--arch/s390/include/asm/perf_event.h96
-rw-r--r--arch/s390/include/asm/pgalloc.h156
-rw-r--r--arch/s390/include/asm/pgtable.h1739
-rw-r--r--arch/s390/include/asm/processor.h419
-rw-r--r--arch/s390/include/asm/ptrace.h175
-rw-r--r--arch/s390/include/asm/qdio.h436
-rw-r--r--arch/s390/include/asm/reset.h19
-rw-r--r--arch/s390/include/asm/runtime_instr.h98
-rw-r--r--arch/s390/include/asm/rwsem.h318
-rw-r--r--arch/s390/include/asm/scatterlist.h3
-rw-r--r--arch/s390/include/asm/schid.h21
-rw-r--r--arch/s390/include/asm/sclp.h71
-rw-r--r--arch/s390/include/asm/scsw.h988
-rw-r--r--arch/s390/include/asm/seccomp.h16
-rw-r--r--arch/s390/include/asm/sections.h8
-rw-r--r--arch/s390/include/asm/segment.h4
-rw-r--r--arch/s390/include/asm/serial.h6
-rw-r--r--arch/s390/include/asm/setup.h151
-rw-r--r--arch/s390/include/asm/sfp-machine.h142
-rw-r--r--arch/s390/include/asm/sfp-util.h77
-rw-r--r--arch/s390/include/asm/shmparam.h11
-rw-r--r--arch/s390/include/asm/signal.h25
-rw-r--r--arch/s390/include/asm/sigp.h53
-rw-r--r--arch/s390/include/asm/smp.h78
-rw-r--r--arch/s390/include/asm/sparsemem.h16
-rw-r--r--arch/s390/include/asm/spinlock.h202
-rw-r--r--arch/s390/include/asm/spinlock_types.h20
-rw-r--r--arch/s390/include/asm/string.h143
-rw-r--r--arch/s390/include/asm/switch_to.h137
-rw-r--r--arch/s390/include/asm/syscall.h100
-rw-r--r--arch/s390/include/asm/sysinfo.h169
-rw-r--r--arch/s390/include/asm/termios.h25
-rw-r--r--arch/s390/include/asm/thread_info.h109
-rw-r--r--arch/s390/include/asm/timex.h161
-rw-r--r--arch/s390/include/asm/tlb.h151
-rw-r--r--arch/s390/include/asm/tlbflush.h211
-rw-r--r--arch/s390/include/asm/topology.h60
-rw-r--r--arch/s390/include/asm/types.h28
-rw-r--r--arch/s390/include/asm/uaccess.h376
-rw-r--r--arch/s390/include/asm/unaligned.h13
-rw-r--r--arch/s390/include/asm/unistd.h57
-rw-r--r--arch/s390/include/asm/user.h74
-rw-r--r--arch/s390/include/asm/vdso.h47
-rw-r--r--arch/s390/include/asm/vga.h6
-rw-r--r--arch/s390/include/asm/vtime.h7
-rw-r--r--arch/s390/include/asm/vtimer.h33
-rw-r--r--arch/s390/include/asm/xor.h1
-rw-r--r--arch/s390/include/uapi/asm/Kbuild51
-rw-r--r--arch/s390/include/uapi/asm/auxvec.h6
-rw-r--r--arch/s390/include/uapi/asm/bitsperlong.h13
-rw-r--r--arch/s390/include/uapi/asm/byteorder.h6
-rw-r--r--arch/s390/include/uapi/asm/chpid.h22
-rw-r--r--arch/s390/include/uapi/asm/chsc.h143
-rw-r--r--arch/s390/include/uapi/asm/cmb.h53
-rw-r--r--arch/s390/include/uapi/asm/dasd.h295
-rw-r--r--arch/s390/include/uapi/asm/debug.h34
-rw-r--r--arch/s390/include/uapi/asm/errno.h11
-rw-r--r--arch/s390/include/uapi/asm/fcntl.h1
-rw-r--r--arch/s390/include/uapi/asm/hypfs.h25
-rw-r--r--arch/s390/include/uapi/asm/ioctl.h1
-rw-r--r--arch/s390/include/uapi/asm/ioctls.h8
-rw-r--r--arch/s390/include/uapi/asm/ipcbuf.h31
-rw-r--r--arch/s390/include/uapi/asm/kvm.h131
-rw-r--r--arch/s390/include/uapi/asm/kvm_para.h11
-rw-r--r--arch/s390/include/uapi/asm/kvm_virtio.h64
-rw-r--r--arch/s390/include/uapi/asm/mman.h6
-rw-r--r--arch/s390/include/uapi/asm/monwriter.h31
-rw-r--r--arch/s390/include/uapi/asm/msgbuf.h37
-rw-r--r--arch/s390/include/uapi/asm/param.h6
-rw-r--r--arch/s390/include/uapi/asm/poll.h1
-rw-r--r--arch/s390/include/uapi/asm/posix_types.h51
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h459
-rw-r--r--arch/s390/include/uapi/asm/qeth.h115
-rw-r--r--arch/s390/include/uapi/asm/resource.h13
-rw-r--r--arch/s390/include/uapi/asm/schid.h16
-rw-r--r--arch/s390/include/uapi/asm/sclp_ctl.h24
-rw-r--r--arch/s390/include/uapi/asm/sembuf.h29
-rw-r--r--arch/s390/include/uapi/asm/setup.h13
-rw-r--r--arch/s390/include/uapi/asm/shmbuf.h48
-rw-r--r--arch/s390/include/uapi/asm/sie.h243
-rw-r--r--arch/s390/include/uapi/asm/sigcontext.h70
-rw-r--r--arch/s390/include/uapi/asm/siginfo.h16
-rw-r--r--arch/s390/include/uapi/asm/signal.h129
-rw-r--r--arch/s390/include/uapi/asm/socket.h89
-rw-r--r--arch/s390/include/uapi/asm/sockios.h6
-rw-r--r--arch/s390/include/uapi/asm/stat.h103
-rw-r--r--arch/s390/include/uapi/asm/statfs.h50
-rw-r--r--arch/s390/include/uapi/asm/swab.h89
-rw-r--r--arch/s390/include/uapi/asm/tape390.h102
-rw-r--r--arch/s390/include/uapi/asm/termbits.h6
-rw-r--r--arch/s390/include/uapi/asm/termios.h49
-rw-r--r--arch/s390/include/uapi/asm/types.h22
-rw-r--r--arch/s390/include/uapi/asm/ucontext.h37
-rw-r--r--arch/s390/include/uapi/asm/unistd.h378
-rw-r--r--arch/s390/include/uapi/asm/virtio-ccw.h21
-rw-r--r--arch/s390/include/uapi/asm/vtoc.h213
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h341
-rw-r--r--arch/s390/kernel/.gitignore1
-rw-r--r--arch/s390/kernel/Makefile62
-rw-r--r--arch/s390/kernel/asm-offsets.c158
-rw-r--r--arch/s390/kernel/base.S100
-rw-r--r--arch/s390/kernel/binfmt_elf32.c214
-rw-r--r--arch/s390/kernel/bitmap.S56
-rw-r--r--arch/s390/kernel/cache.c389
-rw-r--r--arch/s390/kernel/compat_exec_domain.c29
-rw-r--r--arch/s390/kernel/compat_linux.c709
-rw-r--r--arch/s390/kernel/compat_linux.h206
-rw-r--r--arch/s390/kernel/compat_ptrace.h64
-rw-r--r--arch/s390/kernel/compat_signal.c393
-rw-r--r--arch/s390/kernel/compat_wrapper.S1734
-rw-r--r--arch/s390/kernel/compat_wrapper.c216
-rw-r--r--arch/s390/kernel/cpcmd.c12
-rw-r--r--arch/s390/kernel/crash.c16
-rw-r--r--arch/s390/kernel/crash_dump.c647
-rw-r--r--arch/s390/kernel/debug.c219
-rw-r--r--arch/s390/kernel/diag.c21
-rw-r--r--arch/s390/kernel/dis.c963
-rw-r--r--arch/s390/kernel/dumpstack.c217
-rw-r--r--arch/s390/kernel/early.c404
-rw-r--r--arch/s390/kernel/ebcdic.c3
-rw-r--r--arch/s390/kernel/entry.S1567
-rw-r--r--arch/s390/kernel/entry.h93
-rw-r--r--arch/s390/kernel/entry64.S1560
-rw-r--r--arch/s390/kernel/ftrace.c192
-rw-r--r--arch/s390/kernel/head.S381
-rw-r--r--arch/s390/kernel/head31.S63
-rw-r--r--arch/s390/kernel/head64.S146
-rw-r--r--arch/s390/kernel/head_kdump.S108
-rw-r--r--arch/s390/kernel/init_task.c44
-rw-r--r--arch/s390/kernel/ipl.c891
-rw-r--r--arch/s390/kernel/irq.c333
-rw-r--r--arch/s390/kernel/jump_label.c70
-rw-r--r--arch/s390/kernel/kprobes.c733
-rw-r--r--arch/s390/kernel/lgr.c186
-rw-r--r--arch/s390/kernel/machine_kexec.c225
-rw-r--r--arch/s390/kernel/mcount.S73
-rw-r--r--arch/s390/kernel/mcount64.S65
-rw-r--r--arch/s390/kernel/module.c192
-rw-r--r--arch/s390/kernel/nmi.c372
-rw-r--r--arch/s390/kernel/os_info.c168
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c696
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c322
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c1643
-rw-r--r--arch/s390/kernel/perf_event.c324
-rw-r--r--arch/s390/kernel/pgm_check.S152
-rw-r--r--arch/s390/kernel/process.c396
-rw-r--r--arch/s390/kernel/processor.c97
-rw-r--r--arch/s390/kernel/ptrace.c1228
-rw-r--r--arch/s390/kernel/reipl.S20
-rw-r--r--arch/s390/kernel/reipl64.S102
-rw-r--r--arch/s390/kernel/relocate_kernel.S13
-rw-r--r--arch/s390/kernel/relocate_kernel64.S15
-rw-r--r--arch/s390/kernel/runtime_instr.c149
-rw-r--r--arch/s390/kernel/s390_ext.c138
-rw-r--r--arch/s390/kernel/s390_ksyms.c54
-rw-r--r--arch/s390/kernel/sclp.S360
-rw-r--r--arch/s390/kernel/setup.c1056
-rw-r--r--arch/s390/kernel/signal.c353
-rw-r--r--arch/s390/kernel/smp.c1452
-rw-r--r--arch/s390/kernel/stacktrace.c7
-rw-r--r--arch/s390/kernel/suspend.c225
-rw-r--r--arch/s390/kernel/swsusp_asm64.S306
-rw-r--r--arch/s390/kernel/sys_s390.c199
-rw-r--r--arch/s390/kernel/syscalls.S560
-rw-r--r--arch/s390/kernel/sysinfo.c428
-rw-r--r--arch/s390/kernel/time.c1162
-rw-r--r--arch/s390/kernel/topology.c527
-rw-r--r--arch/s390/kernel/traps.c646
-rw-r--r--arch/s390/kernel/vdso.c333
-rw-r--r--arch/s390/kernel/vdso32/.gitignore1
-rw-r--r--arch/s390/kernel/vdso32/Makefile58
-rw-r--r--arch/s390/kernel/vdso32/clock_getres.S39
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S125
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S79
-rw-r--r--arch/s390/kernel/vdso32/note.S12
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S138
-rw-r--r--arch/s390/kernel/vdso32/vdso32_wrapper.S14
-rw-r--r--arch/s390/kernel/vdso64/.gitignore1
-rw-r--r--arch/s390/kernel/vdso64/Makefile58
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S46
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S127
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S57
-rw-r--r--arch/s390/kernel/vdso64/note.S12
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S138
-rw-r--r--arch/s390/kernel/vdso64/vdso64_wrapper.S14
-rw-r--r--arch/s390/kernel/vmlinux.lds.S118
-rw-r--r--arch/s390/kernel/vtime.c676
-rw-r--r--arch/s390/kvm/Kconfig31
-rw-r--r--arch/s390/kvm/Makefile9
-rw-r--r--arch/s390/kvm/diag.c199
-rw-r--r--arch/s390/kvm/gaccess.c726
-rw-r--r--arch/s390/kvm/gaccess.h501
-rw-r--r--arch/s390/kvm/guestdbg.c482
-rw-r--r--arch/s390/kvm/intercept.c364
-rw-r--r--arch/s390/kvm/interrupt.c1472
-rw-r--r--arch/s390/kvm/irq.h22
-rw-r--r--arch/s390/kvm/kvm-s390.c1558
-rw-r--r--arch/s390/kvm/kvm-s390.h194
-rw-r--r--arch/s390/kvm/priv.c988
-rw-r--r--arch/s390/kvm/sie64a.S47
-rw-r--r--arch/s390/kvm/sigp.c485
-rw-r--r--arch/s390/kvm/trace-s390.h273
-rw-r--r--arch/s390/kvm/trace.h418
-rw-r--r--arch/s390/lib/Makefile6
-rw-r--r--arch/s390/lib/delay.c137
-rw-r--r--arch/s390/lib/div64.c6
-rw-r--r--arch/s390/lib/find.c77
-rw-r--r--arch/s390/lib/mem32.S92
-rw-r--r--arch/s390/lib/mem64.S88
-rw-r--r--arch/s390/lib/qrnnd.S5
-rw-r--r--arch/s390/lib/spinlock.c205
-rw-r--r--arch/s390/lib/string.c51
-rw-r--r--arch/s390/lib/uaccess.c406
-rw-r--r--arch/s390/lib/uaccess.h23
-rw-r--r--arch/s390/lib/uaccess_mvcos.c231
-rw-r--r--arch/s390/lib/uaccess_pt.c464
-rw-r--r--arch/s390/lib/uaccess_std.c317
-rw-r--r--arch/s390/lib/ucmpdi2.c26
-rw-r--r--arch/s390/math-emu/Makefile2
-rw-r--r--arch/s390/math-emu/math.c10
-rw-r--r--arch/s390/mm/Makefile10
-rw-r--r--arch/s390/mm/cmm.c200
-rw-r--r--arch/s390/mm/dump_pagetables.c246
-rw-r--r--arch/s390/mm/extable.c81
-rw-r--r--arch/s390/mm/extmem.c383
-rw-r--r--arch/s390/mm/fault.c831
-rw-r--r--arch/s390/mm/gup.c246
-rw-r--r--arch/s390/mm/hugetlbpage.c147
-rw-r--r--arch/s390/mm/init.c241
-rw-r--r--arch/s390/mm/maccess.c204
-rw-r--r--arch/s390/mm/mem_detect.c65
-rw-r--r--arch/s390/mm/mmap.c117
-rw-r--r--arch/s390/mm/page-states.c59
-rw-r--r--arch/s390/mm/pageattr.c146
-rw-r--r--arch/s390/mm/pgtable.c1479
-rw-r--r--arch/s390/mm/vmem.c182
-rw-r--r--arch/s390/net/Makefile4
-rw-r--r--arch/s390/net/bpf_jit.S130
-rw-r--r--arch/s390/net/bpf_jit_comp.c891
-rw-r--r--arch/s390/oprofile/Makefile3
-rw-r--r--arch/s390/oprofile/backtrace.c8
-rw-r--r--arch/s390/oprofile/hwsampler.c1178
-rw-r--r--arch/s390/oprofile/hwsampler.h63
-rw-r--r--arch/s390/oprofile/init.c510
-rw-r--r--arch/s390/oprofile/op_counter.h21
-rw-r--r--arch/s390/pci/Makefile6
-rw-r--r--arch/s390/pci/pci.c935
-rw-r--r--arch/s390/pci/pci_clp.c392
-rw-r--r--arch/s390/pci/pci_debug.c167
-rw-r--r--arch/s390/pci/pci_dma.c506
-rw-r--r--arch/s390/pci/pci_event.c136
-rw-r--r--arch/s390/pci/pci_insn.c202
-rw-r--r--arch/s390/pci/pci_sysfs.c110
383 files changed, 60108 insertions, 16267 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
new file mode 100644
index 00000000000..647c3eccc3d
--- /dev/null
+++ b/arch/s390/Kbuild
@@ -0,0 +1,9 @@
+obj-y += kernel/
+obj-y += mm/
+obj-$(CONFIG_KVM) += kvm/
+obj-$(CONFIG_CRYPTO_HW) += crypto/
+obj-$(CONFIG_S390_HYPFS_FS) += hypfs/
+obj-$(CONFIG_APPLDATA_BASE) += appldata/
+obj-$(CONFIG_MATHEMU) += math-emu/
+obj-y += net/
+obj-$(CONFIG_PCI) += pci/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 107e492cb47..bb63499fc5d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -1,18 +1,8 @@
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
-#
-
-config SCHED_MC
- def_bool y
- depends on SMP
-
config MMU
def_bool y
config ZONE_DMA
def_bool y
- depends on 64BIT
config LOCKDEP_SUPPORT
def_bool y
@@ -30,80 +20,328 @@ config RWSEM_XCHGADD_ALGORITHM
def_bool y
config ARCH_HAS_ILOG2_U32
- bool
- default n
+ def_bool n
config ARCH_HAS_ILOG2_U64
- bool
- default n
+ def_bool n
config GENERIC_HWEIGHT
def_bool y
-config GENERIC_TIME
- def_bool y
+config GENERIC_BUG
+ def_bool y if BUG
-config GENERIC_CLOCKEVENTS
+config GENERIC_BUG_RELATIVE_POINTERS
def_bool y
-config GENERIC_BUG
- bool
- depends on BUG
- default y
+config ARCH_DMA_ADDR_T_64BIT
+ def_bool 64BIT
+
+config GENERIC_LOCKBREAK
+ def_bool y if SMP && PREEMPT
+
+config PGSTE
+ def_bool y if KVM
-config NO_IOMEM
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y
-config NO_DMA
+config KEXEC
def_bool y
-config GENERIC_LOCKBREAK
- bool
- default y
- depends on SMP && PREEMPT
+config AUDIT_ARCH
+ def_bool y
-config PGSTE
- bool
- default y if KVM
+config NO_IOPORT_MAP
+ def_bool y
-mainmenu "Linux Kernel Configuration"
+config PCI_QUIRKS
+ def_bool n
config S390
def_bool y
- select HAVE_OPROFILE
+ select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+ select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select ARCH_INLINE_READ_LOCK
+ select ARCH_INLINE_READ_LOCK_BH
+ select ARCH_INLINE_READ_LOCK_IRQ
+ select ARCH_INLINE_READ_LOCK_IRQSAVE
+ select ARCH_INLINE_READ_TRYLOCK
+ select ARCH_INLINE_READ_UNLOCK
+ select ARCH_INLINE_READ_UNLOCK_BH
+ select ARCH_INLINE_READ_UNLOCK_IRQ
+ select ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+ select ARCH_INLINE_SPIN_LOCK
+ select ARCH_INLINE_SPIN_LOCK_BH
+ select ARCH_INLINE_SPIN_LOCK_IRQ
+ select ARCH_INLINE_SPIN_LOCK_IRQSAVE
+ select ARCH_INLINE_SPIN_TRYLOCK
+ select ARCH_INLINE_SPIN_TRYLOCK_BH
+ select ARCH_INLINE_SPIN_UNLOCK
+ select ARCH_INLINE_SPIN_UNLOCK_BH
+ select ARCH_INLINE_SPIN_UNLOCK_IRQ
+ select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+ select ARCH_INLINE_WRITE_LOCK
+ select ARCH_INLINE_WRITE_LOCK_BH
+ select ARCH_INLINE_WRITE_LOCK_IRQ
+ select ARCH_INLINE_WRITE_LOCK_IRQSAVE
+ select ARCH_INLINE_WRITE_TRYLOCK
+ select ARCH_INLINE_WRITE_UNLOCK
+ select ARCH_INLINE_WRITE_UNLOCK_BH
+ select ARCH_INLINE_WRITE_UNLOCK_IRQ
+ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+ select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+ select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_WANT_IPC_PARSE_VERSION
+ select BUILDTIME_EXTABLE_SORT
+ select CLONE_BACKWARDS2
+ select GENERIC_CLOCKEVENTS
+ select GENERIC_CPU_DEVICES if !SMP
+ select GENERIC_FIND_FIRST_BIT
+ select GENERIC_SMP_IDLE_THREAD
+ select GENERIC_TIME_VSYSCALL
+ select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+ select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
+ select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_TRACEHOOK
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT
+ select HAVE_BPF_JIT if 64BIT && PACK_STACK
+ select HAVE_CMPXCHG_DOUBLE
+ select HAVE_CMPXCHG_LOCAL
+ select HAVE_C_RECORDMCOUNT
+ select HAVE_DEBUG_KMEMLEAK
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_FUTEX_CMPXCHG if FUTEX
+ select HAVE_KERNEL_BZIP2
+ select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZ4
+ select HAVE_KERNEL_LZMA
+ select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_XZ
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_KVM if 64BIT
+ select HAVE_MEMBLOCK
+ select HAVE_MEMBLOCK_NODE_MAP
+ select HAVE_MEMBLOCK_PHYS_MAP
+ select HAVE_MOD_ARCH_SPECIFIC
+ select HAVE_OPROFILE
+ select HAVE_PERF_EVENTS
+ select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_UID16 if 32BIT
+ select HAVE_VIRT_CPU_ACCOUNTING
+ select KTIME_SCALAR if 32BIT
+ select MODULES_USE_ELF_RELA
+ select NO_BOOTMEM
+ select OLD_SIGACTION
+ select OLD_SIGSUSPEND3
+ select SYSCTL_EXCEPTION_TRACE
+ select TTY
+ select VIRT_CPU_ACCOUNTING
+ select VIRT_TO_BUS
+
+config SCHED_OMIT_FRAME_POINTER
+ def_bool y
source "init/Kconfig"
-menu "Base setup"
+source "kernel/Kconfig.freezer"
+
+menu "Processor type and features"
+
+config HAVE_MARCH_Z900_FEATURES
+ def_bool n
+
+config HAVE_MARCH_Z990_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z900_FEATURES
+
+config HAVE_MARCH_Z9_109_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z990_FEATURES
+
+config HAVE_MARCH_Z10_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z9_109_FEATURES
+
+config HAVE_MARCH_Z196_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z10_FEATURES
+
+config HAVE_MARCH_ZEC12_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z196_FEATURES
+
+choice
+ prompt "Processor type"
+ default MARCH_G5
+
+config MARCH_G5
+ bool "System/390 model G5 and G6"
+ depends on !64BIT
+ help
+ Select this to build a 31 bit kernel that works
+ on all ESA/390 and z/Architecture machines.
+
+config MARCH_Z900
+ bool "IBM zSeries model z800 and z900"
+ select HAVE_MARCH_Z900_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for model z800/z900 (2064 and
+ 2066 series). This will enable some optimizations that are not
+ available on older ESA/390 (31 Bit) only CPUs.
+
+config MARCH_Z990
+ bool "IBM zSeries model z890 and z990"
+ select HAVE_MARCH_Z990_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for model z890/z990 (2084 and
+ 2086 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z9_109
+ bool "IBM System z9"
+ select HAVE_MARCH_Z9_109_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for IBM System z9 (2094 and
+ 2096 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z10
+ bool "IBM System z10"
+ select HAVE_MARCH_Z10_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for IBM System z10 (2097 and
+ 2098 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z196
+ bool "IBM zEnterprise 114 and 196"
+ select HAVE_MARCH_Z196_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for IBM zEnterprise 114 and 196
+ (2818 and 2817 series). The kernel will be slightly faster but will
+ not work on older machines.
+
+config MARCH_ZEC12
+ bool "IBM zBC12 and zEC12"
+ select HAVE_MARCH_ZEC12_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
+ 2827 series). The kernel will be slightly faster but will not work on
+ older machines.
+
+endchoice
+
+config MARCH_G5_TUNE
+ def_bool TUNE_G5 || MARCH_G5 && TUNE_DEFAULT
+
+config MARCH_Z900_TUNE
+ def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT
+
+config MARCH_Z990_TUNE
+ def_bool TUNE_Z990 || MARCH_Z990 && TUNE_DEFAULT
+
+config MARCH_Z9_109_TUNE
+ def_bool TUNE_Z9_109 || MARCH_Z9_109 && TUNE_DEFAULT
+
+config MARCH_Z10_TUNE
+ def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT
-comment "Processor type and features"
+config MARCH_Z196_TUNE
+ def_bool TUNE_Z196 || MARCH_Z196 && TUNE_DEFAULT
-source "kernel/time/Kconfig"
+config MARCH_ZEC12_TUNE
+ def_bool TUNE_ZEC12 || MARCH_ZEC12 && TUNE_DEFAULT
+
+choice
+ prompt "Tune code generation"
+ default TUNE_DEFAULT
+ help
+ Cause the compiler to tune (-mtune) the generated code for a machine.
+ This will make the code run faster on the selected machine but
+ somewhat slower on other machines.
+ This option only changes how the compiler emits instructions, not the
+ selection of instructions itself, so the resulting kernel will run on
+ all other machines.
+
+config TUNE_DEFAULT
+ bool "Default"
+ help
+ Tune the generated code for the target processor for which the kernel
+ will be compiled.
+
+config TUNE_G5
+ bool "System/390 model G5 and G6"
+
+config TUNE_Z900
+ bool "IBM zSeries model z800 and z900"
+
+config TUNE_Z990
+ bool "IBM zSeries model z890 and z990"
+
+config TUNE_Z9_109
+ bool "IBM System z9"
+
+config TUNE_Z10
+ bool "IBM System z10"
+
+config TUNE_Z196
+ bool "IBM zEnterprise 114 and 196"
+
+config TUNE_ZEC12
+ bool "IBM zBC12 and zEC12"
+
+endchoice
config 64BIT
- bool "64 bit kernel"
+ def_bool y
+ prompt "64 bit kernel"
help
- Select this option if you have a 64 bit IBM zSeries machine
+ Select this option if you have an IBM z/Architecture machine
and want to use the 64 bit addressing mode.
config 32BIT
- bool
- default y if !64BIT
+ def_bool y if !64BIT
+
+config COMPAT
+ def_bool y
+ prompt "Kernel support for 31 bit emulation"
+ depends on 64BIT
+ select COMPAT_BINFMT_ELF if BINFMT_ELF
+ select ARCH_WANT_OLD_COMPAT_IPC
+ select COMPAT_OLD_SIGACTION
+ help
+ Select this option if you want to enable your system kernel to
+ handle system-calls from ELF binaries for 31 bit ESA. This option
+ (and some other stuff like libraries and such) is needed for
+ executing 31 bit applications. It is safe to say "Y".
+
+config SYSVIPC_COMPAT
+ def_bool y if COMPAT && SYSVIPC
+
+config KEYS_COMPAT
+ def_bool y if COMPAT && KEYS
config SMP
- bool "Symmetric multi-processing support"
+ def_bool y
+ prompt "Symmetric multi-processing support"
---help---
This enables support for systems with more than one CPU. If you have
a system with only one CPU, like most personal computers, say N. If
you have a system with more than one CPU, say Y.
- If you say N here, the kernel will run on single and multiprocessor
+ If you say N here, the kernel will run on uni- and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
you say Y here, the kernel will run on many, but not all,
- singleprocessor machines. On a singleprocessor machine, the kernel
+ uniprocessor machines. On a uniprocessor machine, the kernel
will run faster if you say N here.
See also the SMP-HOWTO available at
@@ -112,119 +350,88 @@ config SMP
Even if you don't know what to do here, say Y.
config NR_CPUS
- int "Maximum number of CPUs (2-64)"
- range 2 64
+ int "Maximum number of CPUs (2-256)"
+ range 2 256
depends on SMP
default "32" if !64BIT
default "64" if 64BIT
help
This allows you to specify the maximum number of CPUs which this
- kernel will support. The maximum supported value is 64 and the
+ kernel will support. The maximum supported value is 256 and the
minimum value which makes sense is 2.
This is purely to save memory - each supported CPU adds
approximately sixteen kilobytes to the kernel image.
config HOTPLUG_CPU
- bool "Support for hot-pluggable CPUs"
+ def_bool y
+ prompt "Support for hot-pluggable CPUs"
depends on SMP
- select HOTPLUG
- default n
help
Say Y here to be able to turn CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu/cpu#.
Say N if you want to disable CPU hotplug.
+config SCHED_MC
+ def_bool n
+
+config SCHED_BOOK
+ def_bool y
+ prompt "Book scheduler support"
+ depends on SMP
+ select SCHED_MC
+ help
+ Book scheduler support improves the CPU scheduler's decision making
+ when dealing with machines that have several books.
+
+source kernel/Kconfig.preempt
+
config MATHEMU
- bool "IEEE FPU emulation"
+ def_bool y
+ prompt "IEEE FPU emulation"
depends on MARCH_G5
help
This option is required for IEEE compliant floating point arithmetic
- on older S/390 machines. Say Y unless you know your machine doesn't
+ on older ESA/390 machines. Say Y unless you know your machine doesn't
need this.
-config COMPAT
- bool "Kernel support for 31 bit emulation"
- depends on 64BIT
- help
- Select this option if you want to enable your system kernel to
- handle system-calls from ELF binaries for 31 bit ESA. This option
- (and some other stuff like libraries and such) is needed for
- executing 31 bit applications. It is safe to say "Y".
-
-config SYSVIPC_COMPAT
- bool
- depends on COMPAT && SYSVIPC
- default y
-
-config AUDIT_ARCH
- bool
- default y
-
-config S390_SWITCH_AMODE
- bool "Switch kernel/user addressing modes"
- help
- This option allows to switch the addressing modes of kernel and user
- space. The kernel parameter switch_amode=on will enable this feature,
- default is disabled. Enabling this (via kernel parameter) on machines
- earlier than IBM System z9-109 EC/BC will reduce system performance.
+source kernel/Kconfig.hz
- Note that this option will also be selected by selecting the execute
- protection option below. Enabling the execute protection via the
- noexec kernel parameter will also switch the addressing modes,
- independent of the switch_amode kernel parameter.
+endmenu
+menu "Memory setup"
-config S390_EXEC_PROTECT
- bool "Data execute protection"
- select S390_SWITCH_AMODE
- help
- This option allows to enable a buffer overflow protection for user
- space programs and it also selects the addressing mode option above.
- The kernel parameter noexec=on will enable this feature and also
- switch the addressing modes, default is disabled. Enabling this (via
- kernel parameter) on machines earlier than IBM System z9-109 EC/BC
- will reduce system performance.
+config ARCH_SPARSEMEM_ENABLE
+ def_bool y
+ select SPARSEMEM_VMEMMAP_ENABLE
+ select SPARSEMEM_VMEMMAP
+ select SPARSEMEM_STATIC if !64BIT
-comment "Code generation options"
+config ARCH_SPARSEMEM_DEFAULT
+ def_bool y
-choice
- prompt "Processor type"
- default MARCH_G5
+config ARCH_SELECT_MEMORY_MODEL
+ def_bool y
-config MARCH_G5
- bool "S/390 model G5 and G6"
- depends on !64BIT
- help
- Select this to build a 31 bit kernel that works
- on all S/390 and zSeries machines.
+config ARCH_ENABLE_MEMORY_HOTPLUG
+ def_bool y if SPARSEMEM
-config MARCH_Z900
- bool "IBM eServer zSeries model z800 and z900"
- help
- Select this to optimize for zSeries machines. This
- will enable some optimizations that are not available
- on older 31 bit only CPUs.
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+ def_bool y
-config MARCH_Z990
- bool "IBM eServer zSeries model z890 and z990"
- help
- Select this enable optimizations for model z890/z990.
- This will be slightly faster but does not work on
- older machines such as the z900.
+config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+ def_bool y
+ depends on 64BIT
-config MARCH_Z9_109
- bool "IBM System z9"
- help
- Select this to enable optimizations for IBM System z9-109, IBM
- System z9 Enterprise Class (z9 EC), and IBM System z9 Business
- Class (z9 BC). The kernel will be slightly faster but will not
- work on older machines such as the z990, z890, z900, and z800.
+config FORCE_MAX_ZONEORDER
+ int
+ default "9"
-endchoice
+source "mm/Kconfig"
config PACK_STACK
- bool "Pack kernel stack"
+ def_bool y
+ prompt "Pack kernel stack"
help
This option enables the compiler option -mkernel-backchain if it
is available. If the option is available the compiler supports
@@ -236,22 +443,9 @@ config PACK_STACK
Say Y if you are unsure.
-config SMALL_STACK
- bool "Use 4kb/8kb for kernel stack instead of 8kb/16kb"
- depends on PACK_STACK && !LOCKDEP
- help
- If you say Y here and the compiler supports the -mkernel-backchain
- option the kernel will use a smaller kernel stack size. For 31 bit
- the reduced size is 4kb instead of 8kb and for 64 bit it is 8kb
- instead of 16kb. This allows to run more thread on a system and
- reduces the pressure on the memory management for higher order
- page allocations.
-
- Say N if you are unsure.
-
-
config CHECK_STACK
- bool "Detect kernel stack overflow"
+ def_bool y
+ prompt "Detect kernel stack overflow"
help
This option enables the compiler option -mstack-guard and
-mstack-size if they are available. If the compiler supports them
@@ -274,119 +468,199 @@ config STACK_GUARD
The minimum size for the stack guard should be 256 for 31 bit and
512 for 64 bit.
-config WARN_STACK
- bool "Emit compiler warnings for function with broken stack usage"
+config WARN_DYNAMIC_STACK
+ def_bool n
+ prompt "Emit compiler warnings for function with dynamic stack usage"
help
- This option enables the compiler options -mwarn-framesize and
- -mwarn-dynamicstack. If the compiler supports these options it
- will generate warnings for function which either use alloca or
- create a stack frame bigger then CONFIG_WARN_STACK_SIZE.
+ This option enables the compiler option -mwarn-dynamicstack. If the
+ compiler supports this options generates warnings for functions
+ that dynamically allocate stack space using alloca.
Say N if you are unsure.
-config WARN_STACK_SIZE
- int "Maximum frame size considered safe (128-2048)"
- range 128 2048
- depends on WARN_STACK
- default "256"
+endmenu
+
+menu "I/O subsystem"
+
+config QDIO
+ def_tristate y
+ prompt "QDIO support"
+ ---help---
+ This driver provides the Queued Direct I/O base support for
+ IBM System z.
+
+ To compile this driver as a module, choose M here: the
+ module will be called qdio.
+
+ If unsure, say Y.
+
+menuconfig PCI
+ bool "PCI support"
+ default n
+ depends on 64BIT
+ select PCI_MSI
help
- This allows you to specify the maximum frame size a function may
- have without the compiler complaining about it.
+ Enable PCI support.
-config ARCH_POPULATES_NODE_MAP
- def_bool y
+if PCI
-comment "Kernel preemption"
+config PCI_NR_FUNCTIONS
+ int "Maximum number of PCI functions (1-4096)"
+ range 1 4096
+ default "64"
+ help
+ This allows you to specify the maximum number of PCI functions which
+ this kernel will support.
-source "kernel/Kconfig.preempt"
+config PCI_NR_MSI
+ int "Maximum number of MSI interrupts (64-32768)"
+ range 64 32768
+ default "256"
+ help
+ This defines the number of virtual interrupts the kernel will
+ provide for MSI interrupts. If you configure your system to have
+ too few drivers will fail to allocate MSI interrupts for all
+ PCI devices.
-config ARCH_SPARSEMEM_ENABLE
- def_bool y
- select SPARSEMEM_VMEMMAP_ENABLE
- select SPARSEMEM_VMEMMAP
- select SPARSEMEM_STATIC if !64BIT
+source "drivers/pci/Kconfig"
+source "drivers/pci/pcie/Kconfig"
+source "drivers/pci/hotplug/Kconfig"
-config ARCH_SPARSEMEM_DEFAULT
- def_bool y
+endif # PCI
-config ARCH_SELECT_MEMORY_MODEL
- def_bool y
+config PCI_DOMAINS
+ def_bool PCI
-source "mm/Kconfig"
+config HAS_IOMEM
+ def_bool PCI
-comment "I/O subsystem configuration"
+config IOMMU_HELPER
+ def_bool PCI
-config MACHCHK_WARNING
- bool "Process warning machine checks"
- help
- Select this option if you want the machine check handler on IBM S/390 or
- zSeries to process warning machine checks (e.g. on power failures).
- If unsure, say "Y".
+config HAS_DMA
+ def_bool PCI
+ select HAVE_DMA_API_DEBUG
-config QDIO
- tristate "QDIO support"
- ---help---
- This driver provides the Queued Direct I/O base support for
- IBM System z.
+config NEED_SG_DMA_LENGTH
+ def_bool PCI
- To compile this driver as a module, choose M here: the
- module will be called qdio.
+config HAVE_DMA_ATTRS
+ def_bool PCI
- If unsure, say Y.
+config NEED_DMA_MAP_STATE
+ def_bool PCI
-config QDIO_DEBUG
- bool "Extended debugging information"
- depends on QDIO
+config CHSC_SCH
+ def_tristate m
+ prompt "Support for CHSC subchannels"
help
- Say Y here to get extended debugging output in
- /sys/kernel/debug/s390dbf/qdio...
- Warning: this option reduces the performance of the QDIO module.
+ This driver allows usage of CHSC subchannels. A CHSC subchannel
+ is usually present on LPAR only.
+ The driver creates a device /dev/chsc, which may be used to
+ obtain I/O configuration information about the machine and
+ to issue asynchronous chsc commands (DANGEROUS).
+ You will usually only want to use this interface on a special
+ LPAR designated for system management.
- If unsure, say N.
+ To compile this driver as a module, choose M here: the
+ module will be called chsc_sch.
-comment "Misc"
+ If unsure, say N.
-config IPL
- bool "Builtin IPL record support"
+config SCM_BUS
+ def_bool y
+ depends on 64BIT
+ prompt "SCM bus driver"
help
- If you want to use the produced kernel to IPL directly from a
- device, you have to merge a bootsector specific to the device
- into the first bytes of the kernel. You will have to select the
- IPL device.
+ Bus driver for Storage Class Memory.
-choice
- prompt "IPL method generated into head.S"
- depends on IPL
- default IPL_TAPE
+config EADM_SCH
+ def_tristate m
+ prompt "Support for EADM subchannels"
+ depends on SCM_BUS
help
- Select "tape" if you want to IPL the image from a Tape.
+ This driver allows usage of EADM subchannels. EADM subchannels act
+ as a communication vehicle for SCM increments.
- Select "vm_reader" if you are running under VM/ESA and want
- to IPL the image from the emulated card reader.
+ To compile this driver as a module, choose M here: the
+ module will be called eadm_sch.
-config IPL_TAPE
- bool "tape"
+endmenu
-config IPL_VM
- bool "vm_reader"
+menu "Dump support"
-endchoice
+config CRASH_DUMP
+ bool "kernel crash dumps"
+ depends on 64BIT && SMP
+ select KEXEC
+ help
+ Generate crash dump after being started by kexec.
+ Crash dump kernels are loaded in the main kernel with kexec-tools
+ into a specially reserved region and then later executed after
+ a crash by kdump/kexec.
+ Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+ This option also enables s390 zfcpdump.
+ See also <file:Documentation/s390/zfcpdump.txt>
+
+endmenu
+
+menu "Executable file formats / Emulations"
source "fs/Kconfig.binfmt"
-config FORCE_MAX_ZONEORDER
- int
- default "9"
+config SECCOMP
+ def_bool y
+ prompt "Enable seccomp to safely compute untrusted bytecode"
+ depends on PROC_FS
+ help
+ This kernel feature is useful for number crunching applications
+ that may need to compute untrusted bytecode during their
+ execution. By using pipes or other transports made available to
+ the process as file descriptors supporting the read/write
+ syscalls, it's possible to isolate those applications in
+ their own address space using seccomp. Once seccomp is
+ enabled via /proc/<pid>/seccomp, it cannot be disabled
+ and the task is only allowed to execute a few safe syscalls
+ defined by each seccomp mode.
-config PROCESS_DEBUG
- bool "Show crashed user process info"
- help
- Say Y to print all process fault locations to the console. This is
- a debugging option; you probably do not want to set it unless you
- are an S390 port maintainer.
+ If unsure, say Y.
+
+endmenu
+
+menu "Power Management"
+
+config ARCH_HIBERNATION_POSSIBLE
+ def_bool y if 64BIT
+
+source "kernel/power/Kconfig"
+
+endmenu
+
+source "net/Kconfig"
+
+config PCMCIA
+ def_bool n
+
+config CCW
+ def_bool y
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/s390/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
+
+menu "Virtualization"
config PFAULT
- bool "Pseudo page fault support"
+ def_bool y
+ prompt "Pseudo page fault support"
help
Select this option, if you want to use PFAULT pseudo page fault
handling under VM. If running native or in LPAR, this option
@@ -399,6 +673,7 @@ config PFAULT
config SHARED_KERNEL
bool "VM shared kernel support"
+ depends on !JUMP_LABEL
help
Select this option, if you want to share the text segment of the
Linux kernel between different VM guests. This reduces memory
@@ -409,7 +684,8 @@ config SHARED_KERNEL
doing and want to exploit this feature.
config CMM
- tristate "Cooperative memory management"
+ def_tristate n
+ prompt "Cooperative memory management"
help
Select this option, if you want to enable the kernel interface
to reduce the memory size of the system. This is accomplished
@@ -420,43 +696,18 @@ config CMM
Everybody who wants to run Linux under VM should select this
option.
-config CMM_PROC
- bool "/proc interface to cooperative memory management"
- depends on CMM
- help
- Select this option to enable the /proc interface to the
- cooperative memory management.
-
config CMM_IUCV
- bool "IUCV special message interface to cooperative memory management"
+ def_bool y
+ prompt "IUCV special message interface to cooperative memory management"
depends on CMM && (SMSGIUCV=y || CMM=SMSGIUCV)
help
Select this option to enable the special message interface to
the cooperative memory management.
-config PAGE_STATES
- bool "Unused page notification"
- help
- This enables the notification of unused pages to the
- hypervisor. The ESSA instruction is used to do the states
- changes between a page that has content and the unused state.
-
-config VIRT_TIMER
- bool "Virtual CPU timer support"
- help
- This provides a kernel interface for virtual CPU timers.
- Default is disabled.
-
-config VIRT_CPU_ACCOUNTING
- bool "Base user process accounting on virtual cpu timer"
- depends on VIRT_TIMER
- help
- Select this option to use CPU timer deltas to do user
- process accounting.
-
config APPLDATA_BASE
- bool "Linux - VM Monitor Stream, base infrastructure"
- depends on PROC_FS && VIRT_TIMER=y
+ def_bool n
+ prompt "Linux - VM Monitor Stream, base infrastructure"
+ depends on PROC_FS
help
This provides a kernel interface for creating and updating z/VM APPLDATA
monitor records. The monitor records are updated at certain time
@@ -470,7 +721,8 @@ config APPLDATA_BASE
The /proc entries can also be read from, showing the current settings.
config APPLDATA_MEM
- tristate "Monitor memory management statistics"
+ def_tristate m
+ prompt "Monitor memory management statistics"
depends on APPLDATA_BASE && VM_EVENT_COUNTERS
help
This provides memory management related data to the Linux - VM Monitor
@@ -486,7 +738,8 @@ config APPLDATA_MEM
appldata_mem.o.
config APPLDATA_OS
- tristate "Monitor OS statistics"
+ def_tristate m
+ prompt "Monitor OS statistics"
depends on APPLDATA_BASE
help
This provides OS related data to the Linux - VM Monitor Stream, like
@@ -500,8 +753,9 @@ config APPLDATA_OS
appldata_os.o.
config APPLDATA_NET_SUM
- tristate "Monitor overall network statistics"
- depends on APPLDATA_BASE
+ def_tristate m
+ prompt "Monitor overall network statistics"
+ depends on APPLDATA_BASE && NET
help
This provides network related data to the Linux - VM Monitor Stream,
currently there is only a total sum of network I/O statistics, no
@@ -514,58 +768,29 @@ config APPLDATA_NET_SUM
This can also be compiled as a module, which will be called
appldata_net_sum.o.
-source kernel/Kconfig.hz
-
config S390_HYPFS_FS
- bool "s390 hypervisor file system support"
+ def_bool y
+ prompt "s390 hypervisor file system support"
select SYS_HYPERVISOR
- default y
help
This is a virtual file system intended to provide accounting
information in an s390 hypervisor environment.
-config KEXEC
- bool "kexec system call"
- help
- kexec is a system call that implements the ability to shutdown your
- current kernel, and to start another kernel. It is like a reboot
- but is independent of hardware/microcode support.
-
-config ZFCPDUMP
- tristate "zfcpdump support"
- select SMP
- default n
- help
- Select this option if you want to build an zfcpdump enabled kernel.
- Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+source "arch/s390/kvm/Kconfig"
config S390_GUEST
-bool "s390 guest support (EXPERIMENTAL)"
- depends on 64BIT && EXPERIMENTAL
+ def_bool y
+ prompt "s390 support for virtio devices"
+ depends on 64BIT
+ select TTY
+ select VIRTUALIZATION
select VIRTIO
- select VIRTIO_RING
+ select VIRTIO_CONSOLE
help
- Select this option if you want to run the kernel under s390 linux
-endmenu
-
-source "net/Kconfig"
+ Enabling this option adds support for virtio based paravirtual device
+ drivers on s390.
-config PCMCIA
- def_bool n
-
-config CCW
- def_bool y
+ Select this option if you want to run the kernel as a guest under
+ the KVM hypervisor.
-source "drivers/Kconfig"
-
-source "fs/Kconfig"
-
-source "arch/s390/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"
-
-source "arch/s390/kvm/Kconfig"
+endmenu
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 4599fa06bd8..c56878e1245 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -1,17 +1,35 @@
menu "Kernel hacking"
config TRACE_IRQFLAGS_SUPPORT
- bool
- default y
+ def_bool y
source "lib/Kconfig.debug"
-config DEBUG_PAGEALLOC
- bool "Debug page memory allocations"
+config STRICT_DEVMEM
+ def_bool y
+ prompt "Filter access to /dev/mem"
+ ---help---
+ This option restricts access to /dev/mem. If this option is
+ disabled, you allow userspace access to all memory, including
+ kernel and userspace memory. Accidental memory access is likely
+ to be disastrous.
+ Memory access is required for experts who want to debug the kernel.
+
+ If you are unsure, say Y.
+
+config S390_PTDUMP
+ bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
- help
- Unmap pages from the kernel linear mapping after free_pages().
- This results in a slowdown, but helps to find certain types of
- memory corruptions.
+ select DEBUG_FS
+ ---help---
+ Say Y here if you want to show the kernel pagetable layout in a
+ debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+ If in doubt, say "N"
+config DEBUG_SET_MODULE_RONX
+ def_bool y
+ depends on MODULES
endmenu
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 792a4e7743c..874e6d6e9c5 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -14,6 +14,7 @@
#
ifndef CONFIG_64BIT
+LD_BFD := elf32-s390
LDFLAGS := -m elf_s390
KBUILD_CFLAGS += -m31
KBUILD_AFLAGS += -m31
@@ -21,8 +22,10 @@ UTS_MACHINE := s390
STACK_SIZE := 8192
CHECKFLAGS += -D__s390__ -msize-long
else
+LD_BFD := elf64-s390
LDFLAGS := -m elf64_s390
-MODFLAGS += -fpic -D__PIC__
+KBUILD_AFLAGS_MODULE += -fPIC
+KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_CFLAGS += -m64
KBUILD_AFLAGS += -m64
UTS_MACHINE := s390x
@@ -30,10 +33,23 @@ STACK_SIZE := 16384
CHECKFLAGS += -D__s390__ -D__s390x__
endif
-cflags-$(CONFIG_MARCH_G5) += $(call cc-option,-march=g5)
-cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900)
-cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990)
-cflags-$(CONFIG_MARCH_Z9_109) += $(call cc-option,-march=z9-109)
+export LD_BFD
+
+cflags-$(CONFIG_MARCH_G5) += -march=g5
+cflags-$(CONFIG_MARCH_Z900) += -march=z900
+cflags-$(CONFIG_MARCH_Z990) += -march=z990
+cflags-$(CONFIG_MARCH_Z9_109) += -march=z9-109
+cflags-$(CONFIG_MARCH_Z10) += -march=z10
+cflags-$(CONFIG_MARCH_Z196) += -march=z196
+cflags-$(CONFIG_MARCH_ZEC12) += -march=zEC12
+
+cflags-$(CONFIG_MARCH_G5_TUNE) += -mtune=g5
+cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900
+cflags-$(CONFIG_MARCH_Z990_TUNE) += -mtune=z990
+cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109
+cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10
+cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196
+cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12
#KBUILD_IMAGE is necessary for make rpm
KBUILD_IMAGE :=arch/s390/boot/image
@@ -47,22 +63,12 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
ifeq ($(call cc-option-yn,-mkernel-backchain),y)
cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK
aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK
-cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK
-aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK
-ifdef CONFIG_SMALL_STACK
-STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) )
-endif
endif
# new style option for packed stacks
ifeq ($(call cc-option-yn,-mpacked-stack),y)
cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK
aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK
-cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK
-aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK
-ifdef CONFIG_SMALL_STACK
-STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) )
-endif
endif
ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y)
@@ -73,8 +79,7 @@ endif
endif
ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y)
-cflags-$(CONFIG_WARN_STACK) += -mwarn-dynamicstack
-cflags-$(CONFIG_WARN_STACK) += -mwarn-framesize=$(CONFIG_WARN_STACK_SIZE)
+cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack
endif
KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y)
@@ -82,36 +87,43 @@ KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare
KBUILD_AFLAGS += $(aflags-y)
OBJCOPYFLAGS := -O binary
-LDFLAGS_vmlinux := -e start
-head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o
+head-y := arch/s390/kernel/head.o
+head-y += arch/s390/kernel/$(if $(CONFIG_64BIT),head64.o,head31.o)
+
+# See arch/s390/Kbuild for content of core part of the kernel
+core-y += arch/s390/
-core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
- arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
libs-y += arch/s390/lib/
drivers-y += drivers/s390/
-drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/
# must be linked after kernel
drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/
boot := arch/s390/boot
-all: image
+all: image bzImage
install: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
-image: vmlinux
+image bzImage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
zfcpdump:
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+vdso_install:
+ifeq ($(CONFIG_64BIT),y)
+ $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
+endif
+ $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
+
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
# Don't use tabs in echo arguments
define archhelp
echo '* image - Kernel image for IPL ($(boot)/image)'
+ echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)'
endef
diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h
index db3ae850510..4a67f2b5f6a 100644
--- a/arch/s390/appldata/appldata.h
+++ b/arch/s390/appldata/appldata.h
@@ -1,15 +1,11 @@
/*
- * arch/s390/appldata/appldata.h
- *
* Definitions and interface for Linux - z/VM Monitor Stream.
*
- * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright IBM Corp. 2003, 2008
*
* Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
-//#define APPLDATA_DEBUG /* Debug messages on/off */
-
#define APPLDATA_MAX_REC_SIZE 4024 /* Maximum size of the */
/* data buffer */
#define APPLDATA_MAX_PROCS 100
@@ -28,16 +24,6 @@
#define CTL_APPLDATA_NET_SUM 2125
#define CTL_APPLDATA_PROC 2126
-#define P_INFO(x...) printk(KERN_INFO MY_PRINT_NAME " info: " x)
-#define P_ERROR(x...) printk(KERN_ERR MY_PRINT_NAME " error: " x)
-#define P_WARNING(x...) printk(KERN_WARNING MY_PRINT_NAME " status: " x)
-
-#ifdef APPLDATA_DEBUG
-#define P_DEBUG(x...) printk(KERN_DEBUG MY_PRINT_NAME " debug: " x)
-#else
-#define P_DEBUG(x...) do {} while (0)
-#endif
-
struct appldata_ops {
struct list_head list;
struct ctl_table_header *sysctl_header;
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index ad40729bec3..47c8630c93c 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -1,15 +1,16 @@
/*
- * arch/s390/appldata/appldata_base.c
- *
* Base infrastructure for Linux-z/VM Monitor Stream, Stage 1.
* Exports appldata_register_ops() and appldata_unregister_ops() for the
* data gathering modules.
*
- * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright IBM Corp. 2003, 2009
*
* Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
+#define KMSG_COMPONENT "appldata"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -23,8 +24,10 @@
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/workqueue.h>
+#include <linux/suspend.h>
+#include <linux/platform_device.h>
#include <asm/appldata.h>
-#include <asm/timer.h>
+#include <asm/vtimer.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/smp.h>
@@ -32,21 +35,22 @@
#include "appldata.h"
-#define MY_PRINT_NAME "appldata" /* for debug messages, etc. */
#define APPLDATA_CPU_INTERVAL 10000 /* default (CPU) time for
sampling interval in
milliseconds */
#define TOD_MICRO 0x01000 /* nr. of TOD clock units
for 1 microsecond */
+
+static struct platform_device *appldata_pdev;
+
/*
* /proc entries (sysctl)
*/
static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+static int appldata_timer_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-static int appldata_interval_handler(ctl_table *ctl, int write,
- struct file *filp,
+static int appldata_interval_handler(struct ctl_table *ctl, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos);
@@ -55,12 +59,12 @@ static struct ctl_table appldata_table[] = {
{
.procname = "timer",
.mode = S_IRUGO | S_IWUSR,
- .proc_handler = &appldata_timer_handler,
+ .proc_handler = appldata_timer_handler,
},
{
.procname = "interval",
.mode = S_IRUGO | S_IWUSR,
- .proc_handler = &appldata_interval_handler,
+ .proc_handler = appldata_interval_handler,
},
{ },
};
@@ -78,12 +82,12 @@ static struct ctl_table appldata_dir_table[] = {
/*
* Timer
*/
-static DEFINE_PER_CPU(struct vtimer_list, appldata_timer);
-static atomic_t appldata_expire_count = ATOMIC_INIT(0);
+static struct vtimer_list appldata_timer;
static DEFINE_SPINLOCK(appldata_timer_lock);
static int appldata_interval = APPLDATA_CPU_INTERVAL;
static int appldata_timer_active;
+static int appldata_timer_suspended = 0;
/*
* Work queue
@@ -96,7 +100,7 @@ static DECLARE_WORK(appldata_work, appldata_work_fn);
/*
* Ops list
*/
-static DEFINE_SPINLOCK(appldata_ops_lock);
+static DEFINE_MUTEX(appldata_ops_mutex);
static LIST_HEAD(appldata_ops_list);
@@ -108,13 +112,7 @@ static LIST_HEAD(appldata_ops_list);
*/
static void appldata_timer_function(unsigned long data)
{
- P_DEBUG(" -= Timer =-\n");
- P_DEBUG("CPU: %i, expire_count: %i\n", smp_processor_id(),
- atomic_read(&appldata_expire_count));
- if (atomic_dec_and_test(&appldata_expire_count)) {
- atomic_set(&appldata_expire_count, num_online_cpus());
- queue_work(appldata_wq, (struct work_struct *) data);
- }
+ queue_work(appldata_wq, (struct work_struct *) data);
}
/*
@@ -126,22 +124,15 @@ static void appldata_work_fn(struct work_struct *work)
{
struct list_head *lh;
struct appldata_ops *ops;
- int i;
- P_DEBUG(" -= Work Queue =-\n");
- i = 0;
- get_online_cpus();
- spin_lock(&appldata_ops_lock);
+ mutex_lock(&appldata_ops_mutex);
list_for_each(lh, &appldata_ops_list) {
ops = list_entry(lh, struct appldata_ops, list);
- P_DEBUG("list_for_each loop: %i) active = %u, name = %s\n",
- ++i, ops->active, ops->name);
if (ops->active == 1) {
ops->callback(ops->data);
}
}
- spin_unlock(&appldata_ops_lock);
- put_online_cpus();
+ mutex_unlock(&appldata_ops_mutex);
}
/*
@@ -169,20 +160,6 @@ int appldata_diag(char record_nr, u16 function, unsigned long buffer,
/****************************** /proc stuff **********************************/
-/*
- * appldata_mod_vtimer_wrap()
- *
- * wrapper function for mod_virt_timer(), because smp_call_function_single()
- * accepts only one parameter.
- */
-static void __appldata_mod_vtimer_wrap(void *p) {
- struct {
- struct vtimer_list *timer;
- u64 expires;
- } *args = p;
- mod_virt_timer(args->timer, args->expires);
-}
-
#define APPLDATA_ADD_TIMER 0
#define APPLDATA_DEL_TIMER 1
#define APPLDATA_MOD_TIMER 2
@@ -193,51 +170,28 @@ static void __appldata_mod_vtimer_wrap(void *p) {
* Add, delete or modify virtual timers on all online cpus.
* The caller needs to get the appldata_timer_lock spinlock.
*/
-static void
-__appldata_vtimer_setup(int cmd)
+static void __appldata_vtimer_setup(int cmd)
{
- u64 per_cpu_interval;
- int i;
+ u64 timer_interval = (u64) appldata_interval * 1000 * TOD_MICRO;
switch (cmd) {
case APPLDATA_ADD_TIMER:
if (appldata_timer_active)
break;
- per_cpu_interval = (u64) (appldata_interval*1000 /
- num_online_cpus()) * TOD_MICRO;
- for_each_online_cpu(i) {
- per_cpu(appldata_timer, i).expires = per_cpu_interval;
- smp_call_function_single(i, add_virt_timer_periodic,
- &per_cpu(appldata_timer, i),
- 0, 1);
- }
+ appldata_timer.expires = timer_interval;
+ add_virt_timer_periodic(&appldata_timer);
appldata_timer_active = 1;
- P_INFO("Monitoring timer started.\n");
break;
case APPLDATA_DEL_TIMER:
- for_each_online_cpu(i)
- del_virt_timer(&per_cpu(appldata_timer, i));
+ del_virt_timer(&appldata_timer);
if (!appldata_timer_active)
break;
appldata_timer_active = 0;
- atomic_set(&appldata_expire_count, num_online_cpus());
- P_INFO("Monitoring timer stopped.\n");
break;
case APPLDATA_MOD_TIMER:
- per_cpu_interval = (u64) (appldata_interval*1000 /
- num_online_cpus()) * TOD_MICRO;
if (!appldata_timer_active)
break;
- for_each_online_cpu(i) {
- struct {
- struct vtimer_list *timer;
- u64 expires;
- } args;
- args.timer = &per_cpu(appldata_timer, i);
- args.expires = per_cpu_interval;
- smp_call_function_single(i, __appldata_mod_vtimer_wrap,
- &args, 0, 1);
- }
+ mod_virt_timer_periodic(&appldata_timer, timer_interval);
}
}
@@ -247,10 +201,10 @@ __appldata_vtimer_setup(int cmd)
* Start/Stop timer, show status of timer (0 = not active, 1 = active)
*/
static int
-appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_timer_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int len;
+ unsigned int len;
char buf[2];
if (!*lenp || *ppos) {
@@ -258,7 +212,9 @@ appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
return 0;
}
if (!write) {
- len = sprintf(buf, appldata_timer_active ? "1\n" : "0\n");
+ strncpy(buf, appldata_timer_active ? "1\n" : "0\n",
+ ARRAY_SIZE(buf));
+ len = strnlen(buf, ARRAY_SIZE(buf));
if (len > *lenp)
len = *lenp;
if (copy_to_user(buffer, buf, len))
@@ -268,14 +224,12 @@ appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
len = *lenp;
if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
return -EFAULT;
- get_online_cpus();
spin_lock(&appldata_timer_lock);
if (buf[0] == '1')
__appldata_vtimer_setup(APPLDATA_ADD_TIMER);
else if (buf[0] == '0')
__appldata_vtimer_setup(APPLDATA_DEL_TIMER);
spin_unlock(&appldata_timer_lock);
- put_online_cpus();
out:
*lenp = len;
*ppos += len;
@@ -289,10 +243,11 @@ out:
* current timer interval.
*/
static int
-appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_interval_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int len, interval;
+ unsigned int len;
+ int interval;
char buf[16];
if (!*lenp || *ppos) {
@@ -308,25 +263,17 @@ appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
goto out;
}
len = *lenp;
- if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) {
+ if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
return -EFAULT;
- }
interval = 0;
sscanf(buf, "%i", &interval);
- if (interval <= 0) {
- P_ERROR("Timer CPU interval has to be > 0!\n");
+ if (interval <= 0)
return -EINVAL;
- }
- get_online_cpus();
spin_lock(&appldata_timer_lock);
appldata_interval = interval;
__appldata_vtimer_setup(APPLDATA_MOD_TIMER);
spin_unlock(&appldata_timer_lock);
- put_online_cpus();
-
- P_INFO("Monitoring CPU interval set to %u milliseconds.\n",
- interval);
out:
*lenp = len;
*ppos += len;
@@ -340,16 +287,17 @@ out:
* monitoring (0 = not in process, 1 = in process)
*/
static int
-appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_generic_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct appldata_ops *ops = NULL, *tmp_ops;
- int rc, len, found;
+ unsigned int len;
+ int rc, found;
char buf[2];
struct list_head *lh;
found = 0;
- spin_lock(&appldata_ops_lock);
+ mutex_lock(&appldata_ops_mutex);
list_for_each(lh, &appldata_ops_list) {
tmp_ops = list_entry(lh, struct appldata_ops, list);
if (&tmp_ops->ctl_table[2] == ctl) {
@@ -357,15 +305,15 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
}
}
if (!found) {
- spin_unlock(&appldata_ops_lock);
+ mutex_unlock(&appldata_ops_mutex);
return -ENODEV;
}
ops = ctl->data;
if (!try_module_get(ops->owner)) { // protect this function
- spin_unlock(&appldata_ops_lock);
+ mutex_unlock(&appldata_ops_mutex);
return -ENODEV;
}
- spin_unlock(&appldata_ops_lock);
+ mutex_unlock(&appldata_ops_mutex);
if (!*lenp || *ppos) {
*lenp = 0;
@@ -373,7 +321,8 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
return 0;
}
if (!write) {
- len = sprintf(buf, ops->active ? "1\n" : "0\n");
+ strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf));
+ len = strnlen(buf, ARRAY_SIZE(buf));
if (len > *lenp)
len = *lenp;
if (copy_to_user(buffer, buf, len)) {
@@ -389,11 +338,11 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
return -EFAULT;
}
- spin_lock(&appldata_ops_lock);
+ mutex_lock(&appldata_ops_mutex);
if ((buf[0] == '1') && (ops->active == 0)) {
// protect work queue callback
if (!try_module_get(ops->owner)) {
- spin_unlock(&appldata_ops_lock);
+ mutex_unlock(&appldata_ops_mutex);
module_put(ops->owner);
return -ENODEV;
}
@@ -403,29 +352,22 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
(unsigned long) ops->data, ops->size,
ops->mod_lvl);
if (rc != 0) {
- P_ERROR("START DIAG 0xDC for %s failed, "
- "return code: %d\n", ops->name, rc);
+ pr_err("Starting the data collection for %s "
+ "failed with rc=%d\n", ops->name, rc);
module_put(ops->owner);
- } else {
- P_INFO("Monitoring %s data enabled, "
- "DIAG 0xDC started.\n", ops->name);
+ } else
ops->active = 1;
- }
} else if ((buf[0] == '0') && (ops->active == 1)) {
ops->active = 0;
rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
(unsigned long) ops->data, ops->size,
ops->mod_lvl);
- if (rc != 0) {
- P_ERROR("STOP DIAG 0xDC for %s failed, "
- "return code: %d\n", ops->name, rc);
- } else {
- P_INFO("Monitoring %s data disabled, "
- "DIAG 0xDC stopped.\n", ops->name);
- }
+ if (rc != 0)
+ pr_err("Stopping the data collection for %s "
+ "failed with rc=%d\n", ops->name, rc);
module_put(ops->owner);
}
- spin_unlock(&appldata_ops_lock);
+ mutex_unlock(&appldata_ops_mutex);
out:
*lenp = len;
*ppos += len;
@@ -444,16 +386,16 @@ out:
*/
int appldata_register_ops(struct appldata_ops *ops)
{
- if ((ops->size > APPLDATA_MAX_REC_SIZE) || (ops->size < 0))
+ if (ops->size > APPLDATA_MAX_REC_SIZE)
return -EINVAL;
ops->ctl_table = kzalloc(4 * sizeof(struct ctl_table), GFP_KERNEL);
if (!ops->ctl_table)
return -ENOMEM;
- spin_lock(&appldata_ops_lock);
+ mutex_lock(&appldata_ops_mutex);
list_add(&ops->list, &appldata_ops_list);
- spin_unlock(&appldata_ops_lock);
+ mutex_unlock(&appldata_ops_mutex);
ops->ctl_table[0].procname = appldata_proc_name;
ops->ctl_table[0].maxlen = 0;
@@ -468,12 +410,11 @@ int appldata_register_ops(struct appldata_ops *ops)
ops->sysctl_header = register_sysctl_table(ops->ctl_table);
if (!ops->sysctl_header)
goto out;
- P_INFO("%s-ops registered!\n", ops->name);
return 0;
out:
- spin_lock(&appldata_ops_lock);
+ mutex_lock(&appldata_ops_mutex);
list_del(&ops->list);
- spin_unlock(&appldata_ops_lock);
+ mutex_unlock(&appldata_ops_mutex);
kfree(ops->ctl_table);
return -ENOMEM;
}
@@ -485,65 +426,100 @@ out:
*/
void appldata_unregister_ops(struct appldata_ops *ops)
{
- spin_lock(&appldata_ops_lock);
+ mutex_lock(&appldata_ops_mutex);
list_del(&ops->list);
- spin_unlock(&appldata_ops_lock);
+ mutex_unlock(&appldata_ops_mutex);
unregister_sysctl_table(ops->sysctl_header);
kfree(ops->ctl_table);
- P_INFO("%s-ops unregistered!\n", ops->name);
}
/********************** module-ops management <END> **************************/
-/******************************* init / exit *********************************/
-
-static void __cpuinit appldata_online_cpu(int cpu)
+/**************************** suspend / resume *******************************/
+static int appldata_freeze(struct device *dev)
{
- init_virt_timer(&per_cpu(appldata_timer, cpu));
- per_cpu(appldata_timer, cpu).function = appldata_timer_function;
- per_cpu(appldata_timer, cpu).data = (unsigned long)
- &appldata_work;
- atomic_inc(&appldata_expire_count);
+ struct appldata_ops *ops;
+ int rc;
+ struct list_head *lh;
+
spin_lock(&appldata_timer_lock);
- __appldata_vtimer_setup(APPLDATA_MOD_TIMER);
+ if (appldata_timer_active) {
+ __appldata_vtimer_setup(APPLDATA_DEL_TIMER);
+ appldata_timer_suspended = 1;
+ }
spin_unlock(&appldata_timer_lock);
+
+ mutex_lock(&appldata_ops_mutex);
+ list_for_each(lh, &appldata_ops_list) {
+ ops = list_entry(lh, struct appldata_ops, list);
+ if (ops->active == 1) {
+ rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
+ (unsigned long) ops->data, ops->size,
+ ops->mod_lvl);
+ if (rc != 0)
+ pr_err("Stopping the data collection for %s "
+ "failed with rc=%d\n", ops->name, rc);
+ }
+ }
+ mutex_unlock(&appldata_ops_mutex);
+ return 0;
}
-static void __cpuinit appldata_offline_cpu(int cpu)
+static int appldata_restore(struct device *dev)
{
- del_virt_timer(&per_cpu(appldata_timer, cpu));
- if (atomic_dec_and_test(&appldata_expire_count)) {
- atomic_set(&appldata_expire_count, num_online_cpus());
- queue_work(appldata_wq, &appldata_work);
- }
+ struct appldata_ops *ops;
+ int rc;
+ struct list_head *lh;
+
spin_lock(&appldata_timer_lock);
- __appldata_vtimer_setup(APPLDATA_MOD_TIMER);
+ if (appldata_timer_suspended) {
+ __appldata_vtimer_setup(APPLDATA_ADD_TIMER);
+ appldata_timer_suspended = 0;
+ }
spin_unlock(&appldata_timer_lock);
+
+ mutex_lock(&appldata_ops_mutex);
+ list_for_each(lh, &appldata_ops_list) {
+ ops = list_entry(lh, struct appldata_ops, list);
+ if (ops->active == 1) {
+ ops->callback(ops->data); // init record
+ rc = appldata_diag(ops->record_nr,
+ APPLDATA_START_INTERVAL_REC,
+ (unsigned long) ops->data, ops->size,
+ ops->mod_lvl);
+ if (rc != 0) {
+ pr_err("Starting the data collection for %s "
+ "failed with rc=%d\n", ops->name, rc);
+ }
+ }
+ }
+ mutex_unlock(&appldata_ops_mutex);
+ return 0;
}
-static int __cpuinit appldata_cpu_notify(struct notifier_block *self,
- unsigned long action,
- void *hcpu)
+static int appldata_thaw(struct device *dev)
{
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- appldata_online_cpu((long) hcpu);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- appldata_offline_cpu((long) hcpu);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
+ return appldata_restore(dev);
}
-static struct notifier_block __cpuinitdata appldata_nb = {
- .notifier_call = appldata_cpu_notify,
+static const struct dev_pm_ops appldata_pm_ops = {
+ .freeze = appldata_freeze,
+ .thaw = appldata_thaw,
+ .restore = appldata_restore,
};
+static struct platform_driver appldata_pdrv = {
+ .driver = {
+ .name = "appldata",
+ .owner = THIS_MODULE,
+ .pm = &appldata_pm_ops,
+ },
+};
+/************************* suspend / resume <END> ****************************/
+
+
+/******************************* init / exit *********************************/
+
/*
* appldata_init()
*
@@ -551,29 +527,36 @@ static struct notifier_block __cpuinitdata appldata_nb = {
*/
static int __init appldata_init(void)
{
- int i;
+ int rc;
+
+ init_virt_timer(&appldata_timer);
+ appldata_timer.function = appldata_timer_function;
+ appldata_timer.data = (unsigned long) &appldata_work;
- P_DEBUG("sizeof(parameter_list) = %lu\n",
- sizeof(struct appldata_parameter_list));
+ rc = platform_driver_register(&appldata_pdrv);
+ if (rc)
+ return rc;
+ appldata_pdev = platform_device_register_simple("appldata", -1, NULL,
+ 0);
+ if (IS_ERR(appldata_pdev)) {
+ rc = PTR_ERR(appldata_pdev);
+ goto out_driver;
+ }
appldata_wq = create_singlethread_workqueue("appldata");
if (!appldata_wq) {
- P_ERROR("Could not create work queue\n");
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out_device;
}
- get_online_cpus();
- for_each_online_cpu(i)
- appldata_online_cpu(i);
- put_online_cpus();
-
- /* Register cpu hotplug notifier */
- register_hotcpu_notifier(&appldata_nb);
-
appldata_sysctl_header = register_sysctl_table(appldata_dir_table);
-
- P_DEBUG("Base interface initialized.\n");
return 0;
+
+out_device:
+ platform_device_unregister(appldata_pdev);
+out_driver:
+ platform_driver_unregister(&appldata_pdrv);
+ return rc;
}
__initcall(appldata_init);
@@ -584,7 +567,9 @@ EXPORT_SYMBOL_GPL(appldata_register_ops);
EXPORT_SYMBOL_GPL(appldata_unregister_ops);
EXPORT_SYMBOL_GPL(appldata_diag);
+#ifdef CONFIG_SWAP
EXPORT_SYMBOL_GPL(si_swapinfo);
+#endif
EXPORT_SYMBOL_GPL(nr_threads);
EXPORT_SYMBOL_GPL(nr_running);
EXPORT_SYMBOL_GPL(nr_iowait);
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 51181ccdb87..edcf2a70694 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -1,27 +1,24 @@
/*
- * arch/s390/appldata/appldata_mem.c
- *
* Data gathering module for Linux-VM Monitor Stream, Stage 1.
* Collects data related to memory management.
*
- * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright IBM Corp. 2003, 2006
*
* Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/kernel_stat.h>
-#include <asm/io.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/slab.h>
+#include <asm/io.h>
#include "appldata.h"
-#define MY_PRINT_NAME "appldata_mem" /* for debug messages, etc. */
#define P2K(x) ((x) << (PAGE_SHIFT - 10)) /* Converts #Pages to KB */
/*
@@ -36,7 +33,7 @@
* book:
* http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
-static struct appldata_mem_data {
+struct appldata_mem_data {
u64 timestamp;
u32 sync_count_1; /* after VM collected the record data, */
u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the
@@ -67,33 +64,9 @@ static struct appldata_mem_data {
u64 pgmajfault; /* page faults (major only) */
// <-- New in 2.6
-} __attribute__((packed)) appldata_mem_data;
+} __packed;
-static inline void appldata_debug_print(struct appldata_mem_data *mem_data)
-{
- P_DEBUG("--- MEM - RECORD ---\n");
- P_DEBUG("pgpgin = %8lu KB\n", mem_data->pgpgin);
- P_DEBUG("pgpgout = %8lu KB\n", mem_data->pgpgout);
- P_DEBUG("pswpin = %8lu Pages\n", mem_data->pswpin);
- P_DEBUG("pswpout = %8lu Pages\n", mem_data->pswpout);
- P_DEBUG("pgalloc = %8lu \n", mem_data->pgalloc);
- P_DEBUG("pgfault = %8lu \n", mem_data->pgfault);
- P_DEBUG("pgmajfault = %8lu \n", mem_data->pgmajfault);
- P_DEBUG("sharedram = %8lu KB\n", mem_data->sharedram);
- P_DEBUG("totalram = %8lu KB\n", mem_data->totalram);
- P_DEBUG("freeram = %8lu KB\n", mem_data->freeram);
- P_DEBUG("totalhigh = %8lu KB\n", mem_data->totalhigh);
- P_DEBUG("freehigh = %8lu KB\n", mem_data->freehigh);
- P_DEBUG("bufferram = %8lu KB\n", mem_data->bufferram);
- P_DEBUG("cached = %8lu KB\n", mem_data->cached);
- P_DEBUG("totalswap = %8lu KB\n", mem_data->totalswap);
- P_DEBUG("freeswap = %8lu KB\n", mem_data->freeswap);
- P_DEBUG("sync_count_1 = %u\n", mem_data->sync_count_1);
- P_DEBUG("sync_count_2 = %u\n", mem_data->sync_count_2);
- P_DEBUG("timestamp = %lX\n", mem_data->timestamp);
-}
-
/*
* appldata_get_mem_data()
*
@@ -103,7 +76,7 @@ static void appldata_get_mem_data(void *data)
{
/*
* don't put large structures on the stack, we are
- * serialized through the appldata_ops_lock and can use static
+ * serialized through the appldata_ops_mutex and can use static
*/
static struct sysinfo val;
unsigned long ev[NR_VM_EVENT_ITEMS];
@@ -118,9 +91,7 @@ static void appldata_get_mem_data(void *data)
mem_data->pswpin = ev[PSWPIN];
mem_data->pswpout = ev[PSWPOUT];
mem_data->pgalloc = ev[PGALLOC_NORMAL];
-#ifdef CONFIG_ZONE_DMA
mem_data->pgalloc += ev[PGALLOC_DMA];
-#endif
mem_data->pgfault = ev[PGFAULT];
mem_data->pgmajfault = ev[PGMAJFAULT];
@@ -138,11 +109,8 @@ static void appldata_get_mem_data(void *data)
mem_data->totalswap = P2K(val.totalswap);
mem_data->freeswap = P2K(val.freeswap);
- mem_data->timestamp = get_clock();
+ mem_data->timestamp = get_tod_clock();
mem_data->sync_count_2++;
-#ifdef APPLDATA_DEBUG
- appldata_debug_print(mem_data);
-#endif
}
@@ -151,7 +119,6 @@ static struct appldata_ops ops = {
.record_nr = APPLDATA_RECORD_MEM_ID,
.size = sizeof(struct appldata_mem_data),
.callback = &appldata_get_mem_data,
- .data = &appldata_mem_data,
.owner = THIS_MODULE,
.mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */
};
@@ -164,17 +131,17 @@ static struct appldata_ops ops = {
*/
static int __init appldata_mem_init(void)
{
- int rc;
+ int ret;
+
+ ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL);
+ if (!ops.data)
+ return -ENOMEM;
- P_DEBUG("sizeof(mem) = %lu\n", sizeof(struct appldata_mem_data));
+ ret = appldata_register_ops(&ops);
+ if (ret)
+ kfree(ops.data);
- rc = appldata_register_ops(&ops);
- if (rc != 0) {
- P_ERROR("Error registering ops, rc = %i\n", rc);
- } else {
- P_DEBUG("%s-ops registered!\n", ops.name);
- }
- return rc;
+ return ret;
}
/*
@@ -185,7 +152,7 @@ static int __init appldata_mem_init(void)
static void __exit appldata_mem_exit(void)
{
appldata_unregister_ops(&ops);
- P_DEBUG("%s-ops unregistered!\n", ops.name);
+ kfree(ops.data);
}
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 4d834433600..66037d2622b 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -1,18 +1,15 @@
/*
- * arch/s390/appldata/appldata_net_sum.c
- *
* Data gathering module for Linux-VM Monitor Stream, Stage 1.
* Collects accumulated network statistics (Packets received/transmitted,
* dropped, errors, ...).
*
- * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright IBM Corp. 2003, 2006
*
* Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/kernel_stat.h>
#include <linux/netdevice.h>
@@ -21,9 +18,6 @@
#include "appldata.h"
-#define MY_PRINT_NAME "appldata_net_sum" /* for debug messages, etc. */
-
-
/*
* Network data
*
@@ -35,7 +29,7 @@
* book:
* http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
-static struct appldata_net_sum_data {
+struct appldata_net_sum_data {
u64 timestamp;
u32 sync_count_1; /* after VM collected the record data, */
u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the
@@ -57,29 +51,9 @@ static struct appldata_net_sum_data {
u64 rx_dropped; /* no space in linux buffers */
u64 tx_dropped; /* no space available in linux */
u64 collisions; /* collisions while transmitting */
-} __attribute__((packed)) appldata_net_sum_data;
+} __packed;
-static inline void appldata_print_debug(struct appldata_net_sum_data *net_data)
-{
- P_DEBUG("--- NET - RECORD ---\n");
-
- P_DEBUG("nr_interfaces = %u\n", net_data->nr_interfaces);
- P_DEBUG("rx_packets = %8lu\n", net_data->rx_packets);
- P_DEBUG("tx_packets = %8lu\n", net_data->tx_packets);
- P_DEBUG("rx_bytes = %8lu\n", net_data->rx_bytes);
- P_DEBUG("tx_bytes = %8lu\n", net_data->tx_bytes);
- P_DEBUG("rx_errors = %8lu\n", net_data->rx_errors);
- P_DEBUG("tx_errors = %8lu\n", net_data->tx_errors);
- P_DEBUG("rx_dropped = %8lu\n", net_data->rx_dropped);
- P_DEBUG("tx_dropped = %8lu\n", net_data->tx_dropped);
- P_DEBUG("collisions = %8lu\n", net_data->collisions);
-
- P_DEBUG("sync_count_1 = %u\n", net_data->sync_count_1);
- P_DEBUG("sync_count_2 = %u\n", net_data->sync_count_2);
- P_DEBUG("timestamp = %lX\n", net_data->timestamp);
-}
-
/*
* appldata_get_net_sum_data()
*
@@ -90,7 +64,6 @@ static void appldata_get_net_sum_data(void *data)
int i;
struct appldata_net_sum_data *net_data;
struct net_device *dev;
- struct net_device_stats *stats;
unsigned long rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors,
tx_errors, rx_dropped, tx_dropped, collisions;
@@ -107,9 +80,13 @@ static void appldata_get_net_sum_data(void *data)
rx_dropped = 0;
tx_dropped = 0;
collisions = 0;
- read_lock(&dev_base_lock);
- for_each_netdev(&init_net, dev) {
- stats = dev->get_stats(dev);
+
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, dev) {
+ const struct rtnl_link_stats64 *stats;
+ struct rtnl_link_stats64 temp;
+
+ stats = dev_get_stats(dev, &temp);
rx_packets += stats->rx_packets;
tx_packets += stats->tx_packets;
rx_bytes += stats->rx_bytes;
@@ -121,7 +98,8 @@ static void appldata_get_net_sum_data(void *data)
collisions += stats->collisions;
i++;
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
+
net_data->nr_interfaces = i;
net_data->rx_packets = rx_packets;
net_data->tx_packets = tx_packets;
@@ -133,11 +111,8 @@ static void appldata_get_net_sum_data(void *data)
net_data->tx_dropped = tx_dropped;
net_data->collisions = collisions;
- net_data->timestamp = get_clock();
+ net_data->timestamp = get_tod_clock();
net_data->sync_count_2++;
-#ifdef APPLDATA_DEBUG
- appldata_print_debug(net_data);
-#endif
}
@@ -146,7 +121,6 @@ static struct appldata_ops ops = {
.record_nr = APPLDATA_RECORD_NET_SUM_ID,
.size = sizeof(struct appldata_net_sum_data),
.callback = &appldata_get_net_sum_data,
- .data = &appldata_net_sum_data,
.owner = THIS_MODULE,
.mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */
};
@@ -159,17 +133,17 @@ static struct appldata_ops ops = {
*/
static int __init appldata_net_init(void)
{
- int rc;
+ int ret;
- P_DEBUG("sizeof(net) = %lu\n", sizeof(struct appldata_net_sum_data));
+ ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL);
+ if (!ops.data)
+ return -ENOMEM;
- rc = appldata_register_ops(&ops);
- if (rc != 0) {
- P_ERROR("Error registering ops, rc = %i\n", rc);
- } else {
- P_DEBUG("%s-ops registered!\n", ops.name);
- }
- return rc;
+ ret = appldata_register_ops(&ops);
+ if (ret)
+ kfree(ops.data);
+
+ return ret;
}
/*
@@ -180,7 +154,7 @@ static int __init appldata_net_init(void)
static void __exit appldata_net_exit(void)
{
appldata_unregister_ops(&ops);
- P_DEBUG("%s-ops unregistered!\n", ops.name);
+ kfree(ops.data);
}
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 6b3eafe1045..69b23b25ac3 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -1,14 +1,15 @@
/*
- * arch/s390/appldata/appldata_os.c
- *
* Data gathering module for Linux-VM Monitor Stream, Stage 1.
* Collects misc. OS related data (CPU utilization, running processes).
*
- * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright IBM Corp. 2003, 2006
*
* Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
+#define KMSG_COMPONENT "appldata"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -22,7 +23,6 @@
#include "appldata.h"
-#define MY_PRINT_NAME "appldata_os" /* for debug messages, etc. */
#define LOAD_INT(x) ((x) >> FSHIFT)
#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
@@ -89,44 +89,6 @@ static struct appldata_ops ops = {
};
-static inline void appldata_print_debug(struct appldata_os_data *os_data)
-{
- int a0, a1, a2, i;
-
- P_DEBUG("--- OS - RECORD ---\n");
- P_DEBUG("nr_threads = %u\n", os_data->nr_threads);
- P_DEBUG("nr_running = %u\n", os_data->nr_running);
- P_DEBUG("nr_iowait = %u\n", os_data->nr_iowait);
- P_DEBUG("avenrun(int) = %8x / %8x / %8x\n", os_data->avenrun[0],
- os_data->avenrun[1], os_data->avenrun[2]);
- a0 = os_data->avenrun[0];
- a1 = os_data->avenrun[1];
- a2 = os_data->avenrun[2];
- P_DEBUG("avenrun(float) = %d.%02d / %d.%02d / %d.%02d\n",
- LOAD_INT(a0), LOAD_FRAC(a0), LOAD_INT(a1), LOAD_FRAC(a1),
- LOAD_INT(a2), LOAD_FRAC(a2));
-
- P_DEBUG("nr_cpus = %u\n", os_data->nr_cpus);
- for (i = 0; i < os_data->nr_cpus; i++) {
- P_DEBUG("cpu%u : user = %u, nice = %u, system = %u, "
- "idle = %u, irq = %u, softirq = %u, iowait = %u, "
- "steal = %u\n",
- os_data->os_cpu[i].cpu_id,
- os_data->os_cpu[i].per_cpu_user,
- os_data->os_cpu[i].per_cpu_nice,
- os_data->os_cpu[i].per_cpu_system,
- os_data->os_cpu[i].per_cpu_idle,
- os_data->os_cpu[i].per_cpu_irq,
- os_data->os_cpu[i].per_cpu_softirq,
- os_data->os_cpu[i].per_cpu_iowait,
- os_data->os_cpu[i].per_cpu_steal);
- }
-
- P_DEBUG("sync_count_1 = %u\n", os_data->sync_count_1);
- P_DEBUG("sync_count_2 = %u\n", os_data->sync_count_2);
- P_DEBUG("timestamp = %lX\n", os_data->timestamp);
-}
-
/*
* appldata_get_os_data()
*
@@ -151,21 +113,21 @@ static void appldata_get_os_data(void *data)
j = 0;
for_each_online_cpu(i) {
os_data->os_cpu[j].per_cpu_user =
- cputime_to_jiffies(kstat_cpu(i).cpustat.user);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
os_data->os_cpu[j].per_cpu_nice =
- cputime_to_jiffies(kstat_cpu(i).cpustat.nice);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
os_data->os_cpu[j].per_cpu_system =
- cputime_to_jiffies(kstat_cpu(i).cpustat.system);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
os_data->os_cpu[j].per_cpu_idle =
- cputime_to_jiffies(kstat_cpu(i).cpustat.idle);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
os_data->os_cpu[j].per_cpu_irq =
- cputime_to_jiffies(kstat_cpu(i).cpustat.irq);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
os_data->os_cpu[j].per_cpu_softirq =
- cputime_to_jiffies(kstat_cpu(i).cpustat.softirq);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
os_data->os_cpu[j].per_cpu_iowait =
- cputime_to_jiffies(kstat_cpu(i).cpustat.iowait);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
os_data->os_cpu[j].per_cpu_steal =
- cputime_to_jiffies(kstat_cpu(i).cpustat.steal);
+ cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
os_data->os_cpu[j].cpu_id = i;
j++;
}
@@ -180,33 +142,22 @@ static void appldata_get_os_data(void *data)
APPLDATA_START_INTERVAL_REC,
(unsigned long) ops.data, new_size,
ops.mod_lvl);
- if (rc != 0) {
- P_ERROR("os: START NEW DIAG 0xDC failed, "
- "return code: %d, new size = %i\n", rc,
- new_size);
- P_INFO("os: stopping old record now\n");
- } else
- P_INFO("os: new record size = %i\n", new_size);
+ if (rc != 0)
+ pr_err("Starting a new OS data collection "
+ "failed with rc=%d\n", rc);
rc = appldata_diag(APPLDATA_RECORD_OS_ID,
APPLDATA_STOP_REC,
(unsigned long) ops.data, ops.size,
ops.mod_lvl);
if (rc != 0)
- P_ERROR("os: STOP OLD DIAG 0xDC failed, "
- "return code: %d, old size = %i\n", rc,
- ops.size);
- else
- P_INFO("os: old record size = %i stopped\n",
- ops.size);
+ pr_err("Stopping a faulty OS data "
+ "collection failed with rc=%d\n", rc);
}
ops.size = new_size;
}
- os_data->timestamp = get_clock();
+ os_data->timestamp = get_tod_clock();
os_data->sync_count_2++;
-#ifdef APPLDATA_DEBUG
- appldata_print_debug(os_data);
-#endif
}
@@ -220,19 +171,16 @@ static int __init appldata_os_init(void)
int rc, max_size;
max_size = sizeof(struct appldata_os_data) +
- (NR_CPUS * sizeof(struct appldata_os_per_cpu));
+ (num_possible_cpus() * sizeof(struct appldata_os_per_cpu));
if (max_size > APPLDATA_MAX_REC_SIZE) {
- P_ERROR("Max. size of OS record = %i, bigger than maximum "
- "record size (%i)\n", max_size, APPLDATA_MAX_REC_SIZE);
+ pr_err("Maximum OS record size %i exceeds the maximum "
+ "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE);
rc = -ENOMEM;
goto out;
}
- P_DEBUG("max. sizeof(os) = %i, sizeof(os_cpu) = %lu\n", max_size,
- sizeof(struct appldata_os_per_cpu));
- appldata_os_data = kzalloc(max_size, GFP_DMA);
+ appldata_os_data = kzalloc(max_size, GFP_KERNEL | GFP_DMA);
if (appldata_os_data == NULL) {
- P_ERROR("No memory for %s!\n", ops.name);
rc = -ENOMEM;
goto out;
}
@@ -240,17 +188,12 @@ static int __init appldata_os_init(void)
appldata_os_data->per_cpu_size = sizeof(struct appldata_os_per_cpu);
appldata_os_data->cpu_offset = offsetof(struct appldata_os_data,
os_cpu);
- P_DEBUG("cpu offset = %u\n", appldata_os_data->cpu_offset);
ops.data = appldata_os_data;
ops.callback = &appldata_get_os_data;
rc = appldata_register_ops(&ops);
- if (rc != 0) {
- P_ERROR("Error registering ops, rc = %i\n", rc);
+ if (rc != 0)
kfree(appldata_os_data);
- } else {
- P_DEBUG("%s-ops registered!\n", ops.name);
- }
out:
return rc;
}
@@ -264,7 +207,6 @@ static void __exit appldata_os_exit(void)
{
appldata_unregister_ops(&ops);
kfree(appldata_os_data);
- P_DEBUG("%s-ops unregistered!\n", ops.name);
}
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
new file mode 100644
index 00000000000..017d5912ad2
--- /dev/null
+++ b/arch/s390/boot/.gitignore
@@ -0,0 +1,2 @@
+image
+bzImage
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 4d97eef36b8..9a42ecec564 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -6,13 +6,21 @@ COMPILE_VERSION := __linux_compile_version_id__`hostname | \
tr -c '[0-9A-Za-z]' '_'`__`date | \
tr -c '[0-9A-Za-z]' '_'`_t
-EXTRA_CFLAGS := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I.
+ccflags-y := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I.
targets := image
+targets += bzImage
+subdir- := compressed
$(obj)/image: vmlinux FORCE
$(call if_changed,objcopy)
-install: $(CONFIGURE) $(obj)/image
- sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/image \
- System.map Kerntypes "$(INSTALL_PATH)"
+$(obj)/bzImage: $(obj)/compressed/vmlinux FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/compressed/vmlinux: FORCE
+ $(Q)$(MAKE) $(build)=$(obj)/compressed $@
+
+install: $(CONFIGURE) $(obj)/bzImage
+ sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
+ System.map "$(INSTALL_PATH)"
diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore
new file mode 100644
index 00000000000..ae06b9b4c02
--- /dev/null
+++ b/arch/s390/boot/compressed/.gitignore
@@ -0,0 +1,3 @@
+sizes.h
+vmlinux
+vmlinux.lds
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
new file mode 100644
index 00000000000..f90d1fc6d60
--- /dev/null
+++ b/arch/s390/boot/compressed/Makefile
@@ -0,0 +1,71 @@
+#
+# linux/arch/s390/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+
+BITS := $(if $(CONFIG_64BIT),64,31)
+
+targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += misc.o piggy.o sizes.h head$(BITS).o
+
+KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
+KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
+KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
+
+GCOV_PROFILE := n
+
+OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o)
+OBJECTS += $(obj)/head$(BITS).o $(obj)/misc.o $(obj)/piggy.o
+
+LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS)
+ $(call if_changed,ld)
+ @:
+
+sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 0x\1/p'
+
+quiet_cmd_sizes = GEN $@
+ cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
+
+$(obj)/sizes.h: vmlinux
+ $(call if_changed,sizes)
+
+AFLAGS_head$(BITS).o += -I$(obj)
+$(obj)/head$(BITS).o: $(obj)/sizes.h
+
+CFLAGS_misc.o += -I$(obj)
+$(obj)/misc.o: $(obj)/sizes.h
+
+OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
+$(obj)/vmlinux.bin: vmlinux
+ $(call if_changed,objcopy)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+
+suffix-$(CONFIG_KERNEL_GZIP) := gz
+suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZ4) := lz4
+suffix-$(CONFIG_KERNEL_LZMA) := lzma
+suffix-$(CONFIG_KERNEL_LZO) := lzo
+suffix-$(CONFIG_KERNEL_XZ) := xz
+
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
+ $(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
+ $(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+ $(call if_changed,lz4)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
+ $(call if_changed,lzma)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
+ $(call if_changed,lzo)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+ $(call if_changed,xzkern)
+
+LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y)
+ $(call if_changed,ld)
diff --git a/arch/s390/boot/compressed/head31.S b/arch/s390/boot/compressed/head31.S
new file mode 100644
index 00000000000..e8c9e18b803
--- /dev/null
+++ b/arch/s390/boot/compressed/head31.S
@@ -0,0 +1,51 @@
+/*
+ * Startup glue code to uncompress the kernel
+ *
+ * Copyright IBM Corp. 2010
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include "sizes.h"
+
+__HEAD
+ENTRY(startup_continue)
+ basr %r13,0 # get base
+.LPG1:
+ # setup stack
+ l %r15,.Lstack-.LPG1(%r13)
+ ahi %r15,-96
+ l %r1,.Ldecompress-.LPG1(%r13)
+ basr %r14,%r1
+ # setup registers for memory mover & branch to target
+ lr %r4,%r2
+ l %r2,.Loffset-.LPG1(%r13)
+ la %r4,0(%r2,%r4)
+ l %r3,.Lmvsize-.LPG1(%r13)
+ lr %r5,%r3
+ # move the memory mover someplace safe
+ la %r1,0x200
+ mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13)
+ # decompress image is started at 0x11000
+ lr %r6,%r2
+ br %r1
+mover:
+ mvcle %r2,%r4,0
+ jo mover
+ br %r6
+mover_end:
+
+ .align 8
+.Lstack:
+ .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
+.Ldecompress:
+ .long decompress_kernel
+.Loffset:
+ .long 0x11000
+.Lmvsize:
+ .long SZ__bss_start
diff --git a/arch/s390/boot/compressed/head64.S b/arch/s390/boot/compressed/head64.S
new file mode 100644
index 00000000000..f86a4eef28a
--- /dev/null
+++ b/arch/s390/boot/compressed/head64.S
@@ -0,0 +1,48 @@
+/*
+ * Startup glue code to uncompress the kernel
+ *
+ * Copyright IBM Corp. 2010
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include "sizes.h"
+
+__HEAD
+ENTRY(startup_continue)
+ basr %r13,0 # get base
+.LPG1:
+ # setup stack
+ lg %r15,.Lstack-.LPG1(%r13)
+ aghi %r15,-160
+ brasl %r14,decompress_kernel
+ # setup registers for memory mover & branch to target
+ lgr %r4,%r2
+ lg %r2,.Loffset-.LPG1(%r13)
+ la %r4,0(%r2,%r4)
+ lg %r3,.Lmvsize-.LPG1(%r13)
+ lgr %r5,%r3
+ # move the memory mover someplace safe
+ la %r1,0x200
+ mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13)
+ # decompress image is started at 0x11000
+ lgr %r6,%r2
+ br %r1
+mover:
+ mvcle %r2,%r4,0
+ jo mover
+ br %r6
+mover_end:
+
+ .align 8
+.Lstack:
+ .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
+.Loffset:
+ .quad 0x11000
+.Lmvsize:
+ .quad SZ__bss_start
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
new file mode 100644
index 00000000000..57cbaff1f39
--- /dev/null
+++ b/arch/s390/boot/compressed/misc.c
@@ -0,0 +1,175 @@
+/*
+ * Definitions and wrapper functions for kernel decompressor
+ *
+ * Copyright IBM Corp. 2010
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <asm/uaccess.h>
+#include <asm/page.h>
+#include <asm/ipl.h>
+#include "sizes.h"
+
+/*
+ * gzip declarations
+ */
+#define STATIC static
+
+#undef memset
+#undef memcpy
+#undef memmove
+#define memmove memmove
+#define memzero(s, n) memset((s), 0, (n))
+
+/* Symbols defined by linker scripts */
+extern char input_data[];
+extern int input_len;
+extern char _text, _end;
+extern char _bss, _ebss;
+
+static void error(char *m);
+
+static unsigned long free_mem_ptr;
+static unsigned long free_mem_end_ptr;
+
+#ifdef CONFIG_HAVE_KERNEL_BZIP2
+#define HEAP_SIZE 0x400000
+#else
+#define HEAP_SIZE 0x10000
+#endif
+
+#ifdef CONFIG_KERNEL_GZIP
+#include "../../../../lib/decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_BZIP2
+#include "../../../../lib/decompress_bunzip2.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZMA
+#include "../../../../lib/decompress_unlzma.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZO
+#include "../../../../lib/decompress_unlzo.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
+extern _sclp_print_early(const char *);
+
+static int puts(const char *s)
+{
+ _sclp_print_early(s);
+ return 0;
+}
+
+void *memset(void *s, int c, size_t n)
+{
+ char *xs;
+
+ xs = s;
+ while (n--)
+ *xs++ = c;
+ return s;
+}
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dest;
+
+ while (n--)
+ *d++ = *s++;
+ return dest;
+}
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dest;
+
+ if (d <= s) {
+ while (n--)
+ *d++ = *s++;
+ } else {
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ return dest;
+}
+
+static void error(char *x)
+{
+ unsigned long long psw = 0x000a0000deadbeefULL;
+
+ puts("\n\n");
+ puts(x);
+ puts("\n\n -- System halted");
+
+ asm volatile("lpsw %0" : : "Q" (psw));
+}
+
+/*
+ * Safe guard the ipl parameter block against a memory area that will be
+ * overwritten. The validity check for the ipl parameter block is complex
+ * (see cio_get_iplinfo and ipl_save_parameters) but if the pointer to
+ * the ipl parameter block intersects with the passed memory area we can
+ * safely assume that we can read from that memory. In that case just copy
+ * the memory to IPL_PARMBLOCK_ORIGIN even if there is no ipl parameter
+ * block.
+ */
+static void check_ipl_parmblock(void *start, unsigned long size)
+{
+ void *src, *dst;
+
+ src = (void *)(unsigned long) S390_lowcore.ipl_parmblock_ptr;
+ if (src + PAGE_SIZE <= start || src >= start + size)
+ return;
+ dst = (void *) IPL_PARMBLOCK_ORIGIN;
+ memmove(dst, src, PAGE_SIZE);
+ S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
+}
+
+unsigned long decompress_kernel(void)
+{
+ unsigned long output_addr;
+ unsigned char *output;
+
+ output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL;
+ check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start);
+ memset(&_bss, 0, &_ebss - &_bss);
+ free_mem_ptr = (unsigned long)&_end;
+ free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
+ output = (unsigned char *) output_addr;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ /*
+ * Move the initrd right behind the end of the decompressed
+ * kernel image.
+ */
+ if (INITRD_START && INITRD_SIZE &&
+ INITRD_START < (unsigned long) output + SZ__bss_start) {
+ check_ipl_parmblock(output + SZ__bss_start,
+ INITRD_START + INITRD_SIZE);
+ memmove(output + SZ__bss_start,
+ (void *) INITRD_START, INITRD_SIZE);
+ INITRD_START = (unsigned long) output + SZ__bss_start;
+ }
+#endif
+
+ puts("Uncompressing Linux... ");
+ decompress(input_data, input_len, NULL, NULL, output, NULL, error);
+ puts("Ok, booting the kernel.\n");
+ return (unsigned long) output;
+}
+
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
new file mode 100644
index 00000000000..8e1fb823928
--- /dev/null
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,55 @@
+#include <asm-generic/vmlinux.lds.h>
+
+#ifdef CONFIG_64BIT
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+#else
+OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
+OUTPUT_ARCH(s390:31-bit)
+#endif
+
+ENTRY(startup)
+
+SECTIONS
+{
+ /* Be careful parts of head_64.S assume startup_32 is at
+ * address 0.
+ */
+ . = 0;
+ .head.text : {
+ _head = . ;
+ HEAD_TEXT
+ _ehead = . ;
+ }
+ .rodata.compressed : {
+ *(.rodata.compressed)
+ }
+ .text : {
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(.rodata.*)
+ _erodata = . ;
+ }
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ _edata = . ;
+ }
+ . = ALIGN(256);
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN(8); /* For convenience during zeroing */
+ _ebss = .;
+ }
+ _end = .;
+}
diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr
new file mode 100644
index 00000000000..f02382ae5c4
--- /dev/null
+++ b/arch/s390/boot/compressed/vmlinux.scr
@@ -0,0 +1,10 @@
+SECTIONS
+{
+ .rodata.compressed : {
+ input_len = .;
+ LONG(input_data_end - input_data) input_data = .;
+ *(.data)
+ output_len = . - 4;
+ input_data_end = .;
+ }
+}
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index d4026f62cb0..aed3069699b 100644
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -21,8 +21,8 @@
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
new file mode 100644
index 00000000000..fd09a10a2b5
--- /dev/null
+++ b/arch/s390/configs/default_defconfig
@@ -0,0 +1,687 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_CGROUP=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=256
+CONFIG_PREEMPT=y
+CONFIG_HZ_100=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PCI=y
+CONFIG_PCI_DEBUG=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NET_SCTPPROBE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_RDS_DEBUG=y
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_DEVTMPFS=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_XIP=y
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=y
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_ISCSI_TCP=m
+CONFIG_LIBFCOE=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+CONFIG_VHOST_NET=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_VIRTIO_BALLOON=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD_DEBUG=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_XFS_DEBUG=y
+CONFIG_GFS2_FS=m
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_READABLE_ASM=y
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_SELFTEST=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_SLUB_STATS=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_RB=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_RT_MUTEX_TESTER=y
+CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_CREDENTIALS=y
+CONFIG_PROVE_RCU=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=300
+CONFIG_NOTIFIER_ERROR_INJECTION=m
+CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
+CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAILSLAB=y
+CONFIG_FAIL_PAGE_ALLOC=y
+CONFIG_FAIL_MAKE_REQUEST=y
+CONFIG_FAIL_IO_TIMEOUT=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
+CONFIG_LATENCYTOP=y
+CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_KPROBE_EVENT is not set
+CONFIG_LKDTM=m
+CONFIG_TEST_LIST_SORT=y
+CONFIG_KPROBES_SANITY_TEST=y
+CONFIG_RBTREE_TEST=y
+CONFIG_INTERVAL_TREE_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_DMA_API_DEBUG=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_IMA=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=y
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_ZCRYPT=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
+CONFIG_X509_CERTIFICATE_PARSER=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_IA64=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
new file mode 100644
index 00000000000..b061180d354
--- /dev/null
+++ b/arch/s390/configs/gcov_defconfig
@@ -0,0 +1,640 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_PERF=y
+CONFIG_BLK_CGROUP=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_GCOV_KERNEL=y
+CONFIG_GCOV_PROFILE_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=256
+CONFIG_HZ_100=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PCI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NET_SCTPPROBE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_DEVTMPFS=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_XIP=y
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=y
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_ISCSI_TCP=m
+CONFIG_LIBFCOE=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+CONFIG_VHOST_NET=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_VIRTIO_BALLOON=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD_DEBUG=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_GFS2_FS=m
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_TIMER_STATS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_NOTIFIER_ERROR_INJECTION=m
+CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
+CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_LATENCYTOP=y
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_KPROBE_EVENT is not set
+CONFIG_LKDTM=m
+CONFIG_RBTREE_TEST=m
+CONFIG_INTERVAL_TREE_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_IMA=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=y
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_ZCRYPT=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
+CONFIG_X509_CERTIFICATE_PARSER=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_IA64=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
new file mode 100644
index 00000000000..d279baa0801
--- /dev/null
+++ b/arch/s390/configs/performance_defconfig
@@ -0,0 +1,632 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_PERF=y
+CONFIG_BLK_CGROUP=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=256
+CONFIG_HZ_100=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PCI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_IP6_NF_TARGET_NPT=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NET_SCTPPROBE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_DEVTMPFS=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_XIP=y
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=y
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_ISCSI_TCP=m
+CONFIG_LIBFCOE=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+CONFIG_VHOST_NET=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_VIRTIO_BALLOON=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD_DEBUG=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_GFS2_FS=m
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_TIMER_STATS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_KPROBE_EVENT is not set
+CONFIG_LKDTM=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_IMA=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=y
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_ZCRYPT=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
+CONFIG_X509_CERTIFICATE_PARSER=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_IA64=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
new file mode 100644
index 00000000000..948e0e057a2
--- /dev/null
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -0,0 +1,86 @@
+# CONFIG_SWAP is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+# CONFIG_COMPAT is not set
+CONFIG_NR_CPUS=2
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_HZ_100=y
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+# CONFIG_CHECK_STACK is not set
+# CONFIG_CHSC_SCH is not set
+# CONFIG_SCM_BUS is not set
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_SECCOMP is not set
+# CONFIG_IUCV is not set
+CONFIG_ATM=y
+CONFIG_ATM_LANE=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV_XPRAM is not set
+# CONFIG_DCSSBLK is not set
+# CONFIG_DASD is not set
+CONFIG_ENCLOSURE_SERVICES=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_ENCLOSURE=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SRP_ATTRS=y
+CONFIG_ZFCP=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_HVC_IUCV is not set
+CONFIG_RAW_DRIVER=y
+# CONFIG_SCLP_ASYNC is not set
+# CONFIG_HMC_DRV is not set
+# CONFIG_S390_TAPE is not set
+# CONFIG_VMCP is not set
+# CONFIG_MONWRITER is not set
+# CONFIG_S390_VMUR is not set
+# CONFIG_HID is not set
+CONFIG_MEMSTICK=y
+CONFIG_MEMSTICK_DEBUG=y
+CONFIG_MEMSTICK_UNSAFE_RESUME=y
+CONFIG_MSPRO_BLOCK=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_INOTIFY_USER is not set
+CONFIG_CONFIGFS_FS=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_FTRACE is not set
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_IA64=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+# CONFIG_PFAULT is not set
+# CONFIG_S390_HYPFS_FS is not set
+# CONFIG_VIRTUALIZATION is not set
+# CONFIG_S390_GUEST is not set
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 6a1157fa4f9..7f0b7cda625 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o des_check_key.o
+obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
+obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index e33f32b54c0..23223cd63e5 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -4,7 +4,7 @@
* s390 implementation of the AES Cipher Algorithm.
*
* s390 Version:
- * Copyright IBM Corp. 2005,2007
+ * Copyright IBM Corp. 2005, 2007
* Author(s): Jan Glauber (jang@de.ibm.com)
* Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback
*
@@ -17,21 +17,26 @@
*
*/
+#define KMSG_COMPONENT "aes_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/spinlock.h>
#include "crypt_s390.h"
#define AES_KEYLEN_128 1
#define AES_KEYLEN_192 2
#define AES_KEYLEN_256 4
-static char keylen_flag = 0;
+static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
+static char keylen_flag;
struct s390_aes_ctx {
- u8 iv[AES_BLOCK_SIZE];
u8 key[AES_MAX_KEY_SIZE];
long enc;
long dec;
@@ -42,6 +47,23 @@ struct s390_aes_ctx {
} fallback;
};
+struct pcc_param {
+ u8 key[32];
+ u8 tweak[16];
+ u8 block[16];
+ u8 bit[16];
+ u8 xts[16];
+};
+
+struct s390_xts_ctx {
+ u8 key[32];
+ u8 pcc_key[32];
+ long enc;
+ long dec;
+ int key_len;
+ struct crypto_blkcipher *fallback;
+};
+
/*
* Check if the key_len is supported by the HW.
* Returns 0 if it is, a positive number if it is not and software fallback is
@@ -75,14 +97,14 @@ static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key,
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
int ret;
- sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
- sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags &
+ sctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+ sctx->fallback.cip->base.crt_flags |= (tfm->crt_flags &
CRYPTO_TFM_REQ_MASK);
ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
if (ret) {
tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags &
+ tfm->crt_flags |= (sctx->fallback.cip->base.crt_flags &
CRYPTO_TFM_RES_MASK);
}
return ret;
@@ -169,8 +191,9 @@ static int fallback_init_cip(struct crypto_tfm *tfm)
CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(sctx->fallback.cip)) {
- printk(KERN_ERR "Error allocating fallback algo %s\n", name);
- return PTR_ERR(sctx->fallback.blk);
+ pr_err("Allocating AES fallback algorithm %s failed\n",
+ name);
+ return PTR_ERR(sctx->fallback.cip);
}
return 0;
@@ -193,7 +216,6 @@ static struct crypto_alg aes_alg = {
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_aes_ctx),
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
.cra_init = fallback_init_cip,
.cra_exit = fallback_exit_cip,
.cra_u = {
@@ -303,7 +325,8 @@ static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
u8 *in = walk->src.virt.addr;
ret = crypt_s390_km(func, param, out, in, n);
- BUG_ON((ret < 0) || (ret != n));
+ if (ret < 0 || ret != n)
+ return -EIO;
nbytes &= AES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, walk, nbytes);
@@ -349,7 +372,8 @@ static int fallback_init_blk(struct crypto_tfm *tfm)
CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(sctx->fallback.blk)) {
- printk(KERN_ERR "Error allocating fallback algo %s\n", name);
+ pr_err("Allocating AES fallback algorithm %s failed\n",
+ name);
return PTR_ERR(sctx->fallback.blk);
}
@@ -374,7 +398,6 @@ static struct crypto_alg ecb_aes_alg = {
.cra_ctxsize = sizeof(struct s390_aes_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ecb_aes_alg.cra_list),
.cra_init = fallback_init_blk,
.cra_exit = fallback_exit_blk,
.cra_u = {
@@ -418,29 +441,36 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return aes_set_key(tfm, in_key, key_len);
}
-static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
+static int cbc_aes_crypt(struct blkcipher_desc *desc, long func,
struct blkcipher_walk *walk)
{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
int ret = blkcipher_walk_virt(desc, walk);
unsigned int nbytes = walk->nbytes;
+ struct {
+ u8 iv[AES_BLOCK_SIZE];
+ u8 key[AES_MAX_KEY_SIZE];
+ } param;
if (!nbytes)
goto out;
- memcpy(param, walk->iv, AES_BLOCK_SIZE);
+ memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+ memcpy(param.key, sctx->key, sctx->key_len);
do {
/* only use complete blocks */
unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_kmc(func, param, out, in, n);
- BUG_ON((ret < 0) || (ret != n));
+ ret = crypt_s390_kmc(func, &param, out, in, n);
+ if (ret < 0 || ret != n)
+ return -EIO;
nbytes &= AES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, walk, nbytes);
} while ((nbytes = walk->nbytes));
- memcpy(walk->iv, param, AES_BLOCK_SIZE);
+ memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
out:
return ret;
@@ -457,7 +487,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
return fallback_blk_enc(desc, dst, src, nbytes);
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk);
+ return cbc_aes_crypt(desc, sctx->enc, &walk);
}
static int cbc_aes_decrypt(struct blkcipher_desc *desc,
@@ -471,7 +501,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
return fallback_blk_dec(desc, dst, src, nbytes);
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk);
+ return cbc_aes_crypt(desc, sctx->dec, &walk);
}
static struct crypto_alg cbc_aes_alg = {
@@ -484,7 +514,6 @@ static struct crypto_alg cbc_aes_alg = {
.cra_ctxsize = sizeof(struct s390_aes_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(cbc_aes_alg.cra_list),
.cra_init = fallback_init_blk,
.cra_exit = fallback_exit_blk,
.cra_u = {
@@ -499,15 +528,375 @@ static struct crypto_alg cbc_aes_alg = {
}
};
+static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int len)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ unsigned int ret;
+
+ xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+ xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags &
+ CRYPTO_TFM_REQ_MASK);
+
+ ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len);
+ if (ret) {
+ tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+ tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags &
+ CRYPTO_TFM_RES_MASK);
+ }
+ return ret;
+}
+
+static int xts_fallback_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_blkcipher *tfm;
+ unsigned int ret;
+
+ tfm = desc->tfm;
+ desc->tfm = xts_ctx->fallback;
+
+ ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
+
+ desc->tfm = tfm;
+ return ret;
+}
+
+static int xts_fallback_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_blkcipher *tfm;
+ unsigned int ret;
+
+ tfm = desc->tfm;
+ desc->tfm = xts_ctx->fallback;
+
+ ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
+
+ desc->tfm = tfm;
+ return ret;
+}
+
+static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ u32 *flags = &tfm->crt_flags;
+
+ switch (key_len) {
+ case 32:
+ xts_ctx->enc = KM_XTS_128_ENCRYPT;
+ xts_ctx->dec = KM_XTS_128_DECRYPT;
+ memcpy(xts_ctx->key + 16, in_key, 16);
+ memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16);
+ break;
+ case 48:
+ xts_ctx->enc = 0;
+ xts_ctx->dec = 0;
+ xts_fallback_setkey(tfm, in_key, key_len);
+ break;
+ case 64:
+ xts_ctx->enc = KM_XTS_256_ENCRYPT;
+ xts_ctx->dec = KM_XTS_256_DECRYPT;
+ memcpy(xts_ctx->key, in_key, 32);
+ memcpy(xts_ctx->pcc_key, in_key + 32, 32);
+ break;
+ default:
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+ xts_ctx->key_len = key_len;
+ return 0;
+}
+
+static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
+ struct s390_xts_ctx *xts_ctx,
+ struct blkcipher_walk *walk)
+{
+ unsigned int offset = (xts_ctx->key_len >> 1) & 0x10;
+ int ret = blkcipher_walk_virt(desc, walk);
+ unsigned int nbytes = walk->nbytes;
+ unsigned int n;
+ u8 *in, *out;
+ struct pcc_param pcc_param;
+ struct {
+ u8 key[32];
+ u8 init[16];
+ } xts_param;
+
+ if (!nbytes)
+ goto out;
+
+ memset(pcc_param.block, 0, sizeof(pcc_param.block));
+ memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
+ memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
+ memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ memcpy(pcc_param.key, xts_ctx->pcc_key, 32);
+ ret = crypt_s390_pcc(func, &pcc_param.key[offset]);
+ if (ret < 0)
+ return -EIO;
+
+ memcpy(xts_param.key, xts_ctx->key, 32);
+ memcpy(xts_param.init, pcc_param.xts, 16);
+ do {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ out = walk->dst.virt.addr;
+ in = walk->src.virt.addr;
+
+ ret = crypt_s390_km(func, &xts_param.key[offset], out, in, n);
+ if (ret < 0 || ret != n)
+ return -EIO;
+
+ nbytes &= AES_BLOCK_SIZE - 1;
+ ret = blkcipher_walk_done(desc, walk, nbytes);
+ } while ((nbytes = walk->nbytes));
+out:
+ return ret;
+}
+
+static int xts_aes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (unlikely(xts_ctx->key_len == 48))
+ return xts_fallback_encrypt(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk);
+}
+
+static int xts_aes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (unlikely(xts_ctx->key_len == 48))
+ return xts_fallback_decrypt(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk);
+}
+
+static int xts_fallback_init(struct crypto_tfm *tfm)
+{
+ const char *name = tfm->__crt_alg->cra_name;
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+ xts_ctx->fallback = crypto_alloc_blkcipher(name, 0,
+ CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+ if (IS_ERR(xts_ctx->fallback)) {
+ pr_err("Allocating XTS fallback algorithm %s failed\n",
+ name);
+ return PTR_ERR(xts_ctx->fallback);
+ }
+ return 0;
+}
+
+static void xts_fallback_exit(struct crypto_tfm *tfm)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+ crypto_free_blkcipher(xts_ctx->fallback);
+ xts_ctx->fallback = NULL;
+}
+
+static struct crypto_alg xts_aes_alg = {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-s390",
+ .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_xts_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = xts_fallback_init,
+ .cra_exit = xts_fallback_exit,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_aes_set_key,
+ .encrypt = xts_aes_encrypt,
+ .decrypt = xts_aes_decrypt,
+ }
+ }
+};
+
+static int xts_aes_alg_reg;
+
+static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+ switch (key_len) {
+ case 16:
+ sctx->enc = KMCTR_AES_128_ENCRYPT;
+ sctx->dec = KMCTR_AES_128_DECRYPT;
+ break;
+ case 24:
+ sctx->enc = KMCTR_AES_192_ENCRYPT;
+ sctx->dec = KMCTR_AES_192_DECRYPT;
+ break;
+ case 32:
+ sctx->enc = KMCTR_AES_256_ENCRYPT;
+ sctx->dec = KMCTR_AES_256_DECRYPT;
+ break;
+ }
+
+ return aes_set_key(tfm, in_key, key_len);
+}
+
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+ unsigned int i, n;
+
+ /* only use complete blocks, max. PAGE_SIZE */
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+ for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
+ memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE);
+ crypto_inc(ctrptr + i, AES_BLOCK_SIZE);
+ }
+ return n;
+}
+
+static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
+ struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
+{
+ int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+ unsigned int n, nbytes;
+ u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE];
+ u8 *out, *in, *ctrptr = ctrbuf;
+
+ if (!walk->nbytes)
+ return ret;
+
+ if (spin_trylock(&ctrblk_lock))
+ ctrptr = ctrblk;
+
+ memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE);
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ out = walk->dst.virt.addr;
+ in = walk->src.virt.addr;
+ while (nbytes >= AES_BLOCK_SIZE) {
+ if (ctrptr == ctrblk)
+ n = __ctrblk_init(ctrptr, nbytes);
+ else
+ n = AES_BLOCK_SIZE;
+ ret = crypt_s390_kmctr(func, sctx->key, out, in,
+ n, ctrptr);
+ if (ret < 0 || ret != n) {
+ if (ctrptr == ctrblk)
+ spin_unlock(&ctrblk_lock);
+ return -EIO;
+ }
+ if (n > AES_BLOCK_SIZE)
+ memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE);
+ crypto_inc(ctrptr, AES_BLOCK_SIZE);
+ out += n;
+ in += n;
+ nbytes -= n;
+ }
+ ret = blkcipher_walk_done(desc, walk, nbytes);
+ }
+ if (ctrptr == ctrblk) {
+ if (nbytes)
+ memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE);
+ else
+ memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+ spin_unlock(&ctrblk_lock);
+ } else {
+ if (!nbytes)
+ memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+ }
+ /*
+ * final block may be < AES_BLOCK_SIZE, copy only nbytes
+ */
+ if (nbytes) {
+ out = walk->dst.virt.addr;
+ in = walk->src.virt.addr;
+ ret = crypt_s390_kmctr(func, sctx->key, buf, in,
+ AES_BLOCK_SIZE, ctrbuf);
+ if (ret < 0 || ret != AES_BLOCK_SIZE)
+ return -EIO;
+ memcpy(out, buf, nbytes);
+ crypto_inc(ctrbuf, AES_BLOCK_SIZE);
+ ret = blkcipher_walk_done(desc, walk, 0);
+ memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE);
+ }
+
+ return ret;
+}
+
+static int ctr_aes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_aes_crypt(desc, sctx->enc, sctx, &walk);
+}
+
+static int ctr_aes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_aes_crypt(desc, sctx->dec, sctx, &walk);
+}
+
+static struct crypto_alg ctr_aes_alg = {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-s390",
+ .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_aes_set_key,
+ .encrypt = ctr_aes_encrypt,
+ .decrypt = ctr_aes_decrypt,
+ }
+ }
+};
+
+static int ctr_aes_alg_reg;
+
static int __init aes_s390_init(void)
{
int ret;
- if (crypt_s390_func_available(KM_AES_128_ENCRYPT))
+ if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA))
keylen_flag |= AES_KEYLEN_128;
- if (crypt_s390_func_available(KM_AES_192_ENCRYPT))
+ if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA))
keylen_flag |= AES_KEYLEN_192;
- if (crypt_s390_func_available(KM_AES_256_ENCRYPT))
+ if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA))
keylen_flag |= AES_KEYLEN_256;
if (!keylen_flag)
@@ -515,9 +904,8 @@ static int __init aes_s390_init(void)
/* z9 109 and z9 BC/EC only support 128 bit key length */
if (keylen_flag == AES_KEYLEN_128)
- printk(KERN_INFO
- "aes_s390: hardware acceleration only available for "
- "128 bit keys\n");
+ pr_info("AES hardware acceleration is only available for"
+ " 128-bit keys\n");
ret = crypto_register_alg(&aes_alg);
if (ret)
@@ -531,9 +919,42 @@ static int __init aes_s390_init(void)
if (ret)
goto cbc_aes_err;
+ if (crypt_s390_func_available(KM_XTS_128_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+ crypt_s390_func_available(KM_XTS_256_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+ ret = crypto_register_alg(&xts_aes_alg);
+ if (ret)
+ goto xts_aes_err;
+ xts_aes_alg_reg = 1;
+ }
+
+ if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+ crypt_s390_func_available(KMCTR_AES_192_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+ crypt_s390_func_available(KMCTR_AES_256_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+ ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+ if (!ctrblk) {
+ ret = -ENOMEM;
+ goto ctr_aes_err;
+ }
+ ret = crypto_register_alg(&ctr_aes_alg);
+ if (ret) {
+ free_page((unsigned long) ctrblk);
+ goto ctr_aes_err;
+ }
+ ctr_aes_alg_reg = 1;
+ }
+
out:
return ret;
+ctr_aes_err:
+ crypto_unregister_alg(&xts_aes_alg);
+xts_aes_err:
+ crypto_unregister_alg(&cbc_aes_alg);
cbc_aes_err:
crypto_unregister_alg(&ecb_aes_alg);
ecb_aes_err:
@@ -544,6 +965,12 @@ aes_err:
static void __exit aes_s390_fini(void)
{
+ if (ctr_aes_alg_reg) {
+ crypto_unregister_alg(&ctr_aes_alg);
+ free_page((unsigned long) ctrblk);
+ }
+ if (xts_aes_alg_reg)
+ crypto_unregister_alg(&xts_aes_alg);
crypto_unregister_alg(&cbc_aes_alg);
crypto_unregister_alg(&ecb_aes_alg);
crypto_unregister_alg(&aes_alg);
@@ -552,7 +979,7 @@ static void __exit aes_s390_fini(void)
module_init(aes_s390_init);
module_exit(aes_s390_fini);
-MODULE_ALIAS("aes");
+MODULE_ALIAS("aes-all");
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index 9992f95ef99..6c5cc6da711 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -3,7 +3,7 @@
*
* Support for s390 cryptographic instructions.
*
- * Copyright IBM Corp. 2003,2007
+ * Copyright IBM Corp. 2003, 2007
* Author(s): Thomas Spatzier
* Jan Glauber (jan.glauber@de.ibm.com)
*
@@ -17,6 +17,7 @@
#define _CRYPTO_ARCH_S390_CRYPT_S390_H
#include <asm/errno.h>
+#include <asm/facility.h>
#define CRYPT_S390_OP_MASK 0xFF00
#define CRYPT_S390_FUNC_MASK 0x00FF
@@ -24,13 +25,18 @@
#define CRYPT_S390_PRIORITY 300
#define CRYPT_S390_COMPOSITE_PRIORITY 400
+#define CRYPT_S390_MSA 0x1
+#define CRYPT_S390_MSA3 0x2
+#define CRYPT_S390_MSA4 0x4
+
/* s390 cryptographic operations */
enum crypt_s390_operations {
CRYPT_S390_KM = 0x0100,
CRYPT_S390_KMC = 0x0200,
CRYPT_S390_KIMD = 0x0300,
CRYPT_S390_KLMD = 0x0400,
- CRYPT_S390_KMAC = 0x0500
+ CRYPT_S390_KMAC = 0x0500,
+ CRYPT_S390_KMCTR = 0x0600
};
/*
@@ -51,6 +57,10 @@ enum crypt_s390_km_func {
KM_AES_192_DECRYPT = CRYPT_S390_KM | 0x13 | 0x80,
KM_AES_256_ENCRYPT = CRYPT_S390_KM | 0x14,
KM_AES_256_DECRYPT = CRYPT_S390_KM | 0x14 | 0x80,
+ KM_XTS_128_ENCRYPT = CRYPT_S390_KM | 0x32,
+ KM_XTS_128_DECRYPT = CRYPT_S390_KM | 0x32 | 0x80,
+ KM_XTS_256_ENCRYPT = CRYPT_S390_KM | 0x34,
+ KM_XTS_256_DECRYPT = CRYPT_S390_KM | 0x34 | 0x80,
};
/*
@@ -75,6 +85,26 @@ enum crypt_s390_kmc_func {
};
/*
+ * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER)
+ * instruction
+ */
+enum crypt_s390_kmctr_func {
+ KMCTR_QUERY = CRYPT_S390_KMCTR | 0x0,
+ KMCTR_DEA_ENCRYPT = CRYPT_S390_KMCTR | 0x1,
+ KMCTR_DEA_DECRYPT = CRYPT_S390_KMCTR | 0x1 | 0x80,
+ KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2,
+ KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80,
+ KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3,
+ KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80,
+ KMCTR_AES_128_ENCRYPT = CRYPT_S390_KMCTR | 0x12,
+ KMCTR_AES_128_DECRYPT = CRYPT_S390_KMCTR | 0x12 | 0x80,
+ KMCTR_AES_192_ENCRYPT = CRYPT_S390_KMCTR | 0x13,
+ KMCTR_AES_192_DECRYPT = CRYPT_S390_KMCTR | 0x13 | 0x80,
+ KMCTR_AES_256_ENCRYPT = CRYPT_S390_KMCTR | 0x14,
+ KMCTR_AES_256_DECRYPT = CRYPT_S390_KMCTR | 0x14 | 0x80,
+};
+
+/*
* function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
* instruction
*/
@@ -83,6 +113,7 @@ enum crypt_s390_kimd_func {
KIMD_SHA_1 = CRYPT_S390_KIMD | 1,
KIMD_SHA_256 = CRYPT_S390_KIMD | 2,
KIMD_SHA_512 = CRYPT_S390_KIMD | 3,
+ KIMD_GHASH = CRYPT_S390_KIMD | 65,
};
/*
@@ -284,6 +315,45 @@ static inline int crypt_s390_kmac(long func, void *param,
}
/**
+ * crypt_s390_kmctr:
+ * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ * @counter: address of counter value
+ *
+ * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of processed
+ * bytes for encryption/decryption funcs
+ */
+static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
+ const u8 *src, long src_len, u8 *counter)
+{
+ register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+ register void *__param asm("1") = param;
+ register const u8 *__src asm("2") = src;
+ register long __src_len asm("3") = src_len;
+ register u8 *__dest asm("4") = dest;
+ register u8 *__ctr asm("6") = counter;
+ int ret = -1;
+
+ asm volatile(
+ "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */
+ "1: brc 1,0b \n" /* handle partial completion */
+ " la %0,0\n"
+ "2:\n"
+ EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest),
+ "+a" (__ctr)
+ : "d" (__func), "a" (__param) : "cc", "memory");
+ if (ret < 0)
+ return ret;
+ return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
+}
+
+/**
* crypt_s390_func_available:
* @func: the function code of the specific function; 0 if op in general
*
@@ -291,11 +361,22 @@ static inline int crypt_s390_kmac(long func, void *param,
*
* Returns 1 if func available; 0 if func or op in general not available
*/
-static inline int crypt_s390_func_available(int func)
+static inline int crypt_s390_func_available(int func,
+ unsigned int facility_mask)
{
unsigned char status[16];
int ret;
+ if (facility_mask & CRYPT_S390_MSA && !test_facility(17))
+ return 0;
+
+ if (facility_mask & CRYPT_S390_MSA3 &&
+ (!test_facility(2) || !test_facility(76)))
+ return 0;
+ if (facility_mask & CRYPT_S390_MSA4 &&
+ (!test_facility(2) || !test_facility(77)))
+ return 0;
+
switch (func & CRYPT_S390_OP_MASK) {
case CRYPT_S390_KM:
ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0);
@@ -312,6 +393,10 @@ static inline int crypt_s390_func_available(int func)
case CRYPT_S390_KMAC:
ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0);
break;
+ case CRYPT_S390_KMCTR:
+ ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0,
+ NULL);
+ break;
default:
return 0;
}
@@ -322,4 +407,31 @@ static inline int crypt_s390_func_available(int func)
return (status[func >> 3] & (0x80 >> (func & 7))) != 0;
}
+/**
+ * crypt_s390_pcc:
+ * @func: the function code passed to KM; see crypt_s390_km_func
+ * @param: address of parameter block; see POP for details on each func
+ *
+ * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for success.
+ */
+static inline int crypt_s390_pcc(long func, void *param)
+{
+ register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */
+ register void *__param asm("1") = param;
+ int ret = -1;
+
+ asm volatile(
+ "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */
+ "1: brc 1,0b \n" /* handle partial completion */
+ " la %0,0\n"
+ "2:\n"
+ EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ : "+d" (ret)
+ : "d" (__func), "a" (__param) : "cc", "memory");
+ return ret;
+}
+
+
#endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */
diff --git a/arch/s390/crypto/crypto_des.h b/arch/s390/crypto/crypto_des.h
deleted file mode 100644
index c964b64111d..00000000000
--- a/arch/s390/crypto/crypto_des.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Function for checking keys for the DES and Tripple DES Encryption
- * algorithms.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-#ifndef __CRYPTO_DES_H__
-#define __CRYPTO_DES_H__
-
-extern int crypto_des_check_key(const u8*, unsigned int, u32*);
-
-#endif //__CRYPTO_DES_H__
diff --git a/arch/s390/crypto/des_check_key.c b/arch/s390/crypto/des_check_key.c
deleted file mode 100644
index 5706af26644..00000000000
--- a/arch/s390/crypto/des_check_key.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Function for checking keys for the DES and Tripple DES Encryption
- * algorithms.
- *
- * Originally released as descore by Dana L. How <how@isl.stanford.edu>.
- * Modified by Raimar Falke <rf13@inf.tu-dresden.de> for the Linux-Kernel.
- * Derived from Cryptoapi and Nettle implementations, adapted for in-place
- * scatterlist interface. Changed LGPL to GPL per section 3 of the LGPL.
- *
- * s390 Version:
- * Copyright IBM Corp. 2003
- * Author(s): Thomas Spatzier
- * Jan Glauber (jan.glauber@de.ibm.com)
- *
- * Derived from "crypto/des.c"
- * Copyright (c) 1992 Dana L. How.
- * Copyright (c) Raimar Falke <rf13@inf.tu-dresden.de>
- * Copyright (c) Gisle Sflensminde <gisle@ii.uib.no>
- * Copyright (C) 2001 Niels Mvller.
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/crypto.h>
-#include "crypto_des.h"
-
-#define ROR(d,c,o) ((d) = (d) >> (c) | (d) << (o))
-
-static const u8 parity[] = {
- 8,1,0,8,0,8,8,0,0,8,8,0,8,0,2,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,3,
- 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
- 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
- 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
- 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
- 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
- 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
- 4,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,5,0,8,0,8,8,0,0,8,8,0,8,0,6,8,
-};
-
-/*
- * RFC2451: Weak key checks SHOULD be performed.
- */
-int
-crypto_des_check_key(const u8 *key, unsigned int keylen, u32 *flags)
-{
- u32 n, w;
-
- n = parity[key[0]]; n <<= 4;
- n |= parity[key[1]]; n <<= 4;
- n |= parity[key[2]]; n <<= 4;
- n |= parity[key[3]]; n <<= 4;
- n |= parity[key[4]]; n <<= 4;
- n |= parity[key[5]]; n <<= 4;
- n |= parity[key[6]]; n <<= 4;
- n |= parity[key[7]];
- w = 0x88888888L;
-
- if ((*flags & CRYPTO_TFM_REQ_WEAK_KEY)
- && !((n - (w >> 3)) & w)) { /* 1 in 10^10 keys passes this test */
- if (n < 0x41415151) {
- if (n < 0x31312121) {
- if (n < 0x14141515) {
- /* 01 01 01 01 01 01 01 01 */
- if (n == 0x11111111) goto weak;
- /* 01 1F 01 1F 01 0E 01 0E */
- if (n == 0x13131212) goto weak;
- } else {
- /* 01 E0 01 E0 01 F1 01 F1 */
- if (n == 0x14141515) goto weak;
- /* 01 FE 01 FE 01 FE 01 FE */
- if (n == 0x16161616) goto weak;
- }
- } else {
- if (n < 0x34342525) {
- /* 1F 01 1F 01 0E 01 0E 01 */
- if (n == 0x31312121) goto weak;
- /* 1F 1F 1F 1F 0E 0E 0E 0E (?) */
- if (n == 0x33332222) goto weak;
- } else {
- /* 1F E0 1F E0 0E F1 0E F1 */
- if (n == 0x34342525) goto weak;
- /* 1F FE 1F FE 0E FE 0E FE */
- if (n == 0x36362626) goto weak;
- }
- }
- } else {
- if (n < 0x61616161) {
- if (n < 0x44445555) {
- /* E0 01 E0 01 F1 01 F1 01 */
- if (n == 0x41415151) goto weak;
- /* E0 1F E0 1F F1 0E F1 0E */
- if (n == 0x43435252) goto weak;
- } else {
- /* E0 E0 E0 E0 F1 F1 F1 F1 (?) */
- if (n == 0x44445555) goto weak;
- /* E0 FE E0 FE F1 FE F1 FE */
- if (n == 0x46465656) goto weak;
- }
- } else {
- if (n < 0x64646565) {
- /* FE 01 FE 01 FE 01 FE 01 */
- if (n == 0x61616161) goto weak;
- /* FE 1F FE 1F FE 0E FE 0E */
- if (n == 0x63636262) goto weak;
- } else {
- /* FE E0 FE E0 FE F1 FE F1 */
- if (n == 0x64646565) goto weak;
- /* FE FE FE FE FE FE FE FE */
- if (n == 0x66666666) goto weak;
- }
- }
- }
- }
- return 0;
-weak:
- *flags |= CRYPTO_TFM_RES_WEAK_KEY;
- return -EINVAL;
-}
-
-EXPORT_SYMBOL(crypto_des_check_key);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Key Check function for DES & DES3 Cipher Algorithms");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 4aba83b3159..7acb77f7ef1 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -3,7 +3,7 @@
*
* s390 implementation of the DES Cipher Algorithm.
*
- * Copyright IBM Corp. 2003,2007
+ * Copyright IBM Corp. 2003, 2011
* Author(s): Thomas Spatzier
* Jan Glauber (jan.glauber@de.ibm.com)
*
@@ -14,63 +14,53 @@
*
*/
-#include <crypto/algapi.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/des.h>
#include "crypt_s390.h"
-#include "crypto_des.h"
-
-#define DES_BLOCK_SIZE 8
-#define DES_KEY_SIZE 8
-#define DES3_128_KEY_SIZE (2 * DES_KEY_SIZE)
-#define DES3_128_BLOCK_SIZE DES_BLOCK_SIZE
-
-#define DES3_192_KEY_SIZE (3 * DES_KEY_SIZE)
-#define DES3_192_BLOCK_SIZE DES_BLOCK_SIZE
-
-struct crypt_s390_des_ctx {
- u8 iv[DES_BLOCK_SIZE];
- u8 key[DES_KEY_SIZE];
-};
+#define DES3_KEY_SIZE (3 * DES_KEY_SIZE)
-struct crypt_s390_des3_128_ctx {
- u8 iv[DES_BLOCK_SIZE];
- u8 key[DES3_128_KEY_SIZE];
-};
+static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
-struct crypt_s390_des3_192_ctx {
+struct s390_des_ctx {
u8 iv[DES_BLOCK_SIZE];
- u8 key[DES3_192_KEY_SIZE];
+ u8 key[DES3_KEY_SIZE];
};
static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+ unsigned int key_len)
{
- struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
- int ret;
+ u32 tmp[DES_EXPKEY_WORDS];
- /* test if key is valid (not a weak key) */
- ret = crypto_des_check_key(key, keylen, flags);
- if (ret == 0)
- memcpy(dctx->key, key, keylen);
- return ret;
+ /* check for weak keys */
+ if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+ *flags |= CRYPTO_TFM_RES_WEAK_KEY;
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key, key, key_len);
+ return 0;
}
static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_DEA_ENCRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
+ crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
}
static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
- struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
+ crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
}
static struct crypto_alg des_alg = {
@@ -79,9 +69,8 @@ static struct crypto_alg des_alg = {
.cra_priority = CRYPT_S390_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des_ctx),
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(des_alg.cra_list),
.cra_u = {
.cipher = {
.cia_min_keysize = DES_KEY_SIZE,
@@ -94,7 +83,7 @@ static struct crypto_alg des_alg = {
};
static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
- void *param, struct blkcipher_walk *walk)
+ u8 *key, struct blkcipher_walk *walk)
{
int ret = blkcipher_walk_virt(desc, walk);
unsigned int nbytes;
@@ -105,8 +94,9 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_km(func, param, out, in, n);
- BUG_ON((ret < 0) || (ret != n));
+ ret = crypt_s390_km(func, key, out, in, n);
+ if (ret < 0 || ret != n)
+ return -EIO;
nbytes &= DES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, walk, nbytes);
@@ -116,28 +106,35 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
}
static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
- void *param, struct blkcipher_walk *walk)
+ struct blkcipher_walk *walk)
{
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
int ret = blkcipher_walk_virt(desc, walk);
unsigned int nbytes = walk->nbytes;
+ struct {
+ u8 iv[DES_BLOCK_SIZE];
+ u8 key[DES3_KEY_SIZE];
+ } param;
if (!nbytes)
goto out;
- memcpy(param, walk->iv, DES_BLOCK_SIZE);
+ memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+ memcpy(param.key, ctx->key, DES3_KEY_SIZE);
do {
/* only use complete blocks */
unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_kmc(func, param, out, in, n);
- BUG_ON((ret < 0) || (ret != n));
+ ret = crypt_s390_kmc(func, &param, out, in, n);
+ if (ret < 0 || ret != n)
+ return -EIO;
nbytes &= DES_BLOCK_SIZE - 1;
ret = blkcipher_walk_done(desc, walk, nbytes);
} while ((nbytes = walk->nbytes));
- memcpy(walk->iv, param, DES_BLOCK_SIZE);
+ memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
out:
return ret;
@@ -147,22 +144,22 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, sctx->key, &walk);
+ return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk);
}
static int ecb_des_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_DEA_DECRYPT, sctx->key, &walk);
+ return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk);
}
static struct crypto_alg ecb_des_alg = {
@@ -171,10 +168,9 @@ static struct crypto_alg ecb_des_alg = {
.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des_ctx),
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ecb_des_alg.cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = DES_KEY_SIZE,
@@ -190,22 +186,20 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, sctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk);
}
static int cbc_des_decrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, sctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk);
}
static struct crypto_alg cbc_des_alg = {
@@ -214,10 +208,9 @@ static struct crypto_alg cbc_des_alg = {
.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des_ctx),
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(cbc_des_alg.cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = DES_KEY_SIZE,
@@ -237,326 +230,317 @@ static struct crypto_alg cbc_des_alg = {
* complementation keys. Any weakness is obviated by the use of
* multiple keys.
*
- * However, if the two independent 64-bit keys are equal,
- * then the DES3 operation is simply the same as DES.
- * Implementers MUST reject keys that exhibit this property.
+ * However, if the first two or last two independent 64-bit keys are
+ * equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
+ * same as DES. Implementers MUST reject keys that exhibit this
+ * property.
*
*/
-static int des3_128_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int key_len)
{
- int i, ret;
- struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
- const u8 *temp_key = key;
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
- if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE))) {
- *flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED;
+ if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
+ crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
+ DES_KEY_SIZE)) &&
+ (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+ *flags |= CRYPTO_TFM_RES_WEAK_KEY;
return -EINVAL;
}
- for (i = 0; i < 2; i++, temp_key += DES_KEY_SIZE) {
- ret = crypto_des_check_key(temp_key, DES_KEY_SIZE, flags);
- if (ret < 0)
- return ret;
- }
- memcpy(dctx->key, key, keylen);
+ memcpy(ctx->key, key, key_len);
return 0;
}
-static void des3_128_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_TDEA_128_ENCRYPT, dctx->key, dst, (void*)src,
- DES3_128_BLOCK_SIZE);
+ crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
}
-static void des3_128_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_TDEA_128_DECRYPT, dctx->key, dst, (void*)src,
- DES3_128_BLOCK_SIZE);
+ crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
}
-static struct crypto_alg des3_128_alg = {
- .cra_name = "des3_ede128",
- .cra_driver_name = "des3_ede128-s390",
+static struct crypto_alg des3_alg = {
+ .cra_name = "des3_ede",
+ .cra_driver_name = "des3_ede-s390",
.cra_priority = CRYPT_S390_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = DES3_128_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des3_128_ctx),
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(des3_128_alg.cra_list),
.cra_u = {
.cipher = {
- .cia_min_keysize = DES3_128_KEY_SIZE,
- .cia_max_keysize = DES3_128_KEY_SIZE,
- .cia_setkey = des3_128_setkey,
- .cia_encrypt = des3_128_encrypt,
- .cia_decrypt = des3_128_decrypt,
+ .cia_min_keysize = DES3_KEY_SIZE,
+ .cia_max_keysize = DES3_KEY_SIZE,
+ .cia_setkey = des3_setkey,
+ .cia_encrypt = des3_encrypt,
+ .cia_decrypt = des3_decrypt,
}
}
};
-static int ecb_des3_128_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_des3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_TDEA_128_ENCRYPT, sctx->key, &walk);
+ return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk);
}
-static int ecb_des3_128_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_des3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_TDEA_128_DECRYPT, sctx->key, &walk);
+ return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk);
}
-static struct crypto_alg ecb_des3_128_alg = {
- .cra_name = "ecb(des3_ede128)",
- .cra_driver_name = "ecb-des3_ede128-s390",
+static struct crypto_alg ecb_des3_alg = {
+ .cra_name = "ecb(des3_ede)",
+ .cra_driver_name = "ecb-des3_ede-s390",
.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES3_128_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des3_128_ctx),
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(
- ecb_des3_128_alg.cra_list),
.cra_u = {
.blkcipher = {
- .min_keysize = DES3_128_KEY_SIZE,
- .max_keysize = DES3_128_KEY_SIZE,
- .setkey = des3_128_setkey,
- .encrypt = ecb_des3_128_encrypt,
- .decrypt = ecb_des3_128_decrypt,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .setkey = des3_setkey,
+ .encrypt = ecb_des3_encrypt,
+ .decrypt = ecb_des3_decrypt,
}
}
};
-static int cbc_des3_128_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_des3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_TDEA_128_ENCRYPT, sctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk);
}
-static int cbc_des3_128_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_des3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_TDEA_128_DECRYPT, sctx->iv, &walk);
+ return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk);
}
-static struct crypto_alg cbc_des3_128_alg = {
- .cra_name = "cbc(des3_ede128)",
- .cra_driver_name = "cbc-des3_ede128-s390",
+static struct crypto_alg cbc_des3_alg = {
+ .cra_name = "cbc(des3_ede)",
+ .cra_driver_name = "cbc-des3_ede-s390",
.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES3_128_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des3_128_ctx),
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(
- cbc_des3_128_alg.cra_list),
.cra_u = {
.blkcipher = {
- .min_keysize = DES3_128_KEY_SIZE,
- .max_keysize = DES3_128_KEY_SIZE,
- .ivsize = DES3_128_BLOCK_SIZE,
- .setkey = des3_128_setkey,
- .encrypt = cbc_des3_128_encrypt,
- .decrypt = cbc_des3_128_decrypt,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey,
+ .encrypt = cbc_des3_encrypt,
+ .decrypt = cbc_des3_decrypt,
}
}
};
-/*
- * RFC2451:
- *
- * For DES-EDE3, there is no known need to reject weak or
- * complementation keys. Any weakness is obviated by the use of
- * multiple keys.
- *
- * However, if the first two or last two independent 64-bit keys are
- * equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
- * same as DES. Implementers MUST reject keys that exhibit this
- * property.
- *
- */
-static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
{
- int i, ret;
- struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
- const u8 *temp_key = key;
- u32 *flags = &tfm->crt_flags;
+ unsigned int i, n;
- if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
- memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
- DES_KEY_SIZE))) {
-
- *flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED;
- return -EINVAL;
+ /* align to block size, max. PAGE_SIZE */
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
+ for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
+ memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+ crypto_inc(ctrptr + i, DES_BLOCK_SIZE);
}
- for (i = 0; i < 3; i++, temp_key += DES_KEY_SIZE) {
- ret = crypto_des_check_key(temp_key, DES_KEY_SIZE, flags);
- if (ret < 0)
- return ret;
- }
- memcpy(dctx->key, key, keylen);
- return 0;
-}
-
-static void des3_192_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
-
- crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src,
- DES3_192_BLOCK_SIZE);
+ return n;
}
-static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
+ struct s390_des_ctx *ctx,
+ struct blkcipher_walk *walk)
{
- struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
-
- crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src,
- DES3_192_BLOCK_SIZE);
-}
-
-static struct crypto_alg des3_192_alg = {
- .cra_name = "des3_ede",
- .cra_driver_name = "des3_ede-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = DES3_192_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx),
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(des3_192_alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = DES3_192_KEY_SIZE,
- .cia_max_keysize = DES3_192_KEY_SIZE,
- .cia_setkey = des3_192_setkey,
- .cia_encrypt = des3_192_encrypt,
- .cia_decrypt = des3_192_decrypt,
+ int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
+ unsigned int n, nbytes;
+ u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE];
+ u8 *out, *in, *ctrptr = ctrbuf;
+
+ if (!walk->nbytes)
+ return ret;
+
+ if (spin_trylock(&ctrblk_lock))
+ ctrptr = ctrblk;
+
+ memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE);
+ while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ out = walk->dst.virt.addr;
+ in = walk->src.virt.addr;
+ while (nbytes >= DES_BLOCK_SIZE) {
+ if (ctrptr == ctrblk)
+ n = __ctrblk_init(ctrptr, nbytes);
+ else
+ n = DES_BLOCK_SIZE;
+ ret = crypt_s390_kmctr(func, ctx->key, out, in,
+ n, ctrptr);
+ if (ret < 0 || ret != n) {
+ if (ctrptr == ctrblk)
+ spin_unlock(&ctrblk_lock);
+ return -EIO;
+ }
+ if (n > DES_BLOCK_SIZE)
+ memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE,
+ DES_BLOCK_SIZE);
+ crypto_inc(ctrptr, DES_BLOCK_SIZE);
+ out += n;
+ in += n;
+ nbytes -= n;
}
+ ret = blkcipher_walk_done(desc, walk, nbytes);
}
-};
+ if (ctrptr == ctrblk) {
+ if (nbytes)
+ memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE);
+ else
+ memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+ spin_unlock(&ctrblk_lock);
+ } else {
+ if (!nbytes)
+ memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+ }
+ /* final block may be < DES_BLOCK_SIZE, copy only nbytes */
+ if (nbytes) {
+ out = walk->dst.virt.addr;
+ in = walk->src.virt.addr;
+ ret = crypt_s390_kmctr(func, ctx->key, buf, in,
+ DES_BLOCK_SIZE, ctrbuf);
+ if (ret < 0 || ret != DES_BLOCK_SIZE)
+ return -EIO;
+ memcpy(out, buf, nbytes);
+ crypto_inc(ctrbuf, DES_BLOCK_SIZE);
+ ret = blkcipher_walk_done(desc, walk, 0);
+ memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE);
+ }
+ return ret;
+}
-static int ecb_des3_192_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_des_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, sctx->key, &walk);
+ return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk);
}
-static int ecb_des3_192_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_des_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, sctx->key, &walk);
+ return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk);
}
-static struct crypto_alg ecb_des3_192_alg = {
- .cra_name = "ecb(des3_ede)",
- .cra_driver_name = "ecb-des3_ede-s390",
+static struct crypto_alg ctr_des_alg = {
+ .cra_name = "ctr(des)",
+ .cra_driver_name = "ctr-des-s390",
.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES3_192_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx),
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(
- ecb_des3_192_alg.cra_list),
.cra_u = {
.blkcipher = {
- .min_keysize = DES3_192_KEY_SIZE,
- .max_keysize = DES3_192_KEY_SIZE,
- .setkey = des3_192_setkey,
- .encrypt = ecb_des3_192_encrypt,
- .decrypt = ecb_des3_192_decrypt,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_setkey,
+ .encrypt = ctr_des_encrypt,
+ .decrypt = ctr_des_decrypt,
}
}
};
-static int cbc_des3_192_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_des3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, sctx->iv, &walk);
+ return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk);
}
-static int cbc_des3_192_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_des3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
{
- struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, sctx->iv, &walk);
+ return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk);
}
-static struct crypto_alg cbc_des3_192_alg = {
- .cra_name = "cbc(des3_ede)",
- .cra_driver_name = "cbc-des3_ede-s390",
+static struct crypto_alg ctr_des3_alg = {
+ .cra_name = "ctr(des3_ede)",
+ .cra_driver_name = "ctr-des3_ede-s390",
.cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES3_192_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx),
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(
- cbc_des3_192_alg.cra_list),
.cra_u = {
.blkcipher = {
- .min_keysize = DES3_192_KEY_SIZE,
- .max_keysize = DES3_192_KEY_SIZE,
- .ivsize = DES3_192_BLOCK_SIZE,
- .setkey = des3_192_setkey,
- .encrypt = cbc_des3_192_encrypt,
- .decrypt = cbc_des3_192_decrypt,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey,
+ .encrypt = ctr_des3_encrypt,
+ .decrypt = ctr_des3_decrypt,
}
}
};
-static int des_s390_init(void)
+static int __init des_s390_init(void)
{
- int ret = 0;
+ int ret;
- if (!crypt_s390_func_available(KM_DEA_ENCRYPT) ||
- !crypt_s390_func_available(KM_TDEA_128_ENCRYPT) ||
- !crypt_s390_func_available(KM_TDEA_192_ENCRYPT))
+ if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) ||
+ !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA))
return -EOPNOTSUPP;
ret = crypto_register_alg(&des_alg);
@@ -568,41 +552,46 @@ static int des_s390_init(void)
ret = crypto_register_alg(&cbc_des_alg);
if (ret)
goto cbc_des_err;
-
- ret = crypto_register_alg(&des3_128_alg);
+ ret = crypto_register_alg(&des3_alg);
if (ret)
- goto des3_128_err;
- ret = crypto_register_alg(&ecb_des3_128_alg);
- if (ret)
- goto ecb_des3_128_err;
- ret = crypto_register_alg(&cbc_des3_128_alg);
- if (ret)
- goto cbc_des3_128_err;
-
- ret = crypto_register_alg(&des3_192_alg);
+ goto des3_err;
+ ret = crypto_register_alg(&ecb_des3_alg);
if (ret)
- goto des3_192_err;
- ret = crypto_register_alg(&ecb_des3_192_alg);
+ goto ecb_des3_err;
+ ret = crypto_register_alg(&cbc_des3_alg);
if (ret)
- goto ecb_des3_192_err;
- ret = crypto_register_alg(&cbc_des3_192_alg);
- if (ret)
- goto cbc_des3_192_err;
-
+ goto cbc_des3_err;
+
+ if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+ crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+ ret = crypto_register_alg(&ctr_des_alg);
+ if (ret)
+ goto ctr_des_err;
+ ret = crypto_register_alg(&ctr_des3_alg);
+ if (ret)
+ goto ctr_des3_err;
+ ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+ if (!ctrblk) {
+ ret = -ENOMEM;
+ goto ctr_mem_err;
+ }
+ }
out:
return ret;
-cbc_des3_192_err:
- crypto_unregister_alg(&ecb_des3_192_alg);
-ecb_des3_192_err:
- crypto_unregister_alg(&des3_192_alg);
-des3_192_err:
- crypto_unregister_alg(&cbc_des3_128_alg);
-cbc_des3_128_err:
- crypto_unregister_alg(&ecb_des3_128_alg);
-ecb_des3_128_err:
- crypto_unregister_alg(&des3_128_alg);
-des3_128_err:
+ctr_mem_err:
+ crypto_unregister_alg(&ctr_des3_alg);
+ctr_des3_err:
+ crypto_unregister_alg(&ctr_des_alg);
+ctr_des_err:
+ crypto_unregister_alg(&cbc_des3_alg);
+cbc_des3_err:
+ crypto_unregister_alg(&ecb_des3_alg);
+ecb_des3_err:
+ crypto_unregister_alg(&des3_alg);
+des3_err:
crypto_unregister_alg(&cbc_des_alg);
cbc_des_err:
crypto_unregister_alg(&ecb_des_alg);
@@ -612,21 +601,23 @@ des_err:
goto out;
}
-static void __exit des_s390_fini(void)
+static void __exit des_s390_exit(void)
{
- crypto_unregister_alg(&cbc_des3_192_alg);
- crypto_unregister_alg(&ecb_des3_192_alg);
- crypto_unregister_alg(&des3_192_alg);
- crypto_unregister_alg(&cbc_des3_128_alg);
- crypto_unregister_alg(&ecb_des3_128_alg);
- crypto_unregister_alg(&des3_128_alg);
+ if (ctrblk) {
+ crypto_unregister_alg(&ctr_des_alg);
+ crypto_unregister_alg(&ctr_des3_alg);
+ free_page((unsigned long) ctrblk);
+ }
+ crypto_unregister_alg(&cbc_des3_alg);
+ crypto_unregister_alg(&ecb_des3_alg);
+ crypto_unregister_alg(&des3_alg);
crypto_unregister_alg(&cbc_des_alg);
crypto_unregister_alg(&ecb_des_alg);
crypto_unregister_alg(&des_alg);
}
module_init(des_s390_init);
-module_exit(des_s390_fini);
+module_exit(des_s390_exit);
MODULE_ALIAS("des");
MODULE_ALIAS("des3_ede");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
new file mode 100644
index 00000000000..d43485d142e
--- /dev/null
+++ b/arch/s390/crypto/ghash_s390.c
@@ -0,0 +1,166 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode).
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+
+#include "crypt_s390.h"
+
+#define GHASH_BLOCK_SIZE 16
+#define GHASH_DIGEST_SIZE 16
+
+struct ghash_ctx {
+ u8 icv[16];
+ u8 key[16];
+};
+
+struct ghash_desc_ctx {
+ u8 buffer[GHASH_BLOCK_SIZE];
+ u32 bytes;
+};
+
+static int ghash_init(struct shash_desc *desc)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ memset(dctx, 0, sizeof(*dctx));
+
+ return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+
+ if (keylen != GHASH_BLOCK_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
+ memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
+
+ return 0;
+}
+
+static int ghash_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ unsigned int n;
+ u8 *buf = dctx->buffer;
+ int ret;
+
+ if (dctx->bytes) {
+ u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+ n = min(srclen, dctx->bytes);
+ dctx->bytes -= n;
+ srclen -= n;
+
+ memcpy(pos, src, n);
+ src += n;
+
+ if (!dctx->bytes) {
+ ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf,
+ GHASH_BLOCK_SIZE);
+ if (ret != GHASH_BLOCK_SIZE)
+ return -EIO;
+ }
+ }
+
+ n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+ if (n) {
+ ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n);
+ if (ret != n)
+ return -EIO;
+ src += n;
+ srclen -= n;
+ }
+
+ if (srclen) {
+ dctx->bytes = GHASH_BLOCK_SIZE - srclen;
+ memcpy(buf, src, srclen);
+ }
+
+ return 0;
+}
+
+static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+{
+ u8 *buf = dctx->buffer;
+ int ret;
+
+ if (dctx->bytes) {
+ u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+ memset(pos, 0, dctx->bytes);
+
+ ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE);
+ if (ret != GHASH_BLOCK_SIZE)
+ return -EIO;
+ }
+
+ dctx->bytes = 0;
+ return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ int ret;
+
+ ret = ghash_flush(ctx, dctx);
+ if (!ret)
+ memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE);
+ return ret;
+}
+
+static struct shash_alg ghash_alg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = ghash_init,
+ .update = ghash_update,
+ .final = ghash_final,
+ .setkey = ghash_setkey,
+ .descsize = sizeof(struct ghash_desc_ctx),
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-s390",
+ .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ghash_ctx),
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init ghash_mod_init(void)
+{
+ if (!crypt_s390_func_available(KIMD_GHASH,
+ CRYPT_S390_MSA | CRYPT_S390_MSA4))
+ return -EOPNOTSUPP;
+
+ return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_mod_exit(void)
+{
+ crypto_unregister_shash(&ghash_alg);
+}
+
+module_init(ghash_mod_init);
+module_exit(ghash_mod_exit);
+
+MODULE_ALIAS("ghash");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 0cfefddd837..94a35a4c1b4 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -1,5 +1,5 @@
/*
- * Copyright IBM Corp. 2006,2007
+ * Copyright IBM Corp. 2006, 2007
* Author(s): Jan Glauber <jan.glauber@de.ibm.com>
* Driver for the s390 pseudo random number generator
*/
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/random.h>
+#include <linux/slab.h>
#include <asm/debug.h>
#include <asm/uaccess.h>
@@ -75,7 +76,7 @@ static void prng_seed(int nbytes)
/* Add the entropy */
while (nbytes >= 8) {
- *((__u64 *)parm_block) ^= *((__u64 *)buf+i*8);
+ *((__u64 *)parm_block) ^= *((__u64 *)(buf+i));
prng_add_entropy();
i += 8;
nbytes -= 8;
@@ -151,6 +152,7 @@ static const struct file_operations prng_fops = {
.open = &prng_open,
.release = NULL,
.read = &prng_read,
+ .llseek = noop_llseek,
};
static struct miscdevice prng_dev = {
@@ -164,7 +166,7 @@ static int __init prng_init(void)
int ret;
/* check if the CPU has a PRNG */
- if (!crypt_s390_func_available(KMC_PRNG))
+ if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA))
return -EOPNOTSUPP;
if (prng_chunk_size < 8)
@@ -185,11 +187,8 @@ static int __init prng_init(void)
prng_seed(16);
ret = misc_register(&prng_dev);
- if (ret) {
- printk(KERN_WARNING
- "Could not register misc device for PRNG.\n");
+ if (ret)
goto out_buf;
- }
return 0;
out_buf:
@@ -202,8 +201,7 @@ out_free:
static void __exit prng_exit(void)
{
/* wipe me */
- memset(p->buf, 0, prng_chunk_size);
- kfree(p->buf);
+ kzfree(p->buf);
kfree(p);
misc_deregister(&prng_dev);
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
index 1ceafa571ea..f4e9dc71675 100644
--- a/arch/s390/crypto/sha.h
+++ b/arch/s390/crypto/sha.h
@@ -29,7 +29,9 @@ struct s390_sha_ctx {
int func; /* KIMD function to use */
};
-void s390_sha_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len);
-void s390_sha_final(struct crypto_tfm *tfm, u8 *out);
+struct shash_desc;
+
+int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len);
+int s390_sha_final(struct shash_desc *desc, u8 *out);
#endif
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index b3cb5a89b00..a1b3a9dc9d8 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -8,7 +8,7 @@
* implementation written by Steve Reid.
*
* s390 Version:
- * Copyright IBM Corp. 2003,2007
+ * Copyright IBM Corp. 2003, 2007
* Author(s): Thomas Spatzier
* Jan Glauber (jan.glauber@de.ibm.com)
*
@@ -23,17 +23,17 @@
* any later version.
*
*/
+#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/crypto.h>
#include <crypto/sha.h>
#include "crypt_s390.h"
#include "sha.h"
-static void sha1_init(struct crypto_tfm *tfm)
+static int sha1_init(struct shash_desc *desc)
{
- struct s390_sha_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA1_H0;
sctx->state[1] = SHA1_H1;
@@ -42,34 +42,62 @@ static void sha1_init(struct crypto_tfm *tfm)
sctx->state[4] = SHA1_H4;
sctx->count = 0;
sctx->func = KIMD_SHA_1;
+
+ return 0;
+}
+
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ struct sha1_state *octx = out;
+
+ octx->count = sctx->count;
+ memcpy(octx->state, sctx->state, sizeof(octx->state));
+ memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer));
+ return 0;
+}
+
+static int sha1_import(struct shash_desc *desc, const void *in)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ const struct sha1_state *ictx = in;
+
+ sctx->count = ictx->count;
+ memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+ memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer));
+ sctx->func = KIMD_SHA_1;
+ return 0;
}
-static struct crypto_alg alg = {
- .cra_name = "sha1",
- .cra_driver_name= "sha1-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_DIGEST,
- .cra_blocksize = SHA1_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_sha_ctx),
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(alg.cra_list),
- .cra_u = { .digest = {
- .dia_digestsize = SHA1_DIGEST_SIZE,
- .dia_init = sha1_init,
- .dia_update = s390_sha_update,
- .dia_final = s390_sha_final } }
+static struct shash_alg alg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .init = sha1_init,
+ .update = s390_sha_update,
+ .final = s390_sha_final,
+ .export = sha1_export,
+ .import = sha1_import,
+ .descsize = sizeof(struct s390_sha_ctx),
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name= "sha1-s390",
+ .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
};
static int __init sha1_s390_init(void)
{
- if (!crypt_s390_func_available(KIMD_SHA_1))
+ if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA))
return -EOPNOTSUPP;
- return crypto_register_alg(&alg);
+ return crypto_register_shash(&alg);
}
static void __exit sha1_s390_fini(void)
{
- crypto_unregister_alg(&alg);
+ crypto_unregister_shash(&alg);
}
module_init(sha1_s390_init);
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 19c03fb6ba7..9b853809a49 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -1,32 +1,29 @@
/*
* Cryptographic API.
*
- * s390 implementation of the SHA256 Secure Hash Algorithm.
+ * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm.
*
* s390 Version:
- * Copyright IBM Corp. 2005,2007
+ * Copyright IBM Corp. 2005, 2011
* Author(s): Jan Glauber (jang@de.ibm.com)
*
- * Derived from "crypto/sha256_generic.c"
- * and "arch/s390/crypto/sha1_s390.c"
- *
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
+#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/crypto.h>
#include <crypto/sha.h>
#include "crypt_s390.h"
#include "sha.h"
-static void sha256_init(struct crypto_tfm *tfm)
+static int sha256_init(struct shash_desc *desc)
{
- struct s390_sha_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA256_H0;
sctx->state[1] = SHA256_H1;
@@ -38,40 +35,115 @@ static void sha256_init(struct crypto_tfm *tfm)
sctx->state[7] = SHA256_H7;
sctx->count = 0;
sctx->func = KIMD_SHA_256;
+
+ return 0;
+}
+
+static int sha256_export(struct shash_desc *desc, void *out)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ struct sha256_state *octx = out;
+
+ octx->count = sctx->count;
+ memcpy(octx->state, sctx->state, sizeof(octx->state));
+ memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+ return 0;
+}
+
+static int sha256_import(struct shash_desc *desc, const void *in)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ const struct sha256_state *ictx = in;
+
+ sctx->count = ictx->count;
+ memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+ memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+ sctx->func = KIMD_SHA_256;
+ return 0;
+}
+
+static struct shash_alg sha256_alg = {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = sha256_init,
+ .update = s390_sha_update,
+ .final = s390_sha_final,
+ .export = sha256_export,
+ .import = sha256_import,
+ .descsize = sizeof(struct s390_sha_ctx),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name= "sha256-s390",
+ .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int sha224_init(struct shash_desc *desc)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA224_H0;
+ sctx->state[1] = SHA224_H1;
+ sctx->state[2] = SHA224_H2;
+ sctx->state[3] = SHA224_H3;
+ sctx->state[4] = SHA224_H4;
+ sctx->state[5] = SHA224_H5;
+ sctx->state[6] = SHA224_H6;
+ sctx->state[7] = SHA224_H7;
+ sctx->count = 0;
+ sctx->func = KIMD_SHA_256;
+
+ return 0;
}
-static struct crypto_alg alg = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_DIGEST,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_sha_ctx),
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(alg.cra_list),
- .cra_u = { .digest = {
- .dia_digestsize = SHA256_DIGEST_SIZE,
- .dia_init = sha256_init,
- .dia_update = s390_sha_update,
- .dia_final = s390_sha_final } }
+static struct shash_alg sha224_alg = {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = sha224_init,
+ .update = s390_sha_update,
+ .final = s390_sha_final,
+ .export = sha256_export,
+ .import = sha256_import,
+ .descsize = sizeof(struct s390_sha_ctx),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name= "sha224-s390",
+ .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
};
-static int sha256_s390_init(void)
+static int __init sha256_s390_init(void)
{
- if (!crypt_s390_func_available(KIMD_SHA_256))
- return -EOPNOTSUPP;
+ int ret;
- return crypto_register_alg(&alg);
+ if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA))
+ return -EOPNOTSUPP;
+ ret = crypto_register_shash(&sha256_alg);
+ if (ret < 0)
+ goto out;
+ ret = crypto_register_shash(&sha224_alg);
+ if (ret < 0)
+ crypto_unregister_shash(&sha256_alg);
+out:
+ return ret;
}
static void __exit sha256_s390_fini(void)
{
- crypto_unregister_alg(&alg);
+ crypto_unregister_shash(&sha224_alg);
+ crypto_unregister_shash(&sha256_alg);
}
module_init(sha256_s390_init);
module_exit(sha256_s390_fini);
MODULE_ALIAS("sha256");
+MODULE_ALIAS("sha224");
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm");
+MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 23c7861f6ae..32a81383b69 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -12,16 +12,19 @@
* any later version.
*
*/
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/errno.h>
#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/crypto.h>
#include "sha.h"
#include "crypt_s390.h"
-static void sha512_init(struct crypto_tfm *tfm)
+static int sha512_init(struct shash_desc *desc)
{
- struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
*(__u64 *)&ctx->state[0] = 0x6a09e667f3bcc908ULL;
*(__u64 *)&ctx->state[2] = 0xbb67ae8584caa73bULL;
@@ -33,29 +36,61 @@ static void sha512_init(struct crypto_tfm *tfm)
*(__u64 *)&ctx->state[14] = 0x5be0cd19137e2179ULL;
ctx->count = 0;
ctx->func = KIMD_SHA_512;
+
+ return 0;
+}
+
+static int sha512_export(struct shash_desc *desc, void *out)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ struct sha512_state *octx = out;
+
+ octx->count[0] = sctx->count;
+ octx->count[1] = 0;
+ memcpy(octx->state, sctx->state, sizeof(octx->state));
+ memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+ return 0;
}
-static struct crypto_alg sha512_alg = {
- .cra_name = "sha512",
- .cra_driver_name = "sha512-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_DIGEST,
- .cra_blocksize = SHA512_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_sha_ctx),
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(sha512_alg.cra_list),
- .cra_u = { .digest = {
- .dia_digestsize = SHA512_DIGEST_SIZE,
- .dia_init = sha512_init,
- .dia_update = s390_sha_update,
- .dia_final = s390_sha_final } }
+static int sha512_import(struct shash_desc *desc, const void *in)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ const struct sha512_state *ictx = in;
+
+ if (unlikely(ictx->count[1]))
+ return -ERANGE;
+ sctx->count = ictx->count[0];
+
+ memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+ memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+ sctx->func = KIMD_SHA_512;
+ return 0;
+}
+
+static struct shash_alg sha512_alg = {
+ .digestsize = SHA512_DIGEST_SIZE,
+ .init = sha512_init,
+ .update = s390_sha_update,
+ .final = s390_sha_final,
+ .export = sha512_export,
+ .import = sha512_import,
+ .descsize = sizeof(struct s390_sha_ctx),
+ .statesize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha512",
+ .cra_driver_name= "sha512-s390",
+ .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
};
MODULE_ALIAS("sha512");
-static void sha384_init(struct crypto_tfm *tfm)
+static int sha384_init(struct shash_desc *desc)
{
- struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
*(__u64 *)&ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
*(__u64 *)&ctx->state[2] = 0x629a292a367cd507ULL;
@@ -67,22 +102,28 @@ static void sha384_init(struct crypto_tfm *tfm)
*(__u64 *)&ctx->state[14] = 0x47b5481dbefa4fa4ULL;
ctx->count = 0;
ctx->func = KIMD_SHA_512;
+
+ return 0;
}
-static struct crypto_alg sha384_alg = {
- .cra_name = "sha384",
- .cra_driver_name = "sha384-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_DIGEST,
- .cra_blocksize = SHA384_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_sha_ctx),
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(sha384_alg.cra_list),
- .cra_u = { .digest = {
- .dia_digestsize = SHA384_DIGEST_SIZE,
- .dia_init = sha384_init,
- .dia_update = s390_sha_update,
- .dia_final = s390_sha_final } }
+static struct shash_alg sha384_alg = {
+ .digestsize = SHA384_DIGEST_SIZE,
+ .init = sha384_init,
+ .update = s390_sha_update,
+ .final = s390_sha_final,
+ .export = sha512_export,
+ .import = sha512_import,
+ .descsize = sizeof(struct s390_sha_ctx),
+ .statesize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha384",
+ .cra_driver_name= "sha384-s390",
+ .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_sha_ctx),
+ .cra_module = THIS_MODULE,
+ }
};
MODULE_ALIAS("sha384");
@@ -91,20 +132,20 @@ static int __init init(void)
{
int ret;
- if (!crypt_s390_func_available(KIMD_SHA_512))
+ if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA))
return -EOPNOTSUPP;
- if ((ret = crypto_register_alg(&sha512_alg)) < 0)
+ if ((ret = crypto_register_shash(&sha512_alg)) < 0)
goto out;
- if ((ret = crypto_register_alg(&sha384_alg)) < 0)
- crypto_unregister_alg(&sha512_alg);
+ if ((ret = crypto_register_shash(&sha384_alg)) < 0)
+ crypto_unregister_shash(&sha512_alg);
out:
return ret;
}
static void __exit fini(void)
{
- crypto_unregister_alg(&sha512_alg);
- crypto_unregister_alg(&sha384_alg);
+ crypto_unregister_shash(&sha512_alg);
+ crypto_unregister_shash(&sha384_alg);
}
module_init(init);
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index 9d6eb8c3d37..8620b0ec9c4 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -13,14 +13,15 @@
*
*/
-#include <linux/crypto.h>
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
#include "sha.h"
#include "crypt_s390.h"
-void s390_sha_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len)
+int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
{
- struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm);
- unsigned int bsize = crypto_tfm_alg_blocksize(tfm);
+ struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bsize = crypto_shash_blocksize(desc->tfm);
unsigned int index;
int ret;
@@ -35,29 +36,34 @@ void s390_sha_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len)
if (index) {
memcpy(ctx->buf + index, data, bsize - index);
ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, bsize);
- BUG_ON(ret != bsize);
+ if (ret != bsize)
+ return -EIO;
data += bsize - index;
len -= bsize - index;
+ index = 0;
}
/* process as many blocks as possible */
if (len >= bsize) {
ret = crypt_s390_kimd(ctx->func, ctx->state, data,
len & ~(bsize - 1));
- BUG_ON(ret != (len & ~(bsize - 1)));
+ if (ret != (len & ~(bsize - 1)))
+ return -EIO;
data += ret;
len -= ret;
}
store:
if (len)
memcpy(ctx->buf + index , data, len);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(s390_sha_update);
-void s390_sha_final(struct crypto_tfm *tfm, u8 *out)
+int s390_sha_final(struct shash_desc *desc, u8 *out)
{
- struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm);
- unsigned int bsize = crypto_tfm_alg_blocksize(tfm);
+ struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bsize = crypto_shash_blocksize(desc->tfm);
u64 bits;
unsigned int index, end, plen;
int ret;
@@ -77,19 +83,22 @@ void s390_sha_final(struct crypto_tfm *tfm, u8 *out)
memset(ctx->buf + index, 0x00, end - index - 8);
/*
- * Append message length. Well, SHA-512 wants a 128 bit lenght value,
+ * Append message length. Well, SHA-512 wants a 128 bit length value,
* nevertheless we use u64, should be enough for now...
*/
bits = ctx->count * 8;
memcpy(ctx->buf + end - 8, &bits, sizeof(bits));
ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, end);
- BUG_ON(ret != end);
+ if (ret != end)
+ return -EIO;
/* copy digest to out */
- memcpy(out, ctx->state, crypto_hash_digestsize(crypto_hash_cast(tfm)));
+ memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
/* wipe context */
memset(ctx, 0, sizeof *ctx);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(s390_sha_final);
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index c5cdb975d59..2e56498a40d 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -1,910 +1,208 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.26-rc4
-# Fri May 30 09:49:33 2008
-#
-CONFIG_SCHED_MC=y
-CONFIG_MMU=y
-CONFIG_ZONE_DMA=y
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_HAVE_LATENCYTOP_SUPPORT=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
-CONFIG_GENERIC_HWEIGHT=y
-CONFIG_GENERIC_TIME=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_GENERIC_BUG=y
-CONFIG_NO_IOMEM=y
-CONFIG_NO_DMA=y
-CONFIG_GENERIC_LOCKBREAK=y
-CONFIG_PGSTE=y
-CONFIG_S390=y
-CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
-
-#
-# General setup
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_LOCK_KERNEL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_TASKSTATS is not set
+CONFIG_FHANDLE=y
CONFIG_AUDIT=y
-# CONFIG_AUDITSYSCALL is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=17
CONFIG_CGROUPS=y
-# CONFIG_CGROUP_DEBUG is not set
-CONFIG_CGROUP_NS=y
-# CONFIG_CGROUP_DEVICE is not set
-# CONFIG_CPUSETS is not set
-CONFIG_GROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-# CONFIG_RT_GROUP_SCHED is not set
-CONFIG_USER_SCHED=y
-# CONFIG_CGROUP_SCHED is not set
-# CONFIG_CGROUP_CPUACCT is not set
-# CONFIG_RESOURCE_COUNTERS is not set
-CONFIG_SYSFS_DEPRECATED=y
-CONFIG_SYSFS_DEPRECATED_V2=y
-# CONFIG_RELAY is not set
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_CGROUP=y
CONFIG_NAMESPACES=y
-CONFIG_UTS_NS=y
-CONFIG_IPC_NS=y
-# CONFIG_USER_NS is not set
-# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SYSCTL=y
-# CONFIG_EMBEDDED is not set
-CONFIG_SYSCTL_SYSCALL=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
+CONFIG_RD_XZ=y
+CONFIG_RD_LZO=y
+CONFIG_RD_LZ4=y
+CONFIG_EXPERT=y
# CONFIG_COMPAT_BRK is not set
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_TIMERFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLAB=y
-# CONFIG_SLUB is not set
-# CONFIG_SLOB is not set
-# CONFIG_PROFILING is not set
-# CONFIG_MARKERS is not set
-CONFIG_HAVE_OPROFILE=y
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
CONFIG_KPROBES=y
-CONFIG_KRETPROBES=y
-CONFIG_HAVE_KPROBES=y
-CONFIG_HAVE_KRETPROBES=y
-# CONFIG_HAVE_DMA_ATTRS is not set
-CONFIG_PROC_PAGE_MONITOR=y
-CONFIG_SLABINFO=y
-CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
+CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
-# CONFIG_MODULE_FORCE_LOAD is not set
CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
CONFIG_MODVERSIONS=y
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-CONFIG_KMOD=y
-CONFIG_STOP_MACHINE=y
-CONFIG_BLOCK=y
-# CONFIG_BLK_DEV_IO_TRACE is not set
-CONFIG_BLK_DEV_BSG=y
-CONFIG_BLOCK_COMPAT=y
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
CONFIG_DEFAULT_DEADLINE=y
-# CONFIG_DEFAULT_CFQ is not set
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="deadline"
-CONFIG_PREEMPT_NOTIFIERS=y
-CONFIG_CLASSIC_RCU=y
-
-#
-# Base setup
-#
-
-#
-# Processor type and features
-#
-CONFIG_TICK_ONESHOT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-CONFIG_64BIT=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=32
-CONFIG_HOTPLUG_CPU=y
-CONFIG_COMPAT=y
-CONFIG_SYSVIPC_COMPAT=y
-CONFIG_AUDIT_ARCH=y
-CONFIG_S390_SWITCH_AMODE=y
-CONFIG_S390_EXEC_PROTECT=y
-
-#
-# Code generation options
-#
-# CONFIG_MARCH_G5 is not set
-CONFIG_MARCH_Z900=y
-# CONFIG_MARCH_Z990 is not set
-# CONFIG_MARCH_Z9_109 is not set
-CONFIG_PACK_STACK=y
-# CONFIG_SMALL_STACK is not set
-CONFIG_CHECK_STACK=y
-CONFIG_STACK_GUARD=256
-# CONFIG_WARN_STACK is not set
-CONFIG_ARCH_POPULATES_NODE_MAP=y
-
-#
-# Kernel preemption
-#
-# CONFIG_PREEMPT_NONE is not set
-# CONFIG_PREEMPT_VOLUNTARY is not set
-CONFIG_PREEMPT=y
-# CONFIG_PREEMPT_RCU is not set
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_DEFAULT=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
-CONFIG_SELECT_MEMORY_MODEL=y
-# CONFIG_FLATMEM_MANUAL is not set
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-CONFIG_SPARSEMEM_MANUAL=y
-CONFIG_SPARSEMEM=y
-CONFIG_HAVE_MEMORY_PRESENT=y
-# CONFIG_SPARSEMEM_STATIC is not set
-CONFIG_SPARSEMEM_EXTREME=y
-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
-CONFIG_SPARSEMEM_VMEMMAP=y
-CONFIG_PAGEFLAGS_EXTENDED=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_RESOURCES_64BIT=y
-CONFIG_ZONE_DMA_FLAG=1
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-
-#
-# I/O subsystem configuration
-#
-CONFIG_MACHCHK_WARNING=y
-CONFIG_QDIO=y
-# CONFIG_QDIO_DEBUG is not set
-
-#
-# Misc
-#
-CONFIG_IPL=y
-# CONFIG_IPL_TAPE is not set
-CONFIG_IPL_VM=y
-CONFIG_BINFMT_ELF=y
-CONFIG_BINFMT_MISC=m
-CONFIG_FORCE_MAX_ZONEORDER=9
-# CONFIG_PROCESS_DEBUG is not set
-CONFIG_PFAULT=y
-# CONFIG_SHARED_KERNEL is not set
-# CONFIG_CMM is not set
-# CONFIG_PAGE_STATES is not set
-CONFIG_VIRT_TIMER=y
-CONFIG_VIRT_CPU_ACCOUNTING=y
-# CONFIG_APPLDATA_BASE is not set
+CONFIG_MARCH_Z196=y
+CONFIG_NR_CPUS=256
CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
-# CONFIG_HZ_300 is not set
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
-# CONFIG_SCHED_HRTICK is not set
-CONFIG_S390_HYPFS_FS=y
-CONFIG_KEXEC=y
-# CONFIG_ZFCPDUMP is not set
-CONFIG_S390_GUEST=y
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CMA=y
+CONFIG_CRASH_DUMP=y
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
CONFIG_UNIX=y
-CONFIG_XFRM=y
-# CONFIG_XFRM_USER is not set
-# CONFIG_XFRM_SUB_POLICY is not set
-# CONFIG_XFRM_MIGRATE is not set
-# CONFIG_XFRM_STATISTICS is not set
CONFIG_NET_KEY=y
-# CONFIG_NET_KEY_MIGRATE is not set
-CONFIG_IUCV=m
-CONFIG_AFIUCV=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_XFRM_TUNNEL is not set
-CONFIG_INET_TUNNEL=y
-CONFIG_INET_XFRM_MODE_TRANSPORT=y
-CONFIG_INET_XFRM_MODE_TUNNEL=y
-CONFIG_INET_XFRM_MODE_BEET=y
-CONFIG_INET_LRO=y
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_DEFAULT_TCP_CONG="cubic"
-# CONFIG_TCP_MD5SIG is not set
-# CONFIG_IP_VS is not set
+# CONFIG_INET_LRO is not set
CONFIG_IPV6=y
-# CONFIG_IPV6_PRIVACY is not set
-# CONFIG_IPV6_ROUTER_PREF is not set
-# CONFIG_IPV6_OPTIMISTIC_DAD is not set
-# CONFIG_INET6_AH is not set
-# CONFIG_INET6_ESP is not set
-# CONFIG_INET6_IPCOMP is not set
-# CONFIG_IPV6_MIP6 is not set
-# CONFIG_INET6_XFRM_TUNNEL is not set
-# CONFIG_INET6_TUNNEL is not set
-CONFIG_INET6_XFRM_MODE_TRANSPORT=y
-CONFIG_INET6_XFRM_MODE_TUNNEL=y
-CONFIG_INET6_XFRM_MODE_BEET=y
-# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
-CONFIG_IPV6_SIT=y
-CONFIG_IPV6_NDISC_NODETYPE=y
-# CONFIG_IPV6_TUNNEL is not set
-# CONFIG_IPV6_MULTIPLE_TABLES is not set
-# CONFIG_IPV6_MROUTE is not set
-# CONFIG_NETWORK_SECMARK is not set
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_DEBUG is not set
-CONFIG_NETFILTER_ADVANCED=y
-
-#
-# Core Netfilter Configuration
-#
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NETFILTER_NETLINK_LOG=m
-CONFIG_NF_CONNTRACK=m
-# CONFIG_NF_CT_ACCT is not set
-# CONFIG_NF_CONNTRACK_MARK is not set
-# CONFIG_NF_CONNTRACK_EVENTS is not set
-# CONFIG_NF_CT_PROTO_DCCP is not set
-# CONFIG_NF_CT_PROTO_SCTP is not set
-# CONFIG_NF_CT_PROTO_UDPLITE is not set
-# CONFIG_NF_CONNTRACK_AMANDA is not set
-# CONFIG_NF_CONNTRACK_FTP is not set
-# CONFIG_NF_CONNTRACK_H323 is not set
-# CONFIG_NF_CONNTRACK_IRC is not set
-# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set
-# CONFIG_NF_CONNTRACK_PPTP is not set
-# CONFIG_NF_CONNTRACK_SANE is not set
-# CONFIG_NF_CONNTRACK_SIP is not set
-# CONFIG_NF_CONNTRACK_TFTP is not set
-# CONFIG_NF_CT_NETLINK is not set
-# CONFIG_NETFILTER_XTABLES is not set
-
-#
-# IP: Netfilter Configuration
-#
-# CONFIG_NF_CONNTRACK_IPV4 is not set
-# CONFIG_IP_NF_QUEUE is not set
-# CONFIG_IP_NF_IPTABLES is not set
-# CONFIG_IP_NF_ARPTABLES is not set
-
-#
-# IPv6: Netfilter Configuration
-#
-# CONFIG_NF_CONNTRACK_IPV6 is not set
-# CONFIG_IP6_NF_QUEUE is not set
-# CONFIG_IP6_NF_IPTABLES is not set
-# CONFIG_IP_DCCP is not set
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_MSG is not set
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_HMAC_NONE is not set
-# CONFIG_SCTP_HMAC_SHA1 is not set
-CONFIG_SCTP_HMAC_MD5=y
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_VLAN_8021Q=y
CONFIG_NET_SCHED=y
-
-#
-# Queueing/Scheduling
-#
CONFIG_NET_SCH_CBQ=m
-# CONFIG_NET_SCH_HTB is not set
-# CONFIG_NET_SCH_HFSC is not set
CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_RR=m
CONFIG_NET_SCH_RED=m
CONFIG_NET_SCH_SFQ=m
CONFIG_NET_SCH_TEQL=m
CONFIG_NET_SCH_TBF=m
CONFIG_NET_SCH_GRED=m
CONFIG_NET_SCH_DSMARK=m
-# CONFIG_NET_SCH_NETEM is not set
-# CONFIG_NET_SCH_INGRESS is not set
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-# CONFIG_NET_CLS_BASIC is not set
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_ROUTE=y
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
-# CONFIG_CLS_U32_PERF is not set
CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-# CONFIG_NET_EMATCH is not set
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
-# CONFIG_NET_ACT_GACT is not set
-# CONFIG_NET_ACT_MIRRED is not set
-CONFIG_NET_ACT_NAT=m
-# CONFIG_NET_ACT_PEDIT is not set
-# CONFIG_NET_ACT_SIMP is not set
-# CONFIG_NET_CLS_IND is not set
-CONFIG_NET_SCH_FIFO=y
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_NET_TCPPROBE is not set
-CONFIG_CAN=m
-CONFIG_CAN_RAW=m
-CONFIG_CAN_BCM=m
-
-#
-# CAN Device Drivers
-#
-CONFIG_CAN_VCAN=m
-# CONFIG_CAN_DEBUG_DEVICES is not set
-# CONFIG_AF_RXRPC is not set
-# CONFIG_RFKILL is not set
-# CONFIG_NET_9P is not set
-# CONFIG_PCMCIA is not set
-CONFIG_CCW=y
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
+CONFIG_BPF_JIT=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-CONFIG_SYS_HYPERVISOR=y
-# CONFIG_CONNECTOR is not set
-CONFIG_BLK_DEV=y
-# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_LOOP=m
-# CONFIG_BLK_DEV_CRYPTOLOOP is not set
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_XIP=y
-# CONFIG_CDROM_PKTCDVD is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# S/390 block device drivers
-#
-CONFIG_BLK_DEV_XPRAM=m
-# CONFIG_DCSSBLK is not set
-CONFIG_DASD=y
-CONFIG_DASD_PROFILE=y
-CONFIG_DASD_ECKD=y
-CONFIG_DASD_FBA=y
-CONFIG_DASD_DIAG=y
-CONFIG_DASD_EER=y
-CONFIG_VIRTIO_BLK=m
-CONFIG_MISC_DEVICES=y
-# CONFIG_EEPROM_93CX6 is not set
-# CONFIG_ENCLOSURE_SERVICES is not set
-# CONFIG_HAVE_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
+CONFIG_VIRTIO_BLK=y
CONFIG_SCSI=y
-# CONFIG_SCSI_DMA is not set
-# CONFIG_SCSI_TGT is not set
-CONFIG_SCSI_NETLINK=y
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
-# CONFIG_CHR_DEV_OSST is not set
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
-# CONFIG_CHR_DEV_SCH is not set
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SCAN_ASYNC=y
-CONFIG_SCSI_WAIT_SCAN=m
-
-#
-# SCSI Transports
-#
-# CONFIG_SCSI_SPI_ATTRS is not set
-CONFIG_SCSI_FC_ATTRS=y
-# CONFIG_SCSI_ISCSI_ATTRS is not set
-# CONFIG_SCSI_SAS_ATTRS is not set
-# CONFIG_SCSI_SAS_LIBSAS is not set
-# CONFIG_SCSI_SRP_ATTRS is not set
-CONFIG_SCSI_LOWLEVEL=y
-# CONFIG_ISCSI_TCP is not set
-# CONFIG_SCSI_DEBUG is not set
CONFIG_ZFCP=y
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-# CONFIG_MD_RAID10 is not set
-# CONFIG_MD_RAID456 is not set
-CONFIG_MD_MULTIPATH=m
-# CONFIG_MD_FAULTY is not set
-CONFIG_BLK_DEV_DM=y
-# CONFIG_DM_DEBUG is not set
-CONFIG_DM_CRYPT=y
-CONFIG_DM_SNAPSHOT=y
-CONFIG_DM_MIRROR=y
-CONFIG_DM_ZERO=y
-CONFIG_DM_MULTIPATH=y
-# CONFIG_DM_MULTIPATH_EMC is not set
-# CONFIG_DM_MULTIPATH_RDAC is not set
-# CONFIG_DM_MULTIPATH_HP is not set
-# CONFIG_DM_DELAY is not set
-# CONFIG_DM_UEVENT is not set
+CONFIG_SCSI_VIRTIO=y
CONFIG_NETDEVICES=y
-# CONFIG_NETDEVICES_MULTIQUEUE is not set
-# CONFIG_IFB is not set
-CONFIG_DUMMY=m
CONFIG_BONDING=m
-# CONFIG_MACVLAN is not set
+CONFIG_DUMMY=m
CONFIG_EQUALIZER=m
CONFIG_TUN=m
-CONFIG_VETH=m
-CONFIG_NET_ETHERNET=y
-# CONFIG_MII is not set
-# CONFIG_IBM_NEW_EMAC_ZMII is not set
-# CONFIG_IBM_NEW_EMAC_RGMII is not set
-# CONFIG_IBM_NEW_EMAC_TAH is not set
-# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
-CONFIG_NETDEV_1000=y
-# CONFIG_E1000E_ENABLED is not set
-CONFIG_NETDEV_10000=y
-# CONFIG_TR is not set
-# CONFIG_WAN is not set
-
-#
-# S/390 network device drivers
-#
-CONFIG_LCS=m
-CONFIG_CTCM=m
-# CONFIG_NETIUCV is not set
-# CONFIG_SMSGIUCV is not set
-# CONFIG_CLAW is not set
-CONFIG_QETH=y
-CONFIG_QETH_L2=y
-CONFIG_QETH_L3=y
-CONFIG_QETH_IPV6=y
-CONFIG_CCWGROUP=y
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-CONFIG_VIRTIO_NET=m
-
-#
-# Character devices
-#
-CONFIG_DEVKMEM=y
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-CONFIG_HW_RANDOM=m
-# CONFIG_R3964 is not set
+CONFIG_VIRTIO_NET=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
CONFIG_RAW_DRIVER=m
-CONFIG_MAX_RAW_DEVS=256
-# CONFIG_HANGCHECK_TIMER is not set
-
-#
-# S/390 character device drivers
-#
-CONFIG_TN3270=y
-CONFIG_TN3270_TTY=y
-CONFIG_TN3270_FS=m
-CONFIG_TN3270_CONSOLE=y
-CONFIG_TN3215=y
-CONFIG_TN3215_CONSOLE=y
-CONFIG_CCW_CONSOLE=y
-CONFIG_SCLP_TTY=y
-CONFIG_SCLP_CONSOLE=y
-CONFIG_SCLP_VT220_TTY=y
-CONFIG_SCLP_VT220_CONSOLE=y
-CONFIG_SCLP_CPI=m
-CONFIG_S390_TAPE=m
-
-#
-# S/390 tape interface support
-#
-CONFIG_S390_TAPE_BLOCK=y
-
-#
-# S/390 tape hardware support
-#
-CONFIG_S390_TAPE_34XX=m
-# CONFIG_S390_TAPE_3590 is not set
-# CONFIG_VMLOGRDR is not set
-# CONFIG_VMCP is not set
-# CONFIG_MONREADER is not set
-CONFIG_MONWRITER=m
-CONFIG_S390_VMUR=m
-# CONFIG_POWER_SUPPLY is not set
-# CONFIG_THERMAL is not set
-# CONFIG_WATCHDOG is not set
-
-#
-# Sonics Silicon Backplane
-#
-# CONFIG_MEMSTICK is not set
-# CONFIG_NEW_LEDS is not set
-CONFIG_ACCESSIBILITY=y
-
-#
-# File systems
-#
-CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-# CONFIG_EXT2_FS_XIP is not set
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-# CONFIG_EXT3_FS_POSIX_ACL is not set
-# CONFIG_EXT3_FS_SECURITY is not set
-# CONFIG_EXT4DEV_FS is not set
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=y
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-CONFIG_FS_POSIX_ACL=y
-# CONFIG_XFS_FS is not set
-# CONFIG_GFS2_FS is not set
-# CONFIG_OCFS2_FS is not set
-CONFIG_DNOTIFY=y
-CONFIG_INOTIFY=y
-CONFIG_INOTIFY_USER=y
-# CONFIG_QUOTA is not set
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-# CONFIG_FUSE_FS is not set
-CONFIG_GENERIC_ACL=y
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_FANOTIFY=y
+CONFIG_FUSE_FS=y
CONFIG_PROC_KCORE=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
-# CONFIG_HUGETLBFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_CONFIGFS_FS=m
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_ROMFS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-# CONFIG_NFS_V3_ACL is not set
-# CONFIG_NFS_V4 is not set
-CONFIG_NFSD=y
-CONFIG_NFSD_V3=y
-# CONFIG_NFSD_V3_ACL is not set
-# CONFIG_NFSD_V4 is not set
-CONFIG_LOCKD=y
-CONFIG_LOCKD_V4=y
-CONFIG_EXPORTFS=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=y
-# CONFIG_SUNRPC_BIND34 is not set
-# CONFIG_RPCSEC_GSS_KRB5 is not set
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-CONFIG_IBM_PARTITION=y
-# CONFIG_MAC_PARTITION is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
-# CONFIG_EFI_PARTITION is not set
-# CONFIG_SYSV68_PARTITION is not set
-# CONFIG_NLS is not set
-CONFIG_DLM=m
-# CONFIG_DLM_DEBUG is not set
-
-#
-# Kernel hacking
-#
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-# CONFIG_PRINTK_TIME is not set
-CONFIG_ENABLE_WARN_DEPRECATED=y
-CONFIG_ENABLE_MUST_CHECK=y
-CONFIG_FRAME_WARN=2048
+CONFIG_HUGETLBFS=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
CONFIG_MAGIC_SYSRQ=y
-# CONFIG_UNUSED_SYMBOLS is not set
-CONFIG_DEBUG_FS=y
-# CONFIG_HEADERS_CHECK is not set
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_SCHED_DEBUG is not set
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_TIMER_STATS is not set
-# CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_DEBUG_SLAB is not set
-CONFIG_DEBUG_PREEMPT=y
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_LOCK_STAT is not set
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_BUGVERBOSE=y
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_VM is not set
-# CONFIG_DEBUG_WRITECOUNT is not set
-# CONFIG_DEBUG_LIST is not set
-# CONFIG_DEBUG_SG is not set
-# CONFIG_FRAME_POINTER is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-# CONFIG_KPROBES_SANITY_TEST is not set
-# CONFIG_BACKTRACE_SELF_TEST is not set
-# CONFIG_LKDTM is not set
-# CONFIG_FAULT_INJECTION is not set
-# CONFIG_LATENCYTOP is not set
-CONFIG_SAMPLES=y
-# CONFIG_SAMPLE_KOBJECT is not set
-# CONFIG_SAMPLE_KPROBES is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-# CONFIG_SECURITY_FILE_CAPABILITIES is not set
-CONFIG_CRYPTO=y
-
-#
-# Crypto core or helper
-#
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_AEAD=m
-CONFIG_CRYPTO_BLKCIPHER=y
-CONFIG_CRYPTO_HASH=m
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_GF128MUL=m
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_CRYPTD is not set
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_PROVE_RCU=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_RCU_TRACE=y
+CONFIG_LATENCYTOP=y
+CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_KPROBES_SANITY_TEST=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_AUTHENC=m
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Authenticated Encryption with Associated Data
-#
+CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m
-CONFIG_CRYPTO_SEQIV=m
-
-#
-# Block modes
-#
CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_CTR=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_ECB=m
-# CONFIG_CRYPTO_LRW is not set
+CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_XTS is not set
-
-#
-# Hash modes
-#
-CONFIG_CRYPTO_HMAC=m
-# CONFIG_CRYPTO_XCBC is not set
-
-#
-# Digest
-#
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=m
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-CONFIG_CRYPTO_SHA1=m
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-# CONFIG_CRYPTO_WP512 is not set
-
-#
-# Ciphers
-#
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_BLOWFISH is not set
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_CMAC=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_CRCT10DIF=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_DES is not set
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_FCRYPT=m
-# CONFIG_CRYPTO_KHAZAD is not set
+CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-
-#
-# Compression
-#
-# CONFIG_CRYPTO_DEFLATE is not set
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_ZLIB=m
CONFIG_CRYPTO_LZO=m
-CONFIG_CRYPTO_HW=y
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
CONFIG_ZCRYPT=m
-# CONFIG_ZCRYPT_MONOLITHIC is not set
-# CONFIG_CRYPTO_SHA1_S390 is not set
-# CONFIG_CRYPTO_SHA256_S390 is not set
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA512_S390=m
-# CONFIG_CRYPTO_DES_S390 is not set
-# CONFIG_CRYPTO_AES_S390 is not set
-CONFIG_S390_PRNG=m
-
-#
-# Library routines
-#
-CONFIG_BITREVERSE=m
-# CONFIG_GENERIC_FIND_FIRST_BIT is not set
-# CONFIG_GENERIC_FIND_NEXT_BIT is not set
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-# CONFIG_CRC_ITU_T is not set
-CONFIG_CRC32=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
CONFIG_CRC7=m
-CONFIG_LIBCRC32C=m
-CONFIG_LZO_COMPRESS=m
-CONFIG_LZO_DECOMPRESS=m
-CONFIG_PLIST=y
-CONFIG_HAVE_KVM=y
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=m
-CONFIG_VIRTIO=y
-CONFIG_VIRTIO_RING=y
-CONFIG_VIRTIO_BALLOON=m
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_POWERPC is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
+CONFIG_CMM=m
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
index b08d2abf617..06f8d95a16c 100644
--- a/arch/s390/hypfs/Makefile
+++ b/arch/s390/hypfs/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o
-s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o
+s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index aea572009d6..b34b5ab90a3 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -1,8 +1,7 @@
/*
- * arch/s390/hypfs/hypfs.h
* Hypervisor filesystem for Linux on s390.
*
- * Copyright (C) IBM Corp. 2006
+ * Copyright IBM Corp. 2006
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
*/
@@ -11,29 +10,66 @@
#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/workqueue.h>
+#include <linux/kref.h>
+#include <asm/hypfs.h>
#define REG_FILE_MODE 0440
#define UPDATE_FILE_MODE 0220
#define DIR_MODE 0550
-extern struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent,
- const char *name);
+extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name);
-extern struct dentry *hypfs_create_u64(struct super_block *sb,
- struct dentry *dir, const char *name,
+extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name,
__u64 value);
-extern struct dentry *hypfs_create_str(struct super_block *sb,
- struct dentry *dir, const char *name,
+extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name,
char *string);
/* LPAR Hypervisor */
extern int hypfs_diag_init(void);
extern void hypfs_diag_exit(void);
-extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root);
+extern int hypfs_diag_create_files(struct dentry *root);
/* VM Hypervisor */
extern int hypfs_vm_init(void);
-extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root);
+extern void hypfs_vm_exit(void);
+extern int hypfs_vm_create_files(struct dentry *root);
+
+/* Set Partition-Resource Parameter */
+int hypfs_sprp_init(void);
+void hypfs_sprp_exit(void);
+
+/* debugfs interface */
+struct hypfs_dbfs_file;
+
+struct hypfs_dbfs_data {
+ void *buf;
+ void *buf_free_ptr;
+ size_t size;
+ struct hypfs_dbfs_file *dbfs_file;
+ struct kref kref;
+};
+
+struct hypfs_dbfs_file {
+ const char *name;
+ int (*data_create)(void **data, void **data_free_ptr,
+ size_t *size);
+ void (*data_free)(const void *buf_free_ptr);
+ long (*unlocked_ioctl) (struct file *, unsigned int,
+ unsigned long);
+
+ /* Private data for hypfs_dbfs.c */
+ struct hypfs_dbfs_data *data;
+ struct delayed_work data_free_work;
+ struct mutex lock;
+ struct dentry *dentry;
+};
+
+extern int hypfs_dbfs_init(void);
+extern void hypfs_dbfs_exit(void);
+extern int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df);
+extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df);
#endif /* _HYPFS_H_ */
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
new file mode 100644
index 00000000000..2badf2bf9cd
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -0,0 +1,130 @@
+/*
+ * Hypervisor filesystem for Linux on s390 - debugfs interface
+ *
+ * Copyright IBM Corp. 2010
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/slab.h>
+#include "hypfs.h"
+
+static struct dentry *dbfs_dir;
+
+static struct hypfs_dbfs_data *hypfs_dbfs_data_alloc(struct hypfs_dbfs_file *f)
+{
+ struct hypfs_dbfs_data *data;
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return NULL;
+ kref_init(&data->kref);
+ data->dbfs_file = f;
+ return data;
+}
+
+static void hypfs_dbfs_data_free(struct kref *kref)
+{
+ struct hypfs_dbfs_data *data;
+
+ data = container_of(kref, struct hypfs_dbfs_data, kref);
+ data->dbfs_file->data_free(data->buf_free_ptr);
+ kfree(data);
+}
+
+static void data_free_delayed(struct work_struct *work)
+{
+ struct hypfs_dbfs_data *data;
+ struct hypfs_dbfs_file *df;
+
+ df = container_of(work, struct hypfs_dbfs_file, data_free_work.work);
+ mutex_lock(&df->lock);
+ data = df->data;
+ df->data = NULL;
+ mutex_unlock(&df->lock);
+ kref_put(&data->kref, hypfs_dbfs_data_free);
+}
+
+static ssize_t dbfs_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct hypfs_dbfs_data *data;
+ struct hypfs_dbfs_file *df;
+ ssize_t rc;
+
+ if (*ppos != 0)
+ return 0;
+
+ df = file_inode(file)->i_private;
+ mutex_lock(&df->lock);
+ if (!df->data) {
+ data = hypfs_dbfs_data_alloc(df);
+ if (!data) {
+ mutex_unlock(&df->lock);
+ return -ENOMEM;
+ }
+ rc = df->data_create(&data->buf, &data->buf_free_ptr,
+ &data->size);
+ if (rc) {
+ mutex_unlock(&df->lock);
+ kfree(data);
+ return rc;
+ }
+ df->data = data;
+ schedule_delayed_work(&df->data_free_work, HZ);
+ }
+ data = df->data;
+ kref_get(&data->kref);
+ mutex_unlock(&df->lock);
+
+ rc = simple_read_from_buffer(buf, size, ppos, data->buf, data->size);
+ kref_put(&data->kref, hypfs_dbfs_data_free);
+ return rc;
+}
+
+static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct hypfs_dbfs_file *df;
+ long rc;
+
+ df = file->f_path.dentry->d_inode->i_private;
+ mutex_lock(&df->lock);
+ if (df->unlocked_ioctl)
+ rc = df->unlocked_ioctl(file, cmd, arg);
+ else
+ rc = -ENOTTY;
+ mutex_unlock(&df->lock);
+ return rc;
+}
+
+static const struct file_operations dbfs_ops = {
+ .read = dbfs_read,
+ .llseek = no_llseek,
+ .unlocked_ioctl = dbfs_ioctl,
+};
+
+int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
+{
+ df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df,
+ &dbfs_ops);
+ if (IS_ERR(df->dentry))
+ return PTR_ERR(df->dentry);
+ mutex_init(&df->lock);
+ INIT_DELAYED_WORK(&df->data_free_work, data_free_delayed);
+ return 0;
+}
+
+void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
+{
+ debugfs_remove(df->dentry);
+}
+
+int hypfs_dbfs_init(void)
+{
+ dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
+ return PTR_ERR_OR_ZERO(dbfs_dir);
+}
+
+void hypfs_dbfs_exit(void)
+{
+ debugfs_remove(dbfs_dir);
+}
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index b9a1ce1f28e..5eeffeefae0 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -1,16 +1,20 @@
/*
- * arch/s390/hypfs/hypfs_diag.c
* Hypervisor filesystem for Linux on s390. Diag 204 and 224
* implementation.
*
- * Copyright (C) IBM Corp. 2006
+ * Copyright IBM Corp. 2006, 2008
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
*/
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/types.h>
#include <linux/errno.h>
+#include <linux/slab.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <asm/ebcdic.h>
#include "hypfs.h"
@@ -18,6 +22,8 @@
#define CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */
#define TMP_SIZE 64 /* size of temporary buffers */
+#define DBFS_D204_HDR_VERSION 0
+
/* diag 204 subcodes */
enum diag204_sc {
SUBC_STIB4 = 4,
@@ -43,6 +49,8 @@ static void *diag204_buf; /* 4K aligned buffer for diag204 data */
static void *diag204_buf_vmalloc; /* vmalloc pointer for diag204 data */
static int diag204_buf_pages; /* number of pages for diag204 data */
+static struct dentry *dbfs_d204_file;
+
/*
* DIAG 204 data structures and member access functions.
*
@@ -159,7 +167,7 @@ static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
LPAR_NAME_LEN);
EBCASC(name, LPAR_NAME_LEN);
name[LPAR_NAME_LEN] = 0;
- strstrip(name);
+ strim(name);
}
struct cpu_info {
@@ -360,18 +368,21 @@ static void diag204_free_buffer(void)
} else {
free_pages((unsigned long) diag204_buf, 0);
}
- diag204_buf_pages = 0;
diag204_buf = NULL;
}
+static void *page_align_ptr(void *ptr)
+{
+ return (void *) PAGE_ALIGN((unsigned long) ptr);
+}
+
static void *diag204_alloc_vbuf(int pages)
{
/* The buffer has to be page aligned! */
diag204_buf_vmalloc = vmalloc(PAGE_SIZE * (pages + 1));
if (!diag204_buf_vmalloc)
return ERR_PTR(-ENOMEM);
- diag204_buf = (void*)((unsigned long)diag204_buf_vmalloc
- & ~0xfffUL) + 0x1000;
+ diag204_buf = page_align_ptr(diag204_buf_vmalloc);
diag204_buf_pages = pages;
return diag204_buf;
}
@@ -433,7 +444,7 @@ static int diag204_probe(void)
}
if (diag204((unsigned long)SUBC_STIB6 |
(unsigned long)INFO_EXT, pages, buf) >= 0) {
- diag204_store_sc = SUBC_STIB7;
+ diag204_store_sc = SUBC_STIB6;
diag204_info_type = INFO_EXT;
goto out;
}
@@ -464,17 +475,26 @@ fail_alloc:
return rc;
}
+static int diag204_do_store(void *buf, int pages)
+{
+ int rc;
+
+ rc = diag204((unsigned long) diag204_store_sc |
+ (unsigned long) diag204_info_type, pages, buf);
+ return rc < 0 ? -ENOSYS : 0;
+}
+
static void *diag204_store(void)
{
void *buf;
- int pages;
+ int pages, rc;
buf = diag204_get_buffer(diag204_info_type, &pages);
if (IS_ERR(buf))
goto out;
- if (diag204((unsigned long)diag204_store_sc |
- (unsigned long)diag204_info_type, pages, buf) < 0)
- return ERR_PTR(-ENOSYS);
+ rc = diag204_do_store(buf, pages);
+ if (rc)
+ return ERR_PTR(rc);
out:
return buf;
}
@@ -483,7 +503,7 @@ out:
static int diag224(void *ptr)
{
- int rc = -ENOTSUPP;
+ int rc = -EOPNOTSUPP;
asm volatile(
" diag %1,%2,0x224\n"
@@ -502,7 +522,7 @@ static int diag224_get_name_table(void)
return -ENOMEM;
if (diag224(diag224_cpu_names)) {
kfree(diag224_cpu_names);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
return 0;
@@ -518,30 +538,84 @@ static int diag224_idx2name(int index, char *name)
memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN),
CPU_NAME_LEN);
name[CPU_NAME_LEN] = 0;
- strstrip(name);
+ strim(name);
return 0;
}
+struct dbfs_d204_hdr {
+ u64 len; /* Length of d204 buffer without header */
+ u16 version; /* Version of header */
+ u8 sc; /* Used subcode */
+ char reserved[53];
+} __attribute__ ((packed));
+
+struct dbfs_d204 {
+ struct dbfs_d204_hdr hdr; /* 64 byte header */
+ char buf[]; /* d204 buffer */
+} __attribute__ ((packed));
+
+static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size)
+{
+ struct dbfs_d204 *d204;
+ int rc, buf_size;
+ void *base;
+
+ buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr);
+ base = vzalloc(buf_size);
+ if (!base)
+ return -ENOMEM;
+ d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr);
+ rc = diag204_do_store(d204->buf, diag204_buf_pages);
+ if (rc) {
+ vfree(base);
+ return rc;
+ }
+ d204->hdr.version = DBFS_D204_HDR_VERSION;
+ d204->hdr.len = PAGE_SIZE * diag204_buf_pages;
+ d204->hdr.sc = diag204_store_sc;
+ *data = d204;
+ *data_free_ptr = base;
+ *size = d204->hdr.len + sizeof(struct dbfs_d204_hdr);
+ return 0;
+}
+
+static struct hypfs_dbfs_file dbfs_file_d204 = {
+ .name = "diag_204",
+ .data_create = dbfs_d204_create,
+ .data_free = vfree,
+};
+
__init int hypfs_diag_init(void)
{
int rc;
if (diag204_probe()) {
- printk(KERN_ERR "hypfs: diag 204 not working.");
+ pr_err("The hardware system does not support hypfs\n");
return -ENODATA;
}
- rc = diag224_get_name_table();
- if (rc) {
- diag204_free_buffer();
- printk(KERN_ERR "hypfs: could not get name table.\n");
+ if (diag204_info_type == INFO_EXT) {
+ rc = hypfs_dbfs_create_file(&dbfs_file_d204);
+ if (rc)
+ return rc;
}
- return rc;
+ if (MACHINE_IS_LPAR) {
+ rc = diag224_get_name_table();
+ if (rc) {
+ pr_err("The hardware system does not provide all "
+ "functions required by hypfs\n");
+ debugfs_remove(dbfs_d204_file);
+ return rc;
+ }
+ }
+ return 0;
}
void hypfs_diag_exit(void)
{
+ debugfs_remove(dbfs_d204_file);
diag224_delete_name_table();
diag204_free_buffer();
+ hypfs_dbfs_remove_file(&dbfs_file_d204);
}
/*
@@ -549,8 +623,7 @@ void hypfs_diag_exit(void)
* *******************************************
*/
-static int hypfs_create_cpu_files(struct super_block *sb,
- struct dentry *cpus_dir, void *cpu_info)
+static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
{
struct dentry *cpu_dir;
char buffer[TMP_SIZE];
@@ -558,32 +631,29 @@ static int hypfs_create_cpu_files(struct super_block *sb,
snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
cpu_info));
- cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
- rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
+ cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+ rc = hypfs_create_u64(cpu_dir, "mgmtime",
cpu_info__acc_time(diag204_info_type, cpu_info) -
cpu_info__lp_time(diag204_info_type, cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
- rc = hypfs_create_u64(sb, cpu_dir, "cputime",
+ rc = hypfs_create_u64(cpu_dir, "cputime",
cpu_info__lp_time(diag204_info_type, cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
if (diag204_info_type == INFO_EXT) {
- rc = hypfs_create_u64(sb, cpu_dir, "onlinetime",
+ rc = hypfs_create_u64(cpu_dir, "onlinetime",
cpu_info__online_time(diag204_info_type,
cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
}
diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
- rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
- if (IS_ERR(rc))
- return PTR_ERR(rc);
- return 0;
+ rc = hypfs_create_str(cpu_dir, "type", buffer);
+ return PTR_RET(rc);
}
-static void *hypfs_create_lpar_files(struct super_block *sb,
- struct dentry *systems_dir, void *part_hdr)
+static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
{
struct dentry *cpus_dir;
struct dentry *lpar_dir;
@@ -593,16 +663,16 @@ static void *hypfs_create_lpar_files(struct super_block *sb,
part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
lpar_name[LPAR_NAME_LEN] = 0;
- lpar_dir = hypfs_mkdir(sb, systems_dir, lpar_name);
+ lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
if (IS_ERR(lpar_dir))
return lpar_dir;
- cpus_dir = hypfs_mkdir(sb, lpar_dir, "cpus");
+ cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
if (IS_ERR(cpus_dir))
return cpus_dir;
cpu_info = part_hdr + part_hdr__size(diag204_info_type);
for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
int rc;
- rc = hypfs_create_cpu_files(sb, cpus_dir, cpu_info);
+ rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
if (rc)
return ERR_PTR(rc);
cpu_info += cpu_info__size(diag204_info_type);
@@ -610,8 +680,7 @@ static void *hypfs_create_lpar_files(struct super_block *sb,
return cpu_info;
}
-static int hypfs_create_phys_cpu_files(struct super_block *sb,
- struct dentry *cpus_dir, void *cpu_info)
+static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
{
struct dentry *cpu_dir;
char buffer[TMP_SIZE];
@@ -619,34 +688,31 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb,
snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
cpu_info));
- cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
+ cpu_dir = hypfs_mkdir(cpus_dir, buffer);
if (IS_ERR(cpu_dir))
return PTR_ERR(cpu_dir);
- rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
+ rc = hypfs_create_u64(cpu_dir, "mgmtime",
phys_cpu__mgm_time(diag204_info_type, cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
- rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
- if (IS_ERR(rc))
- return PTR_ERR(rc);
- return 0;
+ rc = hypfs_create_str(cpu_dir, "type", buffer);
+ return PTR_RET(rc);
}
-static void *hypfs_create_phys_files(struct super_block *sb,
- struct dentry *parent_dir, void *phys_hdr)
+static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
{
int i;
void *cpu_info;
struct dentry *cpus_dir;
- cpus_dir = hypfs_mkdir(sb, parent_dir, "cpus");
+ cpus_dir = hypfs_mkdir(parent_dir, "cpus");
if (IS_ERR(cpus_dir))
return cpus_dir;
cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
int rc;
- rc = hypfs_create_phys_cpu_files(sb, cpus_dir, cpu_info);
+ rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
if (rc)
return ERR_PTR(rc);
cpu_info += phys_cpu__size(diag204_info_type);
@@ -654,7 +720,7 @@ static void *hypfs_create_phys_files(struct super_block *sb,
return cpu_info;
}
-int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
+int hypfs_diag_create_files(struct dentry *root)
{
struct dentry *systems_dir, *hyp_dir;
void *time_hdr, *part_hdr;
@@ -665,7 +731,7 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
if (IS_ERR(buffer))
return PTR_ERR(buffer);
- systems_dir = hypfs_mkdir(sb, root, "systems");
+ systems_dir = hypfs_mkdir(root, "systems");
if (IS_ERR(systems_dir)) {
rc = PTR_ERR(systems_dir);
goto err_out;
@@ -673,25 +739,25 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
time_hdr = (struct x_info_blk_hdr *)buffer;
part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
- part_hdr = hypfs_create_lpar_files(sb, systems_dir, part_hdr);
+ part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
if (IS_ERR(part_hdr)) {
rc = PTR_ERR(part_hdr);
goto err_out;
}
}
if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) {
- ptr = hypfs_create_phys_files(sb, root, part_hdr);
+ ptr = hypfs_create_phys_files(root, part_hdr);
if (IS_ERR(ptr)) {
rc = PTR_ERR(ptr);
goto err_out;
}
}
- hyp_dir = hypfs_mkdir(sb, root, "hyp");
+ hyp_dir = hypfs_mkdir(root, "hyp");
if (IS_ERR(hyp_dir)) {
rc = PTR_ERR(hyp_dir);
goto err_out;
}
- ptr = hypfs_create_str(sb, hyp_dir, "type", "LPAR Hypervisor");
+ ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
if (IS_ERR(ptr)) {
rc = PTR_ERR(ptr);
goto err_out;
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
new file mode 100644
index 00000000000..f043c3c7e73
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -0,0 +1,141 @@
+/*
+ * Hypervisor filesystem for Linux on s390.
+ * Set Partition-Resource Parameter interface.
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/compat.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <asm/compat.h>
+#include <asm/sclp.h>
+#include "hypfs.h"
+
+#define DIAG304_SET_WEIGHTS 0
+#define DIAG304_QUERY_PRP 1
+#define DIAG304_SET_CAPPING 2
+
+#define DIAG304_CMD_MAX 2
+
+static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+ register unsigned long _data asm("2") = (unsigned long) data;
+ register unsigned long _rc asm("3");
+ register unsigned long _cmd asm("4") = cmd;
+
+ asm volatile("diag %1,%2,0x304\n"
+ : "=d" (_rc) : "d" (_data), "d" (_cmd) : "memory");
+
+ return _rc;
+}
+
+static void hypfs_sprp_free(const void *data)
+{
+ free_page((unsigned long) data);
+}
+
+static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size)
+{
+ unsigned long rc;
+ void *data;
+
+ data = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ rc = hypfs_sprp_diag304(data, DIAG304_QUERY_PRP);
+ if (rc != 1) {
+ *data_ptr = *free_ptr = NULL;
+ *size = 0;
+ free_page((unsigned long) data);
+ return -EIO;
+ }
+ *data_ptr = *free_ptr = data;
+ *size = PAGE_SIZE;
+ return 0;
+}
+
+static int __hypfs_sprp_ioctl(void __user *user_area)
+{
+ struct hypfs_diag304 diag304;
+ unsigned long cmd;
+ void __user *udata;
+ void *data;
+ int rc;
+
+ if (copy_from_user(&diag304, user_area, sizeof(diag304)))
+ return -EFAULT;
+ if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX)
+ return -EINVAL;
+
+ data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ if (!data)
+ return -ENOMEM;
+
+ udata = (void __user *)(unsigned long) diag304.data;
+ if (diag304.args[1] == DIAG304_SET_WEIGHTS ||
+ diag304.args[1] == DIAG304_SET_CAPPING)
+ if (copy_from_user(data, udata, PAGE_SIZE)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ cmd = *(unsigned long *) &diag304.args[0];
+ diag304.rc = hypfs_sprp_diag304(data, cmd);
+
+ if (diag304.args[1] == DIAG304_QUERY_PRP)
+ if (copy_to_user(udata, data, PAGE_SIZE)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0;
+out:
+ free_page((unsigned long) data);
+ return rc;
+}
+
+static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ void __user *argp;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (is_compat_task())
+ argp = compat_ptr(arg);
+ else
+ argp = (void __user *) arg;
+ switch (cmd) {
+ case HYPFS_DIAG304:
+ return __hypfs_sprp_ioctl(argp);
+ default: /* unknown ioctl number */
+ return -ENOTTY;
+ }
+ return 0;
+}
+
+static struct hypfs_dbfs_file hypfs_sprp_file = {
+ .name = "diag_304",
+ .data_create = hypfs_sprp_create,
+ .data_free = hypfs_sprp_free,
+ .unlocked_ioctl = hypfs_sprp_ioctl,
+};
+
+int hypfs_sprp_init(void)
+{
+ if (!sclp_has_sprp())
+ return 0;
+ return hypfs_dbfs_create_file(&hypfs_sprp_file);
+}
+
+void hypfs_sprp_exit(void)
+{
+ if (!sclp_has_sprp())
+ return;
+ hypfs_dbfs_remove_file(&hypfs_sprp_file);
+}
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index d01fc8f799f..32040ace00e 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -1,7 +1,7 @@
/*
* Hypervisor filesystem for Linux on s390. z/VM implementation.
*
- * Copyright (C) IBM Corp. 2006
+ * Copyright IBM Corp. 2006
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
*/
@@ -10,9 +10,11 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <asm/ebcdic.h>
+#include <asm/timex.h>
#include "hypfs.h"
#define NAME_LEN 8
+#define DBFS_D2FC_HDR_VERSION 0
static char local_guest[] = " ";
static char all_guests[] = "* ";
@@ -30,7 +32,7 @@ struct diag2fc_data {
__u32 pcpus;
__u32 lcpus;
__u32 vcpus;
- __u32 cpu_min;
+ __u32 ocpus;
__u32 cpu_max;
__u32 cpu_shares;
__u32 cpu_use_samp;
@@ -76,42 +78,44 @@ static int diag2fc(int size, char* query, void *addr)
return -residual_cnt;
}
-static struct diag2fc_data *diag2fc_store(char *query, int *count)
+/*
+ * Allocate buffer for "query" and store diag 2fc at "offset"
+ */
+static void *diag2fc_store(char *query, unsigned int *count, int offset)
{
+ void *data;
int size;
- struct diag2fc_data *data;
do {
size = diag2fc(0, query, NULL);
if (size < 0)
return ERR_PTR(-EACCES);
- data = vmalloc(size);
+ data = vmalloc(size + offset);
if (!data)
return ERR_PTR(-ENOMEM);
- if (diag2fc(size, query, data) == 0)
+ if (diag2fc(size, query, data + offset) == 0)
break;
vfree(data);
} while (1);
- *count = (size / sizeof(*data));
+ *count = (size / sizeof(struct diag2fc_data));
return data;
}
-static void diag2fc_free(void *data)
+static void diag2fc_free(const void *data)
{
vfree(data);
}
-#define ATTRIBUTE(sb, dir, name, member) \
+#define ATTRIBUTE(dir, name, member) \
do { \
void *rc; \
- rc = hypfs_create_u64(sb, dir, name, member); \
+ rc = hypfs_create_u64(dir, name, member); \
if (IS_ERR(rc)) \
return PTR_ERR(rc); \
} while(0)
-static int hpyfs_vm_create_guest(struct super_block *sb,
- struct dentry *systems_dir,
+static int hpyfs_vm_create_guest(struct dentry *systems_dir,
struct diag2fc_data *data)
{
char guest_name[NAME_LEN + 1] = {};
@@ -124,89 +128,95 @@ static int hpyfs_vm_create_guest(struct super_block *sb,
/* guest dir */
memcpy(guest_name, data->guest_name, NAME_LEN);
EBCASC(guest_name, NAME_LEN);
- strstrip(guest_name);
- guest_dir = hypfs_mkdir(sb, systems_dir, guest_name);
+ strim(guest_name);
+ guest_dir = hypfs_mkdir(systems_dir, guest_name);
if (IS_ERR(guest_dir))
return PTR_ERR(guest_dir);
- ATTRIBUTE(sb, guest_dir, "onlinetime_us", data->el_time);
+ ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
/* logical cpu information */
- cpus_dir = hypfs_mkdir(sb, guest_dir, "cpus");
+ cpus_dir = hypfs_mkdir(guest_dir, "cpus");
if (IS_ERR(cpus_dir))
return PTR_ERR(cpus_dir);
- ATTRIBUTE(sb, cpus_dir, "cputime_us", data->used_cpu);
- ATTRIBUTE(sb, cpus_dir, "capped", capped_value);
- ATTRIBUTE(sb, cpus_dir, "dedicated", dedicated_flag);
- ATTRIBUTE(sb, cpus_dir, "count", data->vcpus);
- ATTRIBUTE(sb, cpus_dir, "weight_min", data->cpu_min);
- ATTRIBUTE(sb, cpus_dir, "weight_max", data->cpu_max);
- ATTRIBUTE(sb, cpus_dir, "weight_cur", data->cpu_shares);
+ ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
+ ATTRIBUTE(cpus_dir, "capped", capped_value);
+ ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
+ ATTRIBUTE(cpus_dir, "count", data->vcpus);
+ /*
+ * Note: The "weight_min" attribute got the wrong name.
+ * The value represents the number of non-stopped (operating)
+ * CPUS.
+ */
+ ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
+ ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
+ ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
/* memory information */
- mem_dir = hypfs_mkdir(sb, guest_dir, "mem");
+ mem_dir = hypfs_mkdir(guest_dir, "mem");
if (IS_ERR(mem_dir))
return PTR_ERR(mem_dir);
- ATTRIBUTE(sb, mem_dir, "min_KiB", data->mem_min_kb);
- ATTRIBUTE(sb, mem_dir, "max_KiB", data->mem_max_kb);
- ATTRIBUTE(sb, mem_dir, "used_KiB", data->mem_used_kb);
- ATTRIBUTE(sb, mem_dir, "share_KiB", data->mem_share_kb);
+ ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
+ ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
+ ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
+ ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
/* samples */
- samples_dir = hypfs_mkdir(sb, guest_dir, "samples");
+ samples_dir = hypfs_mkdir(guest_dir, "samples");
if (IS_ERR(samples_dir))
return PTR_ERR(samples_dir);
- ATTRIBUTE(sb, samples_dir, "cpu_using", data->cpu_use_samp);
- ATTRIBUTE(sb, samples_dir, "cpu_delay", data->cpu_delay_samp);
- ATTRIBUTE(sb, samples_dir, "mem_delay", data->page_wait_samp);
- ATTRIBUTE(sb, samples_dir, "idle", data->idle_samp);
- ATTRIBUTE(sb, samples_dir, "other", data->other_samp);
- ATTRIBUTE(sb, samples_dir, "total", data->total_samp);
+ ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
+ ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
+ ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
+ ATTRIBUTE(samples_dir, "idle", data->idle_samp);
+ ATTRIBUTE(samples_dir, "other", data->other_samp);
+ ATTRIBUTE(samples_dir, "total", data->total_samp);
return 0;
}
-int hypfs_vm_create_files(struct super_block *sb, struct dentry *root)
+int hypfs_vm_create_files(struct dentry *root)
{
struct dentry *dir, *file;
struct diag2fc_data *data;
- int rc, i, count = 0;
+ unsigned int count = 0;
+ int rc, i;
- data = diag2fc_store(guest_query, &count);
+ data = diag2fc_store(guest_query, &count, 0);
if (IS_ERR(data))
return PTR_ERR(data);
/* Hpervisor Info */
- dir = hypfs_mkdir(sb, root, "hyp");
+ dir = hypfs_mkdir(root, "hyp");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
goto failed;
}
- file = hypfs_create_str(sb, dir, "type", "z/VM Hypervisor");
+ file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
if (IS_ERR(file)) {
rc = PTR_ERR(file);
goto failed;
}
/* physical cpus */
- dir = hypfs_mkdir(sb, root, "cpus");
+ dir = hypfs_mkdir(root, "cpus");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
goto failed;
}
- file = hypfs_create_u64(sb, dir, "count", data->lcpus);
+ file = hypfs_create_u64(dir, "count", data->lcpus);
if (IS_ERR(file)) {
rc = PTR_ERR(file);
goto failed;
}
/* guests */
- dir = hypfs_mkdir(sb, root, "systems");
+ dir = hypfs_mkdir(root, "systems");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
goto failed;
}
for (i = 0; i < count; i++) {
- rc = hpyfs_vm_create_guest(sb, dir, &(data[i]));
+ rc = hpyfs_vm_create_guest(dir, &(data[i]));
if (rc)
goto failed;
}
@@ -218,14 +228,60 @@ failed:
return rc;
}
+struct dbfs_d2fc_hdr {
+ u64 len; /* Length of d2fc buffer without header */
+ u16 version; /* Version of header */
+ char tod_ext[16]; /* TOD clock for d2fc */
+ u64 count; /* Number of VM guests in d2fc buffer */
+ char reserved[30];
+} __attribute__ ((packed));
+
+struct dbfs_d2fc {
+ struct dbfs_d2fc_hdr hdr; /* 64 byte header */
+ char buf[]; /* d2fc buffer */
+} __attribute__ ((packed));
+
+static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size)
+{
+ struct dbfs_d2fc *d2fc;
+ unsigned int count;
+
+ d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr));
+ if (IS_ERR(d2fc))
+ return PTR_ERR(d2fc);
+ get_tod_clock_ext(d2fc->hdr.tod_ext);
+ d2fc->hdr.len = count * sizeof(struct diag2fc_data);
+ d2fc->hdr.version = DBFS_D2FC_HDR_VERSION;
+ d2fc->hdr.count = count;
+ memset(&d2fc->hdr.reserved, 0, sizeof(d2fc->hdr.reserved));
+ *data = d2fc;
+ *data_free_ptr = d2fc;
+ *size = d2fc->hdr.len + sizeof(struct dbfs_d2fc_hdr);
+ return 0;
+}
+
+static struct hypfs_dbfs_file dbfs_file_2fc = {
+ .name = "diag_2fc",
+ .data_create = dbfs_diag2fc_create,
+ .data_free = diag2fc_free,
+};
+
int hypfs_vm_init(void)
{
+ if (!MACHINE_IS_VM)
+ return 0;
if (diag2fc(0, all_guests, NULL) > 0)
guest_query = all_guests;
else if (diag2fc(0, local_guest, NULL) > 0)
guest_query = local_guest;
else
return -EACCES;
+ return hypfs_dbfs_create_file(&dbfs_file_2fc);
+}
- return 0;
+void hypfs_vm_exit(void)
+{
+ if (!MACHINE_IS_VM)
+ return;
+ hypfs_dbfs_remove_file(&dbfs_file_2fc);
}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 4b010ff814c..c952b981e4f 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -1,36 +1,38 @@
/*
- * arch/s390/hypfs/inode.c
* Hypervisor filesystem for Linux on s390.
*
- * Copyright (C) IBM Corp. 2006
+ * Copyright IBM Corp. 2006, 2008
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
*/
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/vfs.h>
+#include <linux/slab.h>
#include <linux/pagemap.h>
-#include <linux/gfp.h>
#include <linux/time.h>
#include <linux/parser.h>
#include <linux/sysfs.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
+#include <linux/aio.h>
#include <asm/ebcdic.h>
#include "hypfs.h"
#define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */
#define TMP_SIZE 64 /* size of temporary buffers */
-static struct dentry *hypfs_create_update_file(struct super_block *sb,
- struct dentry *dir);
+static struct dentry *hypfs_create_update_file(struct dentry *dir);
struct hypfs_sb_info {
- uid_t uid; /* uid used for files and dirs */
- gid_t gid; /* gid used for files and dirs */
+ kuid_t uid; /* uid used for files and dirs */
+ kgid_t gid; /* gid used for files and dirs */
struct dentry *update_file; /* file to trigger update */
time_t last_update; /* last update time in secs since 1970 */
struct mutex lock; /* lock to protect update process */
@@ -38,7 +40,7 @@ struct hypfs_sb_info {
static const struct file_operations hypfs_file_ops;
static struct file_system_type hypfs_type;
-static struct super_operations hypfs_s_ops;
+static const struct super_operations hypfs_s_ops;
/* start of list of all dentries, which have to be deleted on update */
static struct dentry *hypfs_last_dentry;
@@ -70,8 +72,6 @@ static void hypfs_remove(struct dentry *dentry)
struct dentry *parent;
parent = dentry->d_parent;
- if (!parent || !parent->d_inode)
- return;
mutex_lock(&parent->d_inode->i_mutex);
if (hypfs_positive(dentry)) {
if (S_ISDIR(dentry->d_inode->i_mode))
@@ -94,34 +94,32 @@ static void hypfs_delete_tree(struct dentry *root)
}
}
-static struct inode *hypfs_make_inode(struct super_block *sb, int mode)
+static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
{
struct inode *ret = new_inode(sb);
if (ret) {
struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
+ ret->i_ino = get_next_ino();
ret->i_mode = mode;
ret->i_uid = hypfs_info->uid;
ret->i_gid = hypfs_info->gid;
- ret->i_blocks = 0;
ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
- if (mode & S_IFDIR)
- ret->i_nlink = 2;
- else
- ret->i_nlink = 1;
+ if (S_ISDIR(mode))
+ set_nlink(ret, 2);
}
return ret;
}
-static void hypfs_drop_inode(struct inode *inode)
+static void hypfs_evict_inode(struct inode *inode)
{
+ clear_inode(inode);
kfree(inode->i_private);
- generic_delete_inode(inode);
}
static int hypfs_open(struct inode *inode, struct file *filp)
{
- char *data = filp->f_path.dentry->d_inode->i_private;
+ char *data = file_inode(filp)->i_private;
struct hypfs_sb_info *fs_info;
if (filp->f_mode & FMODE_WRITE) {
@@ -143,51 +141,40 @@ static int hypfs_open(struct inode *inode, struct file *filp)
}
mutex_unlock(&fs_info->lock);
}
- return 0;
+ return nonseekable_open(inode, filp);
}
static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t offset)
{
char *data;
- size_t len;
+ ssize_t ret;
struct file *filp = iocb->ki_filp;
/* XXX: temporary */
char __user *buf = iov[0].iov_base;
size_t count = iov[0].iov_len;
- if (nr_segs != 1) {
- count = -EINVAL;
- goto out;
- }
+ if (nr_segs != 1)
+ return -EINVAL;
data = filp->private_data;
- len = strlen(data);
- if (offset > len) {
- count = 0;
- goto out;
- }
- if (count > len - offset)
- count = len - offset;
- if (copy_to_user(buf, data + offset, count)) {
- count = -EFAULT;
- goto out;
- }
- iocb->ki_pos += count;
+ ret = simple_read_from_buffer(buf, count, &offset, data, strlen(data));
+ if (ret <= 0)
+ return ret;
+
+ iocb->ki_pos += ret;
file_accessed(filp);
-out:
- return count;
+
+ return ret;
}
static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t offset)
{
int rc;
- struct super_block *sb;
- struct hypfs_sb_info *fs_info;
+ struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
+ struct hypfs_sb_info *fs_info = sb->s_fs_info;
size_t count = iov_length(iov, nr_segs);
- sb = iocb->ki_filp->f_path.dentry->d_inode->i_sb;
- fs_info = sb->s_fs_info;
/*
* Currently we only allow one update per second for two reasons:
* 1. diag 204 is VERY expensive
@@ -205,11 +192,11 @@ static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
}
hypfs_delete_tree(sb->s_root);
if (MACHINE_IS_VM)
- rc = hypfs_vm_create_files(sb, sb->s_root);
+ rc = hypfs_vm_create_files(sb->s_root);
else
- rc = hypfs_diag_create_files(sb, sb->s_root);
+ rc = hypfs_diag_create_files(sb->s_root);
if (rc) {
- printk(KERN_ERR "hypfs: Update failed\n");
+ pr_err("Updating the hypfs tree failed\n");
hypfs_delete_tree(sb->s_root);
goto out;
}
@@ -228,7 +215,7 @@ static int hypfs_release(struct inode *inode, struct file *filp)
enum { opt_uid, opt_gid, opt_err };
-static match_table_t hypfs_tokens = {
+static const match_table_t hypfs_tokens = {
{opt_uid, "uid=%u"},
{opt_gid, "gid=%u"},
{opt_err, NULL}
@@ -238,6 +225,8 @@ static int hypfs_parse_options(char *options, struct super_block *sb)
{
char *str;
substring_t args[MAX_OPT_ARGS];
+ kuid_t uid;
+ kgid_t gid;
if (!options)
return 0;
@@ -252,29 +241,34 @@ static int hypfs_parse_options(char *options, struct super_block *sb)
case opt_uid:
if (match_int(&args[0], &option))
return -EINVAL;
- hypfs_info->uid = option;
+ uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(uid))
+ return -EINVAL;
+ hypfs_info->uid = uid;
break;
case opt_gid:
if (match_int(&args[0], &option))
return -EINVAL;
- hypfs_info->gid = option;
+ gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(gid))
+ return -EINVAL;
+ hypfs_info->gid = gid;
break;
case opt_err:
default:
- printk(KERN_ERR "hypfs: Unrecognized mount option "
- "\"%s\" or missing value\n", str);
+ pr_err("%s is not a valid mount option\n", str);
return -EINVAL;
}
}
return 0;
}
-static int hypfs_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int hypfs_show_options(struct seq_file *s, struct dentry *root)
{
- struct hypfs_sb_info *hypfs_info = mnt->mnt_sb->s_fs_info;
+ struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info;
- seq_printf(s, ",uid=%u", hypfs_info->uid);
- seq_printf(s, ",gid=%u", hypfs_info->gid);
+ seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, hypfs_info->uid));
+ seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, hypfs_info->gid));
return 0;
}
@@ -289,107 +283,84 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi)
return -ENOMEM;
mutex_init(&sbi->lock);
- sbi->uid = current->uid;
- sbi->gid = current->gid;
+ sbi->uid = current_uid();
+ sbi->gid = current_gid();
sb->s_fs_info = sbi;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = HYPFS_MAGIC;
sb->s_op = &hypfs_s_ops;
- if (hypfs_parse_options(data, sb)) {
- rc = -EINVAL;
- goto err_alloc;
- }
+ if (hypfs_parse_options(data, sb))
+ return -EINVAL;
root_inode = hypfs_make_inode(sb, S_IFDIR | 0755);
- if (!root_inode) {
- rc = -ENOMEM;
- goto err_alloc;
- }
+ if (!root_inode)
+ return -ENOMEM;
root_inode->i_op = &simple_dir_inode_operations;
root_inode->i_fop = &simple_dir_operations;
- root_dentry = d_alloc_root(root_inode);
- if (!root_dentry) {
- iput(root_inode);
- rc = -ENOMEM;
- goto err_alloc;
- }
+ sb->s_root = root_dentry = d_make_root(root_inode);
+ if (!root_dentry)
+ return -ENOMEM;
if (MACHINE_IS_VM)
- rc = hypfs_vm_create_files(sb, root_dentry);
+ rc = hypfs_vm_create_files(root_dentry);
else
- rc = hypfs_diag_create_files(sb, root_dentry);
+ rc = hypfs_diag_create_files(root_dentry);
if (rc)
- goto err_tree;
- sbi->update_file = hypfs_create_update_file(sb, root_dentry);
- if (IS_ERR(sbi->update_file)) {
- rc = PTR_ERR(sbi->update_file);
- goto err_tree;
- }
+ return rc;
+ sbi->update_file = hypfs_create_update_file(root_dentry);
+ if (IS_ERR(sbi->update_file))
+ return PTR_ERR(sbi->update_file);
hypfs_update_update(sb);
- sb->s_root = root_dentry;
- printk(KERN_INFO "hypfs: Hypervisor filesystem mounted\n");
+ pr_info("Hypervisor filesystem mounted\n");
return 0;
-
-err_tree:
- hypfs_delete_tree(root_dentry);
- d_genocide(root_dentry);
- dput(root_dentry);
-err_alloc:
- kfree(sbi);
- return rc;
}
-static int hypfs_get_super(struct file_system_type *fst, int flags,
- const char *devname, void *data, struct vfsmount *mnt)
+static struct dentry *hypfs_mount(struct file_system_type *fst, int flags,
+ const char *devname, void *data)
{
- return get_sb_single(fst, flags, data, hypfs_fill_super, mnt);
+ return mount_single(fst, flags, data, hypfs_fill_super);
}
static void hypfs_kill_super(struct super_block *sb)
{
struct hypfs_sb_info *sb_info = sb->s_fs_info;
- if (sb->s_root) {
+ if (sb->s_root)
hypfs_delete_tree(sb->s_root);
+ if (sb_info->update_file)
hypfs_remove(sb_info->update_file);
- kfree(sb->s_fs_info);
- sb->s_fs_info = NULL;
- }
+ kfree(sb->s_fs_info);
+ sb->s_fs_info = NULL;
kill_litter_super(sb);
}
-static struct dentry *hypfs_create_file(struct super_block *sb,
- struct dentry *parent, const char *name,
- char *data, mode_t mode)
+static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
+ char *data, umode_t mode)
{
struct dentry *dentry;
struct inode *inode;
- struct qstr qname;
- qname.name = name;
- qname.len = strlen(name);
- qname.hash = full_name_hash(name, qname.len);
mutex_lock(&parent->d_inode->i_mutex);
dentry = lookup_one_len(name, parent, strlen(name));
if (IS_ERR(dentry)) {
dentry = ERR_PTR(-ENOMEM);
goto fail;
}
- inode = hypfs_make_inode(sb, mode);
+ inode = hypfs_make_inode(parent->d_sb, mode);
if (!inode) {
dput(dentry);
dentry = ERR_PTR(-ENOMEM);
goto fail;
}
- if (mode & S_IFREG) {
+ if (S_ISREG(mode)) {
inode->i_fop = &hypfs_file_ops;
if (data)
inode->i_size = strlen(data);
else
inode->i_size = 0;
- } else if (mode & S_IFDIR) {
+ } else if (S_ISDIR(mode)) {
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
- parent->d_inode->i_nlink++;
+ inc_nlink(parent->d_inode);
} else
BUG();
inode->i_private = data;
@@ -400,24 +371,22 @@ fail:
return dentry;
}
-struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent,
- const char *name)
+struct dentry *hypfs_mkdir(struct dentry *parent, const char *name)
{
struct dentry *dentry;
- dentry = hypfs_create_file(sb, parent, name, NULL, S_IFDIR | DIR_MODE);
+ dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE);
if (IS_ERR(dentry))
return dentry;
hypfs_add_dentry(dentry);
return dentry;
}
-static struct dentry *hypfs_create_update_file(struct super_block *sb,
- struct dentry *dir)
+static struct dentry *hypfs_create_update_file(struct dentry *dir)
{
struct dentry *dentry;
- dentry = hypfs_create_file(sb, dir, "update", NULL,
+ dentry = hypfs_create_file(dir, "update", NULL,
S_IFREG | UPDATE_FILE_MODE);
/*
* We do not put the update file on the 'delete' list with
@@ -427,19 +396,19 @@ static struct dentry *hypfs_create_update_file(struct super_block *sb,
return dentry;
}
-struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
+struct dentry *hypfs_create_u64(struct dentry *dir,
const char *name, __u64 value)
{
char *buffer;
char tmp[TMP_SIZE];
struct dentry *dentry;
- snprintf(tmp, TMP_SIZE, "%lld\n", (unsigned long long int)value);
+ snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value);
buffer = kstrdup(tmp, GFP_KERNEL);
if (!buffer)
return ERR_PTR(-ENOMEM);
dentry =
- hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE);
+ hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
if (IS_ERR(dentry)) {
kfree(buffer);
return ERR_PTR(-ENOMEM);
@@ -448,7 +417,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
return dentry;
}
-struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir,
+struct dentry *hypfs_create_str(struct dentry *dir,
const char *name, char *string)
{
char *buffer;
@@ -459,7 +428,7 @@ struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir,
return ERR_PTR(-ENOMEM);
sprintf(buffer, "%s\n", string);
dentry =
- hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE);
+ hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
if (IS_ERR(dentry)) {
kfree(buffer);
return ERR_PTR(-ENOMEM);
@@ -475,18 +444,20 @@ static const struct file_operations hypfs_file_ops = {
.write = do_sync_write,
.aio_read = hypfs_aio_read,
.aio_write = hypfs_aio_write,
+ .llseek = no_llseek,
};
static struct file_system_type hypfs_type = {
.owner = THIS_MODULE,
.name = "s390_hypfs",
- .get_sb = hypfs_get_super,
+ .mount = hypfs_mount,
.kill_sb = hypfs_kill_super
};
+MODULE_ALIAS_FS("s390_hypfs");
-static struct super_operations hypfs_s_ops = {
+static const struct super_operations hypfs_s_ops = {
.statfs = simple_statfs,
- .drop_inode = hypfs_drop_inode,
+ .evict_inode = hypfs_evict_inode,
.show_options = hypfs_show_options,
};
@@ -496,20 +467,25 @@ static int __init hypfs_init(void)
{
int rc;
- if (MACHINE_IS_VM) {
- if (hypfs_vm_init())
- /* no diag 2fc, just exit */
- return -ENODATA;
- } else {
- if (hypfs_diag_init()) {
- rc = -ENODATA;
- goto fail_diag;
- }
+ rc = hypfs_dbfs_init();
+ if (rc)
+ return rc;
+ if (hypfs_diag_init()) {
+ rc = -ENODATA;
+ goto fail_dbfs_exit;
+ }
+ if (hypfs_vm_init()) {
+ rc = -ENODATA;
+ goto fail_hypfs_diag_exit;
+ }
+ if (hypfs_sprp_init()) {
+ rc = -ENODATA;
+ goto fail_hypfs_vm_exit;
}
s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
if (!s390_kobj) {
- rc = -ENOMEM;;
- goto fail_sysfs;
+ rc = -ENOMEM;
+ goto fail_hypfs_sprp_exit;
}
rc = register_filesystem(&hypfs_type);
if (rc)
@@ -518,20 +494,26 @@ static int __init hypfs_init(void)
fail_filesystem:
kobject_put(s390_kobj);
-fail_sysfs:
- if (!MACHINE_IS_VM)
- hypfs_diag_exit();
-fail_diag:
- printk(KERN_ERR "hypfs: Initialization failed with rc = %i.\n", rc);
+fail_hypfs_sprp_exit:
+ hypfs_sprp_exit();
+fail_hypfs_vm_exit:
+ hypfs_vm_exit();
+fail_hypfs_diag_exit:
+ hypfs_diag_exit();
+fail_dbfs_exit:
+ hypfs_dbfs_exit();
+ pr_err("Initialization of hypfs failed with rc=%i\n", rc);
return rc;
}
static void __exit hypfs_exit(void)
{
- if (!MACHINE_IS_VM)
- hypfs_diag_exit();
unregister_filesystem(&hypfs_type);
kobject_put(s390_kobj);
+ hypfs_sprp_exit();
+ hypfs_vm_exit();
+ hypfs_diag_exit();
+ hypfs_dbfs_exit();
}
module_init(hypfs_init)
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
new file mode 100644
index 00000000000..57892a8a905
--- /dev/null
+++ b/arch/s390/include/asm/Kbuild
@@ -0,0 +1,7 @@
+
+
+generic-y += clkdev.h
+generic-y += hash.h
+generic-y += mcs_spinlock.h
+generic-y += preempt.h
+generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
new file mode 100644
index 00000000000..bd93ff6661b
--- /dev/null
+++ b/arch/s390/include/asm/airq.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright IBM Corp. 2002, 2007
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>
+ * Cornelia Huck <cornelia.huck@de.ibm.com>
+ * Arnd Bergmann <arndb@de.ibm.com>
+ * Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_AIRQ_H
+#define _ASM_S390_AIRQ_H
+
+#include <linux/bit_spinlock.h>
+
+struct airq_struct {
+ struct hlist_node list; /* Handler queueing. */
+ void (*handler)(struct airq_struct *); /* Thin-interrupt handler */
+ u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
+ u8 lsi_mask; /* Local-Summary-Indicator mask */
+ u8 isc; /* Interrupt-subclass */
+ u8 flags;
+};
+
+#define AIRQ_PTR_ALLOCATED 0x01
+
+int register_adapter_interrupt(struct airq_struct *airq);
+void unregister_adapter_interrupt(struct airq_struct *airq);
+
+/* Adapter interrupt bit vector */
+struct airq_iv {
+ unsigned long *vector; /* Adapter interrupt bit vector */
+ unsigned long *avail; /* Allocation bit mask for the bit vector */
+ unsigned long *bitlock; /* Lock bit mask for the bit vector */
+ unsigned long *ptr; /* Pointer associated with each bit */
+ unsigned int *data; /* 32 bit value associated with each bit */
+ unsigned long bits; /* Number of bits in the vector */
+ unsigned long end; /* Number of highest allocated bit + 1 */
+ spinlock_t lock; /* Lock to protect alloc & free */
+};
+
+#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */
+#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
+#define AIRQ_IV_DATA 8 /* Allocate the data array */
+
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+void airq_iv_release(struct airq_iv *iv);
+unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
+void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
+unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
+ unsigned long end);
+
+static inline unsigned long airq_iv_alloc_bit(struct airq_iv *iv)
+{
+ return airq_iv_alloc(iv, 1);
+}
+
+static inline void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit)
+{
+ airq_iv_free(iv, bit, 1);
+}
+
+static inline unsigned long airq_iv_end(struct airq_iv *iv)
+{
+ return iv->end;
+}
+
+static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit)
+{
+ const unsigned long be_to_le = BITS_PER_LONG - 1;
+ bit_spin_lock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit)
+{
+ const unsigned long be_to_le = BITS_PER_LONG - 1;
+ bit_spin_unlock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit,
+ unsigned int data)
+{
+ iv->data[bit] = data;
+}
+
+static inline unsigned int airq_iv_get_data(struct airq_iv *iv,
+ unsigned long bit)
+{
+ return iv->data[bit];
+}
+
+static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit,
+ unsigned long ptr)
+{
+ iv->ptr[bit] = ptr;
+}
+
+static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv,
+ unsigned long bit)
+{
+ return iv->ptr[bit];
+}
+
+#endif /* _ASM_S390_AIRQ_H */
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
new file mode 100644
index 00000000000..32a70598715
--- /dev/null
+++ b/arch/s390/include/asm/appldata.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright IBM Corp. 2006
+ *
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ */
+
+#ifndef _ASM_S390_APPLDATA_H
+#define _ASM_S390_APPLDATA_H
+
+#include <asm/io.h>
+
+#ifndef CONFIG_64BIT
+
+#define APPLDATA_START_INTERVAL_REC 0x00 /* Function codes for */
+#define APPLDATA_STOP_REC 0x01 /* DIAG 0xDC */
+#define APPLDATA_GEN_EVENT_REC 0x02
+#define APPLDATA_START_CONFIG_REC 0x03
+
+/*
+ * Parameter list for DIAGNOSE X'DC'
+ */
+struct appldata_parameter_list {
+ u16 diag; /* The DIAGNOSE code X'00DC' */
+ u8 function; /* The function code for the DIAGNOSE */
+ u8 parlist_length; /* Length of the parameter list */
+ u32 product_id_addr; /* Address of the 16-byte product ID */
+ u16 reserved;
+ u16 buffer_length; /* Length of the application data buffer */
+ u32 buffer_addr; /* Address of the application data buffer */
+} __attribute__ ((packed));
+
+#else /* CONFIG_64BIT */
+
+#define APPLDATA_START_INTERVAL_REC 0x80
+#define APPLDATA_STOP_REC 0x81
+#define APPLDATA_GEN_EVENT_REC 0x82
+#define APPLDATA_START_CONFIG_REC 0x83
+
+/*
+ * Parameter list for DIAGNOSE X'DC'
+ */
+struct appldata_parameter_list {
+ u16 diag;
+ u8 function;
+ u8 parlist_length;
+ u32 unused01;
+ u16 reserved;
+ u16 buffer_length;
+ u32 unused02;
+ u64 product_id_addr;
+ u64 buffer_addr;
+} __attribute__ ((packed));
+
+#endif /* CONFIG_64BIT */
+
+struct appldata_product_id {
+ char prod_nr[7]; /* product number */
+ u16 prod_fn; /* product function */
+ u8 record_nr; /* record number */
+ u16 version_nr; /* version */
+ u16 release_nr; /* release */
+ u16 mod_lvl; /* modification level */
+} __attribute__ ((packed));
+
+static inline int appldata_asm(struct appldata_product_id *id,
+ unsigned short fn, void *buffer,
+ unsigned short length)
+{
+ struct appldata_parameter_list parm_list;
+ int ry;
+
+ if (!MACHINE_IS_VM)
+ return -EOPNOTSUPP;
+ parm_list.diag = 0xdc;
+ parm_list.function = fn;
+ parm_list.parlist_length = sizeof(parm_list);
+ parm_list.buffer_length = length;
+ parm_list.product_id_addr = (unsigned long) id;
+ parm_list.buffer_addr = virt_to_phys(buffer);
+ asm volatile(
+ " diag %1,%0,0xdc"
+ : "=d" (ry)
+ : "d" (&parm_list), "m" (parm_list), "m" (*id)
+ : "cc");
+ return ry;
+}
+
+#endif /* _ASM_S390_APPLDATA_H */
diff --git a/arch/s390/include/asm/asm-offsets.h b/arch/s390/include/asm/asm-offsets.h
new file mode 100644
index 00000000000..d370ee36a18
--- /dev/null
+++ b/arch/s390/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
new file mode 100644
index 00000000000..fa934fe080c
--- /dev/null
+++ b/arch/s390/include/asm/atomic.h
@@ -0,0 +1,415 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Denis Joseph Barrow,
+ * Arnd Bergmann <arndb@de.ibm.com>,
+ *
+ * Atomic operations that C can't guarantee us.
+ * Useful for resource counting etc.
+ * s390 uses 'Compare And Swap' for atomicity in SMP environment.
+ *
+ */
+
+#ifndef __ARCH_S390_ATOMIC__
+#define __ARCH_S390_ATOMIC__
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+
+#define ATOMIC_INIT(i) { (i) }
+
+#define __ATOMIC_NO_BARRIER "\n"
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __ATOMIC_OR "lao"
+#define __ATOMIC_AND "lan"
+#define __ATOMIC_ADD "laa"
+#define __ATOMIC_BARRIER "bcr 14,0\n"
+
+#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \
+({ \
+ int old_val; \
+ \
+ typecheck(atomic_t *, ptr); \
+ asm volatile( \
+ __barrier \
+ op_string " %0,%2,%1\n" \
+ __barrier \
+ : "=d" (old_val), "+Q" ((ptr)->counter) \
+ : "d" (op_val) \
+ : "cc", "memory"); \
+ old_val; \
+})
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __ATOMIC_OR "or"
+#define __ATOMIC_AND "nr"
+#define __ATOMIC_ADD "ar"
+#define __ATOMIC_BARRIER "\n"
+
+#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \
+({ \
+ int old_val, new_val; \
+ \
+ typecheck(atomic_t *, ptr); \
+ asm volatile( \
+ " l %0,%2\n" \
+ "0: lr %1,%0\n" \
+ op_string " %1,%3\n" \
+ " cs %0,%1,%2\n" \
+ " jl 0b" \
+ : "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\
+ : "d" (op_val) \
+ : "cc", "memory"); \
+ old_val; \
+})
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+static inline int atomic_read(const atomic_t *v)
+{
+ int c;
+
+ asm volatile(
+ " l %0,%1\n"
+ : "=d" (c) : "Q" (v->counter));
+ return c;
+}
+
+static inline void atomic_set(atomic_t *v, int i)
+{
+ asm volatile(
+ " st %1,%0\n"
+ : "=Q" (v->counter) : "d" (i));
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+ return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ asm volatile(
+ "asi %0,%1\n"
+ : "+Q" (v->counter)
+ : "i" (i)
+ : "cc", "memory");
+ return;
+ }
+#endif
+ __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_NO_BARRIER);
+}
+
+#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0)
+#define atomic_inc(_v) atomic_add(1, _v)
+#define atomic_inc_return(_v) atomic_add_return(1, _v)
+#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0)
+#define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v)
+#define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v)
+#define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0)
+#define atomic_dec(_v) atomic_sub(1, _v)
+#define atomic_dec_return(_v) atomic_sub_return(1, _v)
+#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0)
+
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+ __ATOMIC_LOOP(v, ~mask, __ATOMIC_AND, __ATOMIC_NO_BARRIER);
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+ __ATOMIC_LOOP(v, mask, __ATOMIC_OR, __ATOMIC_NO_BARRIER);
+}
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+ asm volatile(
+ " cs %0,%2,%1"
+ : "+d" (old), "+Q" (v->counter)
+ : "d" (new)
+ : "cc", "memory");
+ return old;
+}
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int c, old;
+ c = atomic_read(v);
+ for (;;) {
+ if (unlikely(c == u))
+ break;
+ old = atomic_cmpxchg(v, c, c + a);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return c;
+}
+
+
+#undef __ATOMIC_LOOP
+
+#define ATOMIC64_INIT(i) { (i) }
+
+#ifdef CONFIG_64BIT
+
+#define __ATOMIC64_NO_BARRIER "\n"
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __ATOMIC64_OR "laog"
+#define __ATOMIC64_AND "lang"
+#define __ATOMIC64_ADD "laag"
+#define __ATOMIC64_BARRIER "bcr 14,0\n"
+
+#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \
+({ \
+ long long old_val; \
+ \
+ typecheck(atomic64_t *, ptr); \
+ asm volatile( \
+ __barrier \
+ op_string " %0,%2,%1\n" \
+ __barrier \
+ : "=d" (old_val), "+Q" ((ptr)->counter) \
+ : "d" (op_val) \
+ : "cc", "memory"); \
+ old_val; \
+})
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __ATOMIC64_OR "ogr"
+#define __ATOMIC64_AND "ngr"
+#define __ATOMIC64_ADD "agr"
+#define __ATOMIC64_BARRIER "\n"
+
+#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \
+({ \
+ long long old_val, new_val; \
+ \
+ typecheck(atomic64_t *, ptr); \
+ asm volatile( \
+ " lg %0,%2\n" \
+ "0: lgr %1,%0\n" \
+ op_string " %1,%3\n" \
+ " csg %0,%1,%2\n" \
+ " jl 0b" \
+ : "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\
+ : "d" (op_val) \
+ : "cc", "memory"); \
+ old_val; \
+})
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+static inline long long atomic64_read(const atomic64_t *v)
+{
+ long long c;
+
+ asm volatile(
+ " lg %0,%1\n"
+ : "=d" (c) : "Q" (v->counter));
+ return c;
+}
+
+static inline void atomic64_set(atomic64_t *v, long long i)
+{
+ asm volatile(
+ " stg %1,%0\n"
+ : "=Q" (v->counter) : "d" (i));
+}
+
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
+{
+ return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i;
+}
+
+static inline void atomic64_add(long long i, atomic64_t *v)
+{
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ asm volatile(
+ "agsi %0,%1\n"
+ : "+Q" (v->counter)
+ : "i" (i)
+ : "cc", "memory");
+ return;
+ }
+#endif
+ __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER);
+}
+
+static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v)
+{
+ __ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND, __ATOMIC64_NO_BARRIER);
+}
+
+static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v)
+{
+ __ATOMIC64_LOOP(v, mask, __ATOMIC64_OR, __ATOMIC64_NO_BARRIER);
+}
+
+#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline long long atomic64_cmpxchg(atomic64_t *v,
+ long long old, long long new)
+{
+ asm volatile(
+ " csg %0,%2,%1"
+ : "+d" (old), "+Q" (v->counter)
+ : "d" (new)
+ : "cc", "memory");
+ return old;
+}
+
+#undef __ATOMIC64_LOOP
+
+#else /* CONFIG_64BIT */
+
+typedef struct {
+ long long counter;
+} atomic64_t;
+
+static inline long long atomic64_read(const atomic64_t *v)
+{
+ register_pair rp;
+
+ asm volatile(
+ " lm %0,%N0,%1"
+ : "=&d" (rp) : "Q" (v->counter) );
+ return rp.pair;
+}
+
+static inline void atomic64_set(atomic64_t *v, long long i)
+{
+ register_pair rp = {.pair = i};
+
+ asm volatile(
+ " stm %1,%N1,%0"
+ : "=Q" (v->counter) : "d" (rp) );
+}
+
+static inline long long atomic64_xchg(atomic64_t *v, long long new)
+{
+ register_pair rp_new = {.pair = new};
+ register_pair rp_old;
+
+ asm volatile(
+ " lm %0,%N0,%1\n"
+ "0: cds %0,%2,%1\n"
+ " jl 0b\n"
+ : "=&d" (rp_old), "+Q" (v->counter)
+ : "d" (rp_new)
+ : "cc");
+ return rp_old.pair;
+}
+
+static inline long long atomic64_cmpxchg(atomic64_t *v,
+ long long old, long long new)
+{
+ register_pair rp_old = {.pair = old};
+ register_pair rp_new = {.pair = new};
+
+ asm volatile(
+ " cds %0,%2,%1"
+ : "+&d" (rp_old), "+Q" (v->counter)
+ : "d" (rp_new)
+ : "cc");
+ return rp_old.pair;
+}
+
+
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
+{
+ long long old, new;
+
+ do {
+ old = atomic64_read(v);
+ new = old + i;
+ } while (atomic64_cmpxchg(v, old, new) != old);
+ return new;
+}
+
+static inline void atomic64_set_mask(unsigned long long mask, atomic64_t *v)
+{
+ long long old, new;
+
+ do {
+ old = atomic64_read(v);
+ new = old | mask;
+ } while (atomic64_cmpxchg(v, old, new) != old);
+}
+
+static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v)
+{
+ long long old, new;
+
+ do {
+ old = atomic64_read(v);
+ new = old & mask;
+ } while (atomic64_cmpxchg(v, old, new) != old);
+}
+
+static inline void atomic64_add(long long i, atomic64_t *v)
+{
+ atomic64_add_return(i, v);
+}
+
+#endif /* CONFIG_64BIT */
+
+static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
+{
+ long long c, old;
+
+ c = atomic64_read(v);
+ for (;;) {
+ if (unlikely(c == u))
+ break;
+ old = atomic64_cmpxchg(v, c, c + i);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return c != u;
+}
+
+static inline long long atomic64_dec_if_positive(atomic64_t *v)
+{
+ long long c, old, dec;
+
+ c = atomic64_read(v);
+ for (;;) {
+ dec = c - 1;
+ if (unlikely(dec < 0))
+ break;
+ old = atomic64_cmpxchg((v), c, dec);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return dec;
+}
+
+#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0)
+#define atomic64_inc(_v) atomic64_add(1, _v)
+#define atomic64_inc_return(_v) atomic64_add_return(1, _v)
+#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0)
+#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v)
+#define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v)
+#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0)
+#define atomic64_dec(_v) atomic64_sub(1, _v)
+#define atomic64_dec_return(_v) atomic64_sub_return(1, _v)
+#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0)
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+
+#endif /* __ARCH_S390_ATOMIC__ */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
new file mode 100644
index 00000000000..19ff956b752
--- /dev/null
+++ b/arch/s390/include/asm/barrier.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+/* Fast-BCR without checkpoint synchronization */
+#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0)
+#else
+#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0)
+#endif
+
+#define rmb() mb()
+#define wmb() mb()
+#define read_barrier_depends() do { } while(0)
+#define smp_mb() mb()
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#define smp_read_barrier_depends() read_barrier_depends()
+
+#define smp_mb__before_atomic() smp_mb()
+#define smp_mb__after_atomic() smp_mb()
+
+#define set_mb(var, value) do { var = value; mb(); } while (0)
+
+#define smp_store_release(p, v) \
+do { \
+ compiletime_assert_atomic_type(*p); \
+ barrier(); \
+ ACCESS_ONCE(*p) = (v); \
+} while (0)
+
+#define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = ACCESS_ONCE(*p); \
+ compiletime_assert_atomic_type(*p); \
+ barrier(); \
+ ___p1; \
+})
+
+#endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
new file mode 100644
index 00000000000..52054247767
--- /dev/null
+++ b/arch/s390/include/asm/bitops.h
@@ -0,0 +1,482 @@
+/*
+ * Copyright IBM Corp. 1999,2013
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *
+ * The description below was taken in large parts from the powerpc
+ * bitops header file:
+ * Within a word, bits are numbered LSB first. Lot's of places make
+ * this assumption by directly testing bits with (val & (1<<nr)).
+ * This can cause confusion for large (> 1 word) bitmaps on a
+ * big-endian system because, unlike little endian, the number of each
+ * bit depends on the word size.
+ *
+ * The bitop functions are defined to work on unsigned longs, so for an
+ * s390x system the bits end up numbered:
+ * |63..............0|127............64|191...........128|255...........192|
+ * and on s390:
+ * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224|
+ *
+ * There are a few little-endian macros used mostly for filesystem
+ * bitmaps, these work on similar bit arrays layouts, but
+ * byte-oriented:
+ * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
+ *
+ * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
+ * number field needs to be reversed compared to the big-endian bit
+ * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
+ *
+ * We also have special functions which work with an MSB0 encoding:
+ * on an s390x system the bits are numbered:
+ * |0..............63|64............127|128...........191|192...........255|
+ * and on s390:
+ * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
+ *
+ * The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit
+ * number field needs to be reversed compared to the LSB0 encoded bit
+ * fields. This can be achieved by XOR with 0x3f (64b) or 0x1f (32b).
+ *
+ */
+
+#ifndef _S390_BITOPS_H
+#define _S390_BITOPS_H
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/typecheck.h>
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+
+#define __BITOPS_NO_BARRIER "\n"
+
+#ifndef CONFIG_64BIT
+
+#define __BITOPS_OR "or"
+#define __BITOPS_AND "nr"
+#define __BITOPS_XOR "xr"
+#define __BITOPS_BARRIER "\n"
+
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
+({ \
+ unsigned long __old, __new; \
+ \
+ typecheck(unsigned long *, (__addr)); \
+ asm volatile( \
+ " l %0,%2\n" \
+ "0: lr %1,%0\n" \
+ __op_string " %1,%3\n" \
+ " cs %0,%1,%2\n" \
+ " jl 0b" \
+ : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
+ : "d" (__val) \
+ : "cc", "memory"); \
+ __old; \
+})
+
+#else /* CONFIG_64BIT */
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __BITOPS_OR "laog"
+#define __BITOPS_AND "lang"
+#define __BITOPS_XOR "laxg"
+#define __BITOPS_BARRIER "bcr 14,0\n"
+
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
+({ \
+ unsigned long __old; \
+ \
+ typecheck(unsigned long *, (__addr)); \
+ asm volatile( \
+ __barrier \
+ __op_string " %0,%2,%1\n" \
+ __barrier \
+ : "=d" (__old), "+Q" (*(__addr)) \
+ : "d" (__val) \
+ : "cc", "memory"); \
+ __old; \
+})
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __BITOPS_OR "ogr"
+#define __BITOPS_AND "ngr"
+#define __BITOPS_XOR "xgr"
+#define __BITOPS_BARRIER "\n"
+
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
+({ \
+ unsigned long __old, __new; \
+ \
+ typecheck(unsigned long *, (__addr)); \
+ asm volatile( \
+ " lg %0,%2\n" \
+ "0: lgr %1,%0\n" \
+ __op_string " %1,%3\n" \
+ " csg %0,%1,%2\n" \
+ " jl 0b" \
+ : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
+ : "d" (__val) \
+ : "cc", "memory"); \
+ __old; \
+})
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#endif /* CONFIG_64BIT */
+
+#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+static inline unsigned long *
+__bitops_word(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long addr;
+
+ addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
+ return (unsigned long *)addr;
+}
+
+static inline unsigned char *
+__bitops_byte(unsigned long nr, volatile unsigned long *ptr)
+{
+ return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
+}
+
+static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ if (__builtin_constant_p(nr)) {
+ unsigned char *caddr = __bitops_byte(nr, ptr);
+
+ asm volatile(
+ "oi %0,%b1\n"
+ : "+Q" (*caddr)
+ : "i" (1 << (nr & 7))
+ : "cc", "memory");
+ return;
+ }
+#endif
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_NO_BARRIER);
+}
+
+static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ if (__builtin_constant_p(nr)) {
+ unsigned char *caddr = __bitops_byte(nr, ptr);
+
+ asm volatile(
+ "ni %0,%b1\n"
+ : "+Q" (*caddr)
+ : "i" (~(1 << (nr & 7)))
+ : "cc", "memory");
+ return;
+ }
+#endif
+ mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
+ __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_NO_BARRIER);
+}
+
+static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ if (__builtin_constant_p(nr)) {
+ unsigned char *caddr = __bitops_byte(nr, ptr);
+
+ asm volatile(
+ "xi %0,%b1\n"
+ : "+Q" (*caddr)
+ : "i" (1 << (nr & 7))
+ : "cc", "memory");
+ return;
+ }
+#endif
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_NO_BARRIER);
+}
+
+static inline int
+test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long old, mask;
+
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ old = __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_BARRIER);
+ return (old & mask) != 0;
+}
+
+static inline int
+test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long old, mask;
+
+ mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
+ old = __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_BARRIER);
+ return (old & ~mask) != 0;
+}
+
+static inline int
+test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long old, mask;
+
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_BARRIER);
+ return (old & mask) != 0;
+}
+
+static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+
+ *addr |= 1 << (nr & 7);
+}
+
+static inline void
+__clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+
+ *addr &= ~(1 << (nr & 7));
+}
+
+static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+
+ *addr ^= 1 << (nr & 7);
+}
+
+static inline int
+__test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+ unsigned char ch;
+
+ ch = *addr;
+ *addr |= 1 << (nr & 7);
+ return (ch >> (nr & 7)) & 1;
+}
+
+static inline int
+__test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+ unsigned char ch;
+
+ ch = *addr;
+ *addr &= ~(1 << (nr & 7));
+ return (ch >> (nr & 7)) & 1;
+}
+
+static inline int
+__test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+ unsigned char ch;
+
+ ch = *addr;
+ *addr ^= 1 << (nr & 7);
+ return (ch >> (nr & 7)) & 1;
+}
+
+static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr)
+{
+ const volatile unsigned char *addr;
+
+ addr = ((const volatile unsigned char *)ptr);
+ addr += (nr ^ (BITS_PER_LONG - 8)) >> 3;
+ return (*addr >> (nr & 7)) & 1;
+}
+
+/*
+ * Functions which use MSB0 bit numbering.
+ * On an s390x system the bits are numbered:
+ * |0..............63|64............127|128...........191|192...........255|
+ * and on s390:
+ * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
+ */
+unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
+unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
+ unsigned long offset);
+
+static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void __clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline int test_bit_inv(unsigned long nr,
+ const volatile unsigned long *ptr)
+{
+ return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+
+/**
+ * __flogr - find leftmost one
+ * @word - The word to search
+ *
+ * Returns the bit number of the most significant bit set,
+ * where the most significant bit has bit number 0.
+ * If no bit is set this function returns 64.
+ */
+static inline unsigned char __flogr(unsigned long word)
+{
+ if (__builtin_constant_p(word)) {
+ unsigned long bit = 0;
+
+ if (!word)
+ return 64;
+ if (!(word & 0xffffffff00000000UL)) {
+ word <<= 32;
+ bit += 32;
+ }
+ if (!(word & 0xffff000000000000UL)) {
+ word <<= 16;
+ bit += 16;
+ }
+ if (!(word & 0xff00000000000000UL)) {
+ word <<= 8;
+ bit += 8;
+ }
+ if (!(word & 0xf000000000000000UL)) {
+ word <<= 4;
+ bit += 4;
+ }
+ if (!(word & 0xc000000000000000UL)) {
+ word <<= 2;
+ bit += 2;
+ }
+ if (!(word & 0x8000000000000000UL)) {
+ word <<= 1;
+ bit += 1;
+ }
+ return bit;
+ } else {
+ register unsigned long bit asm("4") = word;
+ register unsigned long out asm("5");
+
+ asm volatile(
+ " flogr %[bit],%[bit]\n"
+ : [bit] "+d" (bit), [out] "=d" (out) : : "cc");
+ return bit;
+ }
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+ return __flogr(-word & word) ^ (BITS_PER_LONG - 1);
+}
+
+/**
+ * ffs - find first bit set
+ * @word: the word to search
+ *
+ * This is defined the same way as the libc and
+ * compiler builtin ffs routines (man ffs).
+ */
+static inline int ffs(int word)
+{
+ unsigned long mask = 2 * BITS_PER_LONG - 1;
+ unsigned int val = (unsigned int)word;
+
+ return (1 + (__flogr(-val & val) ^ (BITS_PER_LONG - 1))) & mask;
+}
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+ return __flogr(word) ^ (BITS_PER_LONG - 1);
+}
+
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @word: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+static inline int fls64(unsigned long word)
+{
+ unsigned long mask = 2 * BITS_PER_LONG - 1;
+
+ return (1 + (__flogr(word) ^ (BITS_PER_LONG - 1))) & mask;
+}
+
+/**
+ * fls - find last (most-significant) bit set
+ * @word: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static inline int fls(int word)
+{
+ return fls64((unsigned int)word);
+}
+
+#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
+
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/fls64.h>
+
+#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
+
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#endif /* _S390_BITOPS_H */
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
new file mode 100644
index 00000000000..bf90d1fd97a
--- /dev/null
+++ b/arch/s390/include/asm/bug.h
@@ -0,0 +1,71 @@
+#ifndef _ASM_S390_BUG_H
+#define _ASM_S390_BUG_H
+
+#include <linux/kernel.h>
+
+#ifdef CONFIG_BUG
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define __EMIT_BUG(x) do { \
+ asm volatile( \
+ "0: j 0b+2\n" \
+ "1:\n" \
+ ".section .rodata.str,\"aMS\",@progbits,1\n" \
+ "2: .asciz \""__FILE__"\"\n" \
+ ".previous\n" \
+ ".section __bug_table,\"a\"\n" \
+ "3: .long 1b-3b,2b-3b\n" \
+ " .short %0,%1\n" \
+ " .org 3b+%2\n" \
+ ".previous\n" \
+ : : "i" (__LINE__), \
+ "i" (x), \
+ "i" (sizeof(struct bug_entry))); \
+} while (0)
+
+#else /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define __EMIT_BUG(x) do { \
+ asm volatile( \
+ "0: j 0b+2\n" \
+ "1:\n" \
+ ".section __bug_table,\"a\"\n" \
+ "2: .long 1b-2b\n" \
+ " .short %0\n" \
+ " .org 2b+%1\n" \
+ ".previous\n" \
+ : : "i" (x), \
+ "i" (sizeof(struct bug_entry))); \
+} while (0)
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define BUG() do { \
+ __EMIT_BUG(0); \
+ unreachable(); \
+} while (0)
+
+#define __WARN_TAINT(taint) do { \
+ __EMIT_BUG(BUGFLAG_TAINT(taint)); \
+} while (0)
+
+#define WARN_ON(x) ({ \
+ int __ret_warn_on = !!(x); \
+ if (__builtin_constant_p(__ret_warn_on)) { \
+ if (__ret_warn_on) \
+ __WARN(); \
+ } else { \
+ if (unlikely(__ret_warn_on)) \
+ __WARN(); \
+ } \
+ unlikely(__ret_warn_on); \
+})
+
+#define HAVE_ARCH_BUG
+#define HAVE_ARCH_WARN_ON
+#endif /* CONFIG_BUG */
+
+#include <asm-generic/bug.h>
+
+#endif /* _ASM_S390_BUG_H */
diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h
new file mode 100644
index 00000000000..0f5bd894f4d
--- /dev/null
+++ b/arch/s390/include/asm/bugs.h
@@ -0,0 +1,20 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/bugs.h"
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ * void check_bugs(void);
+ */
+
+static inline void check_bugs(void)
+{
+ /* s390 has no bugs ... */
+}
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
new file mode 100644
index 00000000000..4d7ccac5fd1
--- /dev/null
+++ b/arch/s390/include/asm/cache.h
@@ -0,0 +1,18 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ *
+ * Derived from "include/asm-i386/cache.h"
+ * Copyright (C) 1992, Linus Torvalds
+ */
+
+#ifndef __ARCH_S390_CACHE_H
+#define __ARCH_S390_CACHE_H
+
+#define L1_CACHE_BYTES 256
+#define L1_CACHE_SHIFT 8
+#define NET_SKB_PAD 32
+
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+#endif
diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h
new file mode 100644
index 00000000000..3e20383d092
--- /dev/null
+++ b/arch/s390/include/asm/cacheflush.h
@@ -0,0 +1,16 @@
+#ifndef _S390_CACHEFLUSH_H
+#define _S390_CACHEFLUSH_H
+
+/* Caches aren't brain-dead on the s390. */
+#include <asm-generic/cacheflush.h>
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable);
+#endif
+
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+
+#endif /* _S390_CACHEFLUSH_H */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
new file mode 100644
index 00000000000..b80e456d642
--- /dev/null
+++ b/arch/s390/include/asm/ccwdev.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright IBM Corp. 2002, 2009
+ *
+ * Author(s): Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * Interface for CCW device drivers
+ */
+#ifndef _S390_CCWDEV_H_
+#define _S390_CCWDEV_H_
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <asm/fcx.h>
+#include <asm/irq.h>
+#include <asm/schid.h>
+
+/* structs from asm/cio.h */
+struct irb;
+struct ccw1;
+struct ccw_dev_id;
+
+/* simplified initializers for struct ccw_device:
+ * CCW_DEVICE and CCW_DEVICE_DEVTYPE initialize one
+ * entry in your MODULE_DEVICE_TABLE and set the match_flag correctly */
+#define CCW_DEVICE(cu, cum) \
+ .cu_type=(cu), .cu_model=(cum), \
+ .match_flags=(CCW_DEVICE_ID_MATCH_CU_TYPE \
+ | (cum ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0))
+
+#define CCW_DEVICE_DEVTYPE(cu, cum, dev, devm) \
+ .cu_type=(cu), .cu_model=(cum), .dev_type=(dev), .dev_model=(devm),\
+ .match_flags=CCW_DEVICE_ID_MATCH_CU_TYPE \
+ | ((cum) ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0) \
+ | CCW_DEVICE_ID_MATCH_DEVICE_TYPE \
+ | ((devm) ? CCW_DEVICE_ID_MATCH_DEVICE_MODEL : 0)
+
+/* scan through an array of device ids and return the first
+ * entry that matches the device.
+ *
+ * the array must end with an entry containing zero match_flags
+ */
+static inline const struct ccw_device_id *
+ccw_device_id_match(const struct ccw_device_id *array,
+ const struct ccw_device_id *match)
+{
+ const struct ccw_device_id *id = array;
+
+ for (id = array; id->match_flags; id++) {
+ if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_TYPE)
+ && (id->cu_type != match->cu_type))
+ continue;
+
+ if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_MODEL)
+ && (id->cu_model != match->cu_model))
+ continue;
+
+ if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_TYPE)
+ && (id->dev_type != match->dev_type))
+ continue;
+
+ if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_MODEL)
+ && (id->dev_model != match->dev_model))
+ continue;
+
+ return id;
+ }
+
+ return NULL;
+}
+
+/**
+ * struct ccw_device - channel attached device
+ * @ccwlock: pointer to device lock
+ * @id: id of this device
+ * @drv: ccw driver for this device
+ * @dev: embedded device structure
+ * @online: online status of device
+ * @handler: interrupt handler
+ *
+ * @handler is a member of the device rather than the driver since a driver
+ * can have different interrupt handlers for different ccw devices
+ * (multi-subchannel drivers).
+ */
+struct ccw_device {
+ spinlock_t *ccwlock;
+/* private: */
+ struct ccw_device_private *private; /* cio private information */
+/* public: */
+ struct ccw_device_id id;
+ struct ccw_driver *drv;
+ struct device dev;
+ int online;
+ void (*handler) (struct ccw_device *, unsigned long, struct irb *);
+};
+
+/*
+ * Possible events used by the path_event notifier.
+ */
+#define PE_NONE 0x0
+#define PE_PATH_GONE 0x1 /* A path is no longer available. */
+#define PE_PATH_AVAILABLE 0x2 /* A path has become available and
+ was successfully verified. */
+#define PE_PATHGROUP_ESTABLISHED 0x4 /* A pathgroup was reset and had
+ to be established again. */
+
+/*
+ * Possible CIO actions triggered by the unit check handler.
+ */
+enum uc_todo {
+ UC_TODO_RETRY,
+ UC_TODO_RETRY_ON_NEW_PATH,
+ UC_TODO_STOP
+};
+
+/**
+ * struct ccw driver - device driver for channel attached devices
+ * @ids: ids supported by this driver
+ * @probe: function called on probe
+ * @remove: function called on remove
+ * @set_online: called when setting device online
+ * @set_offline: called when setting device offline
+ * @notify: notify driver of device state changes
+ * @path_event: notify driver of channel path events
+ * @shutdown: called at device shutdown
+ * @prepare: prepare for pm state transition
+ * @complete: undo work done in @prepare
+ * @freeze: callback for freezing during hibernation snapshotting
+ * @thaw: undo work done in @freeze
+ * @restore: callback for restoring after hibernation
+ * @uc_handler: callback for unit check handler
+ * @driver: embedded device driver structure
+ * @int_class: interruption class to use for accounting interrupts
+ */
+struct ccw_driver {
+ struct ccw_device_id *ids;
+ int (*probe) (struct ccw_device *);
+ void (*remove) (struct ccw_device *);
+ int (*set_online) (struct ccw_device *);
+ int (*set_offline) (struct ccw_device *);
+ int (*notify) (struct ccw_device *, int);
+ void (*path_event) (struct ccw_device *, int *);
+ void (*shutdown) (struct ccw_device *);
+ int (*prepare) (struct ccw_device *);
+ void (*complete) (struct ccw_device *);
+ int (*freeze)(struct ccw_device *);
+ int (*thaw) (struct ccw_device *);
+ int (*restore)(struct ccw_device *);
+ enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *);
+ struct device_driver driver;
+ enum interruption_class int_class;
+};
+
+extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv,
+ const char *bus_id);
+
+/* devices drivers call these during module load and unload.
+ * When a driver is registered, its probe method is called
+ * when new devices for its type pop up */
+extern int ccw_driver_register (struct ccw_driver *driver);
+extern void ccw_driver_unregister (struct ccw_driver *driver);
+
+struct ccw1;
+
+extern int ccw_device_set_options_mask(struct ccw_device *, unsigned long);
+extern int ccw_device_set_options(struct ccw_device *, unsigned long);
+extern void ccw_device_clear_options(struct ccw_device *, unsigned long);
+int ccw_device_is_pathgroup(struct ccw_device *cdev);
+int ccw_device_is_multipath(struct ccw_device *cdev);
+
+/* Allow for i/o completion notification after primary interrupt status. */
+#define CCWDEV_EARLY_NOTIFICATION 0x0001
+/* Report all interrupt conditions. */
+#define CCWDEV_REPORT_ALL 0x0002
+/* Try to perform path grouping. */
+#define CCWDEV_DO_PATHGROUP 0x0004
+/* Allow forced onlining of boxed devices. */
+#define CCWDEV_ALLOW_FORCE 0x0008
+/* Try to use multipath mode. */
+#define CCWDEV_DO_MULTIPATH 0x0010
+
+extern int ccw_device_start(struct ccw_device *, struct ccw1 *,
+ unsigned long, __u8, unsigned long);
+extern int ccw_device_start_timeout(struct ccw_device *, struct ccw1 *,
+ unsigned long, __u8, unsigned long, int);
+extern int ccw_device_start_key(struct ccw_device *, struct ccw1 *,
+ unsigned long, __u8, __u8, unsigned long);
+extern int ccw_device_start_timeout_key(struct ccw_device *, struct ccw1 *,
+ unsigned long, __u8, __u8,
+ unsigned long, int);
+
+
+extern int ccw_device_resume(struct ccw_device *);
+extern int ccw_device_halt(struct ccw_device *, unsigned long);
+extern int ccw_device_clear(struct ccw_device *, unsigned long);
+int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
+ unsigned long intparm, u8 lpm, u8 key);
+int ccw_device_tm_start_key(struct ccw_device *, struct tcw *,
+ unsigned long, u8, u8);
+int ccw_device_tm_start_timeout_key(struct ccw_device *, struct tcw *,
+ unsigned long, u8, u8, int);
+int ccw_device_tm_start(struct ccw_device *, struct tcw *,
+ unsigned long, u8);
+int ccw_device_tm_start_timeout(struct ccw_device *, struct tcw *,
+ unsigned long, u8, int);
+int ccw_device_tm_intrg(struct ccw_device *cdev);
+
+int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask);
+
+extern int ccw_device_set_online(struct ccw_device *cdev);
+extern int ccw_device_set_offline(struct ccw_device *cdev);
+
+
+extern struct ciw *ccw_device_get_ciw(struct ccw_device *, __u32 cmd);
+extern __u8 ccw_device_get_path_mask(struct ccw_device *);
+extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *);
+
+#define get_ccwdev_lock(x) (x)->ccwlock
+
+#define to_ccwdev(n) container_of(n, struct ccw_device, dev)
+#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver)
+
+extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
+extern void ccw_device_destroy_console(struct ccw_device *);
+extern int ccw_device_enable_console(struct ccw_device *);
+extern void ccw_device_wait_idle(struct ccw_device *);
+extern int ccw_device_force_console(struct ccw_device *);
+
+int ccw_device_siosl(struct ccw_device *);
+
+extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
+
+struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *, int);
+#endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
new file mode 100644
index 00000000000..057ce0ca637
--- /dev/null
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -0,0 +1,73 @@
+#ifndef S390_CCWGROUP_H
+#define S390_CCWGROUP_H
+
+struct ccw_device;
+struct ccw_driver;
+
+/**
+ * struct ccwgroup_device - ccw group device
+ * @state: online/offline state
+ * @count: number of attached slave devices
+ * @dev: embedded device structure
+ * @cdev: variable number of slave devices, allocated as needed
+ * @ungroup_work: work to be done when a ccwgroup notifier has action
+ * type %BUS_NOTIFY_UNBIND_DRIVER
+ */
+struct ccwgroup_device {
+ enum {
+ CCWGROUP_OFFLINE,
+ CCWGROUP_ONLINE,
+ } state;
+/* private: */
+ atomic_t onoff;
+ struct mutex reg_mutex;
+/* public: */
+ unsigned int count;
+ struct device dev;
+ struct work_struct ungroup_work;
+ struct ccw_device *cdev[0];
+};
+
+/**
+ * struct ccwgroup_driver - driver for ccw group devices
+ * @setup: function called during device creation to setup the device
+ * @remove: function called on remove
+ * @set_online: function called when device is set online
+ * @set_offline: function called when device is set offline
+ * @shutdown: function called when device is shut down
+ * @prepare: prepare for pm state transition
+ * @complete: undo work done in @prepare
+ * @freeze: callback for freezing during hibernation snapshotting
+ * @thaw: undo work done in @freeze
+ * @restore: callback for restoring after hibernation
+ * @driver: embedded driver structure
+ */
+struct ccwgroup_driver {
+ int (*setup) (struct ccwgroup_device *);
+ void (*remove) (struct ccwgroup_device *);
+ int (*set_online) (struct ccwgroup_device *);
+ int (*set_offline) (struct ccwgroup_device *);
+ void (*shutdown)(struct ccwgroup_device *);
+ int (*prepare) (struct ccwgroup_device *);
+ void (*complete) (struct ccwgroup_device *);
+ int (*freeze)(struct ccwgroup_device *);
+ int (*thaw) (struct ccwgroup_device *);
+ int (*restore)(struct ccwgroup_device *);
+
+ struct device_driver driver;
+};
+
+extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver);
+extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver);
+int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv,
+ int num_devices, const char *buf);
+
+extern int ccwgroup_set_online(struct ccwgroup_device *gdev);
+extern int ccwgroup_set_offline(struct ccwgroup_device *gdev);
+
+extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev);
+extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev);
+
+#define to_ccwgroupdev(x) container_of((x), struct ccwgroup_device, dev)
+#define to_ccwgroupdrv(x) container_of((x), struct ccwgroup_driver, driver)
+#endif
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
new file mode 100644
index 00000000000..74036485635
--- /dev/null
+++ b/arch/s390/include/asm/checksum.h
@@ -0,0 +1,140 @@
+/*
+ * S390 fast network checksum routines
+ *
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Ulrich Hild (first version)
+ * Martin Schwidefsky (heavily optimized CKSM version)
+ * D.J. Barrow (third attempt)
+ */
+
+#ifndef _S390_CHECKSUM_H
+#define _S390_CHECKSUM_H
+
+#include <asm/uaccess.h>
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+static inline __wsum
+csum_partial(const void *buff, int len, __wsum sum)
+{
+ register unsigned long reg2 asm("2") = (unsigned long) buff;
+ register unsigned long reg3 asm("3") = (unsigned long) len;
+
+ asm volatile(
+ "0: cksm %0,%1\n" /* do checksum on longs */
+ " jo 0b\n"
+ : "+d" (sum), "+d" (reg2), "+d" (reg3) : : "cc", "memory");
+ return sum;
+}
+
+/*
+ * the same as csum_partial_copy, but copies from user space.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ *
+ * Copy from userspace and compute checksum.
+ */
+static inline __wsum
+csum_partial_copy_from_user(const void __user *src, void *dst,
+ int len, __wsum sum,
+ int *err_ptr)
+{
+ if (unlikely(copy_from_user(dst, src, len)))
+ *err_ptr = -EFAULT;
+ return csum_partial(dst, len, sum);
+}
+
+
+static inline __wsum
+csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
+{
+ memcpy(dst,src,len);
+ return csum_partial(dst, len, sum);
+}
+
+/*
+ * Fold a partial checksum without adding pseudo headers
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+ u32 csum = (__force u32) sum;
+
+ csum += (csum >> 16) + (csum << 16);
+ csum >>= 16;
+ return (__force __sum16) ~csum;
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ *
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ return csum_fold(csum_partial(iph, ihl*4, 0));
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 32-bit checksum
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ unsigned short len, unsigned short proto,
+ __wsum sum)
+{
+ __u32 csum = (__force __u32)sum;
+
+ csum += (__force __u32)saddr;
+ if (csum < (__force __u32)saddr)
+ csum++;
+
+ csum += (__force __u32)daddr;
+ if (csum < (__force __u32)daddr)
+ csum++;
+
+ csum += len + proto;
+ if (csum < len + proto)
+ csum++;
+
+ return (__force __wsum)csum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+ unsigned short len, unsigned short proto,
+ __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+static inline __sum16 ip_compute_csum(const void *buff, int len)
+{
+ return csum_fold(csum_partial(buff, len, 0));
+}
+
+#endif /* _S390_CHECKSUM_H */
+
+
diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h
new file mode 100644
index 00000000000..7298eec9854
--- /dev/null
+++ b/arch/s390/include/asm/chpid.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright IBM Corp. 2007, 2012
+ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+#ifndef _ASM_S390_CHPID_H
+#define _ASM_S390_CHPID_H
+
+#include <uapi/asm/chpid.h>
+#include <asm/cio.h>
+
+struct channel_path_desc {
+ u8 flags;
+ u8 lsn;
+ u8 desc;
+ u8 chpid;
+ u8 swla;
+ u8 zeroes;
+ u8 chla;
+ u8 chpp;
+} __packed;
+
+static inline void chp_id_init(struct chp_id *chpid)
+{
+ memset(chpid, 0, sizeof(struct chp_id));
+}
+
+static inline int chp_id_is_equal(struct chp_id *a, struct chp_id *b)
+{
+ return (a->id == b->id) && (a->cssid == b->cssid);
+}
+
+static inline void chp_id_next(struct chp_id *chpid)
+{
+ if (chpid->id < __MAX_CHPID)
+ chpid->id++;
+ else {
+ chpid->id = 0;
+ chpid->cssid++;
+ }
+}
+
+static inline int chp_id_is_valid(struct chp_id *chpid)
+{
+ return (chpid->cssid <= __MAX_CSSID);
+}
+
+
+#define chp_id_for_each(c) \
+ for (chp_id_init(c); chp_id_is_valid(c); chp_id_next(c))
+#endif /* _ASM_S390_CHPID_H */
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
new file mode 100644
index 00000000000..09633920776
--- /dev/null
+++ b/arch/s390/include/asm/cio.h
@@ -0,0 +1,315 @@
+/*
+ * Common interface for I/O on S/390
+ */
+#ifndef _ASM_S390_CIO_H_
+#define _ASM_S390_CIO_H_
+
+#include <linux/spinlock.h>
+#include <asm/types.h>
+
+#define LPM_ANYPATH 0xff
+#define __MAX_CSSID 0
+#define __MAX_SUBCHANNEL 65535
+#define __MAX_SSID 3
+
+#include <asm/scsw.h>
+
+/**
+ * struct ccw1 - channel command word
+ * @cmd_code: command code
+ * @flags: flags, like IDA addressing, etc.
+ * @count: byte count
+ * @cda: data address
+ *
+ * The ccw is the basic structure to build channel programs that perform
+ * operations with the device or the control unit. Only Format-1 channel
+ * command words are supported.
+ */
+struct ccw1 {
+ __u8 cmd_code;
+ __u8 flags;
+ __u16 count;
+ __u32 cda;
+} __attribute__ ((packed,aligned(8)));
+
+#define CCW_FLAG_DC 0x80
+#define CCW_FLAG_CC 0x40
+#define CCW_FLAG_SLI 0x20
+#define CCW_FLAG_SKIP 0x10
+#define CCW_FLAG_PCI 0x08
+#define CCW_FLAG_IDA 0x04
+#define CCW_FLAG_SUSPEND 0x02
+
+#define CCW_CMD_READ_IPL 0x02
+#define CCW_CMD_NOOP 0x03
+#define CCW_CMD_BASIC_SENSE 0x04
+#define CCW_CMD_TIC 0x08
+#define CCW_CMD_STLCK 0x14
+#define CCW_CMD_SENSE_PGID 0x34
+#define CCW_CMD_SUSPEND_RECONN 0x5B
+#define CCW_CMD_RDC 0x64
+#define CCW_CMD_RELEASE 0x94
+#define CCW_CMD_SET_PGID 0xAF
+#define CCW_CMD_SENSE_ID 0xE4
+#define CCW_CMD_DCTL 0xF3
+
+#define SENSE_MAX_COUNT 0x20
+
+/**
+ * struct erw - extended report word
+ * @res0: reserved
+ * @auth: authorization check
+ * @pvrf: path-verification-required flag
+ * @cpt: channel-path timeout
+ * @fsavf: failing storage address validity flag
+ * @cons: concurrent sense
+ * @scavf: secondary ccw address validity flag
+ * @fsaf: failing storage address format
+ * @scnt: sense count, if @cons == %1
+ * @res16: reserved
+ */
+struct erw {
+ __u32 res0 : 3;
+ __u32 auth : 1;
+ __u32 pvrf : 1;
+ __u32 cpt : 1;
+ __u32 fsavf : 1;
+ __u32 cons : 1;
+ __u32 scavf : 1;
+ __u32 fsaf : 1;
+ __u32 scnt : 6;
+ __u32 res16 : 16;
+} __attribute__ ((packed));
+
+/**
+ * struct erw_eadm - EADM Subchannel extended report word
+ * @b: aob error
+ * @r: arsb error
+ */
+struct erw_eadm {
+ __u32 : 16;
+ __u32 b : 1;
+ __u32 r : 1;
+ __u32 : 14;
+} __packed;
+
+/**
+ * struct sublog - subchannel logout area
+ * @res0: reserved
+ * @esf: extended status flags
+ * @lpum: last path used mask
+ * @arep: ancillary report
+ * @fvf: field-validity flags
+ * @sacc: storage access code
+ * @termc: termination code
+ * @devsc: device-status check
+ * @serr: secondary error
+ * @ioerr: i/o-error alert
+ * @seqc: sequence code
+ */
+struct sublog {
+ __u32 res0 : 1;
+ __u32 esf : 7;
+ __u32 lpum : 8;
+ __u32 arep : 1;
+ __u32 fvf : 5;
+ __u32 sacc : 2;
+ __u32 termc : 2;
+ __u32 devsc : 1;
+ __u32 serr : 1;
+ __u32 ioerr : 1;
+ __u32 seqc : 3;
+} __attribute__ ((packed));
+
+/**
+ * struct esw0 - Format 0 Extended Status Word (ESW)
+ * @sublog: subchannel logout
+ * @erw: extended report word
+ * @faddr: failing storage address
+ * @saddr: secondary ccw address
+ */
+struct esw0 {
+ struct sublog sublog;
+ struct erw erw;
+ __u32 faddr[2];
+ __u32 saddr;
+} __attribute__ ((packed));
+
+/**
+ * struct esw1 - Format 1 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @zero16: reserved zeros
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw1 {
+ __u8 zero0;
+ __u8 lpum;
+ __u16 zero16;
+ struct erw erw;
+ __u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw2 - Format 2 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @dcti: device-connect-time interval
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw2 {
+ __u8 zero0;
+ __u8 lpum;
+ __u16 dcti;
+ struct erw erw;
+ __u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw3 - Format 3 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @res: reserved
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw3 {
+ __u8 zero0;
+ __u8 lpum;
+ __u16 res;
+ struct erw erw;
+ __u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw_eadm - EADM Subchannel Extended Status Word (ESW)
+ * @sublog: subchannel logout
+ * @erw: extended report word
+ */
+struct esw_eadm {
+ __u32 sublog;
+ struct erw_eadm erw;
+ __u32 : 32;
+ __u32 : 32;
+ __u32 : 32;
+} __packed;
+
+/**
+ * struct irb - interruption response block
+ * @scsw: subchannel status word
+ * @esw: extended status word
+ * @ecw: extended control word
+ *
+ * The irb that is handed to the device driver when an interrupt occurs. For
+ * solicited interrupts, the common I/O layer already performs checks whether
+ * a field is valid; a field not being valid is always passed as %0.
+ * If a unit check occurred, @ecw may contain sense data; this is retrieved
+ * by the common I/O layer itself if the device doesn't support concurrent
+ * sense (so that the device driver never needs to perform basic sene itself).
+ * For unsolicited interrupts, the irb is passed as-is (expect for sense data,
+ * if applicable).
+ */
+struct irb {
+ union scsw scsw;
+ union {
+ struct esw0 esw0;
+ struct esw1 esw1;
+ struct esw2 esw2;
+ struct esw3 esw3;
+ struct esw_eadm eadm;
+ } esw;
+ __u8 ecw[32];
+} __attribute__ ((packed,aligned(4)));
+
+/**
+ * struct ciw - command information word (CIW) layout
+ * @et: entry type
+ * @reserved: reserved bits
+ * @ct: command type
+ * @cmd: command code
+ * @count: command count
+ */
+struct ciw {
+ __u32 et : 2;
+ __u32 reserved : 2;
+ __u32 ct : 4;
+ __u32 cmd : 8;
+ __u32 count : 16;
+} __attribute__ ((packed));
+
+#define CIW_TYPE_RCD 0x0 /* read configuration data */
+#define CIW_TYPE_SII 0x1 /* set interface identifier */
+#define CIW_TYPE_RNI 0x2 /* read node identifier */
+
+/*
+ * Flags used as input parameters for do_IO()
+ */
+#define DOIO_ALLOW_SUSPEND 0x0001 /* allow for channel prog. suspend */
+#define DOIO_DENY_PREFETCH 0x0002 /* don't allow for CCW prefetch */
+#define DOIO_SUPPRESS_INTER 0x0004 /* suppress intermediate inter. */
+ /* ... for suspended CCWs */
+/* Device or subchannel gone. */
+#define CIO_GONE 0x0001
+/* No path to device. */
+#define CIO_NO_PATH 0x0002
+/* Device has appeared. */
+#define CIO_OPER 0x0004
+/* Sick revalidation of device. */
+#define CIO_REVALIDATE 0x0008
+/* Device did not respond in time. */
+#define CIO_BOXED 0x0010
+
+/**
+ * struct ccw_dev_id - unique identifier for ccw devices
+ * @ssid: subchannel set id
+ * @devno: device number
+ *
+ * This structure is not directly based on any hardware structure. The
+ * hardware identifies a device by its device number and its subchannel,
+ * which is in turn identified by its id. In order to get a unique identifier
+ * for ccw devices across subchannel sets, @struct ccw_dev_id has been
+ * introduced.
+ */
+struct ccw_dev_id {
+ u8 ssid;
+ u16 devno;
+};
+
+/**
+ * ccw_device_id_is_equal() - compare two ccw_dev_ids
+ * @dev_id1: a ccw_dev_id
+ * @dev_id2: another ccw_dev_id
+ * Returns:
+ * %1 if the two structures are equal field-by-field,
+ * %0 if not.
+ * Context:
+ * any
+ */
+static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
+ struct ccw_dev_id *dev_id2)
+{
+ if ((dev_id1->ssid == dev_id2->ssid) &&
+ (dev_id1->devno == dev_id2->devno))
+ return 1;
+ return 0;
+}
+
+void channel_subsystem_reinit(void);
+extern void css_schedule_reprobe(void);
+
+extern void reipl_ccw_dev(struct ccw_dev_id *id);
+
+struct cio_iplinfo {
+ u16 devno;
+ int is_qdio;
+};
+
+extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo);
+
+/* Function from drivers/s390/cio/chsc.c */
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl);
+int chsc_sstpi(void *page, void *result, size_t size);
+
+#endif
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
new file mode 100644
index 00000000000..a0e71a501f7
--- /dev/null
+++ b/arch/s390/include/asm/clp.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_S390_CLP_H
+#define _ASM_S390_CLP_H
+
+/* CLP common request & response block size */
+#define CLP_BLK_SIZE PAGE_SIZE
+
+struct clp_req_hdr {
+ u16 len;
+ u16 cmd;
+} __packed;
+
+struct clp_rsp_hdr {
+ u16 len;
+ u16 rsp;
+} __packed;
+
+/* CLP Response Codes */
+#define CLP_RC_OK 0x0010 /* Command request successfully */
+#define CLP_RC_CMD 0x0020 /* Command code not recognized */
+#define CLP_RC_PERM 0x0030 /* Command not authorized */
+#define CLP_RC_FMT 0x0040 /* Invalid command request format */
+#define CLP_RC_LEN 0x0050 /* Invalid command request length */
+#define CLP_RC_8K 0x0060 /* Command requires 8K LPCB */
+#define CLP_RC_RESNOT0 0x0070 /* Reserved field not zero */
+#define CLP_RC_NODATA 0x0080 /* No data available */
+#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */
+
+#endif
diff --git a/arch/s390/include/asm/cmb.h b/arch/s390/include/asm/cmb.h
new file mode 100644
index 00000000000..806eac12e3b
--- /dev/null
+++ b/arch/s390/include/asm/cmb.h
@@ -0,0 +1,12 @@
+#ifndef S390_CMB_H
+#define S390_CMB_H
+
+#include <uapi/asm/cmb.h>
+
+struct ccw_device;
+extern int enable_cmf(struct ccw_device *cdev);
+extern int disable_cmf(struct ccw_device *cdev);
+extern u64 cmf_read(struct ccw_device *cdev, int index);
+extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data);
+
+#endif /* S390_CMB_H */
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
new file mode 100644
index 00000000000..4236408070e
--- /dev/null
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -0,0 +1,304 @@
+/*
+ * Copyright IBM Corp. 1999, 2011
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#ifndef __ASM_CMPXCHG_H
+#define __ASM_CMPXCHG_H
+
+#include <linux/mmdebug.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+
+extern void __xchg_called_with_bad_pointer(void);
+
+static inline unsigned long __xchg(unsigned long x, void *ptr, int size)
+{
+ unsigned long addr, old;
+ int shift;
+
+ switch (size) {
+ case 1:
+ addr = (unsigned long) ptr;
+ shift = (3 ^ (addr & 3)) << 3;
+ addr ^= addr & 3;
+ asm volatile(
+ " l %0,%4\n"
+ "0: lr 0,%0\n"
+ " nr 0,%3\n"
+ " or 0,%2\n"
+ " cs %0,0,%4\n"
+ " jl 0b\n"
+ : "=&d" (old), "=Q" (*(int *) addr)
+ : "d" ((x & 0xff) << shift), "d" (~(0xff << shift)),
+ "Q" (*(int *) addr) : "memory", "cc", "0");
+ return old >> shift;
+ case 2:
+ addr = (unsigned long) ptr;
+ shift = (2 ^ (addr & 2)) << 3;
+ addr ^= addr & 2;
+ asm volatile(
+ " l %0,%4\n"
+ "0: lr 0,%0\n"
+ " nr 0,%3\n"
+ " or 0,%2\n"
+ " cs %0,0,%4\n"
+ " jl 0b\n"
+ : "=&d" (old), "=Q" (*(int *) addr)
+ : "d" ((x & 0xffff) << shift), "d" (~(0xffff << shift)),
+ "Q" (*(int *) addr) : "memory", "cc", "0");
+ return old >> shift;
+ case 4:
+ asm volatile(
+ " l %0,%3\n"
+ "0: cs %0,%2,%3\n"
+ " jl 0b\n"
+ : "=&d" (old), "=Q" (*(int *) ptr)
+ : "d" (x), "Q" (*(int *) ptr)
+ : "memory", "cc");
+ return old;
+#ifdef CONFIG_64BIT
+ case 8:
+ asm volatile(
+ " lg %0,%3\n"
+ "0: csg %0,%2,%3\n"
+ " jl 0b\n"
+ : "=&d" (old), "=m" (*(long *) ptr)
+ : "d" (x), "Q" (*(long *) ptr)
+ : "memory", "cc");
+ return old;
+#endif /* CONFIG_64BIT */
+ }
+ __xchg_called_with_bad_pointer();
+ return x;
+}
+
+#define xchg(ptr, x) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\
+ __ret; \
+})
+
+/*
+ * Atomic compare and exchange. Compare OLD with MEM, if identical,
+ * store NEW in MEM. Return the initial value in MEM. Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#define __HAVE_ARCH_CMPXCHG
+
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static inline unsigned long __cmpxchg(void *ptr, unsigned long old,
+ unsigned long new, int size)
+{
+ unsigned long addr, prev, tmp;
+ int shift;
+
+ switch (size) {
+ case 1:
+ addr = (unsigned long) ptr;
+ shift = (3 ^ (addr & 3)) << 3;
+ addr ^= addr & 3;
+ asm volatile(
+ " l %0,%2\n"
+ "0: nr %0,%5\n"
+ " lr %1,%0\n"
+ " or %0,%3\n"
+ " or %1,%4\n"
+ " cs %0,%1,%2\n"
+ " jnl 1f\n"
+ " xr %1,%0\n"
+ " nr %1,%5\n"
+ " jnz 0b\n"
+ "1:"
+ : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr)
+ : "d" ((old & 0xff) << shift),
+ "d" ((new & 0xff) << shift),
+ "d" (~(0xff << shift))
+ : "memory", "cc");
+ return prev >> shift;
+ case 2:
+ addr = (unsigned long) ptr;
+ shift = (2 ^ (addr & 2)) << 3;
+ addr ^= addr & 2;
+ asm volatile(
+ " l %0,%2\n"
+ "0: nr %0,%5\n"
+ " lr %1,%0\n"
+ " or %0,%3\n"
+ " or %1,%4\n"
+ " cs %0,%1,%2\n"
+ " jnl 1f\n"
+ " xr %1,%0\n"
+ " nr %1,%5\n"
+ " jnz 0b\n"
+ "1:"
+ : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr)
+ : "d" ((old & 0xffff) << shift),
+ "d" ((new & 0xffff) << shift),
+ "d" (~(0xffff << shift))
+ : "memory", "cc");
+ return prev >> shift;
+ case 4:
+ asm volatile(
+ " cs %0,%3,%1\n"
+ : "=&d" (prev), "=Q" (*(int *) ptr)
+ : "0" (old), "d" (new), "Q" (*(int *) ptr)
+ : "memory", "cc");
+ return prev;
+#ifdef CONFIG_64BIT
+ case 8:
+ asm volatile(
+ " csg %0,%3,%1\n"
+ : "=&d" (prev), "=Q" (*(long *) ptr)
+ : "0" (old), "d" (new), "Q" (*(long *) ptr)
+ : "memory", "cc");
+ return prev;
+#endif /* CONFIG_64BIT */
+ }
+ __cmpxchg_called_with_bad_pointer();
+ return old;
+}
+
+#define cmpxchg(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __cmpxchg((ptr), (unsigned long)(o), (unsigned long)(n), \
+ sizeof(*(ptr))); \
+ __ret; \
+})
+
+#ifdef CONFIG_64BIT
+#define cmpxchg64(ptr, o, n) \
+({ \
+ cmpxchg((ptr), (o), (n)); \
+})
+#else /* CONFIG_64BIT */
+static inline unsigned long long __cmpxchg64(void *ptr,
+ unsigned long long old,
+ unsigned long long new)
+{
+ register_pair rp_old = {.pair = old};
+ register_pair rp_new = {.pair = new};
+ unsigned long long *ullptr = ptr;
+
+ asm volatile(
+ " cds %0,%2,%1"
+ : "+d" (rp_old), "+Q" (*ullptr)
+ : "d" (rp_new)
+ : "memory", "cc");
+ return rp_old.pair;
+}
+
+#define cmpxchg64(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __cmpxchg64((ptr), \
+ (unsigned long long)(o), \
+ (unsigned long long)(n)); \
+ __ret; \
+})
+#endif /* CONFIG_64BIT */
+
+#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
+({ \
+ register __typeof__(*(p1)) __old1 asm("2") = (o1); \
+ register __typeof__(*(p2)) __old2 asm("3") = (o2); \
+ register __typeof__(*(p1)) __new1 asm("4") = (n1); \
+ register __typeof__(*(p2)) __new2 asm("5") = (n2); \
+ int cc; \
+ asm volatile( \
+ insn " %[old],%[new],%[ptr]\n" \
+ " ipm %[cc]\n" \
+ " srl %[cc],28" \
+ : [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2) \
+ : [new] "d" (__new1), "d" (__new2), \
+ [ptr] "Q" (*(p1)), "Q" (*(p2)) \
+ : "memory", "cc"); \
+ !cc; \
+})
+
+#define __cmpxchg_double_4(p1, p2, o1, o2, n1, n2) \
+ __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cds")
+
+#define __cmpxchg_double_8(p1, p2, o1, o2, n1, n2) \
+ __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cdsg")
+
+extern void __cmpxchg_double_called_with_bad_pointer(void);
+
+#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+({ \
+ int __ret; \
+ switch (sizeof(*(p1))) { \
+ case 4: \
+ __ret = __cmpxchg_double_4(p1, p2, o1, o2, n1, n2); \
+ break; \
+ case 8: \
+ __ret = __cmpxchg_double_8(p1, p2, o1, o2, n1, n2); \
+ break; \
+ default: \
+ __cmpxchg_double_called_with_bad_pointer(); \
+ } \
+ __ret; \
+})
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+({ \
+ __typeof__(p1) __p1 = (p1); \
+ __typeof__(p2) __p2 = (p2); \
+ int __ret; \
+ BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
+ BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
+ VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
+ if (sizeof(long) == 4) \
+ __ret = __cmpxchg_double_4(__p1, __p2, o1, o2, n1, n2); \
+ else \
+ __ret = __cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2); \
+ __ret; \
+})
+
+#define system_has_cmpxchg_double() 1
+
+#include <asm-generic/cmpxchg-local.h>
+
+static inline unsigned long __cmpxchg_local(void *ptr,
+ unsigned long old,
+ unsigned long new, int size)
+{
+ switch (size) {
+ case 1:
+ case 2:
+ case 4:
+#ifdef CONFIG_64BIT
+ case 8:
+#endif
+ return __cmpxchg(ptr, old, new, size);
+ default:
+ return __cmpxchg_local_generic(ptr, old, new, size);
+ }
+
+ return old;
+}
+
+/*
+ * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
+ * them available.
+ */
+#define cmpxchg_local(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __cmpxchg_local((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ __ret; \
+})
+
+#define cmpxchg64_local(ptr, o, n) cmpxchg64((ptr), (o), (n))
+
+#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
new file mode 100644
index 00000000000..d350ed9d0fb
--- /dev/null
+++ b/arch/s390/include/asm/compat.h
@@ -0,0 +1,359 @@
+#ifndef _ASM_S390X_COMPAT_H
+#define _ASM_S390X_COMPAT_H
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+
+#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64))
+
+#define __SC_DELOUSE(t,v) ({ \
+ BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \
+ (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
+})
+
+#define PSW32_MASK_PER 0x40000000UL
+#define PSW32_MASK_DAT 0x04000000UL
+#define PSW32_MASK_IO 0x02000000UL
+#define PSW32_MASK_EXT 0x01000000UL
+#define PSW32_MASK_KEY 0x00F00000UL
+#define PSW32_MASK_BASE 0x00080000UL /* Always one */
+#define PSW32_MASK_MCHECK 0x00040000UL
+#define PSW32_MASK_WAIT 0x00020000UL
+#define PSW32_MASK_PSTATE 0x00010000UL
+#define PSW32_MASK_ASC 0x0000C000UL
+#define PSW32_MASK_CC 0x00003000UL
+#define PSW32_MASK_PM 0x00000f00UL
+#define PSW32_MASK_RI 0x00000080UL
+
+#define PSW32_MASK_USER 0x0000FF00UL
+
+#define PSW32_ADDR_AMODE 0x80000000UL
+#define PSW32_ADDR_INSN 0x7FFFFFFFUL
+
+#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20)
+
+#define PSW32_ASC_PRIMARY 0x00000000UL
+#define PSW32_ASC_ACCREG 0x00004000UL
+#define PSW32_ASC_SECONDARY 0x00008000UL
+#define PSW32_ASC_HOME 0x0000C000UL
+
+#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
+ PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
+ PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
+ PSW32_ASC_PRIMARY)
+
+#define COMPAT_USER_HZ 100
+#define COMPAT_UTS_MACHINE "s390\0\0\0\0"
+
+typedef u32 compat_size_t;
+typedef s32 compat_ssize_t;
+typedef s32 compat_time_t;
+typedef s32 compat_clock_t;
+typedef s32 compat_pid_t;
+typedef u16 __compat_uid_t;
+typedef u16 __compat_gid_t;
+typedef u32 __compat_uid32_t;
+typedef u32 __compat_gid32_t;
+typedef u16 compat_mode_t;
+typedef u32 compat_ino_t;
+typedef u16 compat_dev_t;
+typedef s32 compat_off_t;
+typedef s64 compat_loff_t;
+typedef u16 compat_nlink_t;
+typedef u16 compat_ipc_pid_t;
+typedef s32 compat_daddr_t;
+typedef u32 compat_caddr_t;
+typedef __kernel_fsid_t compat_fsid_t;
+typedef s32 compat_key_t;
+typedef s32 compat_timer_t;
+
+typedef s32 compat_int_t;
+typedef s32 compat_long_t;
+typedef s64 compat_s64;
+typedef u32 compat_uint_t;
+typedef u32 compat_ulong_t;
+typedef u64 compat_u64;
+typedef u32 compat_uptr_t;
+
+typedef struct {
+ u32 mask;
+ u32 addr;
+} __aligned(8) psw_compat_t;
+
+typedef struct {
+ psw_compat_t psw;
+ u32 gprs[NUM_GPRS];
+ u32 acrs[NUM_ACRS];
+ u32 orig_gpr2;
+} s390_compat_regs;
+
+typedef struct {
+ u32 gprs_high[NUM_GPRS];
+} s390_compat_regs_high;
+
+struct compat_timespec {
+ compat_time_t tv_sec;
+ s32 tv_nsec;
+};
+
+struct compat_timeval {
+ compat_time_t tv_sec;
+ s32 tv_usec;
+};
+
+struct compat_stat {
+ compat_dev_t st_dev;
+ u16 __pad1;
+ compat_ino_t st_ino;
+ compat_mode_t st_mode;
+ compat_nlink_t st_nlink;
+ __compat_uid_t st_uid;
+ __compat_gid_t st_gid;
+ compat_dev_t st_rdev;
+ u16 __pad2;
+ u32 st_size;
+ u32 st_blksize;
+ u32 st_blocks;
+ u32 st_atime;
+ u32 st_atime_nsec;
+ u32 st_mtime;
+ u32 st_mtime_nsec;
+ u32 st_ctime;
+ u32 st_ctime_nsec;
+ u32 __unused4;
+ u32 __unused5;
+};
+
+struct compat_flock {
+ short l_type;
+ short l_whence;
+ compat_off_t l_start;
+ compat_off_t l_len;
+ compat_pid_t l_pid;
+};
+
+#define F_GETLK64 12
+#define F_SETLK64 13
+#define F_SETLKW64 14
+
+struct compat_flock64 {
+ short l_type;
+ short l_whence;
+ compat_loff_t l_start;
+ compat_loff_t l_len;
+ compat_pid_t l_pid;
+};
+
+struct compat_statfs {
+ u32 f_type;
+ u32 f_bsize;
+ u32 f_blocks;
+ u32 f_bfree;
+ u32 f_bavail;
+ u32 f_files;
+ u32 f_ffree;
+ compat_fsid_t f_fsid;
+ u32 f_namelen;
+ u32 f_frsize;
+ u32 f_flags;
+ u32 f_spare[4];
+};
+
+struct compat_statfs64 {
+ u32 f_type;
+ u32 f_bsize;
+ u64 f_blocks;
+ u64 f_bfree;
+ u64 f_bavail;
+ u64 f_files;
+ u64 f_ffree;
+ compat_fsid_t f_fsid;
+ u32 f_namelen;
+ u32 f_frsize;
+ u32 f_flags;
+ u32 f_spare[4];
+};
+
+#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff
+#define COMPAT_RLIM_INFINITY 0xffffffff
+
+typedef u32 compat_old_sigset_t; /* at least 32 bits */
+
+#define _COMPAT_NSIG 64
+#define _COMPAT_NSIG_BPW 32
+
+typedef u32 compat_sigset_word;
+
+typedef union compat_sigval {
+ compat_int_t sival_int;
+ compat_uptr_t sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+ int si_signo;
+ int si_errno;
+ int si_code;
+
+ union {
+ int _pad[128/sizeof(int) - 3];
+
+ /* kill() */
+ struct {
+ pid_t _pid; /* sender's pid */
+ uid_t _uid; /* sender's uid */
+ } _kill;
+
+ /* POSIX.1b timers */
+ struct {
+ compat_timer_t _tid; /* timer id */
+ int _overrun; /* overrun count */
+ compat_sigval_t _sigval; /* same as below */
+ int _sys_private; /* not to be passed to user */
+ } _timer;
+
+ /* POSIX.1b signals */
+ struct {
+ pid_t _pid; /* sender's pid */
+ uid_t _uid; /* sender's uid */
+ compat_sigval_t _sigval;
+ } _rt;
+
+ /* SIGCHLD */
+ struct {
+ pid_t _pid; /* which child */
+ uid_t _uid; /* sender's uid */
+ int _status;/* exit code */
+ compat_clock_t _utime;
+ compat_clock_t _stime;
+ } _sigchld;
+
+ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+ struct {
+ __u32 _addr; /* faulting insn/memory ref. - pointer */
+ } _sigfault;
+
+ /* SIGPOLL */
+ struct {
+ int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+} compat_siginfo_t;
+
+/*
+ * How these fields are to be accessed.
+ */
+#define si_pid _sifields._kill._pid
+#define si_uid _sifields._kill._uid
+#define si_status _sifields._sigchld._status
+#define si_utime _sifields._sigchld._utime
+#define si_stime _sifields._sigchld._stime
+#define si_value _sifields._rt._sigval
+#define si_int _sifields._rt._sigval.sival_int
+#define si_ptr _sifields._rt._sigval.sival_ptr
+#define si_addr _sifields._sigfault._addr
+#define si_band _sifields._sigpoll._band
+#define si_fd _sifields._sigpoll._fd
+#define si_tid _sifields._timer._tid
+#define si_overrun _sifields._timer._overrun
+
+#define COMPAT_OFF_T_MAX 0x7fffffff
+#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL
+
+/*
+ * A pointer passed in from user mode. This should not
+ * be used for syscall parameters, just declare them
+ * as pointers because the syscall entry code will have
+ * appropriately converted them already.
+ */
+
+static inline void __user *compat_ptr(compat_uptr_t uptr)
+{
+ return (void __user *)(unsigned long)(uptr & 0x7fffffffUL);
+}
+
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+ return (u32)(unsigned long)uptr;
+}
+
+#ifdef CONFIG_COMPAT
+
+static inline int is_compat_task(void)
+{
+ return is_32bit_task();
+}
+
+static inline void __user *arch_compat_alloc_user_space(long len)
+{
+ unsigned long stack;
+
+ stack = KSTK_ESP(current);
+ if (is_compat_task())
+ stack &= 0x7fffffffUL;
+ return (void __user *) (stack - len);
+}
+
+#endif
+
+struct compat_ipc64_perm {
+ compat_key_t key;
+ __compat_uid32_t uid;
+ __compat_gid32_t gid;
+ __compat_uid32_t cuid;
+ __compat_gid32_t cgid;
+ compat_mode_t mode;
+ unsigned short __pad1;
+ unsigned short seq;
+ unsigned short __pad2;
+ unsigned int __unused1;
+ unsigned int __unused2;
+};
+
+struct compat_semid64_ds {
+ struct compat_ipc64_perm sem_perm;
+ compat_time_t sem_otime;
+ compat_ulong_t __pad1;
+ compat_time_t sem_ctime;
+ compat_ulong_t __pad2;
+ compat_ulong_t sem_nsems;
+ compat_ulong_t __unused1;
+ compat_ulong_t __unused2;
+};
+
+struct compat_msqid64_ds {
+ struct compat_ipc64_perm msg_perm;
+ compat_time_t msg_stime;
+ compat_ulong_t __pad1;
+ compat_time_t msg_rtime;
+ compat_ulong_t __pad2;
+ compat_time_t msg_ctime;
+ compat_ulong_t __pad3;
+ compat_ulong_t msg_cbytes;
+ compat_ulong_t msg_qnum;
+ compat_ulong_t msg_qbytes;
+ compat_pid_t msg_lspid;
+ compat_pid_t msg_lrpid;
+ compat_ulong_t __unused1;
+ compat_ulong_t __unused2;
+};
+
+struct compat_shmid64_ds {
+ struct compat_ipc64_perm shm_perm;
+ compat_size_t shm_segsz;
+ compat_time_t shm_atime;
+ compat_ulong_t __pad1;
+ compat_time_t shm_dtime;
+ compat_ulong_t __pad2;
+ compat_time_t shm_ctime;
+ compat_ulong_t __pad3;
+ compat_pid_t shm_cpid;
+ compat_pid_t shm_lpid;
+ compat_ulong_t shm_nattch;
+ compat_ulong_t __unused1;
+ compat_ulong_t __unused2;
+};
+#endif /* _ASM_S390X_COMPAT_H */
diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h
new file mode 100644
index 00000000000..3dfadb5d648
--- /dev/null
+++ b/arch/s390/include/asm/cpcmd.h
@@ -0,0 +1,32 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Christian Borntraeger (cborntra@de.ibm.com),
+ */
+
+#ifndef _ASM_S390_CPCMD_H
+#define _ASM_S390_CPCMD_H
+
+/*
+ * the lowlevel function for cpcmd
+ * the caller of __cpcmd has to ensure that the response buffer is below 2 GB
+ */
+extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+
+/*
+ * cpcmd is the in-kernel interface for issuing CP commands
+ *
+ * cmd: null-terminated command string, max 240 characters
+ * response: response buffer for VM's textual response
+ * rlen: size of the response buffer, cpcmd will not exceed this size
+ * but will cap the output, if its too large. Everything that
+ * did not fit into the buffer will be silently dropped
+ * response_code: return pointer for VM's error code
+ * return value: the size of the response. The caller can check if the buffer
+ * was large enough by comparing the return value and rlen
+ * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep
+ */
+extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+
+#endif /* _ASM_S390_CPCMD_H */
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
new file mode 100644
index 00000000000..f5a8e2fcde0
--- /dev/null
+++ b/arch/s390/include/asm/cpu.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright IBM Corp. 2000, 2009
+ * Author(s): Hartmut Penner <hp@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Christian Ehrhardt <ehrhardt@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_CPU_H
+#define _ASM_S390_CPU_H
+
+#define MAX_CPU_ADDRESS 255
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct cpuid
+{
+ unsigned int version : 8;
+ unsigned int ident : 24;
+ unsigned int machine : 16;
+ unsigned int unused : 16;
+} __attribute__ ((packed, aligned(8)));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_CPU_H */
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
new file mode 100644
index 00000000000..cb700d54bd8
--- /dev/null
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -0,0 +1,283 @@
+/*
+ * CPU-measurement facilities
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#ifndef _ASM_S390_CPU_MF_H
+#define _ASM_S390_CPU_MF_H
+
+#include <linux/errno.h>
+#include <asm/facility.h>
+
+#define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */
+#define CPU_MF_INT_SF_ISE (1 << 30) /* incorrect SDBT entry */
+#define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */
+#define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */
+#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */
+#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */
+#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */
+#define CPU_MF_INT_RI_HALTED (1 << 5) /* run-time instr. halted */
+#define CPU_MF_INT_RI_BUF_FULL (1 << 4) /* run-time instr. program
+ buffer full */
+
+#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA)
+#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \
+ CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \
+ CPU_MF_INT_SF_LSDA)
+#define CPU_MF_INT_RI_MASK (CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL)
+
+/* CPU measurement facility support */
+static inline int cpum_cf_avail(void)
+{
+ return MACHINE_HAS_LPP && test_facility(67);
+}
+
+static inline int cpum_sf_avail(void)
+{
+ return MACHINE_HAS_LPP && test_facility(68);
+}
+
+
+struct cpumf_ctr_info {
+ u16 cfvn;
+ u16 auth_ctl;
+ u16 enable_ctl;
+ u16 act_ctl;
+ u16 max_cpu;
+ u16 csvn;
+ u16 max_cg;
+ u16 reserved1;
+ u32 reserved2[12];
+} __packed;
+
+/* QUERY SAMPLING INFORMATION block */
+struct hws_qsi_info_block { /* Bit(s) */
+ unsigned int b0_13:14; /* 0-13: zeros */
+ unsigned int as:1; /* 14: basic-sampling authorization */
+ unsigned int ad:1; /* 15: diag-sampling authorization */
+ unsigned int b16_21:6; /* 16-21: zeros */
+ unsigned int es:1; /* 22: basic-sampling enable control */
+ unsigned int ed:1; /* 23: diag-sampling enable control */
+ unsigned int b24_29:6; /* 24-29: zeros */
+ unsigned int cs:1; /* 30: basic-sampling activation control */
+ unsigned int cd:1; /* 31: diag-sampling activation control */
+ unsigned int bsdes:16; /* 4-5: size of basic sampling entry */
+ unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */
+ unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+ unsigned long tear; /* 24-31: TEAR contents */
+ unsigned long dear; /* 32-39: DEAR contents */
+ unsigned int rsvrd0; /* 40-43: reserved */
+ unsigned int cpu_speed; /* 44-47: CPU speed */
+ unsigned long long rsvrd1; /* 48-55: reserved */
+ unsigned long long rsvrd2; /* 56-63: reserved */
+} __packed;
+
+/* SET SAMPLING CONTROLS request block */
+struct hws_lsctl_request_block {
+ unsigned int s:1; /* 0: maximum buffer indicator */
+ unsigned int h:1; /* 1: part. level reserved for VM use*/
+ unsigned long long b2_53:52;/* 2-53: zeros */
+ unsigned int es:1; /* 54: basic-sampling enable control */
+ unsigned int ed:1; /* 55: diag-sampling enable control */
+ unsigned int b56_61:6; /* 56-61: - zeros */
+ unsigned int cs:1; /* 62: basic-sampling activation control */
+ unsigned int cd:1; /* 63: diag-sampling activation control */
+ unsigned long interval; /* 8-15: sampling interval */
+ unsigned long tear; /* 16-23: TEAR contents */
+ unsigned long dear; /* 24-31: DEAR contents */
+ /* 32-63: */
+ unsigned long rsvrd1; /* reserved */
+ unsigned long rsvrd2; /* reserved */
+ unsigned long rsvrd3; /* reserved */
+ unsigned long rsvrd4; /* reserved */
+} __packed;
+
+struct hws_basic_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:4; /* 16-19 reserved */
+ unsigned int U:4; /* 20-23 Number of unique instruct. */
+ unsigned int z:2; /* zeros */
+ unsigned int T:1; /* 26 PSW DAT mode */
+ unsigned int W:1; /* 27 PSW wait state */
+ unsigned int P:1; /* 28 PSW Problem state */
+ unsigned int AS:2; /* 29-30 PSW address-space control */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ unsigned int:16;
+ unsigned int prim_asn:16; /* primary ASN */
+ unsigned long long ia; /* Instruction Address */
+ unsigned long long gpp; /* Guest Program Parameter */
+ unsigned long long hpp; /* Host Program Parameter */
+} __packed;
+
+struct hws_diag_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:14; /* 16-19 and 20-30 reserved */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ u8 data[]; /* Machine-dependent sample data */
+} __packed;
+
+struct hws_combined_entry {
+ struct hws_basic_entry basic; /* Basic-sampling data entry */
+ struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
+} __packed;
+
+struct hws_trailer_entry {
+ union {
+ struct {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned int t:1; /* 2 - Timestamp format */
+ unsigned long long:61; /* 3 - 63: Reserved */
+ };
+ unsigned long long flags; /* 0 - 63: All indicators */
+ };
+ unsigned long long overflow; /* 64 - sample Overflow count */
+ unsigned char timestamp[16]; /* 16 - 31 timestamp */
+ unsigned long long reserved1; /* 32 -Reserved */
+ unsigned long long reserved2; /* */
+ unsigned long long progusage1; /* 48 - reserved for programming use */
+ unsigned long long progusage2; /* */
+} __packed;
+
+/* Query counter information */
+static inline int qctri(struct cpumf_ctr_info *info)
+{
+ int rc = -EINVAL;
+
+ asm volatile (
+ "0: .insn s,0xb28e0000,%1\n"
+ "1: lhi %0,0\n"
+ "2:\n"
+ EX_TABLE(1b, 2b)
+ : "+d" (rc), "=Q" (*info));
+ return rc;
+}
+
+/* Load CPU-counter-set controls */
+static inline int lcctl(u64 ctl)
+{
+ int cc;
+
+ asm volatile (
+ " .insn s,0xb2840000,%1\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc) : "m" (ctl) : "cc");
+ return cc;
+}
+
+/* Extract CPU counter */
+static inline int ecctr(u64 ctr, u64 *val)
+{
+ register u64 content asm("4") = 0;
+ int cc;
+
+ asm volatile (
+ " .insn rre,0xb2e40000,%0,%2\n"
+ " ipm %1\n"
+ " srl %1,28\n"
+ : "=d" (content), "=d" (cc) : "d" (ctr) : "cc");
+ if (!cc)
+ *val = content;
+ return cc;
+}
+
+/* Query sampling information */
+static inline int qsi(struct hws_qsi_info_block *info)
+{
+ int cc;
+ cc = 1;
+
+ asm volatile(
+ "0: .insn s,0xb2860000,0(%1)\n"
+ "1: lhi %0,0\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "=d" (cc), "+a" (info)
+ : "m" (*info)
+ : "cc", "memory");
+
+ return cc ? -EINVAL : 0;
+}
+
+/* Load sampling controls */
+static inline int lsctl(struct hws_lsctl_request_block *req)
+{
+ int cc;
+
+ cc = 1;
+ asm volatile(
+ "0: .insn s,0xb2870000,0(%1)\n"
+ "1: ipm %0\n"
+ " srl %0,28\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "+d" (cc), "+a" (req)
+ : "m" (*req)
+ : "cc", "memory");
+
+ return cc ? -EINVAL : 0;
+}
+
+/* Sampling control helper functions */
+
+#include <linux/time.h>
+
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+ unsigned long freq)
+{
+ return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+ unsigned long rate)
+{
+ return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL
+#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
+
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+ /* TOD in STCKE format */
+ if (te->t)
+ return *((unsigned long long *) &te->timestamp[1]);
+
+ /* TOD in STCK format */
+ return *((unsigned long long *) &te->timestamp[0]);
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+ void *ret;
+
+ ret = (void *) v;
+ ret += PAGE_SIZE;
+ ret -= sizeof(struct hws_trailer_entry);
+
+ return (unsigned long *) ret;
+}
+
+/* Return if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt */
+static inline int is_link_entry(unsigned long *s)
+{
+ return *s & 0x1ul ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+ return (unsigned long *) (*s & ~0x1ul);
+}
+#endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
new file mode 100644
index 00000000000..f65bd363451
--- /dev/null
+++ b/arch/s390/include/asm/cputime.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright IBM Corp. 2004
+ *
+ * Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _S390_CPUTIME_H
+#define _S390_CPUTIME_H
+
+#include <linux/types.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <asm/div64.h>
+
+
+/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
+
+typedef unsigned long long __nocast cputime_t;
+typedef unsigned long long __nocast cputime64_t;
+
+static inline unsigned long __div(unsigned long long n, unsigned long base)
+{
+#ifndef CONFIG_64BIT
+ register_pair rp;
+
+ rp.pair = n >> 1;
+ asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1));
+ return rp.subreg.odd;
+#else /* CONFIG_64BIT */
+ return n / base;
+#endif /* CONFIG_64BIT */
+}
+
+#define cputime_one_jiffy jiffies_to_cputime(1)
+
+/*
+ * Convert cputime to jiffies and back.
+ */
+static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
+{
+ return __div((__force unsigned long long) cputime, 4096000000ULL / HZ);
+}
+
+static inline cputime_t jiffies_to_cputime(const unsigned int jif)
+{
+ return (__force cputime_t)(jif * (4096000000ULL / HZ));
+}
+
+static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
+{
+ unsigned long long jif = (__force unsigned long long) cputime;
+ do_div(jif, 4096000000ULL / HZ);
+ return jif;
+}
+
+static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
+{
+ return (__force cputime64_t)(jif * (4096000000ULL / HZ));
+}
+
+/*
+ * Convert cputime to microseconds and back.
+ */
+static inline unsigned int cputime_to_usecs(const cputime_t cputime)
+{
+ return (__force unsigned long long) cputime >> 12;
+}
+
+static inline cputime_t usecs_to_cputime(const unsigned int m)
+{
+ return (__force cputime_t)(m * 4096ULL);
+}
+
+#define usecs_to_cputime64(m) usecs_to_cputime(m)
+
+/*
+ * Convert cputime to milliseconds and back.
+ */
+static inline unsigned int cputime_to_secs(const cputime_t cputime)
+{
+ return __div((__force unsigned long long) cputime, 2048000000) >> 1;
+}
+
+static inline cputime_t secs_to_cputime(const unsigned int s)
+{
+ return (__force cputime_t)(s * 4096000000ULL);
+}
+
+/*
+ * Convert cputime to timespec and back.
+ */
+static inline cputime_t timespec_to_cputime(const struct timespec *value)
+{
+ unsigned long long ret = value->tv_sec * 4096000000ULL;
+ return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000);
+}
+
+static inline void cputime_to_timespec(const cputime_t cputime,
+ struct timespec *value)
+{
+ unsigned long long __cputime = (__force unsigned long long) cputime;
+#ifndef CONFIG_64BIT
+ register_pair rp;
+
+ rp.pair = __cputime >> 1;
+ asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
+ value->tv_nsec = rp.subreg.even * 1000 / 4096;
+ value->tv_sec = rp.subreg.odd;
+#else
+ value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096;
+ value->tv_sec = __cputime / 4096000000ULL;
+#endif
+}
+
+/*
+ * Convert cputime to timeval and back.
+ * Since cputime and timeval have the same resolution (microseconds)
+ * this is easy.
+ */
+static inline cputime_t timeval_to_cputime(const struct timeval *value)
+{
+ unsigned long long ret = value->tv_sec * 4096000000ULL;
+ return (__force cputime_t)(ret + value->tv_usec * 4096ULL);
+}
+
+static inline void cputime_to_timeval(const cputime_t cputime,
+ struct timeval *value)
+{
+ unsigned long long __cputime = (__force unsigned long long) cputime;
+#ifndef CONFIG_64BIT
+ register_pair rp;
+
+ rp.pair = __cputime >> 1;
+ asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
+ value->tv_usec = rp.subreg.even / 4096;
+ value->tv_sec = rp.subreg.odd;
+#else
+ value->tv_usec = (__cputime % 4096000000ULL) / 4096;
+ value->tv_sec = __cputime / 4096000000ULL;
+#endif
+}
+
+/*
+ * Convert cputime to clock and back.
+ */
+static inline clock_t cputime_to_clock_t(cputime_t cputime)
+{
+ unsigned long long clock = (__force unsigned long long) cputime;
+ do_div(clock, 4096000000ULL / USER_HZ);
+ return clock;
+}
+
+static inline cputime_t clock_t_to_cputime(unsigned long x)
+{
+ return (__force cputime_t)(x * (4096000000ULL / USER_HZ));
+}
+
+/*
+ * Convert cputime64 to clock.
+ */
+static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
+{
+ unsigned long long clock = (__force unsigned long long) cputime;
+ do_div(clock, 4096000000ULL / USER_HZ);
+ return clock;
+}
+
+struct s390_idle_data {
+ int nohz_delay;
+ unsigned int sequence;
+ unsigned long long idle_count;
+ unsigned long long idle_time;
+ unsigned long long clock_idle_enter;
+ unsigned long long clock_idle_exit;
+ unsigned long long timer_idle_enter;
+ unsigned long long timer_idle_exit;
+};
+
+DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
+
+cputime64_t s390_get_idle_time(int cpu);
+
+#define arch_idle_time(cpu) s390_get_idle_time(cpu)
+
+static inline int s390_nohz_delay(int cpu)
+{
+ return __get_cpu_var(s390_idle).nohz_delay != 0;
+}
+
+#define arch_needs_cpu(cpu) s390_nohz_delay(cpu)
+
+#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h
new file mode 100644
index 00000000000..7c31d3e25cd
--- /dev/null
+++ b/arch/s390/include/asm/crw.h
@@ -0,0 +1,69 @@
+/*
+ * Data definitions for channel report processing
+ * Copyright IBM Corp. 2000, 2009
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Cornelia Huck <cornelia.huck@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_CRW_H
+#define _ASM_S390_CRW_H
+
+#include <linux/types.h>
+
+/*
+ * Channel Report Word
+ */
+struct crw {
+ __u32 res1 : 1; /* reserved zero */
+ __u32 slct : 1; /* solicited */
+ __u32 oflw : 1; /* overflow */
+ __u32 chn : 1; /* chained */
+ __u32 rsc : 4; /* reporting source code */
+ __u32 anc : 1; /* ancillary report */
+ __u32 res2 : 1; /* reserved zero */
+ __u32 erc : 6; /* error-recovery code */
+ __u32 rsid : 16; /* reporting-source ID */
+} __attribute__ ((packed));
+
+typedef void (*crw_handler_t)(struct crw *, struct crw *, int);
+
+extern int crw_register_handler(int rsc, crw_handler_t handler);
+extern void crw_unregister_handler(int rsc);
+extern void crw_handle_channel_report(void);
+void crw_wait_for_channel_report(void);
+
+#define NR_RSCS 16
+
+#define CRW_RSC_MONITOR 0x2 /* monitoring facility */
+#define CRW_RSC_SCH 0x3 /* subchannel */
+#define CRW_RSC_CPATH 0x4 /* channel path */
+#define CRW_RSC_CONFIG 0x9 /* configuration-alert facility */
+#define CRW_RSC_CSS 0xB /* channel subsystem */
+
+#define CRW_ERC_EVENT 0x00 /* event information pending */
+#define CRW_ERC_AVAIL 0x01 /* available */
+#define CRW_ERC_INIT 0x02 /* initialized */
+#define CRW_ERC_TERROR 0x03 /* temporary error */
+#define CRW_ERC_IPARM 0x04 /* installed parm initialized */
+#define CRW_ERC_TERM 0x05 /* terminal */
+#define CRW_ERC_PERRN 0x06 /* perm. error, fac. not init */
+#define CRW_ERC_PERRI 0x07 /* perm. error, facility init */
+#define CRW_ERC_PMOD 0x08 /* installed parameters modified */
+
+static inline int stcrw(struct crw *pcrw)
+{
+ int ccode;
+
+ asm volatile(
+ " stcrw 0(%2)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (ccode), "=m" (*pcrw)
+ : "a" (pcrw)
+ : "cc" );
+ return ccode;
+}
+
+#endif /* _ASM_S390_CRW_H */
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
new file mode 100644
index 00000000000..09d1dd46bd5
--- /dev/null
+++ b/arch/s390/include/asm/css_chars.h
@@ -0,0 +1,38 @@
+#ifndef _ASM_CSS_CHARS_H
+#define _ASM_CSS_CHARS_H
+
+#include <linux/types.h>
+
+struct css_general_char {
+ u64 : 12;
+ u32 dynio : 1; /* bit 12 */
+ u32 : 4;
+ u32 eadm : 1; /* bit 17 */
+ u32 : 23;
+ u32 aif : 1; /* bit 41 */
+ u32 : 3;
+ u32 mcss : 1; /* bit 45 */
+ u32 fcs : 1; /* bit 46 */
+ u32 : 1;
+ u32 ext_mb : 1; /* bit 48 */
+ u32 : 7;
+ u32 aif_tdd : 1; /* bit 56 */
+ u32 : 1;
+ u32 qebsm : 1; /* bit 58 */
+ u32 : 8;
+ u32 aif_osa : 1; /* bit 67 */
+ u32 : 12;
+ u32 eadm_rf : 1; /* bit 80 */
+ u32 : 1;
+ u32 cib : 1; /* bit 82 */
+ u32 : 5;
+ u32 fcx : 1; /* bit 88 */
+ u32 : 19;
+ u32 alt_ssi : 1; /* bit 108 */
+ u32:1;
+ u32 narf:1; /* bit 110 */
+} __packed;
+
+extern struct css_general_char css_general_characteristics;
+
+#endif
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
new file mode 100644
index 00000000000..31ab9f346d7
--- /dev/null
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_CTL_REG_H
+#define __ASM_CTL_REG_H
+
+#include <linux/bug.h>
+
+#ifdef CONFIG_64BIT
+# define __CTL_LOAD "lctlg"
+# define __CTL_STORE "stctg"
+#else
+# define __CTL_LOAD "lctl"
+# define __CTL_STORE "stctl"
+#endif
+
+#define __ctl_load(array, low, high) { \
+ typedef struct { char _[sizeof(array)]; } addrtype; \
+ \
+ BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
+ asm volatile( \
+ __CTL_LOAD " %1,%2,%0\n" \
+ : : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high));\
+}
+
+#define __ctl_store(array, low, high) { \
+ typedef struct { char _[sizeof(array)]; } addrtype; \
+ \
+ BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
+ asm volatile( \
+ __CTL_STORE " %1,%2,%0\n" \
+ : "=Q" (*(addrtype *)(&array)) \
+ : "i" (low), "i" (high)); \
+}
+
+static inline void __ctl_set_bit(unsigned int cr, unsigned int bit)
+{
+ unsigned long reg;
+
+ __ctl_store(reg, cr, cr);
+ reg |= 1UL << bit;
+ __ctl_load(reg, cr, cr);
+}
+
+static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
+{
+ unsigned long reg;
+
+ __ctl_store(reg, cr, cr);
+ reg &= ~(1UL << bit);
+ __ctl_load(reg, cr, cr);
+}
+
+void smp_ctl_set_bit(int cr, int bit);
+void smp_ctl_clear_bit(int cr, int bit);
+
+union ctlreg0 {
+ unsigned long val;
+ struct {
+#ifdef CONFIG_64BIT
+ unsigned long : 32;
+#endif
+ unsigned long : 3;
+ unsigned long lap : 1; /* Low-address-protection control */
+ unsigned long : 4;
+ unsigned long edat : 1; /* Enhanced-DAT-enablement control */
+ unsigned long : 23;
+ };
+};
+
+#ifdef CONFIG_SMP
+# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
+# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
+#else
+# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit)
+# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit)
+#endif
+
+#endif /* __ASM_CTL_REG_H */
diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h
new file mode 100644
index 00000000000..b80941f30df
--- /dev/null
+++ b/arch/s390/include/asm/current.h
@@ -0,0 +1,18 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/current.h"
+ */
+
+#ifndef _S390_CURRENT_H
+#define _S390_CURRENT_H
+
+#include <asm/lowcore.h>
+
+struct task_struct;
+
+#define current ((struct task_struct *const)S390_lowcore.current_task)
+
+#endif /* !(_S390_CURRENT_H) */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
new file mode 100644
index 00000000000..530c15eb01e
--- /dev/null
+++ b/arch/s390/include/asm/debug.h
@@ -0,0 +1,237 @@
+/*
+ * S/390 debug facility
+ *
+ * Copyright IBM Corp. 1999, 2000
+ */
+#ifndef DEBUG_H
+#define DEBUG_H
+
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <uapi/asm/debug.h>
+
+#define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */
+#define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */
+#define DEBUG_FLUSH_ALL -1 /* parameter to flush all areas */
+#define DEBUG_MAX_VIEWS 10 /* max number of views in proc fs */
+#define DEBUG_MAX_NAME_LEN 64 /* max length for a debugfs file name */
+#define DEBUG_DEFAULT_LEVEL 3 /* initial debug level */
+
+#define DEBUG_DIR_ROOT "s390dbf" /* name of debug root directory in proc fs */
+
+#define DEBUG_DATA(entry) (char*)(entry + 1) /* data is stored behind */
+ /* the entry information */
+
+typedef struct __debug_entry debug_entry_t;
+
+struct debug_view;
+
+typedef struct debug_info {
+ struct debug_info* next;
+ struct debug_info* prev;
+ atomic_t ref_count;
+ spinlock_t lock;
+ int level;
+ int nr_areas;
+ int pages_per_area;
+ int buf_size;
+ int entry_size;
+ debug_entry_t*** areas;
+ int active_area;
+ int *active_pages;
+ int *active_entries;
+ struct dentry* debugfs_root_entry;
+ struct dentry* debugfs_entries[DEBUG_MAX_VIEWS];
+ struct debug_view* views[DEBUG_MAX_VIEWS];
+ char name[DEBUG_MAX_NAME_LEN];
+ umode_t mode;
+} debug_info_t;
+
+typedef int (debug_header_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ int area,
+ debug_entry_t* entry,
+ char* out_buf);
+
+typedef int (debug_format_proc_t) (debug_info_t* id,
+ struct debug_view* view, char* out_buf,
+ const char* in_buf);
+typedef int (debug_prolog_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ char* out_buf);
+typedef int (debug_input_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ struct file* file,
+ const char __user *user_buf,
+ size_t in_buf_size, loff_t* offset);
+
+int debug_dflt_header_fn(debug_info_t* id, struct debug_view* view,
+ int area, debug_entry_t* entry, char* out_buf);
+
+struct debug_view {
+ char name[DEBUG_MAX_NAME_LEN];
+ debug_prolog_proc_t* prolog_proc;
+ debug_header_proc_t* header_proc;
+ debug_format_proc_t* format_proc;
+ debug_input_proc_t* input_proc;
+ void* private_data;
+};
+
+extern struct debug_view debug_hex_ascii_view;
+extern struct debug_view debug_raw_view;
+extern struct debug_view debug_sprintf_view;
+
+/* do NOT use the _common functions */
+
+debug_entry_t* debug_event_common(debug_info_t* id, int level,
+ const void* data, int length);
+
+debug_entry_t* debug_exception_common(debug_info_t* id, int level,
+ const void* data, int length);
+
+/* Debug Feature API: */
+
+debug_info_t *debug_register(const char *name, int pages, int nr_areas,
+ int buf_size);
+
+debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas,
+ int buf_size, umode_t mode, uid_t uid,
+ gid_t gid);
+
+void debug_unregister(debug_info_t* id);
+
+void debug_set_level(debug_info_t* id, int new_level);
+
+void debug_set_critical(void);
+void debug_stop_all(void);
+
+static inline bool debug_level_enabled(debug_info_t* id, int level)
+{
+ return level <= id->level;
+}
+
+static inline debug_entry_t*
+debug_event(debug_info_t* id, int level, void* data, int length)
+{
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_event_common(id,level,data,length);
+}
+
+static inline debug_entry_t*
+debug_int_event(debug_info_t* id, int level, unsigned int tag)
+{
+ unsigned int t=tag;
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_event_common(id,level,&t,sizeof(unsigned int));
+}
+
+static inline debug_entry_t *
+debug_long_event (debug_info_t* id, int level, unsigned long tag)
+{
+ unsigned long t=tag;
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_event_common(id,level,&t,sizeof(unsigned long));
+}
+
+static inline debug_entry_t*
+debug_text_event(debug_info_t* id, int level, const char* txt)
+{
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_event_common(id,level,txt,strlen(txt));
+}
+
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ */
+extern debug_entry_t *
+debug_sprintf_event(debug_info_t* id,int level,char *string,...)
+ __attribute__ ((format(printf, 3, 4)));
+
+
+static inline debug_entry_t*
+debug_exception(debug_info_t* id, int level, void* data, int length)
+{
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_exception_common(id,level,data,length);
+}
+
+static inline debug_entry_t*
+debug_int_exception(debug_info_t* id, int level, unsigned int tag)
+{
+ unsigned int t=tag;
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_exception_common(id,level,&t,sizeof(unsigned int));
+}
+
+static inline debug_entry_t *
+debug_long_exception (debug_info_t* id, int level, unsigned long tag)
+{
+ unsigned long t=tag;
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_exception_common(id,level,&t,sizeof(unsigned long));
+}
+
+static inline debug_entry_t*
+debug_text_exception(debug_info_t* id, int level, const char* txt)
+{
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_exception_common(id,level,txt,strlen(txt));
+}
+
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ */
+extern debug_entry_t *
+debug_sprintf_exception(debug_info_t* id,int level,char *string,...)
+ __attribute__ ((format(printf, 3, 4)));
+
+int debug_register_view(debug_info_t* id, struct debug_view* view);
+int debug_unregister_view(debug_info_t* id, struct debug_view* view);
+
+/*
+ define the debug levels:
+ - 0 No debugging output to console or syslog
+ - 1 Log internal errors to syslog, ignore check conditions
+ - 2 Log internal errors and check conditions to syslog
+ - 3 Log internal errors to console, log check conditions to syslog
+ - 4 Log internal errors and check conditions to console
+ - 5 panic on internal errors, log check conditions to console
+ - 6 panic on both, internal errors and check conditions
+ */
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 4
+#endif
+
+#define INTERNAL_ERRMSG(x,y...) "E" __FILE__ "%d: " x, __LINE__, y
+#define INTERNAL_WRNMSG(x,y...) "W" __FILE__ "%d: " x, __LINE__, y
+#define INTERNAL_INFMSG(x,y...) "I" __FILE__ "%d: " x, __LINE__, y
+#define INTERNAL_DEBMSG(x,y...) "D" __FILE__ "%d: " x, __LINE__, y
+
+#if DEBUG_LEVEL > 0
+#define PRINT_DEBUG(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#define PRINT_INFO(x...) printk ( KERN_INFO PRINTK_HEADER x )
+#define PRINT_WARN(x...) printk ( KERN_WARNING PRINTK_HEADER x )
+#define PRINT_ERR(x...) printk ( KERN_ERR PRINTK_HEADER x )
+#define PRINT_FATAL(x...) panic ( PRINTK_HEADER x )
+#else
+#define PRINT_DEBUG(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#define PRINT_INFO(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#define PRINT_WARN(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#define PRINT_ERR(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#define PRINT_FATAL(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#endif /* DASD_DEBUG */
+
+#endif /* DEBUG_H */
diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h
new file mode 100644
index 00000000000..3f6e4095f47
--- /dev/null
+++ b/arch/s390/include/asm/delay.h
@@ -0,0 +1,24 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/delay.h"
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/s390/lib/delay.c
+ */
+
+#ifndef _S390_DELAY_H
+#define _S390_DELAY_H
+
+void __ndelay(unsigned long long nsecs);
+void __udelay(unsigned long long usecs);
+void udelay_simple(unsigned long long usecs);
+void __delay(unsigned long loops);
+
+#define ndelay(n) __ndelay((unsigned long long) (n))
+#define udelay(n) __udelay((unsigned long long) (n))
+#define mdelay(n) __udelay((unsigned long long) (n) * 1000)
+
+#endif /* defined(_S390_DELAY_H) */
diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h
new file mode 100644
index 00000000000..d8f9872b0e2
--- /dev/null
+++ b/arch/s390/include/asm/device.h
@@ -0,0 +1,7 @@
+/*
+ * Arch specific extensions to struct device
+ *
+ * This file is released under the GPLv2
+ */
+#include <asm-generic/device.h>
+
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
new file mode 100644
index 00000000000..7e91c58072e
--- /dev/null
+++ b/arch/s390/include/asm/diag.h
@@ -0,0 +1,52 @@
+/*
+ * s390 diagnose functions
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_DIAG_H
+#define _ASM_S390_DIAG_H
+
+/*
+ * Diagnose 10: Release page range
+ */
+static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
+{
+ unsigned long start_addr, end_addr;
+
+ start_addr = start_pfn << PAGE_SHIFT;
+ end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
+
+ asm volatile(
+ "0: diag %0,%1,0x10\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ EX_TABLE(1b, 1b)
+ : : "a" (start_addr), "a" (end_addr));
+}
+
+/*
+ * Diagnose 14: Input spool file manipulation
+ */
+extern int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode);
+
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+struct diag210 {
+ u16 vrdcdvno; /* device number (input) */
+ u16 vrdclen; /* data block length (input) */
+ u8 vrdcvcla; /* virtual device class (output) */
+ u8 vrdcvtyp; /* virtual device type (output) */
+ u8 vrdcvsta; /* virtual device status (output) */
+ u8 vrdcvfla; /* virtual device flags (output) */
+ u8 vrdcrccl; /* real device class (output) */
+ u8 vrdccrty; /* real device type (output) */
+ u8 vrdccrmd; /* real device model (output) */
+ u8 vrdccrft; /* real device feature (output) */
+} __attribute__((packed, aligned(4)));
+
+extern int diag210(struct diag210 *addr);
+
+#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h
new file mode 100644
index 00000000000..04a83f5773c
--- /dev/null
+++ b/arch/s390/include/asm/dis.h
@@ -0,0 +1,52 @@
+/*
+ * Disassemble s390 instructions.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#ifndef __ASM_S390_DIS_H__
+#define __ASM_S390_DIS_H__
+
+/* Type of operand */
+#define OPERAND_GPR 0x1 /* Operand printed as %rx */
+#define OPERAND_FPR 0x2 /* Operand printed as %fx */
+#define OPERAND_AR 0x4 /* Operand printed as %ax */
+#define OPERAND_CR 0x8 /* Operand printed as %cx */
+#define OPERAND_DISP 0x10 /* Operand printed as displacement */
+#define OPERAND_BASE 0x20 /* Operand printed as base register */
+#define OPERAND_INDEX 0x40 /* Operand printed as index register */
+#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */
+#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */
+#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */
+
+
+struct s390_operand {
+ int bits; /* The number of bits in the operand. */
+ int shift; /* The number of bits to shift. */
+ int flags; /* One bit syntax flags. */
+};
+
+struct s390_insn {
+ const char name[5];
+ unsigned char opfrag;
+ unsigned char format;
+};
+
+
+static inline int insn_length(unsigned char code)
+{
+ return ((((int) code + 64) >> 7) + 1) << 1;
+}
+
+void show_code(struct pt_regs *regs);
+void print_fn_code(unsigned char *code, unsigned long len);
+int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len);
+struct s390_insn *find_insn(unsigned char *code);
+
+static inline int is_known_insn(unsigned char *code)
+{
+ return !!find_insn(code);
+}
+
+#endif /* __ASM_S390_DIS_H__ */
diff --git a/arch/s390/include/asm/div64.h b/arch/s390/include/asm/div64.h
new file mode 100644
index 00000000000..6cd978cefb2
--- /dev/null
+++ b/arch/s390/include/asm/div64.h
@@ -0,0 +1 @@
+#include <asm-generic/div64.h>
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
new file mode 100644
index 00000000000..3fbc67d9e19
--- /dev/null
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -0,0 +1,79 @@
+#ifndef _ASM_S390_DMA_MAPPING_H
+#define _ASM_S390_DMA_MAPPING_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-attrs.h>
+#include <linux/dma-debug.h>
+#include <linux/io.h>
+
+#define DMA_ERROR_CODE (~(dma_addr_t) 0x0)
+
+extern struct dma_map_ops s390_dma_ops;
+
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+ return &s390_dma_ops;
+}
+
+extern int dma_set_mask(struct device *dev, u64 mask);
+
+static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction direction)
+{
+}
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+#include <asm-generic/dma-mapping-common.h>
+
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ if (dma_ops->dma_supported == NULL)
+ return 1;
+ return dma_ops->dma_supported(dev, mask);
+}
+
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+ if (!dev->dma_mask)
+ return 0;
+ return addr + size - 1 <= *dev->dma_mask;
+}
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ debug_dma_mapping_error(dev, dma_addr);
+ if (dma_ops->mapping_error)
+ return dma_ops->mapping_error(dev, dma_addr);
+ return dma_addr == DMA_ERROR_CODE;
+}
+
+static inline void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ struct dma_map_ops *ops = get_dma_ops(dev);
+ void *ret;
+
+ ret = ops->alloc(dev, size, dma_handle, flag, NULL);
+ debug_dma_alloc_coherent(dev, size, *dma_handle, ret);
+ return ret;
+}
+
+static inline void dma_free_coherent(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
+ dma_ops->free(dev, size, cpu_addr, dma_handle, NULL);
+}
+
+#endif /* _ASM_S390_DMA_MAPPING_H */
diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h
new file mode 100644
index 00000000000..bb9bdcd2086
--- /dev/null
+++ b/arch/s390/include/asm/dma.h
@@ -0,0 +1,19 @@
+#ifndef _ASM_S390_DMA_H
+#define _ASM_S390_DMA_H
+
+#include <asm/io.h>
+
+/*
+ * MAX_DMA_ADDRESS is ambiguous because on s390 its completely unrelated
+ * to DMA. It _is_ used for the s390 memory zone split at 2GB caused
+ * by the 31 bit heritage.
+ */
+#define MAX_DMA_ADDRESS 0x80000000
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy (0)
+#endif
+
+#endif /* _ASM_S390_DMA_H */
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
new file mode 100644
index 00000000000..67026300c88
--- /dev/null
+++ b/arch/s390/include/asm/eadm.h
@@ -0,0 +1,117 @@
+#ifndef _ASM_S390_EADM_H
+#define _ASM_S390_EADM_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+
+struct arqb {
+ u64 data;
+ u16 fmt:4;
+ u16:12;
+ u16 cmd_code;
+ u16:16;
+ u16 msb_count;
+ u32 reserved[12];
+} __packed;
+
+#define ARQB_CMD_MOVE 1
+
+struct arsb {
+ u16 fmt:4;
+ u32:28;
+ u8 ef;
+ u8:8;
+ u8 ecbi;
+ u8:8;
+ u8 fvf;
+ u16:16;
+ u8 eqc;
+ u32:32;
+ u64 fail_msb;
+ u64 fail_aidaw;
+ u64 fail_ms;
+ u64 fail_scm;
+ u32 reserved[4];
+} __packed;
+
+#define EQC_WR_PROHIBIT 22
+
+struct msb {
+ u8 fmt:4;
+ u8 oc:4;
+ u8 flags;
+ u16:12;
+ u16 bs:4;
+ u32 blk_count;
+ u64 data_addr;
+ u64 scm_addr;
+ u64:64;
+} __packed;
+
+struct aidaw {
+ u8 flags;
+ u32 :24;
+ u32 :32;
+ u64 data_addr;
+} __packed;
+
+#define MSB_OC_CLEAR 0
+#define MSB_OC_READ 1
+#define MSB_OC_WRITE 2
+#define MSB_OC_RELEASE 3
+
+#define MSB_FLAG_BNM 0x80
+#define MSB_FLAG_IDA 0x40
+
+#define MSB_BS_4K 0
+#define MSB_BS_1M 1
+
+#define AOB_NR_MSB 124
+
+struct aob {
+ struct arqb request;
+ struct arsb response;
+ struct msb msb[AOB_NR_MSB];
+} __packed __aligned(PAGE_SIZE);
+
+struct aob_rq_header {
+ struct scm_device *scmdev;
+ char data[0];
+};
+
+struct scm_device {
+ u64 address;
+ u64 size;
+ unsigned int nr_max_block;
+ struct device dev;
+ struct {
+ unsigned int persistence:4;
+ unsigned int oper_state:4;
+ unsigned int data_state:4;
+ unsigned int rank:4;
+ unsigned int release:1;
+ unsigned int res_id:8;
+ } __packed attrs;
+};
+
+#define OP_STATE_GOOD 1
+#define OP_STATE_TEMP_ERR 2
+#define OP_STATE_PERM_ERR 3
+
+enum scm_event {SCM_CHANGE, SCM_AVAIL};
+
+struct scm_driver {
+ struct device_driver drv;
+ int (*probe) (struct scm_device *scmdev);
+ int (*remove) (struct scm_device *scmdev);
+ void (*notify) (struct scm_device *scmdev, enum scm_event event);
+ void (*handler) (struct scm_device *scmdev, void *data, int error);
+};
+
+int scm_driver_register(struct scm_driver *scmdrv);
+void scm_driver_unregister(struct scm_driver *scmdrv);
+
+int eadm_start_aob(struct aob *aob);
+void scm_irq_handler(struct aob *aob, int error);
+
+#endif /* _ASM_S390_EADM_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
new file mode 100644
index 00000000000..c5befc5a3bf
--- /dev/null
+++ b/arch/s390/include/asm/ebcdic.h
@@ -0,0 +1,48 @@
+/*
+ * EBCDIC -> ASCII, ASCII -> EBCDIC conversion routines.
+ *
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _EBCDIC_H
+#define _EBCDIC_H
+
+#ifndef _S390_TYPES_H
+#include <types.h>
+#endif
+
+extern __u8 _ascebc_500[256]; /* ASCII -> EBCDIC 500 conversion table */
+extern __u8 _ebcasc_500[256]; /* EBCDIC 500 -> ASCII conversion table */
+extern __u8 _ascebc[256]; /* ASCII -> EBCDIC conversion table */
+extern __u8 _ebcasc[256]; /* EBCDIC -> ASCII conversion table */
+extern __u8 _ebc_tolower[256]; /* EBCDIC -> lowercase */
+extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
+
+static inline void
+codepage_convert(const __u8 *codepage, volatile __u8 * addr, unsigned long nr)
+{
+ if (nr-- <= 0)
+ return;
+ asm volatile(
+ " bras 1,1f\n"
+ " tr 0(1,%0),0(%2)\n"
+ "0: tr 0(256,%0),0(%2)\n"
+ " la %0,256(%0)\n"
+ "1: ahi %1,-256\n"
+ " jnm 0b\n"
+ " ex %1,0(1)"
+ : "+&a" (addr), "+&a" (nr)
+ : "a" (codepage) : "cc", "memory", "1");
+}
+
+#define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr)
+#define EBCASC(addr,nr) codepage_convert(_ebcasc, addr, nr)
+#define ASCEBC_500(addr,nr) codepage_convert(_ascebc_500, addr, nr)
+#define EBCASC_500(addr,nr) codepage_convert(_ebcasc_500, addr, nr)
+#define EBC_TOLOWER(addr,nr) codepage_convert(_ebc_tolower, addr, nr)
+#define EBC_TOUPPER(addr,nr) codepage_convert(_ebc_toupper, addr, nr)
+
+#endif
+
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
new file mode 100644
index 00000000000..78f4f8711d5
--- /dev/null
+++ b/arch/s390/include/asm/elf.h
@@ -0,0 +1,230 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/elf.h"
+ */
+
+#ifndef __ASMS390_ELF_H
+#define __ASMS390_ELF_H
+
+/* s390 relocations defined by the ABIs */
+#define R_390_NONE 0 /* No reloc. */
+#define R_390_8 1 /* Direct 8 bit. */
+#define R_390_12 2 /* Direct 12 bit. */
+#define R_390_16 3 /* Direct 16 bit. */
+#define R_390_32 4 /* Direct 32 bit. */
+#define R_390_PC32 5 /* PC relative 32 bit. */
+#define R_390_GOT12 6 /* 12 bit GOT offset. */
+#define R_390_GOT32 7 /* 32 bit GOT offset. */
+#define R_390_PLT32 8 /* 32 bit PC relative PLT address. */
+#define R_390_COPY 9 /* Copy symbol at runtime. */
+#define R_390_GLOB_DAT 10 /* Create GOT entry. */
+#define R_390_JMP_SLOT 11 /* Create PLT entry. */
+#define R_390_RELATIVE 12 /* Adjust by program base. */
+#define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */
+#define R_390_GOTPC 14 /* 32 bit PC rel. offset to GOT. */
+#define R_390_GOT16 15 /* 16 bit GOT offset. */
+#define R_390_PC16 16 /* PC relative 16 bit. */
+#define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */
+#define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */
+#define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */
+#define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */
+#define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */
+#define R_390_64 22 /* Direct 64 bit. */
+#define R_390_PC64 23 /* PC relative 64 bit. */
+#define R_390_GOT64 24 /* 64 bit GOT offset. */
+#define R_390_PLT64 25 /* 64 bit PC relative PLT address. */
+#define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */
+#define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */
+#define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */
+#define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */
+#define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */
+#define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */
+#define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */
+#define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */
+#define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */
+#define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */
+#define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */
+#define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */
+#define R_390_TLS_GDCALL 38 /* Tag for function call in general
+ dynamic TLS code. */
+#define R_390_TLS_LDCALL 39 /* Tag for function call in local
+ dynamic TLS code. */
+#define R_390_TLS_GD32 40 /* Direct 32 bit for general dynamic
+ thread local data. */
+#define R_390_TLS_GD64 41 /* Direct 64 bit for general dynamic
+ thread local data. */
+#define R_390_TLS_GOTIE12 42 /* 12 bit GOT offset for static TLS
+ block offset. */
+#define R_390_TLS_GOTIE32 43 /* 32 bit GOT offset for static TLS
+ block offset. */
+#define R_390_TLS_GOTIE64 44 /* 64 bit GOT offset for static TLS
+ block offset. */
+#define R_390_TLS_LDM32 45 /* Direct 32 bit for local dynamic
+ thread local data in LD code. */
+#define R_390_TLS_LDM64 46 /* Direct 64 bit for local dynamic
+ thread local data in LD code. */
+#define R_390_TLS_IE32 47 /* 32 bit address of GOT entry for
+ negated static TLS block offset. */
+#define R_390_TLS_IE64 48 /* 64 bit address of GOT entry for
+ negated static TLS block offset. */
+#define R_390_TLS_IEENT 49 /* 32 bit rel. offset to GOT entry for
+ negated static TLS block offset. */
+#define R_390_TLS_LE32 50 /* 32 bit negated offset relative to
+ static TLS block. */
+#define R_390_TLS_LE64 51 /* 64 bit negated offset relative to
+ static TLS block. */
+#define R_390_TLS_LDO32 52 /* 32 bit offset relative to TLS
+ block. */
+#define R_390_TLS_LDO64 53 /* 64 bit offset relative to TLS
+ block. */
+#define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */
+#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */
+#define R_390_TLS_TPOFF 56 /* Negate offset in static TLS
+ block. */
+#define R_390_20 57 /* Direct 20 bit. */
+#define R_390_GOT20 58 /* 20 bit GOT offset. */
+#define R_390_GOTPLT20 59 /* 20 bit offset to jump slot. */
+#define R_390_TLS_GOTIE20 60 /* 20 bit GOT offset for static TLS
+ block offset. */
+/* Keep this the last entry. */
+#define R_390_NUM 61
+
+/* Bits present in AT_HWCAP. */
+#define HWCAP_S390_ESAN3 1
+#define HWCAP_S390_ZARCH 2
+#define HWCAP_S390_STFLE 4
+#define HWCAP_S390_MSA 8
+#define HWCAP_S390_LDISP 16
+#define HWCAP_S390_EIMM 32
+#define HWCAP_S390_DFP 64
+#define HWCAP_S390_HPAGE 128
+#define HWCAP_S390_ETF3EH 256
+#define HWCAP_S390_HIGH_GPRS 512
+#define HWCAP_S390_TE 1024
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#ifndef CONFIG_64BIT
+#define ELF_CLASS ELFCLASS32
+#else /* CONFIG_64BIT */
+#define ELF_CLASS ELFCLASS64
+#endif /* CONFIG_64BIT */
+#define ELF_DATA ELFDATA2MSB
+#define ELF_ARCH EM_S390
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/compat.h>
+#include <asm/syscall.h>
+#include <asm/user.h>
+
+typedef s390_fp_regs elf_fpregset_t;
+typedef s390_regs elf_gregset_t;
+
+typedef s390_fp_regs compat_elf_fpregset_t;
+typedef s390_compat_regs compat_elf_gregset_t;
+
+#include <linux/sched.h> /* for task_struct */
+#include <asm/mmu_context.h>
+
+#include <asm/vdso.h>
+
+extern unsigned int vdso_enabled;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+ (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
+ && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+#define compat_elf_check_arch(x) \
+ (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
+ && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+#define compat_start_thread start_thread31
+
+/* For SVR4/S390 the function pointer to be registered with `atexit` is
+ passed in R14. */
+#define ELF_PLAT_INIT(_r, load_addr) \
+ do { \
+ _r->gprs[14] = 0; \
+ } while (0)
+
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE 4096
+
+/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
+ use of this is to invoke "./ld.so someprog" to test out a new version of
+ the loader. We need to make sure that it is out of the way of the program
+ that it will "exec", and that there is sufficient room for the brk. */
+
+extern unsigned long randomize_et_dyn(unsigned long base);
+#define ELF_ET_DYN_BASE (randomize_et_dyn(STACK_TOP / 3 * 2))
+
+/* This yields a mask that user programs can use to figure out what
+ instruction set this CPU supports. */
+
+extern unsigned long elf_hwcap;
+#define ELF_HWCAP (elf_hwcap)
+
+/* This yields a string that ld.so will use to load implementation
+ specific libraries for optimization. This is more specific in
+ intent than poking at uname or /proc/cpuinfo.
+
+ For the moment, we have only optimizations for the Intel generations,
+ but that could change... */
+
+#define ELF_PLATFORM_SIZE 8
+extern char elf_platform[];
+#define ELF_PLATFORM (elf_platform)
+
+#ifndef CONFIG_COMPAT
+#define SET_PERSONALITY(ex) \
+do { \
+ set_personality(PER_LINUX | \
+ (current->personality & (~PER_MASK))); \
+ current_thread_info()->sys_call_table = \
+ (unsigned long) &sys_call_table; \
+} while (0)
+#else /* CONFIG_COMPAT */
+#define SET_PERSONALITY(ex) \
+do { \
+ if (personality(current->personality) != PER_LINUX32) \
+ set_personality(PER_LINUX | \
+ (current->personality & ~PER_MASK)); \
+ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \
+ set_thread_flag(TIF_31BIT); \
+ current_thread_info()->sys_call_table = \
+ (unsigned long) &sys_call_table_emu; \
+ } else { \
+ clear_thread_flag(TIF_31BIT); \
+ current_thread_info()->sys_call_table = \
+ (unsigned long) &sys_call_table; \
+ } \
+} while (0)
+#endif /* CONFIG_COMPAT */
+
+#define STACK_RND_MASK 0x7ffUL
+
+#define ARCH_DLINFO \
+do { \
+ if (vdso_enabled) \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
+ (unsigned long)current->mm->context.vdso_base); \
+} while (0)
+
+struct linux_binprm;
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+int arch_setup_additional_pages(struct linux_binprm *, int);
+
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa);
+
+#endif
diff --git a/arch/s390/include/asm/emergency-restart.h b/arch/s390/include/asm/emergency-restart.h
new file mode 100644
index 00000000000..108d8c48e42
--- /dev/null
+++ b/arch/s390/include/asm/emergency-restart.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_EMERGENCY_RESTART_H
+#define _ASM_EMERGENCY_RESTART_H
+
+#include <asm-generic/emergency-restart.h>
+
+#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
new file mode 100644
index 00000000000..629b79a9316
--- /dev/null
+++ b/arch/s390/include/asm/etr.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright IBM Corp. 2006
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#ifndef __S390_ETR_H
+#define __S390_ETR_H
+
+/* ETR attachment control register */
+struct etr_eacr {
+ unsigned int e0 : 1; /* port 0 stepping control */
+ unsigned int e1 : 1; /* port 1 stepping control */
+ unsigned int _pad0 : 5; /* must be 00100 */
+ unsigned int dp : 1; /* data port control */
+ unsigned int p0 : 1; /* port 0 change recognition control */
+ unsigned int p1 : 1; /* port 1 change recognition control */
+ unsigned int _pad1 : 3; /* must be 000 */
+ unsigned int ea : 1; /* ETR alert control */
+ unsigned int es : 1; /* ETR sync check control */
+ unsigned int sl : 1; /* switch to local control */
+} __attribute__ ((packed));
+
+/* Port state returned by steai */
+enum etr_psc {
+ etr_psc_operational = 0,
+ etr_psc_semi_operational = 1,
+ etr_psc_protocol_error = 4,
+ etr_psc_no_symbols = 8,
+ etr_psc_no_signal = 12,
+ etr_psc_pps_mode = 13
+};
+
+/* Logical port state returned by stetr */
+enum etr_lpsc {
+ etr_lpsc_operational_step = 0,
+ etr_lpsc_operational_alt = 1,
+ etr_lpsc_semi_operational = 2,
+ etr_lpsc_protocol_error = 4,
+ etr_lpsc_no_symbol_sync = 8,
+ etr_lpsc_no_signal = 12,
+ etr_lpsc_pps_mode = 13
+};
+
+/* ETR status words */
+struct etr_esw {
+ struct etr_eacr eacr; /* attachment control register */
+ unsigned int y : 1; /* stepping mode */
+ unsigned int _pad0 : 5; /* must be 00000 */
+ unsigned int p : 1; /* stepping port number */
+ unsigned int q : 1; /* data port number */
+ unsigned int psc0 : 4; /* port 0 state code */
+ unsigned int psc1 : 4; /* port 1 state code */
+} __attribute__ ((packed));
+
+/* Second level data register status word */
+struct etr_slsw {
+ unsigned int vv1 : 1; /* copy of validity bit data frame 1 */
+ unsigned int vv2 : 1; /* copy of validity bit data frame 2 */
+ unsigned int vv3 : 1; /* copy of validity bit data frame 3 */
+ unsigned int vv4 : 1; /* copy of validity bit data frame 4 */
+ unsigned int _pad0 : 19; /* must by all zeroes */
+ unsigned int n : 1; /* EAF port number */
+ unsigned int v1 : 1; /* validity bit ETR data frame 1 */
+ unsigned int v2 : 1; /* validity bit ETR data frame 2 */
+ unsigned int v3 : 1; /* validity bit ETR data frame 3 */
+ unsigned int v4 : 1; /* validity bit ETR data frame 4 */
+ unsigned int _pad1 : 4; /* must be 0000 */
+} __attribute__ ((packed));
+
+/* ETR data frames */
+struct etr_edf1 {
+ unsigned int u : 1; /* untuned bit */
+ unsigned int _pad0 : 1; /* must be 0 */
+ unsigned int r : 1; /* service request bit */
+ unsigned int _pad1 : 4; /* must be 0000 */
+ unsigned int a : 1; /* time adjustment bit */
+ unsigned int net_id : 8; /* ETR network id */
+ unsigned int etr_id : 8; /* id of ETR which sends data frames */
+ unsigned int etr_pn : 8; /* port number of ETR output port */
+} __attribute__ ((packed));
+
+struct etr_edf2 {
+ unsigned int etv : 32; /* Upper 32 bits of TOD. */
+} __attribute__ ((packed));
+
+struct etr_edf3 {
+ unsigned int rc : 8; /* failure reason code */
+ unsigned int _pad0 : 3; /* must be 000 */
+ unsigned int c : 1; /* ETR coupled bit */
+ unsigned int tc : 4; /* ETR type code */
+ unsigned int blto : 8; /* biased local time offset */
+ /* (blto - 128) * 15 = minutes */
+ unsigned int buo : 8; /* biased utc offset */
+ /* (buo - 128) = leap seconds */
+} __attribute__ ((packed));
+
+struct etr_edf4 {
+ unsigned int ed : 8; /* ETS device dependent data */
+ unsigned int _pad0 : 1; /* must be 0 */
+ unsigned int buc : 5; /* biased ut1 correction */
+ /* (buc - 16) * 0.1 seconds */
+ unsigned int em : 6; /* ETS error magnitude */
+ unsigned int dc : 6; /* ETS drift code */
+ unsigned int sc : 6; /* ETS steering code */
+} __attribute__ ((packed));
+
+/*
+ * ETR attachment information block, two formats
+ * format 1 has 4 reserved words with a size of 64 bytes
+ * format 2 has 16 reserved words with a size of 96 bytes
+ */
+struct etr_aib {
+ struct etr_esw esw;
+ struct etr_slsw slsw;
+ unsigned long long tsp;
+ struct etr_edf1 edf1;
+ struct etr_edf2 edf2;
+ struct etr_edf3 edf3;
+ struct etr_edf4 edf4;
+ unsigned int reserved[16];
+} __attribute__ ((packed,aligned(8)));
+
+/* ETR interruption parameter */
+struct etr_irq_parm {
+ unsigned int _pad0 : 8;
+ unsigned int pc0 : 1; /* port 0 state change */
+ unsigned int pc1 : 1; /* port 1 state change */
+ unsigned int _pad1 : 3;
+ unsigned int eai : 1; /* ETR alert indication */
+ unsigned int _pad2 : 18;
+} __attribute__ ((packed));
+
+/* Query TOD offset result */
+struct etr_ptff_qto {
+ unsigned long long physical_clock;
+ unsigned long long tod_offset;
+ unsigned long long logical_tod_offset;
+ unsigned long long tod_epoch_difference;
+} __attribute__ ((packed));
+
+/* Inline assembly helper functions */
+static inline int etr_setr(struct etr_eacr *ctrl)
+{
+ int rc = -EOPNOTSUPP;
+
+ asm volatile(
+ " .insn s,0xb2160000,%1\n"
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "+d" (rc) : "Q" (*ctrl));
+ return rc;
+}
+
+/* Stores a format 1 aib with 64 bytes */
+static inline int etr_stetr(struct etr_aib *aib)
+{
+ int rc = -EOPNOTSUPP;
+
+ asm volatile(
+ " .insn s,0xb2170000,%1\n"
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "+d" (rc) : "Q" (*aib));
+ return rc;
+}
+
+/* Stores a format 2 aib with 96 bytes for specified port */
+static inline int etr_steai(struct etr_aib *aib, unsigned int func)
+{
+ register unsigned int reg0 asm("0") = func;
+ int rc = -EOPNOTSUPP;
+
+ asm volatile(
+ " .insn s,0xb2b30000,%1\n"
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "+d" (rc) : "Q" (*aib), "d" (reg0));
+ return rc;
+}
+
+/* Function codes for the steai instruction. */
+#define ETR_STEAI_STEPPING_PORT 0x10
+#define ETR_STEAI_ALTERNATE_PORT 0x11
+#define ETR_STEAI_PORT_0 0x12
+#define ETR_STEAI_PORT_1 0x13
+
+static inline int etr_ptff(void *ptff_block, unsigned int func)
+{
+ register unsigned int reg0 asm("0") = func;
+ register unsigned long reg1 asm("1") = (unsigned long) ptff_block;
+ int rc = -EOPNOTSUPP;
+
+ asm volatile(
+ " .word 0x0104\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (rc), "=m" (ptff_block)
+ : "d" (reg0), "d" (reg1), "m" (ptff_block) : "cc");
+ return rc;
+}
+
+/* Function codes for the ptff instruction. */
+#define ETR_PTFF_QAF 0x00 /* query available functions */
+#define ETR_PTFF_QTO 0x01 /* query tod offset */
+#define ETR_PTFF_QSI 0x02 /* query steering information */
+#define ETR_PTFF_ATO 0x40 /* adjust tod offset */
+#define ETR_PTFF_STO 0x41 /* set tod offset */
+#define ETR_PTFF_SFS 0x42 /* set fine steering rate */
+#define ETR_PTFF_SGS 0x43 /* set gross steering rate */
+
+/* Functions needed by the machine check handler */
+void etr_switch_to_local(void);
+void etr_sync_check(void);
+
+/* STP interruption parameter */
+struct stp_irq_parm {
+ unsigned int _pad0 : 14;
+ unsigned int tsc : 1; /* Timing status change */
+ unsigned int lac : 1; /* Link availability change */
+ unsigned int tcpc : 1; /* Time control parameter change */
+ unsigned int _pad2 : 15;
+} __attribute__ ((packed));
+
+#define STP_OP_SYNC 1
+#define STP_OP_CTRL 3
+
+struct stp_sstpi {
+ unsigned int rsvd0;
+ unsigned int rsvd1 : 8;
+ unsigned int stratum : 8;
+ unsigned int vbits : 16;
+ unsigned int leaps : 16;
+ unsigned int tmd : 4;
+ unsigned int ctn : 4;
+ unsigned int rsvd2 : 3;
+ unsigned int c : 1;
+ unsigned int tst : 4;
+ unsigned int tzo : 16;
+ unsigned int dsto : 16;
+ unsigned int ctrl : 16;
+ unsigned int rsvd3 : 16;
+ unsigned int tto;
+ unsigned int rsvd4;
+ unsigned int ctnid[3];
+ unsigned int rsvd5;
+ unsigned int todoff[4];
+ unsigned int rsvd6[48];
+} __attribute__ ((packed));
+
+/* Functions needed by the machine check handler */
+void stp_sync_check(void);
+void stp_island_check(void);
+
+#endif /* __S390_ETR_H */
diff --git a/arch/s390/include/asm/exec.h b/arch/s390/include/asm/exec.h
new file mode 100644
index 00000000000..c4a93d6327f
--- /dev/null
+++ b/arch/s390/include/asm/exec.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_EXEC_H
+#define __ASM_EXEC_H
+
+extern unsigned long arch_align_stack(unsigned long sp);
+
+#endif /* __ASM_EXEC_H */
diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h
new file mode 100644
index 00000000000..6276002d76b
--- /dev/null
+++ b/arch/s390/include/asm/extmem.h
@@ -0,0 +1,31 @@
+/*
+ * definitions for external memory segment support
+ * Copyright IBM Corp. 2003
+ */
+
+#ifndef _ASM_S390X_DCSS_H
+#define _ASM_S390X_DCSS_H
+#ifndef __ASSEMBLY__
+
+/* possible values for segment type as returned by segment_info */
+#define SEG_TYPE_SW 0
+#define SEG_TYPE_EW 1
+#define SEG_TYPE_SR 2
+#define SEG_TYPE_ER 3
+#define SEG_TYPE_SN 4
+#define SEG_TYPE_EN 5
+#define SEG_TYPE_SC 6
+#define SEG_TYPE_EWEN 7
+
+#define SEGMENT_SHARED 0
+#define SEGMENT_EXCLUSIVE 1
+
+int segment_load (char *name, int segtype, unsigned long *addr, unsigned long *length);
+void segment_unload(char *name);
+void segment_save(char *name);
+int segment_type (char* name);
+int segment_modify_shared (char *name, int do_nonshared);
+void segment_warning(int rc, char *seg_name);
+
+#endif
+#endif
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
new file mode 100644
index 00000000000..0aa6a7ed95a
--- /dev/null
+++ b/arch/s390/include/asm/facility.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_FACILITY_H
+#define __ASM_FACILITY_H
+
+#include <linux/string.h>
+#include <linux/preempt.h>
+#include <asm/lowcore.h>
+
+#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */
+
+static inline int __test_facility(unsigned long nr, void *facilities)
+{
+ unsigned char *ptr;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return 0;
+ ptr = (unsigned char *) facilities + (nr >> 3);
+ return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
+/*
+ * The test_facility function uses the bit odering where the MSB is bit 0.
+ * That makes it easier to query facility bits with the bit number as
+ * documented in the Principles of Operation.
+ */
+static inline int test_facility(unsigned long nr)
+{
+ return __test_facility(nr, &S390_lowcore.stfle_fac_list);
+}
+
+/**
+ * stfle - Store facility list extended
+ * @stfle_fac_list: array where facility list can be stored
+ * @size: size of passed in array in double words
+ */
+static inline void stfle(u64 *stfle_fac_list, int size)
+{
+ unsigned long nr;
+
+ preempt_disable();
+ asm volatile(
+ " .insn s,0xb2b10000,0(0)\n" /* stfl */
+ "0:\n"
+ EX_TABLE(0b, 0b)
+ : "+m" (S390_lowcore.stfl_fac_list));
+ nr = 4; /* bytes stored by stfl */
+ memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4);
+ if (S390_lowcore.stfl_fac_list & 0x01000000) {
+ /* More facility bits available with stfle */
+ register unsigned long reg0 asm("0") = size - 1;
+
+ asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */
+ : "+d" (reg0)
+ : "a" (stfle_fac_list)
+ : "memory", "cc");
+ nr = (reg0 + 1) * 8; /* # bytes stored by stfle */
+ }
+ memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
+ preempt_enable();
+}
+
+#endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/fb.h b/arch/s390/include/asm/fb.h
new file mode 100644
index 00000000000..c7df3803099
--- /dev/null
+++ b/arch/s390/include/asm/fb.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_FB_H_
+#define _ASM_FB_H_
+#include <linux/fb.h>
+
+#define fb_pgprotect(...) do {} while (0)
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+ return 0;
+}
+
+#endif /* _ASM_FB_H_ */
diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h
new file mode 100644
index 00000000000..7ecb92b469b
--- /dev/null
+++ b/arch/s390/include/asm/fcx.h
@@ -0,0 +1,311 @@
+/*
+ * Functions for assembling fcx enabled I/O control blocks.
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_FCX_H
+#define _ASM_S390_FCX_H _ASM_S390_FCX_H
+
+#include <linux/types.h>
+
+#define TCW_FORMAT_DEFAULT 0
+#define TCW_TIDAW_FORMAT_DEFAULT 0
+#define TCW_FLAGS_INPUT_TIDA (1 << (23 - 5))
+#define TCW_FLAGS_TCCB_TIDA (1 << (23 - 6))
+#define TCW_FLAGS_OUTPUT_TIDA (1 << (23 - 7))
+#define TCW_FLAGS_TIDAW_FORMAT(x) ((x) & 3) << (23 - 9)
+#define TCW_FLAGS_GET_TIDAW_FORMAT(x) (((x) >> (23 - 9)) & 3)
+
+/**
+ * struct tcw - Transport Control Word (TCW)
+ * @format: TCW format
+ * @flags: TCW flags
+ * @tccbl: Transport-Command-Control-Block Length
+ * @r: Read Operations
+ * @w: Write Operations
+ * @output: Output-Data Address
+ * @input: Input-Data Address
+ * @tsb: Transport-Status-Block Address
+ * @tccb: Transport-Command-Control-Block Address
+ * @output_count: Output Count
+ * @input_count: Input Count
+ * @intrg: Interrogate TCW Address
+ */
+struct tcw {
+ u32 format:2;
+ u32 :6;
+ u32 flags:24;
+ u32 :8;
+ u32 tccbl:6;
+ u32 r:1;
+ u32 w:1;
+ u32 :16;
+ u64 output;
+ u64 input;
+ u64 tsb;
+ u64 tccb;
+ u32 output_count;
+ u32 input_count;
+ u32 :32;
+ u32 :32;
+ u32 :32;
+ u32 intrg;
+} __attribute__ ((packed, aligned(64)));
+
+#define TIDAW_FLAGS_LAST (1 << (7 - 0))
+#define TIDAW_FLAGS_SKIP (1 << (7 - 1))
+#define TIDAW_FLAGS_DATA_INT (1 << (7 - 2))
+#define TIDAW_FLAGS_TTIC (1 << (7 - 3))
+#define TIDAW_FLAGS_INSERT_CBC (1 << (7 - 4))
+
+/**
+ * struct tidaw - Transport-Indirect-Addressing Word (TIDAW)
+ * @flags: TIDAW flags. Can be an arithmetic OR of the following constants:
+ * %TIDAW_FLAGS_LAST, %TIDAW_FLAGS_SKIP, %TIDAW_FLAGS_DATA_INT,
+ * %TIDAW_FLAGS_TTIC, %TIDAW_FLAGS_INSERT_CBC
+ * @count: Count
+ * @addr: Address
+ */
+struct tidaw {
+ u32 flags:8;
+ u32 :24;
+ u32 count;
+ u64 addr;
+} __attribute__ ((packed, aligned(16)));
+
+/**
+ * struct tsa_iostat - I/O-Status Transport-Status Area (IO-Stat TSA)
+ * @dev_time: Device Time
+ * @def_time: Defer Time
+ * @queue_time: Queue Time
+ * @dev_busy_time: Device-Busy Time
+ * @dev_act_time: Device-Active-Only Time
+ * @sense: Sense Data (if present)
+ */
+struct tsa_iostat {
+ u32 dev_time;
+ u32 def_time;
+ u32 queue_time;
+ u32 dev_busy_time;
+ u32 dev_act_time;
+ u8 sense[32];
+} __attribute__ ((packed));
+
+/**
+ * struct tsa_ddpcs - Device-Detected-Program-Check Transport-Status Area (DDPC TSA)
+ * @rc: Reason Code
+ * @rcq: Reason Code Qualifier
+ * @sense: Sense Data (if present)
+ */
+struct tsa_ddpc {
+ u32 :24;
+ u32 rc:8;
+ u8 rcq[16];
+ u8 sense[32];
+} __attribute__ ((packed));
+
+#define TSA_INTRG_FLAGS_CU_STATE_VALID (1 << (7 - 0))
+#define TSA_INTRG_FLAGS_DEV_STATE_VALID (1 << (7 - 1))
+#define TSA_INTRG_FLAGS_OP_STATE_VALID (1 << (7 - 2))
+
+/**
+ * struct tsa_intrg - Interrogate Transport-Status Area (Intrg. TSA)
+ * @format: Format
+ * @flags: Flags. Can be an arithmetic OR of the following constants:
+ * %TSA_INTRG_FLAGS_CU_STATE_VALID, %TSA_INTRG_FLAGS_DEV_STATE_VALID,
+ * %TSA_INTRG_FLAGS_OP_STATE_VALID
+ * @cu_state: Controle-Unit State
+ * @dev_state: Device State
+ * @op_state: Operation State
+ * @sd_info: State-Dependent Information
+ * @dl_id: Device-Level Identifier
+ * @dd_data: Device-Dependent Data
+ */
+struct tsa_intrg {
+ u32 format:8;
+ u32 flags:8;
+ u32 cu_state:8;
+ u32 dev_state:8;
+ u32 op_state:8;
+ u32 :24;
+ u8 sd_info[12];
+ u32 dl_id;
+ u8 dd_data[28];
+} __attribute__ ((packed));
+
+#define TSB_FORMAT_NONE 0
+#define TSB_FORMAT_IOSTAT 1
+#define TSB_FORMAT_DDPC 2
+#define TSB_FORMAT_INTRG 3
+
+#define TSB_FLAGS_DCW_OFFSET_VALID (1 << (7 - 0))
+#define TSB_FLAGS_COUNT_VALID (1 << (7 - 1))
+#define TSB_FLAGS_CACHE_MISS (1 << (7 - 2))
+#define TSB_FLAGS_TIME_VALID (1 << (7 - 3))
+#define TSB_FLAGS_FORMAT(x) ((x) & 7)
+#define TSB_FORMAT(t) ((t)->flags & 7)
+
+/**
+ * struct tsb - Transport-Status Block (TSB)
+ * @length: Length
+ * @flags: Flags. Can be an arithmetic OR of the following constants:
+ * %TSB_FLAGS_DCW_OFFSET_VALID, %TSB_FLAGS_COUNT_VALID, %TSB_FLAGS_CACHE_MISS,
+ * %TSB_FLAGS_TIME_VALID
+ * @dcw_offset: DCW Offset
+ * @count: Count
+ * @tsa: Transport-Status-Area
+ */
+struct tsb {
+ u32 length:8;
+ u32 flags:8;
+ u32 dcw_offset:16;
+ u32 count;
+ u32 :32;
+ union {
+ struct tsa_iostat iostat;
+ struct tsa_ddpc ddpc;
+ struct tsa_intrg intrg;
+ } __attribute__ ((packed)) tsa;
+} __attribute__ ((packed, aligned(8)));
+
+#define DCW_INTRG_FORMAT_DEFAULT 0
+
+#define DCW_INTRG_RC_UNSPECIFIED 0
+#define DCW_INTRG_RC_TIMEOUT 1
+
+#define DCW_INTRG_RCQ_UNSPECIFIED 0
+#define DCW_INTRG_RCQ_PRIMARY 1
+#define DCW_INTRG_RCQ_SECONDARY 2
+
+#define DCW_INTRG_FLAGS_MPM (1 << (7 - 0))
+#define DCW_INTRG_FLAGS_PPR (1 << (7 - 1))
+#define DCW_INTRG_FLAGS_CRIT (1 << (7 - 2))
+
+/**
+ * struct dcw_intrg_data - Interrogate DCW data
+ * @format: Format. Should be %DCW_INTRG_FORMAT_DEFAULT
+ * @rc: Reason Code. Can be one of %DCW_INTRG_RC_UNSPECIFIED,
+ * %DCW_INTRG_RC_TIMEOUT
+ * @rcq: Reason Code Qualifier: Can be one of %DCW_INTRG_RCQ_UNSPECIFIED,
+ * %DCW_INTRG_RCQ_PRIMARY, %DCW_INTRG_RCQ_SECONDARY
+ * @lpm: Logical-Path Mask
+ * @pam: Path-Available Mask
+ * @pim: Path-Installed Mask
+ * @timeout: Timeout
+ * @flags: Flags. Can be an arithmetic OR of %DCW_INTRG_FLAGS_MPM,
+ * %DCW_INTRG_FLAGS_PPR, %DCW_INTRG_FLAGS_CRIT
+ * @time: Time
+ * @prog_id: Program Identifier
+ * @prog_data: Program-Dependent Data
+ */
+struct dcw_intrg_data {
+ u32 format:8;
+ u32 rc:8;
+ u32 rcq:8;
+ u32 lpm:8;
+ u32 pam:8;
+ u32 pim:8;
+ u32 timeout:16;
+ u32 flags:8;
+ u32 :24;
+ u32 :32;
+ u64 time;
+ u64 prog_id;
+ u8 prog_data[0];
+} __attribute__ ((packed));
+
+#define DCW_FLAGS_CC (1 << (7 - 1))
+
+#define DCW_CMD_WRITE 0x01
+#define DCW_CMD_READ 0x02
+#define DCW_CMD_CONTROL 0x03
+#define DCW_CMD_SENSE 0x04
+#define DCW_CMD_SENSE_ID 0xe4
+#define DCW_CMD_INTRG 0x40
+
+/**
+ * struct dcw - Device-Command Word (DCW)
+ * @cmd: Command Code. Can be one of %DCW_CMD_WRITE, %DCW_CMD_READ,
+ * %DCW_CMD_CONTROL, %DCW_CMD_SENSE, %DCW_CMD_SENSE_ID, %DCW_CMD_INTRG
+ * @flags: Flags. Can be an arithmetic OR of %DCW_FLAGS_CC
+ * @cd_count: Control-Data Count
+ * @count: Count
+ * @cd: Control Data
+ */
+struct dcw {
+ u32 cmd:8;
+ u32 flags:8;
+ u32 :8;
+ u32 cd_count:8;
+ u32 count;
+ u8 cd[0];
+} __attribute__ ((packed));
+
+#define TCCB_FORMAT_DEFAULT 0x7f
+#define TCCB_MAX_DCW 30
+#define TCCB_MAX_SIZE (sizeof(struct tccb_tcah) + \
+ TCCB_MAX_DCW * sizeof(struct dcw) + \
+ sizeof(struct tccb_tcat))
+#define TCCB_SAC_DEFAULT 0x1ffe
+#define TCCB_SAC_INTRG 0x1fff
+
+/**
+ * struct tccb_tcah - Transport-Command-Area Header (TCAH)
+ * @format: Format. Should be %TCCB_FORMAT_DEFAULT
+ * @tcal: Transport-Command-Area Length
+ * @sac: Service-Action Code. Can be one of %TCCB_SAC_DEFAULT, %TCCB_SAC_INTRG
+ * @prio: Priority
+ */
+struct tccb_tcah {
+ u32 format:8;
+ u32 :24;
+ u32 :24;
+ u32 tcal:8;
+ u32 sac:16;
+ u32 :8;
+ u32 prio:8;
+ u32 :32;
+} __attribute__ ((packed));
+
+/**
+ * struct tccb_tcat - Transport-Command-Area Trailer (TCAT)
+ * @count: Transport Count
+ */
+struct tccb_tcat {
+ u32 :32;
+ u32 count;
+} __attribute__ ((packed));
+
+/**
+ * struct tccb - (partial) Transport-Command-Control Block (TCCB)
+ * @tcah: TCAH
+ * @tca: Transport-Command Area
+ */
+struct tccb {
+ struct tccb_tcah tcah;
+ u8 tca[0];
+} __attribute__ ((packed, aligned(8)));
+
+struct tcw *tcw_get_intrg(struct tcw *tcw);
+void *tcw_get_data(struct tcw *tcw);
+struct tccb *tcw_get_tccb(struct tcw *tcw);
+struct tsb *tcw_get_tsb(struct tcw *tcw);
+
+void tcw_init(struct tcw *tcw, int r, int w);
+void tcw_finalize(struct tcw *tcw, int num_tidaws);
+
+void tcw_set_intrg(struct tcw *tcw, struct tcw *intrg_tcw);
+void tcw_set_data(struct tcw *tcw, void *data, int use_tidal);
+void tcw_set_tccb(struct tcw *tcw, struct tccb *tccb);
+void tcw_set_tsb(struct tcw *tcw, struct tsb *tsb);
+
+void tccb_init(struct tccb *tccb, size_t tccb_size, u32 sac);
+void tsb_init(struct tsb *tsb);
+struct dcw *tccb_add_dcw(struct tccb *tccb, size_t tccb_size, u8 cmd, u8 flags,
+ void *cd, u8 cd_count, u32 count);
+struct tidaw *tcw_add_tidaw(struct tcw *tcw, int num_tidaws, u8 flags,
+ void *addr, u32 count);
+
+#endif /* _ASM_S390_FCX_H */
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
new file mode 100644
index 00000000000..bf246dae136
--- /dev/null
+++ b/arch/s390/include/asm/ftrace.h
@@ -0,0 +1,26 @@
+#ifndef _ASM_S390_FTRACE_H
+#define _ASM_S390_FTRACE_H
+
+#ifndef __ASSEMBLY__
+
+extern void _mcount(void);
+
+struct dyn_arch_ftrace { };
+
+#define MCOUNT_ADDR ((long)_mcount)
+
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_64BIT
+#define MCOUNT_INSN_SIZE 12
+#else
+#define MCOUNT_INSN_SIZE 22
+#endif
+
+#endif /* _ASM_S390_FTRACE_H */
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
new file mode 100644
index 00000000000..a4811aa0304
--- /dev/null
+++ b/arch/s390/include/asm/futex.h
@@ -0,0 +1,96 @@
+#ifndef _ASM_S390_FUTEX_H
+#define _ASM_S390_FUTEX_H
+
+#include <linux/uaccess.h>
+#include <linux/futex.h>
+#include <asm/mmu_context.h>
+#include <asm/errno.h>
+
+#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
+ asm volatile( \
+ " sacf 256\n" \
+ "0: l %1,0(%6)\n" \
+ "1:"insn \
+ "2: cs %1,%2,0(%6)\n" \
+ "3: jl 1b\n" \
+ " lhi %0,0\n" \
+ "4: sacf 768\n" \
+ EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
+ : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
+ "=m" (*uaddr) \
+ : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
+ "m" (*uaddr) : "cc");
+
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+{
+ int op = (encoded_op >> 28) & 7;
+ int cmp = (encoded_op >> 24) & 15;
+ int oparg = (encoded_op << 8) >> 20;
+ int cmparg = (encoded_op << 20) >> 20;
+ int oldval = 0, newval, ret;
+
+ load_kernel_asce();
+ if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+ oparg = 1 << oparg;
+
+ pagefault_disable();
+ switch (op) {
+ case FUTEX_OP_SET:
+ __futex_atomic_op("lr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op("lr %2,%1\nar %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op("lr %2,%1\nor %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+ pagefault_enable();
+
+ if (!ret) {
+ switch (cmp) {
+ case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+ case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+ case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+ case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+ case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+ case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+ default: ret = -ENOSYS;
+ }
+ }
+ return ret;
+}
+
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
+{
+ int ret;
+
+ load_kernel_asce();
+ asm volatile(
+ " sacf 256\n"
+ "0: cs %1,%4,0(%5)\n"
+ "1: la %0,0\n"
+ "2: sacf 768\n"
+ EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
+ : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+ : "cc", "memory");
+ *uval = oldval;
+ return ret;
+}
+
+#endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
new file mode 100644
index 00000000000..b7eabaaeffb
--- /dev/null
+++ b/arch/s390/include/asm/hardirq.h
@@ -0,0 +1,26 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ * Derived from "include/asm-i386/hardirq.h"
+ */
+
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <asm/lowcore.h>
+
+#define local_softirq_pending() (S390_lowcore.softirq_pending)
+
+#define __ARCH_IRQ_STAT
+#define __ARCH_HAS_DO_SOFTIRQ
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED
+
+static inline void ack_bad_irq(unsigned int irq)
+{
+ printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
+#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
new file mode 100644
index 00000000000..11eae5f55b7
--- /dev/null
+++ b/arch/s390/include/asm/hugetlb.h
@@ -0,0 +1,115 @@
+/*
+ * IBM System z Huge TLB Page Support for Kernel.
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_HUGETLB_H
+#define _ASM_S390_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+
+#define is_hugepage_only_range(mm, addr, len) 0
+#define hugetlb_free_pgd_range free_pgd_range
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get(pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(struct file *file,
+ unsigned long addr, unsigned long len)
+{
+ if (len & ~HPAGE_MASK)
+ return -EINVAL;
+ if (addr & ~HPAGE_MASK)
+ return -EINVAL;
+ return 0;
+}
+
+#define hugetlb_prefault_arch_hook(mm) do { } while (0)
+#define arch_clear_hugepage_flags(page) do { } while (0)
+
+int arch_prepare_hugepage(struct page *page);
+void arch_release_hugepage(struct page *page);
+
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+{
+ huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
+{
+ int changed = !pte_same(huge_ptep_get(ptep), pte);
+ if (changed) {
+ huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+ }
+ return changed;
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+ set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
+}
+
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+{
+ return mk_pte(page, pgprot);
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+ return pte_none(pte);
+}
+
+static inline int huge_pte_write(pte_t pte)
+{
+ return pte_write(pte);
+}
+
+static inline int huge_pte_dirty(pte_t pte)
+{
+ return pte_dirty(pte);
+}
+
+static inline pte_t huge_pte_mkwrite(pte_t pte)
+{
+ return pte_mkwrite(pte);
+}
+
+static inline pte_t huge_pte_mkdirty(pte_t pte)
+{
+ return pte_mkdirty(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+ return pte_wrprotect(pte);
+}
+
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
+{
+ return pte_modify(pte, newprot);
+}
+
+#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
new file mode 100644
index 00000000000..ee96a8b697f
--- /dev/null
+++ b/arch/s390/include/asm/hw_irq.h
@@ -0,0 +1,11 @@
+#ifndef _HW_IRQ_H
+#define _HW_IRQ_H
+
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+void __init init_airq_interrupts(void);
+void __init init_cio_interrupts(void);
+void __init init_ext_interrupts(void);
+
+#endif
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
new file mode 100644
index 00000000000..ea5a6e45fd9
--- /dev/null
+++ b/arch/s390/include/asm/idals.h
@@ -0,0 +1,248 @@
+/*
+ * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 2000
+ *
+ * History of changes
+ * 07/24/00 new file
+ * 05/04/02 code restructuring.
+ */
+
+#ifndef _S390_IDALS_H
+#define _S390_IDALS_H
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/cio.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_64BIT
+#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */
+#else
+#define IDA_SIZE_LOG 11 /* 11 for 2k , 12 for 4k */
+#endif
+#define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG)
+
+/*
+ * Test if an address/length pair needs an idal list.
+ */
+static inline int
+idal_is_needed(void *vaddr, unsigned int length)
+{
+#ifdef CONFIG_64BIT
+ return ((__pa(vaddr) + length - 1) >> 31) != 0;
+#else
+ return 0;
+#endif
+}
+
+
+/*
+ * Return the number of idal words needed for an address/length pair.
+ */
+static inline unsigned int idal_nr_words(void *vaddr, unsigned int length)
+{
+ return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length +
+ (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
+}
+
+/*
+ * Create the list of idal words for an address/length pair.
+ */
+static inline unsigned long *idal_create_words(unsigned long *idaws,
+ void *vaddr, unsigned int length)
+{
+ unsigned long paddr;
+ unsigned int cidaw;
+
+ paddr = __pa(vaddr);
+ cidaw = ((paddr & (IDA_BLOCK_SIZE-1)) + length +
+ (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
+ *idaws++ = paddr;
+ paddr &= -IDA_BLOCK_SIZE;
+ while (--cidaw > 0) {
+ paddr += IDA_BLOCK_SIZE;
+ *idaws++ = paddr;
+ }
+ return idaws;
+}
+
+/*
+ * Sets the address of the data in CCW.
+ * If necessary it allocates an IDAL and sets the appropriate flags.
+ */
+static inline int
+set_normalized_cda(struct ccw1 * ccw, void *vaddr)
+{
+#ifdef CONFIG_64BIT
+ unsigned int nridaws;
+ unsigned long *idal;
+
+ if (ccw->flags & CCW_FLAG_IDA)
+ return -EINVAL;
+ nridaws = idal_nr_words(vaddr, ccw->count);
+ if (nridaws > 0) {
+ idal = kmalloc(nridaws * sizeof(unsigned long),
+ GFP_ATOMIC | GFP_DMA );
+ if (idal == NULL)
+ return -ENOMEM;
+ idal_create_words(idal, vaddr, ccw->count);
+ ccw->flags |= CCW_FLAG_IDA;
+ vaddr = idal;
+ }
+#endif
+ ccw->cda = (__u32)(unsigned long) vaddr;
+ return 0;
+}
+
+/*
+ * Releases any allocated IDAL related to the CCW.
+ */
+static inline void
+clear_normalized_cda(struct ccw1 * ccw)
+{
+#ifdef CONFIG_64BIT
+ if (ccw->flags & CCW_FLAG_IDA) {
+ kfree((void *)(unsigned long) ccw->cda);
+ ccw->flags &= ~CCW_FLAG_IDA;
+ }
+#endif
+ ccw->cda = 0;
+}
+
+/*
+ * Idal buffer extension
+ */
+struct idal_buffer {
+ size_t size;
+ size_t page_order;
+ void *data[0];
+};
+
+/*
+ * Allocate an idal buffer
+ */
+static inline struct idal_buffer *
+idal_buffer_alloc(size_t size, int page_order)
+{
+ struct idal_buffer *ib;
+ int nr_chunks, nr_ptrs, i;
+
+ nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
+ nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG;
+ ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *),
+ GFP_DMA | GFP_KERNEL);
+ if (ib == NULL)
+ return ERR_PTR(-ENOMEM);
+ ib->size = size;
+ ib->page_order = page_order;
+ for (i = 0; i < nr_ptrs; i++) {
+ if ((i & (nr_chunks - 1)) != 0) {
+ ib->data[i] = ib->data[i-1] + IDA_BLOCK_SIZE;
+ continue;
+ }
+ ib->data[i] = (void *)
+ __get_free_pages(GFP_KERNEL, page_order);
+ if (ib->data[i] != NULL)
+ continue;
+ // Not enough memory
+ while (i >= nr_chunks) {
+ i -= nr_chunks;
+ free_pages((unsigned long) ib->data[i],
+ ib->page_order);
+ }
+ kfree(ib);
+ return ERR_PTR(-ENOMEM);
+ }
+ return ib;
+}
+
+/*
+ * Free an idal buffer.
+ */
+static inline void
+idal_buffer_free(struct idal_buffer *ib)
+{
+ int nr_chunks, nr_ptrs, i;
+
+ nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
+ nr_chunks = (4096 << ib->page_order) >> IDA_SIZE_LOG;
+ for (i = 0; i < nr_ptrs; i += nr_chunks)
+ free_pages((unsigned long) ib->data[i], ib->page_order);
+ kfree(ib);
+}
+
+/*
+ * Test if a idal list is really needed.
+ */
+static inline int
+__idal_buffer_is_needed(struct idal_buffer *ib)
+{
+#ifdef CONFIG_64BIT
+ return ib->size > (4096ul << ib->page_order) ||
+ idal_is_needed(ib->data[0], ib->size);
+#else
+ return ib->size > (4096ul << ib->page_order);
+#endif
+}
+
+/*
+ * Set channel data address to idal buffer.
+ */
+static inline void
+idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw)
+{
+ if (__idal_buffer_is_needed(ib)) {
+ // setup idals;
+ ccw->cda = (u32)(addr_t) ib->data;
+ ccw->flags |= CCW_FLAG_IDA;
+ } else
+ // we do not need idals - use direct addressing
+ ccw->cda = (u32)(addr_t) ib->data[0];
+ ccw->count = ib->size;
+}
+
+/*
+ * Copy count bytes from an idal buffer to user memory
+ */
+static inline size_t
+idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count)
+{
+ size_t left;
+ int i;
+
+ BUG_ON(count > ib->size);
+ for (i = 0; count > IDA_BLOCK_SIZE; i++) {
+ left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE);
+ if (left)
+ return left + count - IDA_BLOCK_SIZE;
+ to = (void __user *) to + IDA_BLOCK_SIZE;
+ count -= IDA_BLOCK_SIZE;
+ }
+ return copy_to_user(to, ib->data[i], count);
+}
+
+/*
+ * Copy count bytes from user memory to an idal buffer
+ */
+static inline size_t
+idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count)
+{
+ size_t left;
+ int i;
+
+ BUG_ON(count > ib->size);
+ for (i = 0; count > IDA_BLOCK_SIZE; i++) {
+ left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE);
+ if (left)
+ return left + count - IDA_BLOCK_SIZE;
+ from = (void __user *) from + IDA_BLOCK_SIZE;
+ count -= IDA_BLOCK_SIZE;
+ }
+ return copy_from_user(ib->data[i], from, count);
+}
+
+#endif
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
new file mode 100644
index 00000000000..cd6b9ee7b69
--- /dev/null
+++ b/arch/s390/include/asm/io.h
@@ -0,0 +1,72 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/io.h"
+ */
+
+#ifndef _S390_IO_H
+#define _S390_IO_H
+
+#include <linux/kernel.h>
+#include <asm/page.h>
+#include <asm/pci_io.h>
+
+void *xlate_dev_mem_ptr(unsigned long phys);
+#define xlate_dev_mem_ptr xlate_dev_mem_ptr
+void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p) p
+
+#define IO_SPACE_LIMIT 0
+
+#ifdef CONFIG_PCI
+
+#define ioremap_nocache(addr, size) ioremap(addr, size)
+#define ioremap_wc ioremap_nocache
+
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
+{
+ return (void __iomem *) offset;
+}
+
+static inline void iounmap(volatile void __iomem *addr)
+{
+}
+
+/*
+ * s390 needs a private implementation of pci_iomap since ioremap with its
+ * offset parameter isn't sufficient. That's because BAR spaces are not
+ * disjunctive on s390 so we need the bar parameter of pci_iomap to find
+ * the corresponding device and create the mapping cookie.
+ */
+#define pci_iomap pci_iomap
+#define pci_iounmap pci_iounmap
+
+#define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count)
+#define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count)
+#define memset_io(dst, val, count) zpci_memset_io(dst, val, count)
+
+#define __raw_readb zpci_read_u8
+#define __raw_readw zpci_read_u16
+#define __raw_readl zpci_read_u32
+#define __raw_readq zpci_read_u64
+#define __raw_writeb zpci_write_u8
+#define __raw_writew zpci_write_u16
+#define __raw_writel zpci_write_u32
+#define __raw_writeq zpci_write_u64
+
+#define readb_relaxed readb
+#define readw_relaxed readw
+#define readl_relaxed readl
+#define readq_relaxed readq
+
+#endif /* CONFIG_PCI */
+
+#include <asm-generic/io.h>
+
+#endif
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
new file mode 100644
index 00000000000..2fcccc0c997
--- /dev/null
+++ b/arch/s390/include/asm/ipl.h
@@ -0,0 +1,182 @@
+/*
+ * s390 (re)ipl support
+ *
+ * Copyright IBM Corp. 2007
+ */
+
+#ifndef _ASM_S390_IPL_H
+#define _ASM_S390_IPL_H
+
+#include <asm/lowcore.h>
+#include <asm/types.h>
+#include <asm/cio.h>
+#include <asm/setup.h>
+
+#define IPL_PARMBLOCK_ORIGIN 0x2000
+
+#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
+ sizeof(struct ipl_block_fcp))
+
+#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 8)
+
+#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \
+ sizeof(struct ipl_block_ccw))
+
+#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 8)
+
+#define IPL_MAX_SUPPORTED_VERSION (0)
+
+#define IPL_PARMBLOCK_START ((struct ipl_parameter_block *) \
+ IPL_PARMBLOCK_ORIGIN)
+#define IPL_PARMBLOCK_SIZE (IPL_PARMBLOCK_START->hdr.len)
+
+struct ipl_list_hdr {
+ u32 len;
+ u8 reserved1[3];
+ u8 version;
+ u32 blk0_len;
+ u8 pbt;
+ u8 flags;
+ u16 reserved2;
+} __attribute__((packed));
+
+struct ipl_block_fcp {
+ u8 reserved1[313-1];
+ u8 opt;
+ u8 reserved2[3];
+ u16 reserved3;
+ u16 devno;
+ u8 reserved4[4];
+ u64 wwpn;
+ u64 lun;
+ u32 bootprog;
+ u8 reserved5[12];
+ u64 br_lba;
+ u32 scp_data_len;
+ u8 reserved6[260];
+ u8 scp_data[];
+} __attribute__((packed));
+
+#define DIAG308_VMPARM_SIZE 64
+#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \
+ offsetof(struct ipl_block_fcp, scp_data)))
+
+struct ipl_block_ccw {
+ u8 load_parm[8];
+ u8 reserved1[84];
+ u8 reserved2[2];
+ u16 devno;
+ u8 vm_flags;
+ u8 reserved3[3];
+ u32 vm_parm_len;
+ u8 nss_name[8];
+ u8 vm_parm[DIAG308_VMPARM_SIZE];
+ u8 reserved4[8];
+} __attribute__((packed));
+
+struct ipl_parameter_block {
+ struct ipl_list_hdr hdr;
+ union {
+ struct ipl_block_fcp fcp;
+ struct ipl_block_ccw ccw;
+ } ipl_info;
+} __attribute__((packed,aligned(4096)));
+
+/*
+ * IPL validity flags
+ */
+extern u32 ipl_flags;
+extern u32 dump_prefix_page;
+
+struct dump_save_areas {
+ struct save_area **areas;
+ int count;
+};
+
+extern struct dump_save_areas dump_save_areas;
+struct save_area *dump_save_area_create(int cpu);
+
+extern void do_reipl(void);
+extern void do_halt(void);
+extern void do_poff(void);
+extern void ipl_save_parameters(void);
+extern void ipl_update_parameters(void);
+extern size_t append_ipl_vmparm(char *, size_t);
+extern size_t append_ipl_scpdata(char *, size_t);
+
+enum {
+ IPL_DEVNO_VALID = 1,
+ IPL_PARMBLOCK_VALID = 2,
+ IPL_NSS_VALID = 4,
+};
+
+enum ipl_type {
+ IPL_TYPE_UNKNOWN = 1,
+ IPL_TYPE_CCW = 2,
+ IPL_TYPE_FCP = 4,
+ IPL_TYPE_FCP_DUMP = 8,
+ IPL_TYPE_NSS = 16,
+};
+
+struct ipl_info
+{
+ enum ipl_type type;
+ union {
+ struct {
+ struct ccw_dev_id dev_id;
+ } ccw;
+ struct {
+ struct ccw_dev_id dev_id;
+ u64 wwpn;
+ u64 lun;
+ } fcp;
+ struct {
+ char name[NSS_NAME_SIZE + 1];
+ } nss;
+ } data;
+};
+
+extern struct ipl_info ipl_info;
+extern void setup_ipl(void);
+
+/*
+ * DIAG 308 support
+ */
+enum diag308_subcode {
+ DIAG308_REL_HSA = 2,
+ DIAG308_IPL = 3,
+ DIAG308_DUMP = 4,
+ DIAG308_SET = 5,
+ DIAG308_STORE = 6,
+};
+
+enum diag308_ipl_type {
+ DIAG308_IPL_TYPE_FCP = 0,
+ DIAG308_IPL_TYPE_CCW = 2,
+};
+
+enum diag308_opt {
+ DIAG308_IPL_OPT_IPL = 0x10,
+ DIAG308_IPL_OPT_DUMP = 0x20,
+};
+
+enum diag308_flags {
+ DIAG308_FLAGS_LP_VALID = 0x80,
+};
+
+enum diag308_vm_flags {
+ DIAG308_VM_FLAGS_NSS_VALID = 0x80,
+ DIAG308_VM_FLAGS_VP_VALID = 0x40,
+};
+
+enum diag308_rc {
+ DIAG308_RC_OK = 0x0001,
+ DIAG308_RC_NOCONFIG = 0x0102,
+};
+
+extern int diag308(unsigned long subcode, void *addr);
+extern void diag308_reset(void);
+extern void store_status(void);
+extern void lgr_info_log(void);
+
+#endif /* _ASM_S390_IPL_H */
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
new file mode 100644
index 00000000000..c4dd400a279
--- /dev/null
+++ b/arch/s390/include/asm/irq.h
@@ -0,0 +1,109 @@
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+#define EXT_INTERRUPT 1
+#define IO_INTERRUPT 2
+#define THIN_INTERRUPT 3
+
+#define NR_IRQS_BASE 4
+
+#ifdef CONFIG_PCI_NR_MSI
+# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
+#else
+# define NR_IRQS NR_IRQS_BASE
+#endif
+
+/* This number is used when no interrupt has been assigned */
+#define NO_IRQ 0
+
+/* External interruption codes */
+#define EXT_IRQ_INTERRUPT_KEY 0x0040
+#define EXT_IRQ_CLK_COMP 0x1004
+#define EXT_IRQ_CPU_TIMER 0x1005
+#define EXT_IRQ_WARNING_TRACK 0x1007
+#define EXT_IRQ_MALFUNC_ALERT 0x1200
+#define EXT_IRQ_EMERGENCY_SIG 0x1201
+#define EXT_IRQ_EXTERNAL_CALL 0x1202
+#define EXT_IRQ_TIMING_ALERT 0x1406
+#define EXT_IRQ_MEASURE_ALERT 0x1407
+#define EXT_IRQ_SERVICE_SIG 0x2401
+#define EXT_IRQ_CP_SERVICE 0x2603
+#define EXT_IRQ_IUCV 0x4000
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hardirq.h>
+#include <linux/percpu.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+
+enum interruption_class {
+ IRQEXT_CLK,
+ IRQEXT_EXC,
+ IRQEXT_EMS,
+ IRQEXT_TMR,
+ IRQEXT_TLA,
+ IRQEXT_PFL,
+ IRQEXT_DSD,
+ IRQEXT_VRT,
+ IRQEXT_SCP,
+ IRQEXT_IUC,
+ IRQEXT_CMS,
+ IRQEXT_CMC,
+ IRQEXT_CMR,
+ IRQIO_CIO,
+ IRQIO_QAI,
+ IRQIO_DAS,
+ IRQIO_C15,
+ IRQIO_C70,
+ IRQIO_TAP,
+ IRQIO_VMR,
+ IRQIO_LCS,
+ IRQIO_CLW,
+ IRQIO_CTC,
+ IRQIO_APB,
+ IRQIO_ADM,
+ IRQIO_CSC,
+ IRQIO_PCI,
+ IRQIO_MSI,
+ IRQIO_VIR,
+ IRQIO_VAI,
+ NMI_NMI,
+ CPU_RST,
+ NR_ARCH_IRQS
+};
+
+struct irq_stat {
+ unsigned int irqs[NR_ARCH_IRQS];
+};
+
+DECLARE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
+
+static __always_inline void inc_irq_stat(enum interruption_class irq)
+{
+ __get_cpu_var(irq_stat).irqs[irq]++;
+}
+
+struct ext_code {
+ unsigned short subcode;
+ unsigned short code;
+};
+
+typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long);
+
+int register_external_irq(u16 code, ext_int_handler_t handler);
+int unregister_external_irq(u16 code, ext_int_handler_t handler);
+
+enum irq_subclass {
+ IRQ_SUBCLASS_MEASUREMENT_ALERT = 5,
+ IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
+};
+
+void irq_subclass_register(enum irq_subclass subclass);
+void irq_subclass_unregister(enum irq_subclass subclass);
+
+#define irq_canonicalize(irq) (irq)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/irq_regs.h b/arch/s390/include/asm/irq_regs.h
new file mode 100644
index 00000000000..3dd9c0b7027
--- /dev/null
+++ b/arch/s390/include/asm/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
new file mode 100644
index 00000000000..37b9091ab8c
--- /dev/null
+++ b/arch/s390/include/asm/irqflags.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright IBM Corp. 2006, 2010
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_IRQFLAGS_H
+#define __ASM_IRQFLAGS_H
+
+#include <linux/types.h>
+
+/* store then OR system mask. */
+#define __arch_local_irq_stosm(__or) \
+({ \
+ unsigned long __mask; \
+ asm volatile( \
+ " stosm %0,%1" \
+ : "=Q" (__mask) : "i" (__or) : "memory"); \
+ __mask; \
+})
+
+/* store then AND system mask. */
+#define __arch_local_irq_stnsm(__and) \
+({ \
+ unsigned long __mask; \
+ asm volatile( \
+ " stnsm %0,%1" \
+ : "=Q" (__mask) : "i" (__and) : "memory"); \
+ __mask; \
+})
+
+/* set system mask. */
+static inline notrace void __arch_local_irq_ssm(unsigned long flags)
+{
+ asm volatile("ssm %0" : : "Q" (flags) : "memory");
+}
+
+static inline notrace unsigned long arch_local_save_flags(void)
+{
+ return __arch_local_irq_stosm(0x00);
+}
+
+static inline notrace unsigned long arch_local_irq_save(void)
+{
+ return __arch_local_irq_stnsm(0xfc);
+}
+
+static inline notrace void arch_local_irq_disable(void)
+{
+ arch_local_irq_save();
+}
+
+static inline notrace void arch_local_irq_enable(void)
+{
+ __arch_local_irq_stosm(0x03);
+}
+
+static inline notrace void arch_local_irq_restore(unsigned long flags)
+{
+ __arch_local_irq_ssm(flags);
+}
+
+static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
+{
+ return !(flags & (3UL << (BITS_PER_LONG - 8)));
+}
+
+static inline notrace bool arch_irqs_disabled(void)
+{
+ return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
new file mode 100644
index 00000000000..68d7d68300f
--- /dev/null
+++ b/arch/s390/include/asm/isc.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_S390_ISC_H
+#define _ASM_S390_ISC_H
+
+#include <linux/types.h>
+
+/*
+ * I/O interruption subclasses used by drivers.
+ * Please add all used iscs here so that it is possible to distribute
+ * isc usage between drivers.
+ * Reminder: 0 is highest priority, 7 lowest.
+ */
+#define MAX_ISC 7
+
+/* Regular I/O interrupts. */
+#define IO_SCH_ISC 3 /* regular I/O subchannels */
+#define CONSOLE_ISC 1 /* console I/O subchannel */
+#define EADM_SCH_ISC 4 /* EADM subchannels */
+#define CHSC_SCH_ISC 7 /* CHSC subchannels */
+/* Adapter interrupts. */
+#define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */
+#define PCI_ISC 2 /* PCI I/O subchannels */
+#define AP_ISC 6 /* adjunct processor (crypto) devices */
+
+/* Functions for registration of I/O interruption subclasses */
+void isc_register(unsigned int isc);
+void isc_unregister(unsigned int isc);
+
+#endif /* _ASM_S390_ISC_H */
diff --git a/arch/s390/include/asm/itcw.h b/arch/s390/include/asm/itcw.h
new file mode 100644
index 00000000000..fb1bedd3dc0
--- /dev/null
+++ b/arch/s390/include/asm/itcw.h
@@ -0,0 +1,30 @@
+/*
+ * Functions for incremental construction of fcx enabled I/O control blocks.
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_ITCW_H
+#define _ASM_S390_ITCW_H
+
+#include <linux/types.h>
+#include <asm/fcx.h>
+
+#define ITCW_OP_READ 0
+#define ITCW_OP_WRITE 1
+
+struct itcw;
+
+struct tcw *itcw_get_tcw(struct itcw *itcw);
+size_t itcw_calc_size(int intrg, int max_tidaws, int intrg_max_tidaws);
+struct itcw *itcw_init(void *buffer, size_t size, int op, int intrg,
+ int max_tidaws, int intrg_max_tidaws);
+struct dcw *itcw_add_dcw(struct itcw *itcw, u8 cmd, u8 flags, void *cd,
+ u8 cd_count, u32 count);
+struct tidaw *itcw_add_tidaw(struct itcw *itcw, u8 flags, void *addr,
+ u32 count);
+void itcw_set_data(struct itcw *itcw, void *addr, int use_tidal);
+void itcw_finalize(struct itcw *itcw);
+
+#endif /* _ASM_S390_ITCW_H */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
new file mode 100644
index 00000000000..346b1c85ffb
--- /dev/null
+++ b/arch/s390/include/asm/jump_label.h
@@ -0,0 +1,37 @@
+#ifndef _ASM_S390_JUMP_LABEL_H
+#define _ASM_S390_JUMP_LABEL_H
+
+#include <linux/types.h>
+
+#define JUMP_LABEL_NOP_SIZE 6
+
+#ifdef CONFIG_64BIT
+#define ASM_PTR ".quad"
+#define ASM_ALIGN ".balign 8"
+#else
+#define ASM_PTR ".long"
+#define ASM_ALIGN ".balign 4"
+#endif
+
+static __always_inline bool arch_static_branch(struct static_key *key)
+{
+ asm_volatile_goto("0: brcl 0,0\n"
+ ".pushsection __jump_table, \"aw\"\n"
+ ASM_ALIGN "\n"
+ ASM_PTR " 0b, %l[label], %0\n"
+ ".popsection\n"
+ : : "X" (key) : : label);
+ return false;
+label:
+ return true;
+}
+
+typedef unsigned long jump_label_t;
+
+struct jump_entry {
+ jump_label_t code;
+ jump_label_t target;
+ jump_label_t key;
+};
+
+#endif
diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h
new file mode 100644
index 00000000000..5c1abd47612
--- /dev/null
+++ b/arch/s390/include/asm/kdebug.h
@@ -0,0 +1,27 @@
+#ifndef _S390_KDEBUG_H
+#define _S390_KDEBUG_H
+
+/*
+ * Feb 2006 Ported to s390 <grundym@us.ibm.com>
+ */
+
+struct pt_regs;
+
+enum die_val {
+ DIE_OOPS = 1,
+ DIE_BPT,
+ DIE_SSTEP,
+ DIE_PANIC,
+ DIE_NMI,
+ DIE_DIE,
+ DIE_NMIWATCHDOG,
+ DIE_KERNELDEBUG,
+ DIE_TRAP,
+ DIE_GPF,
+ DIE_CALL,
+ DIE_NMI_IPI,
+};
+
+extern void die(struct pt_regs *, const char *);
+
+#endif
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
new file mode 100644
index 00000000000..694bcd6bd92
--- /dev/null
+++ b/arch/s390/include/asm/kexec.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com>
+ *
+ */
+
+#ifndef _S390_KEXEC_H
+#define _S390_KEXEC_H
+
+#include <asm/processor.h>
+#include <asm/page.h>
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control pages */
+/* Not more than 2GB */
+#define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31)
+
+/* Maximum address we can use for the crash control pages */
+#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* Allocate one page for the pdp and the second for the code */
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+/* Alignment of crashkernel memory */
+#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_S390
+
+/*
+ * Size for s390x ELF notes per CPU
+ *
+ * Seven notes plus zero note at the end: prstatus, fpregset, timer,
+ * tod_cmp, tod_reg, control regs, and prefix
+ */
+#define KEXEC_NOTE_BYTES \
+ (ALIGN(sizeof(struct elf_note), 4) * 8 + \
+ ALIGN(sizeof("CORE"), 4) * 7 + \
+ ALIGN(sizeof(struct elf_prstatus), 4) + \
+ ALIGN(sizeof(elf_fpregset_t), 4) + \
+ ALIGN(sizeof(u64), 4) + \
+ ALIGN(sizeof(u64), 4) + \
+ ALIGN(sizeof(u32), 4) + \
+ ALIGN(sizeof(u64) * 16, 4) + \
+ ALIGN(sizeof(u32), 4) \
+ )
+
+/* Provide a dummy definition to avoid build failures. */
+static inline void crash_setup_regs(struct pt_regs *newregs,
+ struct pt_regs *oldregs) { }
+
+#endif /*_S390_KEXEC_H */
diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h
new file mode 100644
index 00000000000..0a88622339e
--- /dev/null
+++ b/arch/s390/include/asm/kmap_types.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+#include <asm-generic/kmap_types.h>
+
+#endif
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
new file mode 100644
index 00000000000..4176dfe0fba
--- /dev/null
+++ b/arch/s390/include/asm/kprobes.h
@@ -0,0 +1,89 @@
+#ifndef _ASM_S390_KPROBES_H
+#define _ASM_S390_KPROBES_H
+/*
+ * Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corp. 2002, 2006
+ *
+ * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ * Probes initial implementation ( includes suggestions from
+ * Rusty Russell).
+ * 2004-Nov Modified for PPC64 by Ananth N Mavinakayanahalli
+ * <ananth@in.ibm.com>
+ * 2005-Dec Used as a template for s390 by Mike Grundy
+ * <grundym@us.ibm.com>
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+
+struct pt_regs;
+struct kprobe;
+
+typedef u16 kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION 0x0002
+
+/* Maximum instruction size is 3 (16bit) halfwords: */
+#define MAX_INSN_SIZE 0x0003
+#define MAX_STACK_SIZE 64
+#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
+ (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+ ? (MAX_STACK_SIZE) \
+ : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+
+#define kretprobe_blacklist_size 0
+
+#define KPROBE_SWAP_INST 0x10
+
+#define FIXUP_PSW_NORMAL 0x08
+#define FIXUP_BRANCH_NOT_TAKEN 0x04
+#define FIXUP_RETURN_REGISTER 0x02
+#define FIXUP_NOT_REQUIRED 0x01
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+ /* copy of original instruction */
+ kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned long status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+ unsigned long kprobe_status;
+ unsigned long kprobe_saved_imask;
+ unsigned long kprobe_saved_ctl[3];
+ struct prev_kprobe prev_kprobe;
+ struct pt_regs jprobe_saved_regs;
+ kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+void kretprobe_trampoline(void);
+
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data);
+
+#define flush_insn_slot(p) do { } while (0)
+
+#endif /* _ASM_S390_KPROBES_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
new file mode 100644
index 00000000000..4181d7baabb
--- /dev/null
+++ b/arch/s390/include/asm/kvm_host.h
@@ -0,0 +1,454 @@
+/*
+ * definition for kernel virtual machines on s390
+ *
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+
+#ifndef ASM_KVM_HOST_H
+#define ASM_KVM_HOST_H
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <asm/debug.h>
+#include <asm/cpu.h>
+#include <asm/isc.h>
+
+#define KVM_MAX_VCPUS 64
+#define KVM_USER_MEM_SLOTS 32
+
+/*
+ * These seem to be used for allocating ->chip in the routing table,
+ * which we don't use. 4096 is an out-of-thin-air value. If we need
+ * to look at ->chip later on, we'll need to revisit this.
+ */
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 4096
+
+#define SIGP_CTRL_C 0x00800000
+
+struct sca_entry {
+ atomic_t ctrl;
+ __u32 reserved;
+ __u64 sda;
+ __u64 reserved2[2];
+} __attribute__((packed));
+
+union ipte_control {
+ unsigned long val;
+ struct {
+ unsigned long k : 1;
+ unsigned long kh : 31;
+ unsigned long kg : 32;
+ };
+};
+
+struct sca_block {
+ union ipte_control ipte_control;
+ __u64 reserved[5];
+ __u64 mcn;
+ __u64 reserved2;
+ struct sca_entry cpu[64];
+} __attribute__((packed));
+
+#define CPUSTAT_STOPPED 0x80000000
+#define CPUSTAT_WAIT 0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT 0x04000000
+#define CPUSTAT_IO_INT 0x02000000
+#define CPUSTAT_EXT_INT 0x01000000
+#define CPUSTAT_RUNNING 0x00800000
+#define CPUSTAT_RETAINED 0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB 0x00010000
+#define CPUSTAT_RRF 0x00008000
+#define CPUSTAT_SLSV 0x00004000
+#define CPUSTAT_SLSR 0x00002000
+#define CPUSTAT_ZARCH 0x00000800
+#define CPUSTAT_MCDS 0x00000100
+#define CPUSTAT_SM 0x00000080
+#define CPUSTAT_IBS 0x00000040
+#define CPUSTAT_G 0x00000008
+#define CPUSTAT_GED 0x00000004
+#define CPUSTAT_J 0x00000002
+#define CPUSTAT_P 0x00000001
+
+struct kvm_s390_sie_block {
+ atomic_t cpuflags; /* 0x0000 */
+ __u32 : 1; /* 0x0004 */
+ __u32 prefix : 18;
+ __u32 : 13;
+ __u8 reserved08[4]; /* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+ __u32 prog0c; /* 0x000c */
+ __u8 reserved10[16]; /* 0x0010 */
+#define PROG_BLOCK_SIE 0x00000001
+ atomic_t prog20; /* 0x0020 */
+ __u8 reserved24[4]; /* 0x0024 */
+ __u64 cputm; /* 0x0028 */
+ __u64 ckc; /* 0x0030 */
+ __u64 epoch; /* 0x0038 */
+ __u8 reserved40[4]; /* 0x0040 */
+#define LCTL_CR0 0x8000
+#define LCTL_CR6 0x0200
+#define LCTL_CR9 0x0040
+#define LCTL_CR10 0x0020
+#define LCTL_CR11 0x0010
+#define LCTL_CR14 0x0002
+ __u16 lctl; /* 0x0044 */
+ __s16 icpua; /* 0x0046 */
+#define ICTL_PINT 0x20000000
+#define ICTL_LPSW 0x00400000
+#define ICTL_STCTL 0x00040000
+#define ICTL_ISKE 0x00004000
+#define ICTL_SSKE 0x00002000
+#define ICTL_RRBE 0x00001000
+#define ICTL_TPROT 0x00000200
+ __u32 ictl; /* 0x0048 */
+ __u32 eca; /* 0x004c */
+#define ICPT_INST 0x04
+#define ICPT_PROGI 0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_OPEREXC 0x2C
+#define ICPT_PARTEXEC 0x38
+#define ICPT_IOINST 0x40
+ __u8 icptcode; /* 0x0050 */
+ __u8 reserved51; /* 0x0051 */
+ __u16 ihcpu; /* 0x0052 */
+ __u8 reserved54[2]; /* 0x0054 */
+ __u16 ipa; /* 0x0056 */
+ __u32 ipb; /* 0x0058 */
+ __u32 scaoh; /* 0x005c */
+ __u8 reserved60; /* 0x0060 */
+ __u8 ecb; /* 0x0061 */
+ __u8 ecb2; /* 0x0062 */
+ __u8 reserved63[1]; /* 0x0063 */
+ __u32 scaol; /* 0x0064 */
+ __u8 reserved68[4]; /* 0x0068 */
+ __u32 todpr; /* 0x006c */
+ __u8 reserved70[32]; /* 0x0070 */
+ psw_t gpsw; /* 0x0090 */
+ __u64 gg14; /* 0x00a0 */
+ __u64 gg15; /* 0x00a8 */
+ __u8 reservedb0[20]; /* 0x00b0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ __u32 reservedc8; /* 0x00c8 */
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ __u64 peraddr; /* 0x00d8 */
+ __u8 eai; /* 0x00e0 */
+ __u8 peraid; /* 0x00e1 */
+ __u8 oai; /* 0x00e2 */
+ __u8 armid; /* 0x00e3 */
+ __u8 reservede4[4]; /* 0x00e4 */
+ __u64 tecmc; /* 0x00e8 */
+ __u8 reservedf0[16]; /* 0x00f0 */
+ __u64 gcr[16]; /* 0x0100 */
+ __u64 gbea; /* 0x0180 */
+ __u8 reserved188[24]; /* 0x0188 */
+ __u32 fac; /* 0x01a0 */
+ __u8 reserved1a4[20]; /* 0x01a4 */
+ __u64 cbrlo; /* 0x01b8 */
+ __u8 reserved1c0[30]; /* 0x01c0 */
+ __u64 pp; /* 0x01de */
+ __u8 reserved1e6[2]; /* 0x01e6 */
+ __u64 itdba; /* 0x01e8 */
+ __u8 reserved1f0[16]; /* 0x01f0 */
+} __attribute__((packed));
+
+struct kvm_s390_itdb {
+ __u8 data[256];
+} __packed;
+
+struct sie_page {
+ struct kvm_s390_sie_block sie_block;
+ __u8 reserved200[1024]; /* 0x0200 */
+ struct kvm_s390_itdb itdb; /* 0x0600 */
+ __u8 reserved700[2304]; /* 0x0700 */
+} __packed;
+
+struct kvm_vcpu_stat {
+ u32 exit_userspace;
+ u32 exit_null;
+ u32 exit_external_request;
+ u32 exit_external_interrupt;
+ u32 exit_stop_request;
+ u32 exit_validity;
+ u32 exit_instruction;
+ u32 instruction_lctl;
+ u32 instruction_lctlg;
+ u32 instruction_stctl;
+ u32 instruction_stctg;
+ u32 exit_program_interruption;
+ u32 exit_instr_and_program;
+ u32 deliver_external_call;
+ u32 deliver_emergency_signal;
+ u32 deliver_service_signal;
+ u32 deliver_virtio_interrupt;
+ u32 deliver_stop_signal;
+ u32 deliver_prefix_signal;
+ u32 deliver_restart_signal;
+ u32 deliver_program_int;
+ u32 deliver_io_int;
+ u32 exit_wait_state;
+ u32 instruction_pfmf;
+ u32 instruction_stidp;
+ u32 instruction_spx;
+ u32 instruction_stpx;
+ u32 instruction_stap;
+ u32 instruction_storage_key;
+ u32 instruction_ipte_interlock;
+ u32 instruction_stsch;
+ u32 instruction_chsc;
+ u32 instruction_stsi;
+ u32 instruction_stfl;
+ u32 instruction_tprot;
+ u32 instruction_essa;
+ u32 instruction_sigp_sense;
+ u32 instruction_sigp_sense_running;
+ u32 instruction_sigp_external_call;
+ u32 instruction_sigp_emergency;
+ u32 instruction_sigp_stop;
+ u32 instruction_sigp_arch;
+ u32 instruction_sigp_prefix;
+ u32 instruction_sigp_restart;
+ u32 diagnose_10;
+ u32 diagnose_44;
+ u32 diagnose_9c;
+};
+
+#define PGM_OPERATION 0x01
+#define PGM_PRIVILEGED_OP 0x02
+#define PGM_EXECUTE 0x03
+#define PGM_PROTECTION 0x04
+#define PGM_ADDRESSING 0x05
+#define PGM_SPECIFICATION 0x06
+#define PGM_DATA 0x07
+#define PGM_FIXED_POINT_OVERFLOW 0x08
+#define PGM_FIXED_POINT_DIVIDE 0x09
+#define PGM_DECIMAL_OVERFLOW 0x0a
+#define PGM_DECIMAL_DIVIDE 0x0b
+#define PGM_HFP_EXPONENT_OVERFLOW 0x0c
+#define PGM_HFP_EXPONENT_UNDERFLOW 0x0d
+#define PGM_HFP_SIGNIFICANCE 0x0e
+#define PGM_HFP_DIVIDE 0x0f
+#define PGM_SEGMENT_TRANSLATION 0x10
+#define PGM_PAGE_TRANSLATION 0x11
+#define PGM_TRANSLATION_SPEC 0x12
+#define PGM_SPECIAL_OPERATION 0x13
+#define PGM_OPERAND 0x15
+#define PGM_TRACE_TABEL 0x16
+#define PGM_SPACE_SWITCH 0x1c
+#define PGM_HFP_SQUARE_ROOT 0x1d
+#define PGM_PC_TRANSLATION_SPEC 0x1f
+#define PGM_AFX_TRANSLATION 0x20
+#define PGM_ASX_TRANSLATION 0x21
+#define PGM_LX_TRANSLATION 0x22
+#define PGM_EX_TRANSLATION 0x23
+#define PGM_PRIMARY_AUTHORITY 0x24
+#define PGM_SECONDARY_AUTHORITY 0x25
+#define PGM_LFX_TRANSLATION 0x26
+#define PGM_LSX_TRANSLATION 0x27
+#define PGM_ALET_SPECIFICATION 0x28
+#define PGM_ALEN_TRANSLATION 0x29
+#define PGM_ALE_SEQUENCE 0x2a
+#define PGM_ASTE_VALIDITY 0x2b
+#define PGM_ASTE_SEQUENCE 0x2c
+#define PGM_EXTENDED_AUTHORITY 0x2d
+#define PGM_LSTE_SEQUENCE 0x2e
+#define PGM_ASTE_INSTANCE 0x2f
+#define PGM_STACK_FULL 0x30
+#define PGM_STACK_EMPTY 0x31
+#define PGM_STACK_SPECIFICATION 0x32
+#define PGM_STACK_TYPE 0x33
+#define PGM_STACK_OPERATION 0x34
+#define PGM_ASCE_TYPE 0x38
+#define PGM_REGION_FIRST_TRANS 0x39
+#define PGM_REGION_SECOND_TRANS 0x3a
+#define PGM_REGION_THIRD_TRANS 0x3b
+#define PGM_MONITOR 0x40
+#define PGM_PER 0x80
+#define PGM_CRYPTO_OPERATION 0x119
+
+struct kvm_s390_interrupt_info {
+ struct list_head list;
+ u64 type;
+ union {
+ struct kvm_s390_io_info io;
+ struct kvm_s390_ext_info ext;
+ struct kvm_s390_pgm_info pgm;
+ struct kvm_s390_emerg_info emerg;
+ struct kvm_s390_extcall_info extcall;
+ struct kvm_s390_prefix_info prefix;
+ struct kvm_s390_mchk_info mchk;
+ };
+};
+
+/* for local_interrupt.action_flags */
+#define ACTION_STORE_ON_STOP (1<<0)
+#define ACTION_STOP_ON_STOP (1<<1)
+
+struct kvm_s390_local_interrupt {
+ spinlock_t lock;
+ struct list_head list;
+ atomic_t active;
+ struct kvm_s390_float_interrupt *float_int;
+ int timer_due; /* event indicator for waitqueue below */
+ wait_queue_head_t *wq;
+ atomic_t *cpuflags;
+ unsigned int action_bits;
+};
+
+struct kvm_s390_float_interrupt {
+ spinlock_t lock;
+ struct list_head list;
+ atomic_t active;
+ int next_rr_cpu;
+ unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned int irq_count;
+};
+
+struct kvm_hw_wp_info_arch {
+ unsigned long addr;
+ unsigned long phys_addr;
+ int len;
+ char *old_data;
+};
+
+struct kvm_hw_bp_info_arch {
+ unsigned long addr;
+ int len;
+};
+
+/*
+ * Only the upper 16 bits of kvm_guest_debug->control are arch specific.
+ * Further KVM_GUESTDBG flags which an be used from userspace can be found in
+ * arch/s390/include/uapi/asm/kvm.h
+ */
+#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
+
+#define guestdbg_enabled(vcpu) \
+ (vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
+#define guestdbg_sstep_enabled(vcpu) \
+ (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+#define guestdbg_hw_bp_enabled(vcpu) \
+ (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
+ (vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
+
+struct kvm_guestdbg_info_arch {
+ unsigned long cr0;
+ unsigned long cr9;
+ unsigned long cr10;
+ unsigned long cr11;
+ struct kvm_hw_bp_info_arch *hw_bp_info;
+ struct kvm_hw_wp_info_arch *hw_wp_info;
+ int nr_hw_bp;
+ int nr_hw_wp;
+ unsigned long last_bp;
+};
+
+struct kvm_vcpu_arch {
+ struct kvm_s390_sie_block *sie_block;
+ s390_fp_regs host_fpregs;
+ unsigned int host_acrs[NUM_ACRS];
+ s390_fp_regs guest_fpregs;
+ struct kvm_s390_local_interrupt local_int;
+ struct hrtimer ckc_timer;
+ struct tasklet_struct tasklet;
+ struct kvm_s390_pgm_info pgm;
+ union {
+ struct cpuid cpu_id;
+ u64 stidp_data;
+ };
+ struct gmap *gmap;
+ struct kvm_guestdbg_info_arch guestdbg;
+#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
+ unsigned long pfault_token;
+ unsigned long pfault_select;
+ unsigned long pfault_compare;
+};
+
+struct kvm_vm_stat {
+ u32 remote_tlb_flush;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+struct s390_map_info {
+ struct list_head list;
+ __u64 guest_addr;
+ __u64 addr;
+ struct page *page;
+};
+
+struct s390_io_adapter {
+ unsigned int id;
+ int isc;
+ bool maskable;
+ bool masked;
+ bool swap;
+ struct rw_semaphore maps_lock;
+ struct list_head maps;
+ atomic_t nr_maps;
+};
+
+#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+#define MAX_S390_ADAPTER_MAPS 256
+
+struct kvm_arch{
+ struct sca_block *sca;
+ debug_info_t *dbf;
+ struct kvm_s390_float_interrupt float_int;
+ struct kvm_device *flic;
+ struct gmap *gmap;
+ int css_support;
+ int use_irqchip;
+ int use_cmma;
+ struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
+ wait_queue_head_t ipte_wq;
+ spinlock_t start_stop_lock;
+};
+
+#define KVM_HVA_ERR_BAD (-1UL)
+#define KVM_HVA_ERR_RO_BAD (-2UL)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+ return IS_ERR_VALUE(addr);
+}
+
+#define ASYNC_PF_PER_VCPU 64
+struct kvm_vcpu;
+struct kvm_async_pf;
+struct kvm_arch_async_pf {
+ unsigned long pfault_token;
+};
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+extern char sie_exit;
+#endif
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
new file mode 100644
index 00000000000..e0f842308a6
--- /dev/null
+++ b/arch/s390/include/asm/kvm_para.h
@@ -0,0 +1,157 @@
+/*
+ * definition for paravirtual devices on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+/*
+ * Hypercalls for KVM on s390. The calling convention is similar to the
+ * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
+ * as hypercall number and R7 as parameter 6. The return value is
+ * written to R2. We use the diagnose instruction as hypercall. To avoid
+ * conflicts with existing diagnoses for LPAR and z/VM, we do not use
+ * the instruction encoded number, but specify the number in R1 and
+ * use 0x500 as KVM hypercall
+ *
+ * Copyright IBM Corp. 2007,2008
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#ifndef __S390_KVM_PARA_H
+#define __S390_KVM_PARA_H
+
+#include <uapi/asm/kvm_para.h>
+
+
+
+static inline long kvm_hypercall0(unsigned long nr)
+{
+ register unsigned long __nr asm("1") = nr;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr): "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+ unsigned long p2)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2)
+ : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3) : "memory", "cc");
+ return __rc;
+}
+
+
+static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register unsigned long __p4 asm("5") = p4;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3), "d" (__p4) : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register unsigned long __p4 asm("5") = p4;
+ register unsigned long __p5 asm("6") = p5;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3), "d" (__p4), "d" (__p5) : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5,
+ unsigned long p6)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register unsigned long __p4 asm("5") = p4;
+ register unsigned long __p5 asm("6") = p5;
+ register unsigned long __p6 asm("7") = p6;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6)
+ : "memory", "cc");
+ return __rc;
+}
+
+/* kvm on s390 is always paravirtualization enabled */
+static inline int kvm_para_available(void)
+{
+ return 1;
+}
+
+/* No feature bits are currently assigned for kvm on s390 */
+static inline unsigned int kvm_arch_para_features(void)
+{
+ return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+
+#endif /* __S390_KVM_PARA_H */
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
new file mode 100644
index 00000000000..fc8a8284778
--- /dev/null
+++ b/arch/s390/include/asm/linkage.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#include <linux/stringify.h>
+
+#define __ALIGN .align 4, 0x07
+#define __ALIGN_STR __stringify(__ALIGN)
+
+#endif
diff --git a/arch/s390/include/asm/local.h b/arch/s390/include/asm/local.h
new file mode 100644
index 00000000000..c11c530f74d
--- /dev/null
+++ b/arch/s390/include/asm/local.h
@@ -0,0 +1 @@
+#include <asm-generic/local.h>
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/s390/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
new file mode 100644
index 00000000000..4349197ab9d
--- /dev/null
+++ b/arch/s390/include/asm/lowcore.h
@@ -0,0 +1,361 @@
+/*
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Hartmut Penner <hp@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Denis Joseph Barrow,
+ */
+
+#ifndef _ASM_S390_LOWCORE_H
+#define _ASM_S390_LOWCORE_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+#include <asm/cpu.h>
+
+#ifdef CONFIG_32BIT
+
+#define LC_ORDER 0
+#define LC_PAGES 1
+
+struct save_area {
+ u32 ext_save;
+ u64 timer;
+ u64 clk_cmp;
+ u8 pad1[24];
+ u8 psw[8];
+ u32 pref_reg;
+ u8 pad2[20];
+ u32 acc_regs[16];
+ u64 fp_regs[4];
+ u32 gp_regs[16];
+ u32 ctrl_regs[16];
+} __packed;
+
+struct _lowcore {
+ psw_t restart_psw; /* 0x0000 */
+ psw_t restart_old_psw; /* 0x0008 */
+ __u8 pad_0x0010[0x0014-0x0010]; /* 0x0010 */
+ __u32 ipl_parmblock_ptr; /* 0x0014 */
+ psw_t external_old_psw; /* 0x0018 */
+ psw_t svc_old_psw; /* 0x0020 */
+ psw_t program_old_psw; /* 0x0028 */
+ psw_t mcck_old_psw; /* 0x0030 */
+ psw_t io_old_psw; /* 0x0038 */
+ __u8 pad_0x0040[0x0058-0x0040]; /* 0x0040 */
+ psw_t external_new_psw; /* 0x0058 */
+ psw_t svc_new_psw; /* 0x0060 */
+ psw_t program_new_psw; /* 0x0068 */
+ psw_t mcck_new_psw; /* 0x0070 */
+ psw_t io_new_psw; /* 0x0078 */
+ __u32 ext_params; /* 0x0080 */
+ __u16 ext_cpu_addr; /* 0x0084 */
+ __u16 ext_int_code; /* 0x0086 */
+ __u16 svc_ilc; /* 0x0088 */
+ __u16 svc_code; /* 0x008a */
+ __u16 pgm_ilc; /* 0x008c */
+ __u16 pgm_code; /* 0x008e */
+ __u32 trans_exc_code; /* 0x0090 */
+ __u16 mon_class_num; /* 0x0094 */
+ __u8 per_code; /* 0x0096 */
+ __u8 per_atmid; /* 0x0097 */
+ __u32 per_address; /* 0x0098 */
+ __u32 monitor_code; /* 0x009c */
+ __u8 exc_access_id; /* 0x00a0 */
+ __u8 per_access_id; /* 0x00a1 */
+ __u8 op_access_id; /* 0x00a2 */
+ __u8 ar_mode_id; /* 0x00a3 */
+ __u8 pad_0x00a4[0x00b8-0x00a4]; /* 0x00a4 */
+ __u16 subchannel_id; /* 0x00b8 */
+ __u16 subchannel_nr; /* 0x00ba */
+ __u32 io_int_parm; /* 0x00bc */
+ __u32 io_int_word; /* 0x00c0 */
+ __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */
+ __u32 stfl_fac_list; /* 0x00c8 */
+ __u8 pad_0x00cc[0x00d4-0x00cc]; /* 0x00cc */
+ __u32 extended_save_area_addr; /* 0x00d4 */
+ __u32 cpu_timer_save_area[2]; /* 0x00d8 */
+ __u32 clock_comp_save_area[2]; /* 0x00e0 */
+ __u32 mcck_interruption_code[2]; /* 0x00e8 */
+ __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */
+ __u32 external_damage_code; /* 0x00f4 */
+ __u32 failing_storage_address; /* 0x00f8 */
+ __u8 pad_0x00fc[0x0100-0x00fc]; /* 0x00fc */
+ psw_t psw_save_area; /* 0x0100 */
+ __u32 prefixreg_save_area; /* 0x0108 */
+ __u8 pad_0x010c[0x0120-0x010c]; /* 0x010c */
+
+ /* CPU register save area: defined by architecture */
+ __u32 access_regs_save_area[16]; /* 0x0120 */
+ __u32 floating_pt_save_area[8]; /* 0x0160 */
+ __u32 gpregs_save_area[16]; /* 0x0180 */
+ __u32 cregs_save_area[16]; /* 0x01c0 */
+
+ /* Save areas. */
+ __u32 save_area_sync[8]; /* 0x0200 */
+ __u32 save_area_async[8]; /* 0x0220 */
+ __u32 save_area_restart[1]; /* 0x0240 */
+
+ /* CPU flags. */
+ __u32 cpu_flags; /* 0x0244 */
+
+ /* Return psws. */
+ psw_t return_psw; /* 0x0248 */
+ psw_t return_mcck_psw; /* 0x0250 */
+
+ /* CPU time accounting values */
+ __u64 sync_enter_timer; /* 0x0258 */
+ __u64 async_enter_timer; /* 0x0260 */
+ __u64 mcck_enter_timer; /* 0x0268 */
+ __u64 exit_timer; /* 0x0270 */
+ __u64 user_timer; /* 0x0278 */
+ __u64 system_timer; /* 0x0280 */
+ __u64 steal_timer; /* 0x0288 */
+ __u64 last_update_timer; /* 0x0290 */
+ __u64 last_update_clock; /* 0x0298 */
+ __u64 int_clock; /* 0x02a0 */
+ __u64 mcck_clock; /* 0x02a8 */
+ __u64 clock_comparator; /* 0x02b0 */
+
+ /* Current process. */
+ __u32 current_task; /* 0x02b8 */
+ __u32 thread_info; /* 0x02bc */
+ __u32 kernel_stack; /* 0x02c0 */
+
+ /* Interrupt, panic and restart stack. */
+ __u32 async_stack; /* 0x02c4 */
+ __u32 panic_stack; /* 0x02c8 */
+ __u32 restart_stack; /* 0x02cc */
+
+ /* Restart function and parameter. */
+ __u32 restart_fn; /* 0x02d0 */
+ __u32 restart_data; /* 0x02d4 */
+ __u32 restart_source; /* 0x02d8 */
+
+ /* Address space pointer. */
+ __u32 kernel_asce; /* 0x02dc */
+ __u32 user_asce; /* 0x02e0 */
+ __u32 current_pid; /* 0x02e4 */
+
+ /* SMP info area */
+ __u32 cpu_nr; /* 0x02e8 */
+ __u32 softirq_pending; /* 0x02ec */
+ __u32 percpu_offset; /* 0x02f0 */
+ __u32 machine_flags; /* 0x02f4 */
+ __u32 ftrace_func; /* 0x02f8 */
+ __u32 spinlock_lockval; /* 0x02fc */
+
+ __u8 pad_0x0300[0x0e00-0x0300]; /* 0x0300 */
+
+ /*
+ * 0xe00 contains the address of the IPL Parameter Information
+ * block. Dump tools need IPIB for IPL after dump.
+ * Note: do not change the position of any fields in 0x0e00-0x0f00
+ */
+ __u32 ipib; /* 0x0e00 */
+ __u32 ipib_checksum; /* 0x0e04 */
+ __u32 vmcore_info; /* 0x0e08 */
+ __u8 pad_0x0e0c[0x0e18-0x0e0c]; /* 0x0e0c */
+ __u32 os_info; /* 0x0e18 */
+ __u8 pad_0x0e1c[0x0f00-0x0e1c]; /* 0x0e1c */
+
+ /* Extended facility list */
+ __u64 stfle_fac_list[32]; /* 0x0f00 */
+} __packed;
+
+#else /* CONFIG_32BIT */
+
+#define LC_ORDER 1
+#define LC_PAGES 2
+
+struct save_area {
+ u64 fp_regs[16];
+ u64 gp_regs[16];
+ u8 psw[16];
+ u8 pad1[8];
+ u32 pref_reg;
+ u32 fp_ctrl_reg;
+ u8 pad2[4];
+ u32 tod_reg;
+ u64 timer;
+ u64 clk_cmp;
+ u8 pad3[8];
+ u32 acc_regs[16];
+ u64 ctrl_regs[16];
+} __packed;
+
+struct _lowcore {
+ __u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */
+ __u32 ipl_parmblock_ptr; /* 0x0014 */
+ __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */
+ __u32 ext_params; /* 0x0080 */
+ __u16 ext_cpu_addr; /* 0x0084 */
+ __u16 ext_int_code; /* 0x0086 */
+ __u16 svc_ilc; /* 0x0088 */
+ __u16 svc_code; /* 0x008a */
+ __u16 pgm_ilc; /* 0x008c */
+ __u16 pgm_code; /* 0x008e */
+ __u32 data_exc_code; /* 0x0090 */
+ __u16 mon_class_num; /* 0x0094 */
+ __u8 per_code; /* 0x0096 */
+ __u8 per_atmid; /* 0x0097 */
+ __u64 per_address; /* 0x0098 */
+ __u8 exc_access_id; /* 0x00a0 */
+ __u8 per_access_id; /* 0x00a1 */
+ __u8 op_access_id; /* 0x00a2 */
+ __u8 ar_mode_id; /* 0x00a3 */
+ __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */
+ __u64 trans_exc_code; /* 0x00a8 */
+ __u64 monitor_code; /* 0x00b0 */
+ __u16 subchannel_id; /* 0x00b8 */
+ __u16 subchannel_nr; /* 0x00ba */
+ __u32 io_int_parm; /* 0x00bc */
+ __u32 io_int_word; /* 0x00c0 */
+ __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */
+ __u32 stfl_fac_list; /* 0x00c8 */
+ __u8 pad_0x00cc[0x00e8-0x00cc]; /* 0x00cc */
+ __u32 mcck_interruption_code[2]; /* 0x00e8 */
+ __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */
+ __u32 external_damage_code; /* 0x00f4 */
+ __u64 failing_storage_address; /* 0x00f8 */
+ __u8 pad_0x0100[0x0110-0x0100]; /* 0x0100 */
+ __u64 breaking_event_addr; /* 0x0110 */
+ __u8 pad_0x0118[0x0120-0x0118]; /* 0x0118 */
+ psw_t restart_old_psw; /* 0x0120 */
+ psw_t external_old_psw; /* 0x0130 */
+ psw_t svc_old_psw; /* 0x0140 */
+ psw_t program_old_psw; /* 0x0150 */
+ psw_t mcck_old_psw; /* 0x0160 */
+ psw_t io_old_psw; /* 0x0170 */
+ __u8 pad_0x0180[0x01a0-0x0180]; /* 0x0180 */
+ psw_t restart_psw; /* 0x01a0 */
+ psw_t external_new_psw; /* 0x01b0 */
+ psw_t svc_new_psw; /* 0x01c0 */
+ psw_t program_new_psw; /* 0x01d0 */
+ psw_t mcck_new_psw; /* 0x01e0 */
+ psw_t io_new_psw; /* 0x01f0 */
+
+ /* Save areas. */
+ __u64 save_area_sync[8]; /* 0x0200 */
+ __u64 save_area_async[8]; /* 0x0240 */
+ __u64 save_area_restart[1]; /* 0x0280 */
+
+ /* CPU flags. */
+ __u64 cpu_flags; /* 0x0288 */
+
+ /* Return psws. */
+ psw_t return_psw; /* 0x0290 */
+ psw_t return_mcck_psw; /* 0x02a0 */
+
+ /* CPU accounting and timing values. */
+ __u64 sync_enter_timer; /* 0x02b0 */
+ __u64 async_enter_timer; /* 0x02b8 */
+ __u64 mcck_enter_timer; /* 0x02c0 */
+ __u64 exit_timer; /* 0x02c8 */
+ __u64 user_timer; /* 0x02d0 */
+ __u64 system_timer; /* 0x02d8 */
+ __u64 steal_timer; /* 0x02e0 */
+ __u64 last_update_timer; /* 0x02e8 */
+ __u64 last_update_clock; /* 0x02f0 */
+ __u64 int_clock; /* 0x02f8 */
+ __u64 mcck_clock; /* 0x0300 */
+ __u64 clock_comparator; /* 0x0308 */
+
+ /* Current process. */
+ __u64 current_task; /* 0x0310 */
+ __u64 thread_info; /* 0x0318 */
+ __u64 kernel_stack; /* 0x0320 */
+
+ /* Interrupt, panic and restart stack. */
+ __u64 async_stack; /* 0x0328 */
+ __u64 panic_stack; /* 0x0330 */
+ __u64 restart_stack; /* 0x0338 */
+
+ /* Restart function and parameter. */
+ __u64 restart_fn; /* 0x0340 */
+ __u64 restart_data; /* 0x0348 */
+ __u64 restart_source; /* 0x0350 */
+
+ /* Address space pointer. */
+ __u64 kernel_asce; /* 0x0358 */
+ __u64 user_asce; /* 0x0360 */
+ __u64 current_pid; /* 0x0368 */
+
+ /* SMP info area */
+ __u32 cpu_nr; /* 0x0370 */
+ __u32 softirq_pending; /* 0x0374 */
+ __u64 percpu_offset; /* 0x0378 */
+ __u64 vdso_per_cpu_data; /* 0x0380 */
+ __u64 machine_flags; /* 0x0388 */
+ __u64 ftrace_func; /* 0x0390 */
+ __u64 gmap; /* 0x0398 */
+ __u32 spinlock_lockval; /* 0x03a0 */
+ __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */
+
+ /* Per cpu primary space access list */
+ __u32 paste[16]; /* 0x0400 */
+
+ __u8 pad_0x04c0[0x0e00-0x0440]; /* 0x0440 */
+
+ /*
+ * 0xe00 contains the address of the IPL Parameter Information
+ * block. Dump tools need IPIB for IPL after dump.
+ * Note: do not change the position of any fields in 0x0e00-0x0f00
+ */
+ __u64 ipib; /* 0x0e00 */
+ __u32 ipib_checksum; /* 0x0e08 */
+ __u64 vmcore_info; /* 0x0e0c */
+ __u8 pad_0x0e14[0x0e18-0x0e14]; /* 0x0e14 */
+ __u64 os_info; /* 0x0e18 */
+ __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */
+
+ /* Extended facility list */
+ __u64 stfle_fac_list[32]; /* 0x0f00 */
+ __u8 pad_0x1000[0x11b8-0x1000]; /* 0x1000 */
+
+ /* 64 bit extparam used for pfault/diag 250: defined by architecture */
+ __u64 ext_params2; /* 0x11B8 */
+ __u8 pad_0x11c0[0x1200-0x11C0]; /* 0x11C0 */
+
+ /* CPU register save area: defined by architecture */
+ __u64 floating_pt_save_area[16]; /* 0x1200 */
+ __u64 gpregs_save_area[16]; /* 0x1280 */
+ psw_t psw_save_area; /* 0x1300 */
+ __u8 pad_0x1310[0x1318-0x1310]; /* 0x1310 */
+ __u32 prefixreg_save_area; /* 0x1318 */
+ __u32 fpt_creg_save_area; /* 0x131c */
+ __u8 pad_0x1320[0x1324-0x1320]; /* 0x1320 */
+ __u32 tod_progreg_save_area; /* 0x1324 */
+ __u32 cpu_timer_save_area[2]; /* 0x1328 */
+ __u32 clock_comp_save_area[2]; /* 0x1330 */
+ __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */
+ __u32 access_regs_save_area[16]; /* 0x1340 */
+ __u64 cregs_save_area[16]; /* 0x1380 */
+ __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */
+
+ /* Transaction abort diagnostic block */
+ __u8 pgm_tdb[256]; /* 0x1800 */
+
+ /* align to the top of the prefix area */
+ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
+} __packed;
+
+#endif /* CONFIG_32BIT */
+
+#define S390_lowcore (*((struct _lowcore *) 0))
+
+extern struct _lowcore *lowcore_ptr[];
+
+static inline void set_prefix(__u32 address)
+{
+ asm volatile("spx %0" : : "m" (address) : "memory");
+}
+
+static inline __u32 store_prefix(void)
+{
+ __u32 address;
+
+ asm volatile("stpx %0" : "=m" (address));
+ return address;
+}
+
+#endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/mathemu.h b/arch/s390/include/asm/mathemu.h
new file mode 100644
index 00000000000..614dfaf47f7
--- /dev/null
+++ b/arch/s390/include/asm/mathemu.h
@@ -0,0 +1,28 @@
+/*
+ * IEEE floating point emulation.
+ *
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#ifndef __MATHEMU__
+#define __MATHEMU__
+
+extern int math_emu_b3(__u8 *, struct pt_regs *);
+extern int math_emu_ed(__u8 *, struct pt_regs *);
+extern int math_emu_ldr(__u8 *);
+extern int math_emu_ler(__u8 *);
+extern int math_emu_std(__u8 *, struct pt_regs *);
+extern int math_emu_ld(__u8 *, struct pt_regs *);
+extern int math_emu_ste(__u8 *, struct pt_regs *);
+extern int math_emu_le(__u8 *, struct pt_regs *);
+extern int math_emu_lfpc(__u8 *, struct pt_regs *);
+extern int math_emu_stfpc(__u8 *, struct pt_regs *);
+extern int math_emu_srnm(__u8 *, struct pt_regs *);
+
+#endif /* __MATHEMU__ */
+
+
+
+
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
new file mode 100644
index 00000000000..9977e08df5b
--- /dev/null
+++ b/arch/s390/include/asm/mman.h
@@ -0,0 +1,15 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/mman.h"
+ */
+#ifndef __S390_MMAN_H__
+#define __S390_MMAN_H__
+
+#include <uapi/asm/mman.h>
+
+#if !defined(__ASSEMBLY__) && defined(CONFIG_64BIT)
+int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags);
+#define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags)
+#endif
+#endif /* __S390_MMAN_H__ */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
new file mode 100644
index 00000000000..a5e656260a7
--- /dev/null
+++ b/arch/s390/include/asm/mmu.h
@@ -0,0 +1,42 @@
+#ifndef __MMU_H
+#define __MMU_H
+
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+
+typedef struct {
+ cpumask_t cpu_attach_mask;
+ atomic_t attach_count;
+ unsigned int flush_mm;
+ spinlock_t list_lock;
+ struct list_head pgtable_list;
+ struct list_head gmap_list;
+ unsigned long asce_bits;
+ unsigned long asce_limit;
+ unsigned long vdso_base;
+ /* The mmu context has extended page tables. */
+ unsigned int has_pgste:1;
+ /* The mmu context uses storage keys. */
+ unsigned int use_skey:1;
+} mm_context_t;
+
+#define INIT_MM_CONTEXT(name) \
+ .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \
+ .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
+ .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
+
+static inline int tprot(unsigned long addr)
+{
+ int rc = -EFAULT;
+
+ asm volatile(
+ " tprot 0(%1),0\n"
+ "0: ipm %0\n"
+ " srl %0,28\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "+d" (rc) : "a" (addr) : "cc");
+ return rc;
+}
+
+#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
new file mode 100644
index 00000000000..3815bfea1b2
--- /dev/null
+++ b/arch/s390/include/asm/mmu_context.h
@@ -0,0 +1,123 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/mmu_context.h"
+ */
+
+#ifndef __S390_MMU_CONTEXT_H
+#define __S390_MMU_CONTEXT_H
+
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/tlbflush.h>
+#include <asm/ctl_reg.h>
+
+static inline int init_new_context(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ cpumask_clear(&mm->context.cpu_attach_mask);
+ atomic_set(&mm->context.attach_count, 0);
+ mm->context.flush_mm = 0;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
+#ifdef CONFIG_64BIT
+ mm->context.asce_bits |= _ASCE_TYPE_REGION3;
+#endif
+ mm->context.has_pgste = 0;
+ mm->context.use_skey = 0;
+ mm->context.asce_limit = STACK_TOP_MAX;
+ crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
+ return 0;
+}
+
+#define destroy_context(mm) do { } while (0)
+
+static inline void set_user_asce(struct mm_struct *mm)
+{
+ S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd);
+ if (current->thread.mm_segment.ar4)
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+ set_cpu_flag(CIF_ASCE);
+}
+
+static inline void clear_user_asce(void)
+{
+ S390_lowcore.user_asce = S390_lowcore.kernel_asce;
+
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+}
+
+static inline void load_kernel_asce(void)
+{
+ unsigned long asce;
+
+ __ctl_store(asce, 1, 1);
+ if (asce != S390_lowcore.kernel_asce)
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ set_cpu_flag(CIF_ASCE);
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ int cpu = smp_processor_id();
+
+ if (prev == next)
+ return;
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+ /* Clear old ASCE by loading the kernel ASCE. */
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+ atomic_inc(&next->context.attach_count);
+ atomic_dec(&prev->context.attach_count);
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+ S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
+}
+
+#define finish_arch_post_lock_switch finish_arch_post_lock_switch
+static inline void finish_arch_post_lock_switch(void)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+
+ load_kernel_asce();
+ if (mm) {
+ preempt_disable();
+ while (atomic_read(&mm->context.attach_count) >> 16)
+ cpu_relax();
+
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+ if (mm->context.flush_mm)
+ __tlb_flush_mm(mm);
+ preempt_enable();
+ }
+ set_fs(current->thread.mm_segment);
+}
+
+#define enter_lazy_tlb(mm,tsk) do { } while (0)
+#define deactivate_mm(tsk,mm) do { } while (0)
+
+static inline void activate_mm(struct mm_struct *prev,
+ struct mm_struct *next)
+{
+ switch_mm(prev, next, current);
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ set_user_asce(next);
+}
+
+static inline void arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+#ifdef CONFIG_64BIT
+ if (oldmm->context.asce_limit < mm->context.asce_limit)
+ crst_table_downgrade(mm, oldmm->context.asce_limit);
+#endif
+}
+
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+}
+
+#endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h
new file mode 100644
index 00000000000..df1f861a848
--- /dev/null
+++ b/arch/s390/include/asm/module.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_S390_MODULE_H
+#define _ASM_S390_MODULE_H
+
+#include <asm-generic/module.h>
+
+/*
+ * This file contains the s390 architecture specific module code.
+ */
+
+struct mod_arch_syminfo
+{
+ unsigned long got_offset;
+ unsigned long plt_offset;
+ int got_initialized;
+ int plt_initialized;
+};
+
+struct mod_arch_specific
+{
+ /* Starting offset of got in the module core memory. */
+ unsigned long got_offset;
+ /* Starting offset of plt in the module core memory. */
+ unsigned long plt_offset;
+ /* Size of the got. */
+ unsigned long got_size;
+ /* Size of the plt. */
+ unsigned long plt_size;
+ /* Number of symbols in syminfo. */
+ int nsyms;
+ /* Additional symbol information (got and plt offsets). */
+ struct mod_arch_syminfo *syminfo;
+};
+
+#endif /* _ASM_S390_MODULE_H */
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
new file mode 100644
index 00000000000..458c1f7fbc1
--- /dev/null
+++ b/arch/s390/include/asm/mutex.h
@@ -0,0 +1,9 @@
+/*
+ * Pull in the generic implementation for the mutex fastpath.
+ *
+ * TODO: implement optimized primitives instead, or leave the generic
+ * implementation in place, or pick the atomic_xchg() based generic
+ * implementation. (see asm-generic/mutex-xchg.h for details)
+ */
+
+#include <asm-generic/mutex-dec.h>
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
new file mode 100644
index 00000000000..35f8ec18561
--- /dev/null
+++ b/arch/s390/include/asm/nmi.h
@@ -0,0 +1,66 @@
+/*
+ * Machine check handler definitions
+ *
+ * Copyright IBM Corp. 2000, 2009
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Cornelia Huck <cornelia.huck@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_NMI_H
+#define _ASM_S390_NMI_H
+
+#include <linux/types.h>
+
+struct mci {
+ __u32 sd : 1; /* 00 system damage */
+ __u32 pd : 1; /* 01 instruction-processing damage */
+ __u32 sr : 1; /* 02 system recovery */
+ __u32 : 1; /* 03 */
+ __u32 cd : 1; /* 04 timing-facility damage */
+ __u32 ed : 1; /* 05 external damage */
+ __u32 : 1; /* 06 */
+ __u32 dg : 1; /* 07 degradation */
+ __u32 w : 1; /* 08 warning pending */
+ __u32 cp : 1; /* 09 channel-report pending */
+ __u32 sp : 1; /* 10 service-processor damage */
+ __u32 ck : 1; /* 11 channel-subsystem damage */
+ __u32 : 2; /* 12-13 */
+ __u32 b : 1; /* 14 backed up */
+ __u32 : 1; /* 15 */
+ __u32 se : 1; /* 16 storage error uncorrected */
+ __u32 sc : 1; /* 17 storage error corrected */
+ __u32 ke : 1; /* 18 storage-key error uncorrected */
+ __u32 ds : 1; /* 19 storage degradation */
+ __u32 wp : 1; /* 20 psw mwp validity */
+ __u32 ms : 1; /* 21 psw mask and key validity */
+ __u32 pm : 1; /* 22 psw program mask and cc validity */
+ __u32 ia : 1; /* 23 psw instruction address validity */
+ __u32 fa : 1; /* 24 failing storage address validity */
+ __u32 : 1; /* 25 */
+ __u32 ec : 1; /* 26 external damage code validity */
+ __u32 fp : 1; /* 27 floating point register validity */
+ __u32 gr : 1; /* 28 general register validity */
+ __u32 cr : 1; /* 29 control register validity */
+ __u32 : 1; /* 30 */
+ __u32 st : 1; /* 31 storage logical validity */
+ __u32 ie : 1; /* 32 indirect storage error */
+ __u32 ar : 1; /* 33 access register validity */
+ __u32 da : 1; /* 34 delayed access exception */
+ __u32 : 7; /* 35-41 */
+ __u32 pr : 1; /* 42 tod programmable register validity */
+ __u32 fc : 1; /* 43 fp control register validity */
+ __u32 ap : 1; /* 44 ancillary report */
+ __u32 : 1; /* 45 */
+ __u32 ct : 1; /* 46 cpu timer validity */
+ __u32 cc : 1; /* 47 clock comparator validity */
+ __u32 : 16; /* 47-63 */
+};
+
+struct pt_regs;
+
+extern void s390_handle_mcck(void);
+extern void s390_do_machine_check(struct pt_regs *regs);
+
+#endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
new file mode 100644
index 00000000000..295f2c4f1c9
--- /dev/null
+++ b/arch/s390/include/asm/os_info.h
@@ -0,0 +1,49 @@
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+#ifndef _ASM_S390_OS_INFO_H
+#define _ASM_S390_OS_INFO_H
+
+#define OS_INFO_VERSION_MAJOR 1
+#define OS_INFO_VERSION_MINOR 1
+#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */
+
+#define OS_INFO_VMCOREINFO 0
+#define OS_INFO_REIPL_BLOCK 1
+
+struct os_info_entry {
+ u64 addr;
+ u64 size;
+ u32 csum;
+} __packed;
+
+struct os_info {
+ u64 magic;
+ u32 csum;
+ u16 version_major;
+ u16 version_minor;
+ u64 crashkernel_addr;
+ u64 crashkernel_size;
+ struct os_info_entry entry[2];
+ u8 reserved[4024];
+} __packed;
+
+void os_info_init(void);
+void os_info_entry_add(int nr, void *ptr, u64 len);
+void os_info_crashkernel_add(unsigned long base, unsigned long size);
+u32 os_info_csum(struct os_info *os_info);
+
+#ifdef CONFIG_CRASH_DUMP
+void *os_info_old_entry(int nr, unsigned long *size);
+int copy_from_oldmem(void *dest, void *src, size_t count);
+#else
+static inline void *os_info_old_entry(int nr, unsigned long *size)
+{
+ return NULL;
+}
+#endif
+
+#endif /* _ASM_S390_OS_INFO_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
new file mode 100644
index 00000000000..114258eeaac
--- /dev/null
+++ b/arch/s390/include/asm/page.h
@@ -0,0 +1,167 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Hartmut Penner (hp@de.ibm.com)
+ */
+
+#ifndef _S390_PAGE_H
+#define _S390_PAGE_H
+
+#include <linux/const.h>
+#include <asm/types.h>
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_DEFAULT_ACC 0
+#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
+
+#define HPAGE_SHIFT 20
+#define HPAGE_SIZE (1UL << HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+
+#define ARCH_HAS_SETCLEAR_HUGE_PTE
+#define ARCH_HAS_HUGE_PTE_TYPE
+#define ARCH_HAS_PREPARE_HUGEPAGE
+#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
+
+#include <asm/setup.h>
+#ifndef __ASSEMBLY__
+
+static inline void storage_key_init_range(unsigned long start, unsigned long end)
+{
+#if PAGE_DEFAULT_KEY
+ __storage_key_init_range(start, end);
+#endif
+}
+
+static inline void clear_page(void *page)
+{
+ register unsigned long reg1 asm ("1") = 0;
+ register void *reg2 asm ("2") = page;
+ register unsigned long reg3 asm ("3") = 4096;
+ asm volatile(
+ " mvcl 2,0"
+ : "+d" (reg2), "+d" (reg3) : "d" (reg1)
+ : "memory", "cc");
+}
+
+/*
+ * copy_page uses the mvcl instruction with 0xb0 padding byte in order to
+ * bypass caches when copying a page. Especially when copying huge pages
+ * this keeps L1 and L2 data caches alive.
+ */
+static inline void copy_page(void *to, void *from)
+{
+ register void *reg2 asm ("2") = to;
+ register unsigned long reg3 asm ("3") = 0x1000;
+ register void *reg4 asm ("4") = from;
+ register unsigned long reg5 asm ("5") = 0xb0001000;
+ asm volatile(
+ " mvcl 2,4"
+ : "+d" (reg2), "+d" (reg3), "+d" (reg4), "+d" (reg5)
+ : : "memory", "cc");
+}
+
+#define clear_user_page(page, vaddr, pg) clear_page(page)
+#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
+ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+/*
+ * These are used to make use of C type-checking..
+ */
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct { unsigned long pgste; } pgste_t;
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef pte_t *pgtable_t;
+
+#define pgprot_val(x) ((x).pgprot)
+#define pgste_val(x) ((x).pgste)
+#define pte_val(x) ((x).pte)
+#define pmd_val(x) ((x).pmd)
+#define pud_val(x) ((x).pud)
+#define pgd_val(x) ((x).pgd)
+
+#define __pgste(x) ((pgste_t) { (x) } )
+#define __pte(x) ((pte_t) { (x) } )
+#define __pmd(x) ((pmd_t) { (x) } )
+#define __pud(x) ((pud_t) { (x) } )
+#define __pgd(x) ((pgd_t) { (x) } )
+#define __pgprot(x) ((pgprot_t) { (x) } )
+
+static inline void page_set_storage_key(unsigned long addr,
+ unsigned char skey, int mapped)
+{
+ if (!mapped)
+ asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
+ : : "d" (skey), "a" (addr));
+ else
+ asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static inline unsigned char page_get_storage_key(unsigned long addr)
+{
+ unsigned char skey;
+
+ asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr));
+ return skey;
+}
+
+static inline int page_reset_referenced(unsigned long addr)
+{
+ unsigned int ipm;
+
+ asm volatile(
+ " rrbe 0,%1\n"
+ " ipm %0\n"
+ : "=d" (ipm) : "a" (addr) : "cc");
+ return !!(ipm & 0x20000000);
+}
+
+/* Bits int the storage key */
+#define _PAGE_CHANGED 0x02 /* HW changed bit */
+#define _PAGE_REFERENCED 0x04 /* HW referenced bit */
+#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */
+#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
+
+struct page;
+void arch_free_page(struct page *page, int order);
+void arch_alloc_page(struct page *page, int order);
+void arch_set_page_states(int make_stable);
+
+static inline int devmem_is_allowed(unsigned long pfn)
+{
+ return 0;
+}
+
+#define HAVE_ARCH_FREE_PAGE
+#define HAVE_ARCH_ALLOC_PAGE
+
+#endif /* !__ASSEMBLY__ */
+
+#define __PAGE_OFFSET 0x0UL
+#define PAGE_OFFSET 0x0UL
+#define __pa(x) (unsigned long)(x)
+#define __va(x) (void *)(unsigned long)(x)
+#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#define __HAVE_ARCH_GATE_AREA 1
+
+#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
new file mode 100644
index 00000000000..c030900320e
--- /dev/null
+++ b/arch/s390/include/asm/pci.h
@@ -0,0 +1,192 @@
+#ifndef __ASM_S390_PCI_H
+#define __ASM_S390_PCI_H
+
+/* must be set before including asm-generic/pci.h */
+#define PCI_DMA_BUS_IS_PHYS (0)
+/* must be set before including pci_clp.h */
+#define PCI_BAR_COUNT 6
+
+#include <linux/pci.h>
+#include <asm-generic/pci.h>
+#include <asm-generic/pci-dma-compat.h>
+#include <asm/pci_clp.h>
+#include <asm/pci_debug.h>
+
+#define PCIBIOS_MIN_IO 0x1000
+#define PCIBIOS_MIN_MEM 0x10000000
+
+#define pcibios_assign_all_busses() (0)
+
+void __iomem *pci_iomap(struct pci_dev *, int, unsigned long);
+void pci_iounmap(struct pci_dev *, void __iomem *);
+int pci_domain_nr(struct pci_bus *);
+int pci_proc_domain(struct pci_bus *);
+
+#define ZPCI_BUS_NR 0 /* default bus number */
+#define ZPCI_DEVFN 0 /* default device number */
+
+/* PCI Function Controls */
+#define ZPCI_FC_FN_ENABLED 0x80
+#define ZPCI_FC_ERROR 0x40
+#define ZPCI_FC_BLOCKED 0x20
+#define ZPCI_FC_DMA_ENABLED 0x10
+
+struct zpci_fmb {
+ u32 format : 8;
+ u32 dma_valid : 1;
+ u32 : 23;
+ u32 samples;
+ u64 last_update;
+ /* hardware counters */
+ u64 ld_ops;
+ u64 st_ops;
+ u64 stb_ops;
+ u64 rpcit_ops;
+ u64 dma_rbytes;
+ u64 dma_wbytes;
+ /* software counters */
+ atomic64_t allocated_pages;
+ atomic64_t mapped_pages;
+ atomic64_t unmapped_pages;
+} __packed __aligned(16);
+
+#define ZPCI_MSI_VEC_BITS 11
+#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS)
+#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1)
+
+enum zpci_state {
+ ZPCI_FN_STATE_RESERVED,
+ ZPCI_FN_STATE_STANDBY,
+ ZPCI_FN_STATE_CONFIGURED,
+ ZPCI_FN_STATE_ONLINE,
+ NR_ZPCI_FN_STATES,
+};
+
+struct zpci_bar_struct {
+ struct resource *res; /* bus resource */
+ u32 val; /* bar start & 3 flag bits */
+ u16 map_idx; /* index into bar mapping array */
+ u8 size; /* order 2 exponent */
+};
+
+/* Private data per function */
+struct zpci_dev {
+ struct pci_dev *pdev;
+ struct pci_bus *bus;
+ struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
+
+ enum zpci_state state;
+ u32 fid; /* function ID, used by sclp */
+ u32 fh; /* function handle, used by insn's */
+ u16 vfn; /* virtual function number */
+ u16 pchid; /* physical channel ID */
+ u8 pfgid; /* function group ID */
+ u8 pft; /* pci function type */
+ u16 domain;
+
+ u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
+ u32 uid; /* user defined id */
+ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
+
+ /* IRQ stuff */
+ u64 msi_addr; /* MSI address */
+ struct airq_iv *aibv; /* adapter interrupt bit vector */
+ unsigned int aisb; /* number of the summary bit */
+
+ /* DMA stuff */
+ unsigned long *dma_table;
+ spinlock_t dma_table_lock;
+ int tlb_refresh;
+
+ spinlock_t iommu_bitmap_lock;
+ unsigned long *iommu_bitmap;
+ unsigned long iommu_size;
+ unsigned long iommu_pages;
+ unsigned int next_bit;
+
+ char res_name[16];
+ struct zpci_bar_struct bars[PCI_BAR_COUNT];
+
+ u64 start_dma; /* Start of available DMA addresses */
+ u64 end_dma; /* End of available DMA addresses */
+ u64 dma_mask; /* DMA address space mask */
+
+ /* Function measurement block */
+ struct zpci_fmb *fmb;
+ u16 fmb_update; /* update interval */
+
+ enum pci_bus_speed max_bus_speed;
+
+ struct dentry *debugfs_dev;
+ struct dentry *debugfs_perf;
+};
+
+static inline bool zdev_enabled(struct zpci_dev *zdev)
+{
+ return (zdev->fh & (1UL << 31)) ? true : false;
+}
+
+extern const struct attribute_group *zpci_attr_groups[];
+
+/* -----------------------------------------------------------------------------
+ Prototypes
+----------------------------------------------------------------------------- */
+/* Base stuff */
+int zpci_create_device(struct zpci_dev *);
+int zpci_enable_device(struct zpci_dev *);
+int zpci_disable_device(struct zpci_dev *);
+void zpci_stop_device(struct zpci_dev *);
+int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
+int zpci_unregister_ioat(struct zpci_dev *, u8);
+
+/* CLP */
+int clp_scan_pci_devices(void);
+int clp_rescan_pci_devices(void);
+int clp_rescan_pci_devices_simple(void);
+int clp_add_pci_device(u32, u32, int);
+int clp_enable_fh(struct zpci_dev *, u8);
+int clp_disable_fh(struct zpci_dev *);
+
+#ifdef CONFIG_PCI
+/* Error handling and recovery */
+void zpci_event_error(void *);
+void zpci_event_availability(void *);
+void zpci_rescan(void);
+bool zpci_is_enabled(void);
+#else /* CONFIG_PCI */
+static inline void zpci_event_error(void *e) {}
+static inline void zpci_event_availability(void *e) {}
+static inline void zpci_rescan(void) {}
+#endif /* CONFIG_PCI */
+
+#ifdef CONFIG_HOTPLUG_PCI_S390
+int zpci_init_slot(struct zpci_dev *);
+void zpci_exit_slot(struct zpci_dev *);
+#else /* CONFIG_HOTPLUG_PCI_S390 */
+static inline int zpci_init_slot(struct zpci_dev *zdev)
+{
+ return 0;
+}
+static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
+#endif /* CONFIG_HOTPLUG_PCI_S390 */
+
+/* Helpers */
+struct zpci_dev *get_zdev(struct pci_dev *);
+struct zpci_dev *get_zdev_by_fid(u32);
+
+/* DMA */
+int zpci_dma_init(void);
+void zpci_dma_exit(void);
+
+/* FMB */
+int zpci_fmb_enable_device(struct zpci_dev *);
+int zpci_fmb_disable_device(struct zpci_dev *);
+
+/* Debug */
+int zpci_debug_init(void);
+void zpci_debug_exit(void);
+void zpci_debug_init_device(struct zpci_dev *);
+void zpci_debug_exit_device(struct zpci_dev *);
+void zpci_debug_info(struct zpci_dev *, struct seq_file *);
+
+#endif
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
new file mode 100644
index 00000000000..dd78f92f1cc
--- /dev/null
+++ b/arch/s390/include/asm/pci_clp.h
@@ -0,0 +1,186 @@
+#ifndef _ASM_S390_PCI_CLP_H
+#define _ASM_S390_PCI_CLP_H
+
+#include <asm/clp.h>
+
+/*
+ * Call Logical Processor - Command Codes
+ */
+#define CLP_LIST_PCI 0x0002
+#define CLP_QUERY_PCI_FN 0x0003
+#define CLP_QUERY_PCI_FNGRP 0x0004
+#define CLP_SET_PCI_FN 0x0005
+
+/* PCI function handle list entry */
+struct clp_fh_list_entry {
+ u16 device_id;
+ u16 vendor_id;
+ u32 config_state : 1;
+ u32 : 31;
+ u32 fid; /* PCI function id */
+ u32 fh; /* PCI function handle */
+} __packed;
+
+#define CLP_RC_SETPCIFN_FH 0x0101 /* Invalid PCI fn handle */
+#define CLP_RC_SETPCIFN_FHOP 0x0102 /* Fn handle not valid for op */
+#define CLP_RC_SETPCIFN_DMAAS 0x0103 /* Invalid DMA addr space */
+#define CLP_RC_SETPCIFN_RES 0x0104 /* Insufficient resources */
+#define CLP_RC_SETPCIFN_ALRDY 0x0105 /* Fn already in requested state */
+#define CLP_RC_SETPCIFN_ERR 0x0106 /* Fn in permanent error state */
+#define CLP_RC_SETPCIFN_RECPND 0x0107 /* Error recovery pending */
+#define CLP_RC_SETPCIFN_BUSY 0x0108 /* Fn busy */
+#define CLP_RC_LISTPCI_BADRT 0x010a /* Resume token not recognized */
+#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */
+
+/* request or response block header length */
+#define LIST_PCI_HDR_LEN 32
+
+/* Number of function handles fitting in response block */
+#define CLP_FH_LIST_NR_ENTRIES \
+ ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \
+ / sizeof(struct clp_fh_list_entry))
+
+#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */
+#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */
+
+#define CLP_UTIL_STR_LEN 64
+#define CLP_PFIP_NR_SEGMENTS 4
+
+/* List PCI functions request */
+struct clp_req_list_pci {
+ struct clp_req_hdr hdr;
+ u32 fmt : 4; /* cmd request block format */
+ u32 : 28;
+ u64 reserved1;
+ u64 resume_token;
+ u64 reserved2;
+} __packed;
+
+/* List PCI functions response */
+struct clp_rsp_list_pci {
+ struct clp_rsp_hdr hdr;
+ u32 fmt : 4; /* cmd request block format */
+ u32 : 28;
+ u64 reserved1;
+ u64 resume_token;
+ u32 reserved2;
+ u16 max_fn;
+ u8 reserved3;
+ u8 entry_size;
+ struct clp_fh_list_entry fh_list[CLP_FH_LIST_NR_ENTRIES];
+} __packed;
+
+/* Query PCI function request */
+struct clp_req_query_pci {
+ struct clp_req_hdr hdr;
+ u32 fmt : 4; /* cmd request block format */
+ u32 : 28;
+ u64 reserved1;
+ u32 fh; /* function handle */
+ u32 reserved2;
+ u64 reserved3;
+} __packed;
+
+/* Query PCI function response */
+struct clp_rsp_query_pci {
+ struct clp_rsp_hdr hdr;
+ u32 fmt : 4; /* cmd request block format */
+ u32 : 28;
+ u64 : 64;
+ u16 vfn; /* virtual fn number */
+ u16 : 7;
+ u16 util_str_avail : 1; /* utility string available? */
+ u16 pfgid : 8; /* pci function group id */
+ u32 fid; /* pci function id */
+ u8 bar_size[PCI_BAR_COUNT];
+ u16 pchid;
+ u32 bar[PCI_BAR_COUNT];
+ u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
+ u32 : 24;
+ u8 pft; /* pci function type */
+ u64 sdma; /* start dma as */
+ u64 edma; /* end dma as */
+ u32 reserved[11];
+ u32 uid; /* user defined id */
+ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
+} __packed;
+
+/* Query PCI function group request */
+struct clp_req_query_pci_grp {
+ struct clp_req_hdr hdr;
+ u32 fmt : 4; /* cmd request block format */
+ u32 : 28;
+ u64 reserved1;
+ u32 : 24;
+ u32 pfgid : 8; /* function group id */
+ u32 reserved2;
+ u64 reserved3;
+} __packed;
+
+/* Query PCI function group response */
+struct clp_rsp_query_pci_grp {
+ struct clp_rsp_hdr hdr;
+ u32 fmt : 4; /* cmd request block format */
+ u32 : 28;
+ u64 reserved1;
+ u16 : 4;
+ u16 noi : 12; /* number of interrupts */
+ u8 version;
+ u8 : 6;
+ u8 frame : 1;
+ u8 refresh : 1; /* TLB refresh mode */
+ u16 reserved2;
+ u16 mui;
+ u64 reserved3;
+ u64 dasm; /* dma address space mask */
+ u64 msia; /* MSI address */
+ u64 reserved4;
+ u64 reserved5;
+} __packed;
+
+/* Set PCI function request */
+struct clp_req_set_pci {
+ struct clp_req_hdr hdr;
+ u32 fmt : 4; /* cmd request block format */
+ u32 : 28;
+ u64 reserved1;
+ u32 fh; /* function handle */
+ u16 reserved2;
+ u8 oc; /* operation controls */
+ u8 ndas; /* number of dma spaces */
+ u64 reserved3;
+} __packed;
+
+/* Set PCI function response */
+struct clp_rsp_set_pci {
+ struct clp_rsp_hdr hdr;
+ u32 fmt : 4; /* cmd request block format */
+ u32 : 28;
+ u64 reserved1;
+ u32 fh; /* function handle */
+ u32 reserved3;
+ u64 reserved4;
+} __packed;
+
+/* Combined request/response block structures used by clp insn */
+struct clp_req_rsp_list_pci {
+ struct clp_req_list_pci request;
+ struct clp_rsp_list_pci response;
+} __packed;
+
+struct clp_req_rsp_set_pci {
+ struct clp_req_set_pci request;
+ struct clp_rsp_set_pci response;
+} __packed;
+
+struct clp_req_rsp_query_pci {
+ struct clp_req_query_pci request;
+ struct clp_rsp_query_pci response;
+} __packed;
+
+struct clp_req_rsp_query_pci_grp {
+ struct clp_req_query_pci_grp request;
+ struct clp_rsp_query_pci_grp response;
+} __packed;
+
+#endif
diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h
new file mode 100644
index 00000000000..ac24b26fc06
--- /dev/null
+++ b/arch/s390/include/asm/pci_debug.h
@@ -0,0 +1,28 @@
+#ifndef _S390_ASM_PCI_DEBUG_H
+#define _S390_ASM_PCI_DEBUG_H
+
+#include <asm/debug.h>
+
+extern debug_info_t *pci_debug_msg_id;
+extern debug_info_t *pci_debug_err_id;
+
+#define zpci_dbg(imp, fmt, args...) \
+ debug_sprintf_event(pci_debug_msg_id, imp, fmt, ##args)
+
+#define zpci_err(text...) \
+ do { \
+ char debug_buffer[16]; \
+ snprintf(debug_buffer, 16, text); \
+ debug_text_event(pci_debug_err_id, 0, debug_buffer); \
+ } while (0)
+
+static inline void zpci_err_hex(void *addr, int len)
+{
+ while (len > 0) {
+ debug_event(pci_debug_err_id, 0, (void *) addr, len);
+ len -= pci_debug_err_id->buf_size;
+ addr += pci_debug_err_id->buf_size;
+ }
+}
+
+#endif
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
new file mode 100644
index 00000000000..30b4c179c38
--- /dev/null
+++ b/arch/s390/include/asm/pci_dma.h
@@ -0,0 +1,196 @@
+#ifndef _ASM_S390_PCI_DMA_H
+#define _ASM_S390_PCI_DMA_H
+
+/* I/O Translation Anchor (IOTA) */
+enum zpci_ioat_dtype {
+ ZPCI_IOTA_STO = 0,
+ ZPCI_IOTA_RTTO = 1,
+ ZPCI_IOTA_RSTO = 2,
+ ZPCI_IOTA_RFTO = 3,
+ ZPCI_IOTA_PFAA = 4,
+ ZPCI_IOTA_IOPFAA = 5,
+ ZPCI_IOTA_IOPTO = 7
+};
+
+#define ZPCI_IOTA_IOT_ENABLED 0x800UL
+#define ZPCI_IOTA_DT_ST (ZPCI_IOTA_STO << 2)
+#define ZPCI_IOTA_DT_RT (ZPCI_IOTA_RTTO << 2)
+#define ZPCI_IOTA_DT_RS (ZPCI_IOTA_RSTO << 2)
+#define ZPCI_IOTA_DT_RF (ZPCI_IOTA_RFTO << 2)
+#define ZPCI_IOTA_DT_PF (ZPCI_IOTA_PFAA << 2)
+#define ZPCI_IOTA_FS_4K 0
+#define ZPCI_IOTA_FS_1M 1
+#define ZPCI_IOTA_FS_2G 2
+#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5)
+
+#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
+#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
+#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS)
+#define ZPCI_IOTA_RFTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF)
+#define ZPCI_IOTA_RFAA_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G)
+
+/* I/O Region and segment tables */
+#define ZPCI_INDEX_MASK 0x7ffUL
+
+#define ZPCI_TABLE_TYPE_MASK 0xc
+#define ZPCI_TABLE_TYPE_RFX 0xc
+#define ZPCI_TABLE_TYPE_RSX 0x8
+#define ZPCI_TABLE_TYPE_RTX 0x4
+#define ZPCI_TABLE_TYPE_SX 0x0
+
+#define ZPCI_TABLE_LEN_RFX 0x3
+#define ZPCI_TABLE_LEN_RSX 0x3
+#define ZPCI_TABLE_LEN_RTX 0x3
+
+#define ZPCI_TABLE_OFFSET_MASK 0xc0
+#define ZPCI_TABLE_SIZE 0x4000
+#define ZPCI_TABLE_ALIGN ZPCI_TABLE_SIZE
+#define ZPCI_TABLE_ENTRY_SIZE (sizeof(unsigned long))
+#define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+
+#define ZPCI_TABLE_BITS 11
+#define ZPCI_PT_BITS 8
+#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT)
+#define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+
+#define ZPCI_RTE_FLAG_MASK 0x3fffUL
+#define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK)
+#define ZPCI_STE_FLAG_MASK 0x7ffUL
+#define ZPCI_STE_ADDR_MASK (~ZPCI_STE_FLAG_MASK)
+
+/* I/O Page tables */
+#define ZPCI_PTE_VALID_MASK 0x400
+#define ZPCI_PTE_INVALID 0x400
+#define ZPCI_PTE_VALID 0x000
+#define ZPCI_PT_SIZE 0x800
+#define ZPCI_PT_ALIGN ZPCI_PT_SIZE
+#define ZPCI_PT_ENTRIES (ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_PT_MASK (ZPCI_PT_ENTRIES - 1)
+
+#define ZPCI_PTE_FLAG_MASK 0xfffUL
+#define ZPCI_PTE_ADDR_MASK (~ZPCI_PTE_FLAG_MASK)
+
+/* Shared bits */
+#define ZPCI_TABLE_VALID 0x00
+#define ZPCI_TABLE_INVALID 0x20
+#define ZPCI_TABLE_PROTECTED 0x200
+#define ZPCI_TABLE_UNPROTECTED 0x000
+
+#define ZPCI_TABLE_VALID_MASK 0x20
+#define ZPCI_TABLE_PROT_MASK 0x200
+
+static inline unsigned int calc_rtx(dma_addr_t ptr)
+{
+ return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_sx(dma_addr_t ptr)
+{
+ return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_px(dma_addr_t ptr)
+{
+ return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
+}
+
+static inline void set_pt_pfaa(unsigned long *entry, void *pfaa)
+{
+ *entry &= ZPCI_PTE_FLAG_MASK;
+ *entry |= ((unsigned long) pfaa & ZPCI_PTE_ADDR_MASK);
+}
+
+static inline void set_rt_sto(unsigned long *entry, void *sto)
+{
+ *entry &= ZPCI_RTE_FLAG_MASK;
+ *entry |= ((unsigned long) sto & ZPCI_RTE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_RTX;
+}
+
+static inline void set_st_pto(unsigned long *entry, void *pto)
+{
+ *entry &= ZPCI_STE_FLAG_MASK;
+ *entry |= ((unsigned long) pto & ZPCI_STE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_SX;
+}
+
+static inline void validate_rt_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry &= ~ZPCI_TABLE_OFFSET_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+ *entry |= ZPCI_TABLE_LEN_RTX;
+}
+
+static inline void validate_st_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+}
+
+static inline void invalidate_table_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry |= ZPCI_TABLE_INVALID;
+}
+
+static inline void invalidate_pt_entry(unsigned long *entry)
+{
+ WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
+ *entry &= ~ZPCI_PTE_VALID_MASK;
+ *entry |= ZPCI_PTE_INVALID;
+}
+
+static inline void validate_pt_entry(unsigned long *entry)
+{
+ WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
+ *entry &= ~ZPCI_PTE_VALID_MASK;
+ *entry |= ZPCI_PTE_VALID;
+}
+
+static inline void entry_set_protected(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_PROT_MASK;
+ *entry |= ZPCI_TABLE_PROTECTED;
+}
+
+static inline void entry_clr_protected(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_PROT_MASK;
+ *entry |= ZPCI_TABLE_UNPROTECTED;
+}
+
+static inline int reg_entry_isvalid(unsigned long entry)
+{
+ return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
+}
+
+static inline int pt_entry_isvalid(unsigned long entry)
+{
+ return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
+}
+
+static inline int entry_isprotected(unsigned long entry)
+{
+ return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED;
+}
+
+static inline unsigned long *get_rt_sto(unsigned long entry)
+{
+ return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+ ? (unsigned long *) (entry & ZPCI_RTE_ADDR_MASK)
+ : NULL;
+}
+
+static inline unsigned long *get_st_pto(unsigned long entry)
+{
+ return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+ ? (unsigned long *) (entry & ZPCI_STE_ADDR_MASK)
+ : NULL;
+}
+
+/* Prototypes */
+int zpci_dma_init_device(struct zpci_dev *);
+void zpci_dma_exit_device(struct zpci_dev *);
+
+#endif
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
new file mode 100644
index 00000000000..649eb62c52b
--- /dev/null
+++ b/arch/s390/include/asm/pci_insn.h
@@ -0,0 +1,86 @@
+#ifndef _ASM_S390_PCI_INSN_H
+#define _ASM_S390_PCI_INSN_H
+
+/* Load/Store status codes */
+#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4
+#define ZPCI_PCI_ST_FUNC_IN_ERR 8
+#define ZPCI_PCI_ST_BLOCKED 12
+#define ZPCI_PCI_ST_INSUF_RES 16
+#define ZPCI_PCI_ST_INVAL_AS 20
+#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24
+#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28
+#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36
+#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40
+#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44
+
+/* Load/Store return codes */
+#define ZPCI_PCI_LS_OK 0
+#define ZPCI_PCI_LS_ERR 1
+#define ZPCI_PCI_LS_BUSY 2
+#define ZPCI_PCI_LS_INVAL_HANDLE 3
+
+/* Load/Store address space identifiers */
+#define ZPCI_PCIAS_MEMIO_0 0
+#define ZPCI_PCIAS_MEMIO_1 1
+#define ZPCI_PCIAS_MEMIO_2 2
+#define ZPCI_PCIAS_MEMIO_3 3
+#define ZPCI_PCIAS_MEMIO_4 4
+#define ZPCI_PCIAS_MEMIO_5 5
+#define ZPCI_PCIAS_CFGSPC 15
+
+/* Modify PCI Function Controls */
+#define ZPCI_MOD_FC_REG_INT 2
+#define ZPCI_MOD_FC_DEREG_INT 3
+#define ZPCI_MOD_FC_REG_IOAT 4
+#define ZPCI_MOD_FC_DEREG_IOAT 5
+#define ZPCI_MOD_FC_REREG_IOAT 6
+#define ZPCI_MOD_FC_RESET_ERROR 7
+#define ZPCI_MOD_FC_RESET_BLOCK 9
+#define ZPCI_MOD_FC_SET_MEASURE 10
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED 0x80
+#define ZPCI_FIB_FC_ERROR 0x40
+#define ZPCI_FIB_FC_LS_BLOCKED 0x20
+#define ZPCI_FIB_FC_DMAAS_REG 0x10
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED 0x80
+#define ZPCI_FIB_FC_ERROR 0x40
+#define ZPCI_FIB_FC_LS_BLOCKED 0x20
+#define ZPCI_FIB_FC_DMAAS_REG 0x10
+
+/* Function Information Block */
+struct zpci_fib {
+ u32 fmt : 8; /* format */
+ u32 : 24;
+ u32 : 32;
+ u8 fc; /* function controls */
+ u64 : 56;
+ u64 pba; /* PCI base address */
+ u64 pal; /* PCI address limit */
+ u64 iota; /* I/O Translation Anchor */
+ u32 : 1;
+ u32 isc : 3; /* Interrupt subclass */
+ u32 noi : 12; /* Number of interrupts */
+ u32 : 2;
+ u32 aibvo : 6; /* Adapter interrupt bit vector offset */
+ u32 sum : 1; /* Adapter int summary bit enabled */
+ u32 : 1;
+ u32 aisbo : 6; /* Adapter int summary bit offset */
+ u32 : 32;
+ u64 aibv; /* Adapter int bit vector address */
+ u64 aisb; /* Adapter int summary bit address */
+ u64 fmb_addr; /* Function measurement block address and key */
+ u32 : 32;
+ u32 gd;
+} __packed __aligned(8);
+
+int zpci_mod_fc(u64 req, struct zpci_fib *fib);
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
+int zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+
+#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
new file mode 100644
index 00000000000..d194d544d69
--- /dev/null
+++ b/arch/s390/include/asm/pci_io.h
@@ -0,0 +1,198 @@
+#ifndef _ASM_S390_PCI_IO_H
+#define _ASM_S390_PCI_IO_H
+
+#ifdef CONFIG_PCI
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/pci_insn.h>
+
+/* I/O Map */
+#define ZPCI_IOMAP_MAX_ENTRIES 0x7fff
+#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000ULL
+#define ZPCI_IOMAP_ADDR_IDX_MASK 0x7fff000000000000ULL
+#define ZPCI_IOMAP_ADDR_OFF_MASK 0x0000ffffffffffffULL
+
+struct zpci_iomap_entry {
+ u32 fh;
+ u8 bar;
+};
+
+extern struct zpci_iomap_entry *zpci_iomap_start;
+
+#define ZPCI_IDX(addr) \
+ (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
+#define ZPCI_OFFSET(addr) \
+ ((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
+
+#define ZPCI_CREATE_REQ(handle, space, len) \
+ ((u64) handle << 32 | space << 16 | len)
+
+#define zpci_read(LENGTH, RETTYPE) \
+static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \
+{ \
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \
+ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
+ u64 data; \
+ int rc; \
+ \
+ rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \
+ if (rc) \
+ data = -1ULL; \
+ return (RETTYPE) data; \
+}
+
+#define zpci_write(LENGTH, VALTYPE) \
+static inline void zpci_write_##VALTYPE(VALTYPE val, \
+ const volatile void __iomem *addr) \
+{ \
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \
+ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
+ u64 data = (VALTYPE) val; \
+ \
+ zpci_store(data, req, ZPCI_OFFSET(addr)); \
+}
+
+zpci_read(8, u64)
+zpci_read(4, u32)
+zpci_read(2, u16)
+zpci_read(1, u8)
+zpci_write(8, u64)
+zpci_write(4, u32)
+zpci_write(2, u16)
+zpci_write(1, u8)
+
+static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len)
+{
+ u64 val;
+
+ switch (len) {
+ case 1:
+ val = (u64) *((u8 *) data);
+ break;
+ case 2:
+ val = (u64) *((u16 *) data);
+ break;
+ case 4:
+ val = (u64) *((u32 *) data);
+ break;
+ case 8:
+ val = (u64) *((u64 *) data);
+ break;
+ default:
+ val = 0; /* let FW report error */
+ break;
+ }
+ return zpci_store(val, req, offset);
+}
+
+static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
+{
+ u64 data;
+ int cc;
+
+ cc = zpci_load(&data, req, offset);
+ if (cc)
+ goto out;
+
+ switch (len) {
+ case 1:
+ *((u8 *) dst) = (u8) data;
+ break;
+ case 2:
+ *((u16 *) dst) = (u16) data;
+ break;
+ case 4:
+ *((u32 *) dst) = (u32) data;
+ break;
+ case 8:
+ *((u64 *) dst) = (u64) data;
+ break;
+ }
+out:
+ return cc;
+}
+
+static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
+{
+ return zpci_store_block(data, req, offset);
+}
+
+static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
+{
+ int count = len > max ? max : len, size = 1;
+
+ while (!(src & 0x1) && !(dst & 0x1) && ((size << 1) <= count)) {
+ dst = dst >> 1;
+ src = src >> 1;
+ size = size << 1;
+ }
+ return size;
+}
+
+static inline int zpci_memcpy_fromio(void *dst,
+ const volatile void __iomem *src,
+ unsigned long n)
+{
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(src)];
+ u64 req, offset = ZPCI_OFFSET(src);
+ int size, rc = 0;
+
+ while (n > 0) {
+ size = zpci_get_max_write_size((u64) src, (u64) dst, n, 8);
+ req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
+ rc = zpci_read_single(req, dst, offset, size);
+ if (rc)
+ break;
+ offset += size;
+ dst += size;
+ n -= size;
+ }
+ return rc;
+}
+
+static inline int zpci_memcpy_toio(volatile void __iomem *dst,
+ const void *src, unsigned long n)
+{
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
+ u64 req, offset = ZPCI_OFFSET(dst);
+ int size, rc = 0;
+
+ if (!src)
+ return -EINVAL;
+
+ while (n > 0) {
+ size = zpci_get_max_write_size((u64) dst, (u64) src, n, 128);
+ req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
+
+ if (size > 8) /* main path */
+ rc = zpci_write_block(req, src, offset);
+ else
+ rc = zpci_write_single(req, src, offset, size);
+ if (rc)
+ break;
+ offset += size;
+ src += size;
+ n -= size;
+ }
+ return rc;
+}
+
+static inline int zpci_memset_io(volatile void __iomem *dst,
+ unsigned char val, size_t count)
+{
+ u8 *src = kmalloc(count, GFP_KERNEL);
+ int rc;
+
+ if (src == NULL)
+ return -ENOMEM;
+ memset(src, val, count);
+
+ rc = zpci_memcpy_toio(dst, src, count);
+ kfree(src);
+ return rc;
+}
+
+#endif /* CONFIG_PCI */
+
+#endif /* _ASM_S390_PCI_IO_H */
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
new file mode 100644
index 00000000000..fa91e009745
--- /dev/null
+++ b/arch/s390/include/asm/percpu.h
@@ -0,0 +1,190 @@
+#ifndef __ARCH_S390_PERCPU__
+#define __ARCH_S390_PERCPU__
+
+#include <linux/preempt.h>
+#include <asm/cmpxchg.h>
+
+/*
+ * s390 uses its own implementation for per cpu data, the offset of
+ * the cpu local data area is cached in the cpu's lowcore memory.
+ */
+#define __my_cpu_offset S390_lowcore.percpu_offset
+
+#ifdef CONFIG_64BIT
+
+/*
+ * For 64 bit module code, the module may be more than 4G above the
+ * per cpu area, use weak definitions to force the compiler to
+ * generate external references.
+ */
+#if defined(CONFIG_SMP) && defined(MODULE)
+#define ARCH_NEEDS_WEAK_PER_CPU
+#endif
+
+/*
+ * We use a compare-and-swap loop since that uses less cpu cycles than
+ * disabling and enabling interrupts like the generic variant would do.
+ */
+#define arch_this_cpu_to_op_simple(pcp, val, op) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ old__, new__, prev__; \
+ pcp_op_T__ *ptr__; \
+ preempt_disable(); \
+ ptr__ = __this_cpu_ptr(&(pcp)); \
+ prev__ = *ptr__; \
+ do { \
+ old__ = prev__; \
+ new__ = old__ op (val); \
+ prev__ = cmpxchg(ptr__, old__, new__); \
+ } while (prev__ != old__); \
+ preempt_enable(); \
+ new__; \
+})
+
+#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+
+#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define arch_this_cpu_add(pcp, val, op1, op2, szcast) \
+{ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ val__ = (val); \
+ pcp_op_T__ old__, *ptr__; \
+ preempt_disable(); \
+ ptr__ = __this_cpu_ptr(&(pcp)); \
+ if (__builtin_constant_p(val__) && \
+ ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \
+ asm volatile( \
+ op2 " %[ptr__],%[val__]\n" \
+ : [ptr__] "+Q" (*ptr__) \
+ : [val__] "i" ((szcast)val__) \
+ : "cc"); \
+ } else { \
+ asm volatile( \
+ op1 " %[old__],%[val__],%[ptr__]\n" \
+ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
+ : [val__] "d" (val__) \
+ : "cc"); \
+ } \
+ preempt_enable(); \
+}
+
+#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
+#define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long)
+
+#define arch_this_cpu_add_return(pcp, val, op) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ val__ = (val); \
+ pcp_op_T__ old__, *ptr__; \
+ preempt_disable(); \
+ ptr__ = __this_cpu_ptr(&(pcp)); \
+ asm volatile( \
+ op " %[old__],%[val__],%[ptr__]\n" \
+ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
+ : [val__] "d" (val__) \
+ : "cc"); \
+ preempt_enable(); \
+ old__ + val__; \
+})
+
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa")
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag")
+
+#define arch_this_cpu_to_op(pcp, val, op) \
+{ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ val__ = (val); \
+ pcp_op_T__ old__, *ptr__; \
+ preempt_disable(); \
+ ptr__ = __this_cpu_ptr(&(pcp)); \
+ asm volatile( \
+ op " %[old__],%[val__],%[ptr__]\n" \
+ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
+ : [val__] "d" (val__) \
+ : "cc"); \
+ preempt_enable(); \
+}
+
+#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan")
+#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, "lang")
+#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lao")
+#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laog")
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define arch_this_cpu_cmpxchg(pcp, oval, nval) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ ret__; \
+ pcp_op_T__ *ptr__; \
+ preempt_disable(); \
+ ptr__ = __this_cpu_ptr(&(pcp)); \
+ ret__ = cmpxchg(ptr__, oval, nval); \
+ preempt_enable(); \
+ ret__; \
+})
+
+#define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+
+#define arch_this_cpu_xchg(pcp, nval) \
+({ \
+ typeof(pcp) *ptr__; \
+ typeof(pcp) ret__; \
+ preempt_disable(); \
+ ptr__ = __this_cpu_ptr(&(pcp)); \
+ ret__ = xchg(ptr__, nval); \
+ preempt_enable(); \
+ ret__; \
+})
+
+#define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+
+#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \
+({ \
+ typeof(pcp1) o1__ = (o1), n1__ = (n1); \
+ typeof(pcp2) o2__ = (o2), n2__ = (n2); \
+ typeof(pcp1) *p1__; \
+ typeof(pcp2) *p2__; \
+ int ret__; \
+ preempt_disable(); \
+ p1__ = __this_cpu_ptr(&(pcp1)); \
+ p2__ = __this_cpu_ptr(&(pcp2)); \
+ ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \
+ preempt_enable(); \
+ ret__; \
+})
+
+#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double
+#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
+
+#endif /* CONFIG_64BIT */
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
new file mode 100644
index 00000000000..159a8ec6da9
--- /dev/null
+++ b/arch/s390/include/asm/perf_event.h
@@ -0,0 +1,96 @@
+/*
+ * Performance event support - s390 specific definitions.
+ *
+ * Copyright IBM Corp. 2009, 2013
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_PERF_EVENT_H
+#define _ASM_S390_PERF_EVENT_H
+
+#ifdef CONFIG_64BIT
+
+#include <linux/perf_event.h>
+#include <linux/device.h>
+#include <asm/cpu_mf.h>
+
+/* Per-CPU flags for PMU states */
+#define PMU_F_RESERVED 0x1000
+#define PMU_F_ENABLED 0x2000
+#define PMU_F_IN_USE 0x4000
+#define PMU_F_ERR_IBE 0x0100
+#define PMU_F_ERR_LSDA 0x0200
+#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
+
+/* Perf defintions for PMU event attributes in sysfs */
+extern __init const struct attribute_group **cpumf_cf_event_group(void);
+extern ssize_t cpumf_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page);
+#define EVENT_VAR(_cat, _name) event_attr_##_cat##_##_name
+#define EVENT_PTR(_cat, _name) (&EVENT_VAR(_cat, _name).attr.attr)
+
+#define CPUMF_EVENT_ATTR(cat, name, id) \
+ PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show)
+#define CPUMF_EVENT_PTR(cat, name) EVENT_PTR(cat, name)
+
+
+/* Perf callbacks */
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+
+/* Perf pt_regs extension for sample-data-entry indicators */
+struct perf_sf_sde_regs {
+ unsigned char in_guest:1; /* guest sample */
+ unsigned long reserved:63; /* reserved */
+};
+
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR 256
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR 2
+#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \
+ PERF_CPUM_SF_DIAG_MODE)
+#define PERF_CPUM_SF_FULL_BLOCKS 0x0004 /* Process full SDBs only */
+
+#define REG_NONE 0
+#define REG_OVERFLOW 1
+#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
+#define RAWSAMPLE_REG(hwc) ((hwc)->config)
+#define TEAR_REG(hwc) ((hwc)->last_tag)
+#define SAMPL_RATE(hwc) ((hwc)->event_base)
+#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
+
+/* Structure for sampling data entries to be passed as perf raw sample data
+ * to user space. Note that raw sample data must be aligned and, thus, might
+ * be padded with zeros.
+ */
+struct sf_raw_sample {
+#define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE
+#define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE
+ u64 format;
+ u32 size; /* Size of sf_raw_sample */
+ u16 bsdes; /* Basic-sampling data entry size */
+ u16 dsdes; /* Diagnostic-sampling data entry size */
+ struct hws_basic_entry basic; /* Basic-sampling data entry */
+ struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
+ u8 padding[]; /* Padding to next multiple of 8 */
+} __packed;
+
+/* Perf hardware reserve and release functions */
+int perf_reserve_sampling(void);
+void perf_release_sampling(void);
+
+#endif /* CONFIG_64BIT */
+#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
new file mode 100644
index 00000000000..9e18a61d3df
--- /dev/null
+++ b/arch/s390/include/asm/pgalloc.h
@@ -0,0 +1,156 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Hartmut Penner (hp@de.ibm.com)
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/pgalloc.h"
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef _S390_PGALLOC_H
+#define _S390_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+
+unsigned long *crst_table_alloc(struct mm_struct *);
+void crst_table_free(struct mm_struct *, unsigned long *);
+
+unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
+void page_table_free(struct mm_struct *, unsigned long *);
+void page_table_free_rcu(struct mmu_gather *, unsigned long *);
+
+void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
+ bool init_skey);
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned long key, bool nq);
+
+static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
+{
+ typedef struct { char _[n]; } addrtype;
+
+ *s = val;
+ n = (n / 256) - 1;
+ asm volatile(
+#ifdef CONFIG_64BIT
+ " mvc 8(248,%0),0(%0)\n"
+#else
+ " mvc 4(252,%0),0(%0)\n"
+#endif
+ "0: mvc 256(256,%0),0(%0)\n"
+ " la %0,256(%0)\n"
+ " brct %1,0b\n"
+ : "+a" (s), "+d" (n), "=m" (*(addrtype *) s)
+ : "m" (*(addrtype *) s));
+}
+
+static inline void crst_table_init(unsigned long *crst, unsigned long entry)
+{
+ clear_table(crst, entry, sizeof(unsigned long)*2048);
+}
+
+#ifndef CONFIG_64BIT
+
+static inline unsigned long pgd_entry_type(struct mm_struct *mm)
+{
+ return _SEGMENT_ENTRY_EMPTY;
+}
+
+#define pud_alloc_one(mm,address) ({ BUG(); ((pud_t *)2); })
+#define pud_free(mm, x) do { } while (0)
+
+#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); })
+#define pmd_free(mm, x) do { } while (0)
+
+#define pgd_populate(mm, pgd, pud) BUG()
+#define pud_populate(mm, pud, pmd) BUG()
+
+#else /* CONFIG_64BIT */
+
+static inline unsigned long pgd_entry_type(struct mm_struct *mm)
+{
+ if (mm->context.asce_limit <= (1UL << 31))
+ return _SEGMENT_ENTRY_EMPTY;
+ if (mm->context.asce_limit <= (1UL << 42))
+ return _REGION3_ENTRY_EMPTY;
+ return _REGION2_ENTRY_EMPTY;
+}
+
+int crst_table_upgrade(struct mm_struct *, unsigned long limit);
+void crst_table_downgrade(struct mm_struct *, unsigned long limit);
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ unsigned long *table = crst_table_alloc(mm);
+ if (table)
+ crst_table_init(table, _REGION3_ENTRY_EMPTY);
+ return (pud_t *) table;
+}
+#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud)
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
+{
+ unsigned long *table = crst_table_alloc(mm);
+
+ if (!table)
+ return NULL;
+ crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
+ if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+ crst_table_free(mm, table);
+ return NULL;
+ }
+ return (pmd_t *) table;
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+ pgtable_pmd_page_dtor(virt_to_page(pmd));
+ crst_table_free(mm, (unsigned long *) pmd);
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+ pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
+}
+
+#endif /* CONFIG_64BIT */
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ spin_lock_init(&mm->context.list_lock);
+ INIT_LIST_HEAD(&mm->context.pgtable_list);
+ INIT_LIST_HEAD(&mm->context.gmap_list);
+ return (pgd_t *) crst_table_alloc(mm);
+}
+#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
+
+static inline void pmd_populate(struct mm_struct *mm,
+ pmd_t *pmd, pgtable_t pte)
+{
+ pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
+}
+
+#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
+
+#define pmd_pgtable(pmd) \
+ (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)
+
+/*
+ * page table entry allocation/free routines.
+ */
+#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
+#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
+
+#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
+#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
+
+extern void rcu_table_freelist_finish(void);
+
+#endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
new file mode 100644
index 00000000000..fcba5e03839
--- /dev/null
+++ b/arch/s390/include/asm/pgtable.h
@@ -0,0 +1,1739 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Hartmut Penner (hp@de.ibm.com)
+ * Ulrich Weigand (weigand@de.ibm.com)
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/pgtable.h"
+ */
+
+#ifndef _ASM_S390_PGTABLE_H
+#define _ASM_S390_PGTABLE_H
+
+/*
+ * The Linux memory management assumes a three-level page table setup. For
+ * s390 31 bit we "fold" the mid level into the top-level page table, so
+ * that we physically have the same two-level page table as the s390 mmu
+ * expects in 31 bit mode. For s390 64 bit we use three of the five levels
+ * the hardware provides (region first and region second tables are not
+ * used).
+ *
+ * The "pgd_xxx()" functions are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the S390 page table tree.
+ */
+#ifndef __ASSEMBLY__
+#include <linux/sched.h>
+#include <linux/mm_types.h>
+#include <linux/page-flags.h>
+#include <asm/bug.h>
+#include <asm/page.h>
+
+extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
+extern void paging_init(void);
+extern void vmem_map_init(void);
+
+/*
+ * The S390 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero; used
+ * for zero-mapped memory areas etc..
+ */
+
+extern unsigned long empty_zero_page;
+extern unsigned long zero_page_mask;
+
+#define ZERO_PAGE(vaddr) \
+ (virt_to_page((void *)(empty_zero_page + \
+ (((unsigned long)(vaddr)) &zero_page_mask))))
+#define __HAVE_COLOR_ZERO_PAGE
+
+/* TODO: s390 cannot support io_remap_pfn_range... */
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * PMD_SHIFT determines the size of the area a second-level page
+ * table can map
+ * PGDIR_SHIFT determines what a third-level page table entry can map
+ */
+#ifndef CONFIG_64BIT
+# define PMD_SHIFT 20
+# define PUD_SHIFT 20
+# define PGDIR_SHIFT 20
+#else /* CONFIG_64BIT */
+# define PMD_SHIFT 20
+# define PUD_SHIFT 31
+# define PGDIR_SHIFT 42
+#endif /* CONFIG_64BIT */
+
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+/*
+ * entries per page directory level: the S390 is two-level, so
+ * we don't really have any PMD directory physically.
+ * for S390 segment-table entries are combined to one PGD
+ * that leads to 1024 pte per pgd
+ */
+#define PTRS_PER_PTE 256
+#ifndef CONFIG_64BIT
+#define PTRS_PER_PMD 1
+#define PTRS_PER_PUD 1
+#else /* CONFIG_64BIT */
+#define PTRS_PER_PMD 2048
+#define PTRS_PER_PUD 2048
+#endif /* CONFIG_64BIT */
+#define PTRS_PER_PGD 2048
+
+#define FIRST_USER_ADDRESS 0
+
+#define pte_ERROR(e) \
+ printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e))
+#define pmd_ERROR(e) \
+ printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
+#define pud_ERROR(e) \
+ printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
+#define pgd_ERROR(e) \
+ printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
+
+#ifndef __ASSEMBLY__
+/*
+ * The vmalloc and module area will always be on the topmost area of the kernel
+ * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules.
+ * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
+ * modules will reside. That makes sure that inter module branches always
+ * happen without trampolines and in addition the placement within a 2GB frame
+ * is branch prediction unit friendly.
+ */
+extern unsigned long VMALLOC_START;
+extern unsigned long VMALLOC_END;
+extern struct page *vmemmap;
+
+#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
+
+#ifdef CONFIG_64BIT
+extern unsigned long MODULES_VADDR;
+extern unsigned long MODULES_END;
+#define MODULES_VADDR MODULES_VADDR
+#define MODULES_END MODULES_END
+#define MODULES_LEN (1UL << 31)
+#endif
+
+/*
+ * A 31 bit pagetable entry of S390 has following format:
+ * | PFRA | | OS |
+ * 0 0IP0
+ * 00000000001111111111222222222233
+ * 01234567890123456789012345678901
+ *
+ * I Page-Invalid Bit: Page is not available for address-translation
+ * P Page-Protection Bit: Store access not possible for page
+ *
+ * A 31 bit segmenttable entry of S390 has following format:
+ * | P-table origin | |PTL
+ * 0 IC
+ * 00000000001111111111222222222233
+ * 01234567890123456789012345678901
+ *
+ * I Segment-Invalid Bit: Segment is not available for address-translation
+ * C Common-Segment Bit: Segment is not private (PoP 3-30)
+ * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256)
+ *
+ * The 31 bit segmenttable origin of S390 has following format:
+ *
+ * |S-table origin | | STL |
+ * X **GPS
+ * 00000000001111111111222222222233
+ * 01234567890123456789012345678901
+ *
+ * X Space-Switch event:
+ * G Segment-Invalid Bit: *
+ * P Private-Space Bit: Segment is not private (PoP 3-30)
+ * S Storage-Alteration:
+ * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048)
+ *
+ * A 64 bit pagetable entry of S390 has following format:
+ * | PFRA |0IPC| OS |
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Page-Invalid Bit: Page is not available for address-translation
+ * P Page-Protection Bit: Store access not possible for page
+ * C Change-bit override: HW is not required to set change bit
+ *
+ * A 64 bit segmenttable entry of S390 has following format:
+ * | P-table origin | TT
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Segment-Invalid Bit: Segment is not available for address-translation
+ * C Common-Segment Bit: Segment is not private (PoP 3-30)
+ * P Page-Protection Bit: Store access not possible for page
+ * TT Type 00
+ *
+ * A 64 bit region table entry of S390 has following format:
+ * | S-table origin | TF TTTL
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Segment-Invalid Bit: Segment is not available for address-translation
+ * TT Type 01
+ * TF
+ * TL Table length
+ *
+ * The 64 bit regiontable origin of S390 has following format:
+ * | region table origon | DTTL
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * X Space-Switch event:
+ * G Segment-Invalid Bit:
+ * P Private-Space Bit:
+ * S Storage-Alteration:
+ * R Real space
+ * TL Table-Length:
+ *
+ * A storage key has the following format:
+ * | ACC |F|R|C|0|
+ * 0 3 4 5 6 7
+ * ACC: access key
+ * F : fetch protection bit
+ * R : referenced bit
+ * C : changed bit
+ */
+
+/* Hardware bits in the page table entry */
+#define _PAGE_CO 0x100 /* HW Change-bit override */
+#define _PAGE_PROTECT 0x200 /* HW read-only bit */
+#define _PAGE_INVALID 0x400 /* HW invalid bit */
+#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */
+
+/* Software bits in the page table entry */
+#define _PAGE_PRESENT 0x001 /* SW pte present bit */
+#define _PAGE_TYPE 0x002 /* SW pte type bit */
+#define _PAGE_YOUNG 0x004 /* SW pte young bit */
+#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
+#define _PAGE_READ 0x010 /* SW pte read bit */
+#define _PAGE_WRITE 0x020 /* SW pte write bit */
+#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
+#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */
+#define __HAVE_ARCH_PTE_SPECIAL
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \
+ _PAGE_DIRTY | _PAGE_YOUNG)
+
+/*
+ * handle_pte_fault uses pte_present, pte_none and pte_file to find out the
+ * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit
+ * is used to distinguish present from not-present ptes. It is changed only
+ * with the page table lock held.
+ *
+ * The following table gives the different possible bit combinations for
+ * the pte hardware and software bits in the last 12 bits of a pte:
+ *
+ * 842100000000
+ * 000084210000
+ * 000000008421
+ * .IR...wrdytp
+ * empty .10...000000
+ * swap .10...xxxx10
+ * file .11...xxxxx0
+ * prot-none, clean, old .11...000001
+ * prot-none, clean, young .11...000101
+ * prot-none, dirty, old .10...001001
+ * prot-none, dirty, young .10...001101
+ * read-only, clean, old .11...010001
+ * read-only, clean, young .01...010101
+ * read-only, dirty, old .11...011001
+ * read-only, dirty, young .01...011101
+ * read-write, clean, old .11...110001
+ * read-write, clean, young .01...110101
+ * read-write, dirty, old .10...111001
+ * read-write, dirty, young .00...111101
+ *
+ * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
+ * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
+ * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600
+ * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
+ */
+
+#ifndef CONFIG_64BIT
+
+/* Bits in the segment table address-space-control-element */
+#define _ASCE_SPACE_SWITCH 0x80000000UL /* space switch event */
+#define _ASCE_ORIGIN_MASK 0x7ffff000UL /* segment table origin */
+#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */
+#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */
+#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */
+
+/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */
+#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
+#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
+#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
+#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
+#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT
+
+#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
+#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
+
+/*
+ * Segment table entry encoding (I = invalid, R = read-only bit):
+ * ..R...I.....
+ * prot-none ..1...1.....
+ * read-only ..1...0.....
+ * read-write ..0...0.....
+ * empty ..0...1.....
+ */
+
+/* Page status table bits for virtualization */
+#define PGSTE_ACC_BITS 0xf0000000UL
+#define PGSTE_FP_BIT 0x08000000UL
+#define PGSTE_PCL_BIT 0x00800000UL
+#define PGSTE_HR_BIT 0x00400000UL
+#define PGSTE_HC_BIT 0x00200000UL
+#define PGSTE_GR_BIT 0x00040000UL
+#define PGSTE_GC_BIT 0x00020000UL
+#define PGSTE_UC_BIT 0x00008000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x00004000UL /* IPTE notify bit */
+
+#else /* CONFIG_64BIT */
+
+/* Bits in the segment/region table address-space-control-element */
+#define _ASCE_ORIGIN ~0xfffUL/* segment table origin */
+#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */
+#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */
+#define _ASCE_SPACE_SWITCH 0x40 /* space switch event */
+#define _ASCE_REAL_SPACE 0x20 /* real space control */
+#define _ASCE_TYPE_MASK 0x0c /* asce table type mask */
+#define _ASCE_TYPE_REGION1 0x0c /* region first table type */
+#define _ASCE_TYPE_REGION2 0x08 /* region second table type */
+#define _ASCE_TYPE_REGION3 0x04 /* region third table type */
+#define _ASCE_TYPE_SEGMENT 0x00 /* segment table type */
+#define _ASCE_TABLE_LENGTH 0x03 /* region table length */
+
+/* Bits in the region table entry */
+#define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */
+#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */
+#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
+#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */
+#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */
+#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */
+#define _REGION_ENTRY_TYPE_R3 0x04 /* region third table type */
+#define _REGION_ENTRY_LENGTH 0x03 /* region third length */
+
+#define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
+#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
+#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
+#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
+#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
+#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
+
+#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */
+#define _REGION3_ENTRY_RO 0x200 /* page protection bit */
+#define _REGION3_ENTRY_CO 0x100 /* change-recording override */
+
+/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
+#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
+#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
+#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
+
+#define _SEGMENT_ENTRY (0)
+#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
+
+#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
+#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
+#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */
+#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */
+#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG
+
+/*
+ * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
+ * ..R...I...y.
+ * prot-none, old ..0...1...1.
+ * prot-none, young ..1...1...1.
+ * read-only, old ..1...1...0.
+ * read-only, young ..1...0...1.
+ * read-write, old ..0...1...0.
+ * read-write, young ..0...0...1.
+ * The segment table origin is used to distinguish empty (origin==0) from
+ * read-write, old segment table entries (origin!=0)
+ */
+
+#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
+
+/* Set of bits not changed in pmd_modify */
+#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
+ | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
+
+/* Page status table bits for virtualization */
+#define PGSTE_ACC_BITS 0xf000000000000000UL
+#define PGSTE_FP_BIT 0x0800000000000000UL
+#define PGSTE_PCL_BIT 0x0080000000000000UL
+#define PGSTE_HR_BIT 0x0040000000000000UL
+#define PGSTE_HC_BIT 0x0020000000000000UL
+#define PGSTE_GR_BIT 0x0004000000000000UL
+#define PGSTE_GC_BIT 0x0002000000000000UL
+#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
+
+#endif /* CONFIG_64BIT */
+
+/* Guest Page State used for virtualization */
+#define _PGSTE_GPS_ZERO 0x0000000080000000UL
+#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
+#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
+#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
+
+/*
+ * A user page table pointer has the space-switch-event bit, the
+ * private-space-control bit and the storage-alteration-event-control
+ * bit set. A kernel page table pointer doesn't need them.
+ */
+#define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \
+ _ASCE_ALT_EVENT)
+
+/*
+ * Page protection definitions.
+ */
+#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID)
+#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+ _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_INVALID | _PAGE_PROTECT)
+
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+ _PAGE_PROTECT)
+
+/*
+ * On s390 the page table entry has an invalid bit and a read-only bit.
+ * Read permission implies execute permission and write permission
+ * implies read permission.
+ */
+ /*xwr*/
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READ
+#define __P010 PAGE_READ
+#define __P011 PAGE_READ
+#define __P100 PAGE_READ
+#define __P101 PAGE_READ
+#define __P110 PAGE_READ
+#define __P111 PAGE_READ
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READ
+#define __S010 PAGE_WRITE
+#define __S011 PAGE_WRITE
+#define __S100 PAGE_READ
+#define __S101 PAGE_READ
+#define __S110 PAGE_WRITE
+#define __S111 PAGE_WRITE
+
+/*
+ * Segment entry (large page) protection definitions.
+ */
+#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
+ _SEGMENT_ENTRY_NONE)
+#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \
+ _SEGMENT_ENTRY_PROTECT)
+#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID)
+
+static inline int mm_has_pgste(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (unlikely(mm->context.has_pgste))
+ return 1;
+#endif
+ return 0;
+}
+
+static inline int mm_use_skey(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (mm->context.use_skey)
+ return 1;
+#endif
+ return 0;
+}
+
+/*
+ * pgd/pmd/pte query functions
+ */
+#ifndef CONFIG_64BIT
+
+static inline int pgd_present(pgd_t pgd) { return 1; }
+static inline int pgd_none(pgd_t pgd) { return 0; }
+static inline int pgd_bad(pgd_t pgd) { return 0; }
+
+static inline int pud_present(pud_t pud) { return 1; }
+static inline int pud_none(pud_t pud) { return 0; }
+static inline int pud_large(pud_t pud) { return 0; }
+static inline int pud_bad(pud_t pud) { return 0; }
+
+#else /* CONFIG_64BIT */
+
+static inline int pgd_present(pgd_t pgd)
+{
+ if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+ return 1;
+ return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int pgd_none(pgd_t pgd)
+{
+ if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+ return 0;
+ return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+ /*
+ * With dynamic page table levels the pgd can be a region table
+ * entry or a segment table entry. Check for the bit that are
+ * invalid for either table entry.
+ */
+ unsigned long mask =
+ ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
+ ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
+ return (pgd_val(pgd) & mask) != 0;
+}
+
+static inline int pud_present(pud_t pud)
+{
+ if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
+ return 1;
+ return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int pud_none(pud_t pud)
+{
+ if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
+ return 0;
+ return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
+}
+
+static inline int pud_large(pud_t pud)
+{
+ if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
+ return 0;
+ return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
+}
+
+static inline int pud_bad(pud_t pud)
+{
+ /*
+ * With dynamic page table levels the pud can be a region table
+ * entry or a segment table entry. Check for the bit that are
+ * invalid for either table entry.
+ */
+ unsigned long mask =
+ ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
+ ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
+ return (pud_val(pud) & mask) != 0;
+}
+
+#endif /* CONFIG_64BIT */
+
+static inline int pmd_present(pmd_t pmd)
+{
+ return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
+}
+
+static inline int pmd_none(pmd_t pmd)
+{
+ return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
+#else
+ return 0;
+#endif
+}
+
+static inline int pmd_prot_none(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
+ (pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ if (pmd_large(pmd))
+ return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+#endif
+ return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
+}
+
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty);
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMD_WRITE
+static inline int pmd_write(pmd_t pmd)
+{
+ if (pmd_prot_none(pmd))
+ return 0;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+ int young = 0;
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd))
+ young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
+ else
+ young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
+#endif
+ return young;
+}
+
+static inline int pte_present(pte_t pte)
+{
+ /* Bit pattern: (pte & 0x001) == 0x001 */
+ return (pte_val(pte) & _PAGE_PRESENT) != 0;
+}
+
+static inline int pte_none(pte_t pte)
+{
+ /* Bit pattern: pte == 0x400 */
+ return pte_val(pte) == _PAGE_INVALID;
+}
+
+static inline int pte_swap(pte_t pte)
+{
+ /* Bit pattern: (pte & 0x603) == 0x402 */
+ return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT |
+ _PAGE_TYPE | _PAGE_PRESENT))
+ == (_PAGE_INVALID | _PAGE_TYPE);
+}
+
+static inline int pte_file(pte_t pte)
+{
+ /* Bit pattern: (pte & 0x601) == 0x600 */
+ return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT))
+ == (_PAGE_INVALID | _PAGE_PROTECT);
+}
+
+static inline int pte_special(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_SPECIAL);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t a, pte_t b)
+{
+ return pte_val(a) == pte_val(b);
+}
+
+static inline pgste_t pgste_get_lock(pte_t *ptep)
+{
+ unsigned long new = 0;
+#ifdef CONFIG_PGSTE
+ unsigned long old;
+
+ preempt_disable();
+ asm(
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " nihh %0,0xff7f\n" /* clear PCL bit in old */
+ " oihh %1,0x0080\n" /* set PCL bit in new */
+ " csg %0,%1,%2\n"
+ " jl 0b\n"
+ : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
+ : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
+#endif
+ return __pgste(new);
+}
+
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ asm(
+ " nihh %1,0xff7f\n" /* clear PCL bit */
+ " stg %1,%0\n"
+ : "=Q" (ptep[PTRS_PER_PTE])
+ : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
+ : "cc", "memory");
+ preempt_enable();
+#endif
+}
+
+static inline pgste_t pgste_get(pte_t *ptep)
+{
+ unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+ pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+ return __pgste(pgste);
+}
+
+static inline void pgste_set(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
+#endif
+}
+
+static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
+ struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ unsigned long address, bits, skey;
+
+ if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
+ return pgste;
+ address = pte_val(*ptep) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(address);
+ bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ /* Transfer page changed & referenced bit to guest bits in pgste */
+ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
+ /* Copy page access key and fetch protection bit to pgste */
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+#endif
+ return pgste;
+
+}
+
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+ struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ unsigned long address;
+ unsigned long nkey;
+
+ if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
+ return;
+ VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
+ address = pte_val(entry) & PAGE_MASK;
+ /*
+ * Set page access key and fetch protection bit from pgste.
+ * The guest C/R information is still in the PGSTE, set real
+ * key C/R to 0.
+ */
+ nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+ page_set_storage_key(address, nkey, 0);
+#endif
+}
+
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
+{
+ if ((pte_val(entry) & _PAGE_PRESENT) &&
+ (pte_val(entry) & _PAGE_WRITE) &&
+ !(pte_val(entry) & _PAGE_INVALID)) {
+ if (!MACHINE_HAS_ESOP) {
+ /*
+ * Without enhanced suppression-on-protection force
+ * the dirty bit on for all writable ptes.
+ */
+ pte_val(entry) |= _PAGE_DIRTY;
+ pte_val(entry) &= ~_PAGE_PROTECT;
+ }
+ if (!(pte_val(entry) & _PAGE_PROTECT))
+ /* This pte allows write access, set user-dirty */
+ pgste_val(pgste) |= PGSTE_UC_BIT;
+ }
+ *ptep = entry;
+ return pgste;
+}
+
+/**
+ * struct gmap_struct - guest address space
+ * @mm: pointer to the parent mm_struct
+ * @table: pointer to the page directory
+ * @asce: address space control element for gmap page table
+ * @crst_list: list of all crst tables used in the guest address space
+ * @pfault_enabled: defines if pfaults are applicable for the guest
+ */
+struct gmap {
+ struct list_head list;
+ struct mm_struct *mm;
+ unsigned long *table;
+ unsigned long asce;
+ void *private;
+ struct list_head crst_list;
+ bool pfault_enabled;
+};
+
+/**
+ * struct gmap_rmap - reverse mapping for segment table entries
+ * @gmap: pointer to the gmap_struct
+ * @entry: pointer to a segment table entry
+ * @vmaddr: virtual address in the guest address space
+ */
+struct gmap_rmap {
+ struct list_head list;
+ struct gmap *gmap;
+ unsigned long *entry;
+ unsigned long vmaddr;
+};
+
+/**
+ * struct gmap_pgtable - gmap information attached to a page table
+ * @vmaddr: address of the 1MB segment in the process virtual memory
+ * @mapper: list of segment table entries mapping a page table
+ */
+struct gmap_pgtable {
+ unsigned long vmaddr;
+ struct list_head mapper;
+};
+
+/**
+ * struct gmap_notifier - notify function block for page invalidation
+ * @notifier_call: address of callback function
+ */
+struct gmap_notifier {
+ struct list_head list;
+ void (*notifier_call)(struct gmap *gmap, unsigned long address);
+};
+
+struct gmap *gmap_alloc(struct mm_struct *mm);
+void gmap_free(struct gmap *gmap);
+void gmap_enable(struct gmap *gmap);
+void gmap_disable(struct gmap *gmap);
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len);
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
+unsigned long __gmap_translate(unsigned long address, struct gmap *);
+unsigned long gmap_translate(unsigned long address, struct gmap *);
+unsigned long __gmap_fault(unsigned long address, struct gmap *);
+unsigned long gmap_fault(unsigned long address, struct gmap *);
+void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
+void __gmap_zap(unsigned long address, struct gmap *);
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
+
+
+void gmap_register_ipte_notifier(struct gmap_notifier *);
+void gmap_unregister_ipte_notifier(struct gmap_notifier *);
+int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
+void gmap_do_ipte_notify(struct mm_struct *, pte_t *);
+
+static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+ pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ if (pgste_val(pgste) & PGSTE_IN_BIT) {
+ pgste_val(pgste) &= ~PGSTE_IN_BIT;
+ gmap_do_ipte_notify(mm, ptep);
+ }
+#endif
+ return pgste;
+}
+
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+ */
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry)
+{
+ pgste_t pgste;
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+ pgste_set_key(ptep, pgste, entry, mm);
+ pgste = pgste_set_pte(ptep, pgste, entry);
+ pgste_set_unlock(ptep, pgste);
+ } else {
+ if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
+ pte_val(entry) |= _PAGE_CO;
+ *ptep = entry;
+ }
+}
+
+/*
+ * query functions pte_write/pte_dirty/pte_young only work if
+ * pte_present() is true. Undefined behaviour if not..
+ */
+static inline int pte_write(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_WRITE) != 0;
+}
+
+static inline int pte_dirty(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_DIRTY) != 0;
+}
+
+static inline int pte_young(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_YOUNG) != 0;
+}
+
+#define __HAVE_ARCH_PTE_UNUSED
+static inline int pte_unused(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_UNUSED;
+}
+
+/*
+ * pgd/pmd/pte modification functions
+ */
+
+static inline void pgd_clear(pgd_t *pgd)
+{
+#ifdef CONFIG_64BIT
+ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+ pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
+#endif
+}
+
+static inline void pud_clear(pud_t *pud)
+{
+#ifdef CONFIG_64BIT
+ if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+ pud_val(*pud) = _REGION3_ENTRY_EMPTY;
+#endif
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID;
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ pte_val(*ptep) = _PAGE_INVALID;
+}
+
+/*
+ * The following pte modification functions only work if
+ * pte_present() is true. Undefined behaviour if not..
+ */
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ pte_val(pte) &= _PAGE_CHG_MASK;
+ pte_val(pte) |= pgprot_val(newprot);
+ /*
+ * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
+ * invalid bit set, clear it again for readable, young pages
+ */
+ if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
+ pte_val(pte) &= ~_PAGE_INVALID;
+ /*
+ * newprot for PAGE_READ and PAGE_WRITE has the page protection
+ * bit set, clear it again for writable, dirty pages
+ */
+ if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) &= ~_PAGE_PROTECT;
+ return pte;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_WRITE;
+ pte_val(pte) |= _PAGE_PROTECT;
+ return pte;
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_WRITE;
+ if (pte_val(pte) & _PAGE_DIRTY)
+ pte_val(pte) &= ~_PAGE_PROTECT;
+ return pte;
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_DIRTY;
+ pte_val(pte) |= _PAGE_PROTECT;
+ return pte;
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_DIRTY;
+ if (pte_val(pte) & _PAGE_WRITE)
+ pte_val(pte) &= ~_PAGE_PROTECT;
+ return pte;
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_YOUNG;
+ pte_val(pte) |= _PAGE_INVALID;
+ return pte;
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_YOUNG;
+ if (pte_val(pte) & _PAGE_READ)
+ pte_val(pte) &= ~_PAGE_INVALID;
+ return pte;
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SPECIAL;
+ return pte;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_LARGE;
+ return pte;
+}
+#endif
+
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
+{
+ unsigned long pto = (unsigned long) ptep;
+
+#ifndef CONFIG_64BIT
+ /* pto in ESA mode must point to the start of the segment table */
+ pto &= 0x7ffffc00;
+#endif
+ /* Invalidation + global TLB flush for the pte */
+ asm volatile(
+ " ipte %2,%3"
+ : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+}
+
+static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
+{
+ unsigned long pto = (unsigned long) ptep;
+
+#ifndef CONFIG_64BIT
+ /* pto in ESA mode must point to the start of the segment table */
+ pto &= 0x7ffffc00;
+#endif
+ /* Invalidation + local TLB flush for the pte */
+ asm volatile(
+ " .insn rrf,0xb2210000,%2,%3,0,1"
+ : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+}
+
+static inline void ptep_flush_direct(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ int active, count;
+
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ return;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __ptep_ipte_local(address, ptep);
+ else
+ __ptep_ipte(address, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
+}
+
+static inline void ptep_flush_lazy(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ int active, count;
+
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ return;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pte_val(*ptep) |= _PAGE_INVALID;
+ mm->context.flush_mm = 1;
+ } else
+ __ptep_ipte(address, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
+}
+
+/*
+ * Get (and clear) the user dirty bit for a pte.
+ */
+static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep)
+{
+ pgste_t pgste;
+ pte_t pte;
+ int dirty;
+
+ if (!mm_has_pgste(mm))
+ return 0;
+ pgste = pgste_get_lock(ptep);
+ dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+ pgste_val(pgste) &= ~PGSTE_UC_BIT;
+ pte = *ptep;
+ if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
+ __ptep_ipte(addr, ptep);
+ if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) |= _PAGE_PROTECT;
+ else
+ pte_val(pte) |= _PAGE_INVALID;
+ *ptep = pte;
+ }
+ pgste_set_unlock(ptep, pgste);
+ return dirty;
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ pgste_t pgste;
+ pte_t pte;
+ int young;
+
+ if (mm_has_pgste(vma->vm_mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ }
+
+ pte = *ptep;
+ ptep_flush_direct(vma->vm_mm, addr, ptep);
+ young = pte_young(pte);
+ pte = pte_mkold(pte);
+
+ if (mm_has_pgste(vma->vm_mm)) {
+ pgste = pgste_set_pte(ptep, pgste, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else
+ *ptep = pte;
+
+ return young;
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+{
+ return ptep_test_and_clear_young(vma, address, ptep);
+}
+
+/*
+ * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
+ * both clear the TLB for the unmapped pte. The reason is that
+ * ptep_get_and_clear is used in common code (e.g. change_pte_range)
+ * to modify an active pte. The sequence is
+ * 1) ptep_get_and_clear
+ * 2) set_pte_at
+ * 3) flush_tlb_range
+ * On s390 the tlb needs to get flushed with the modification of the pte
+ * if the pte is active. The only way how this can be implemented is to
+ * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
+ * is a nop.
+ */
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ pgste_t pgste;
+ pte_t pte;
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
+ }
+
+ pte = *ptep;
+ ptep_flush_lazy(mm, address, ptep);
+ pte_val(*ptep) = _PAGE_INVALID;
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_update_all(&pte, pgste, mm);
+ pgste_set_unlock(ptep, pgste);
+ }
+ return pte;
+}
+
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
+ unsigned long address,
+ pte_t *ptep)
+{
+ pgste_t pgste;
+ pte_t pte;
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste_ipte_notify(mm, ptep, pgste);
+ }
+
+ pte = *ptep;
+ ptep_flush_lazy(mm, address, ptep);
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_update_all(&pte, pgste, mm);
+ pgste_set(ptep, pgste);
+ }
+ return pte;
+}
+
+static inline void ptep_modify_prot_commit(struct mm_struct *mm,
+ unsigned long address,
+ pte_t *ptep, pte_t pte)
+{
+ pgste_t pgste;
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get(ptep);
+ pgste_set_key(ptep, pgste, pte, mm);
+ pgste = pgste_set_pte(ptep, pgste, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else
+ *ptep = pte;
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+{
+ pgste_t pgste;
+ pte_t pte;
+
+ if (mm_has_pgste(vma->vm_mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ }
+
+ pte = *ptep;
+ ptep_flush_direct(vma->vm_mm, address, ptep);
+ pte_val(*ptep) = _PAGE_INVALID;
+
+ if (mm_has_pgste(vma->vm_mm)) {
+ if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+ _PGSTE_GPS_USAGE_UNUSED)
+ pte_val(pte) |= _PAGE_UNUSED;
+ pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
+ pgste_set_unlock(ptep, pgste);
+ }
+ return pte;
+}
+
+/*
+ * The batched pte unmap code uses ptep_get_and_clear_full to clear the
+ * ptes. Here an optimization is possible. tlb_gather_mmu flushes all
+ * tlbs of an mm if it can guarantee that the ptes of the mm_struct
+ * cannot be accessed while the batched unmap is running. In this case
+ * full==1 and a simple pte_clear is enough. See tlb.h.
+ */
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+ unsigned long address,
+ pte_t *ptep, int full)
+{
+ pgste_t pgste;
+ pte_t pte;
+
+ if (!full && mm_has_pgste(mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
+ }
+
+ pte = *ptep;
+ if (!full)
+ ptep_flush_lazy(mm, address, ptep);
+ pte_val(*ptep) = _PAGE_INVALID;
+
+ if (!full && mm_has_pgste(mm)) {
+ pgste = pgste_update_all(&pte, pgste, mm);
+ pgste_set_unlock(ptep, pgste);
+ }
+ return pte;
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ pgste_t pgste;
+ pte_t pte = *ptep;
+
+ if (pte_write(pte)) {
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
+ }
+
+ ptep_flush_lazy(mm, address, ptep);
+ pte = pte_wrprotect(pte);
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_set_pte(ptep, pgste, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else
+ *ptep = pte;
+ }
+ return pte;
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ pgste_t pgste;
+
+ if (pte_same(*ptep, entry))
+ return 0;
+ if (mm_has_pgste(vma->vm_mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ }
+
+ ptep_flush_direct(vma->vm_mm, address, ptep);
+
+ if (mm_has_pgste(vma->vm_mm)) {
+ pgste = pgste_set_pte(ptep, pgste, entry);
+ pgste_set_unlock(ptep, pgste);
+ } else
+ *ptep = entry;
+ return 1;
+}
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
+{
+ pte_t __pte;
+ pte_val(__pte) = physpage + pgprot_val(pgprot);
+ return pte_mkyoung(__pte);
+}
+
+static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+ unsigned long physpage = page_to_phys(page);
+ pte_t __pte = mk_pte_phys(physpage, pgprot);
+
+ if (pte_write(__pte) && PageDirty(page))
+ __pte = pte_mkdirty(__pte);
+ return __pte;
+}
+
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
+
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#ifndef CONFIG_64BIT
+
+#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
+#define pud_deref(pmd) ({ BUG(); 0UL; })
+#define pgd_deref(pmd) ({ BUG(); 0UL; })
+
+#define pud_offset(pgd, address) ((pud_t *) pgd)
+#define pmd_offset(pud, address) ((pmd_t *) pud + pmd_index(address))
+
+#else /* CONFIG_64BIT */
+
+#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
+#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
+#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
+
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+{
+ pud_t *pud = (pud_t *) pgd;
+ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+ pud = (pud_t *) pgd_deref(*pgd);
+ return pud + pud_index(address);
+}
+
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+{
+ pmd_t *pmd = (pmd_t *) pud;
+ if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+ pmd = (pmd_t *) pud_deref(*pud);
+ return pmd + pmd_index(address);
+}
+
+#endif /* CONFIG_64BIT */
+
+#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
+#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+
+/* Find an entry in the lowest level page table.. */
+#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
+#define pte_offset_kernel(pmd, address) pte_offset(pmd,address)
+#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
+#define pte_unmap(pte) do { } while (0)
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+{
+ /*
+ * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
+ * Convert to segment table entry format.
+ */
+ if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
+ return pgprot_val(SEGMENT_NONE);
+ if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
+ return pgprot_val(SEGMENT_READ);
+ return pgprot_val(SEGMENT_WRITE);
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd)) {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ } else {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+ }
+#endif
+ return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ if (pmd_prot_none(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ } else {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ }
+#endif
+ return pmd;
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ int young;
+
+ young = pmd_young(pmd);
+ pmd_val(pmd) &= _SEGMENT_CHG_MASK;
+ pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ if (young)
+ pmd = pmd_mkyoung(pmd);
+ return pmd;
+}
+
+static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
+{
+ pmd_t __pmd;
+ pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
+ return pmd_mkyoung(__pmd);
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ /* Do not clobber PROT_NONE segments! */
+ if (!pmd_prot_none(pmd))
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
+
+static inline void __pmdp_csp(pmd_t *pmdp)
+{
+ register unsigned long reg2 asm("2") = pmd_val(*pmdp);
+ register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
+ _SEGMENT_ENTRY_INVALID;
+ register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
+
+ asm volatile(
+ " csp %1,%3"
+ : "=m" (*pmdp)
+ : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+}
+
+static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
+{
+ unsigned long sto;
+
+ sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+ asm volatile(
+ " .insn rrf,0xb98e0000,%2,%3,0,0"
+ : "=m" (*pmdp)
+ : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+ : "cc" );
+}
+
+static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
+{
+ unsigned long sto;
+
+ sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+ asm volatile(
+ " .insn rrf,0xb98e0000,%2,%3,0,1"
+ : "=m" (*pmdp)
+ : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+ : "cc" );
+}
+
+static inline void pmdp_flush_direct(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ int active, count;
+
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return;
+ if (!MACHINE_HAS_IDTE) {
+ __pmdp_csp(pmdp);
+ return;
+ }
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __pmdp_idte_local(address, pmdp);
+ else
+ __pmdp_idte(address, pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
+}
+
+static inline void pmdp_flush_lazy(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ int active, count;
+
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
+ mm->context.flush_mm = 1;
+ } else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(address, pmdp);
+ else
+ __pmdp_csp(pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+ return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t entry)
+{
+ if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1)
+ pmd_val(entry) |= _SEGMENT_ENTRY_CO;
+ *pmdp = entry;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+ return pmd;
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ /* Do not clobber PROT_NONE segments! */
+ if (!pmd_prot_none(pmd))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ /* No dirty bit in the segment table entry. */
+ return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ pmd_t pmd;
+
+ pmd = *pmdp;
+ pmdp_flush_direct(vma->vm_mm, address, pmdp);
+ *pmdp = pmd_mkold(pmd);
+ return pmd_young(pmd);
+}
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
+
+ pmdp_flush_direct(mm, address, pmdp);
+ pmd_clear(pmdp);
+ return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
+static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+static inline void pmdp_invalidate(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ pmdp_flush_direct(vma->vm_mm, address, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
+
+ if (pmd_write(pmd)) {
+ pmdp_flush_direct(mm, address, pmdp);
+ set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd));
+ }
+}
+
+#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
+#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
+}
+
+static inline int has_transparent_hugepage(void)
+{
+ return MACHINE_HAS_HPAGE ? 1 : 0;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return pmd_val(pmd) >> PAGE_SHIFT;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/*
+ * 31 bit swap entry format:
+ * A page-table entry has some bits we have to treat in a special way.
+ * Bits 0, 20 and bit 23 have to be zero, otherwise an specification
+ * exception will occur instead of a page translation exception. The
+ * specifiation exception has the bad habit not to store necessary
+ * information in the lowcore.
+ * Bits 21, 22, 30 and 31 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
+ * This leaves the bits 1-19 and bits 24-29 to store type and offset.
+ * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
+ * plus 24 for the offset.
+ * 0| offset |0110|o|type |00|
+ * 0 0000000001111111111 2222 2 22222 33
+ * 0 1234567890123456789 0123 4 56789 01
+ *
+ * 64 bit swap entry format:
+ * A page-table entry has some bits we have to treat in a special way.
+ * Bits 52 and bit 55 have to be zero, otherwise an specification
+ * exception will occur instead of a page translation exception. The
+ * specifiation exception has the bad habit not to store necessary
+ * information in the lowcore.
+ * Bits 53, 54, 62 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
+ * This leaves the bits 0-51 and bits 56-61 to store type and offset.
+ * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
+ * plus 56 for the offset.
+ * | offset |0110|o|type |00|
+ * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66
+ * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23
+ */
+#ifndef CONFIG_64BIT
+#define __SWP_OFFSET_MASK (~0UL >> 12)
+#else
+#define __SWP_OFFSET_MASK (~0UL >> 11)
+#endif
+static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
+{
+ pte_t pte;
+ offset &= __SWP_OFFSET_MASK;
+ pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
+ ((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
+ return pte;
+}
+
+#define __swp_type(entry) (((entry).val >> 2) & 0x1f)
+#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1))
+#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
+
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+#ifndef CONFIG_64BIT
+# define PTE_FILE_MAX_BITS 26
+#else /* CONFIG_64BIT */
+# define PTE_FILE_MAX_BITS 59
+#endif /* CONFIG_64BIT */
+
+#define pte_to_pgoff(__pte) \
+ ((((__pte).pte >> 12) << 7) + (((__pte).pte >> 1) & 0x7f))
+
+#define pgoff_to_pte(__off) \
+ ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
+ | _PAGE_INVALID | _PAGE_PROTECT })
+
+#endif /* !__ASSEMBLY__ */
+
+#define kern_addr_valid(addr) (1)
+
+extern int vmem_add_mapping(unsigned long start, unsigned long size);
+extern int vmem_remove_mapping(unsigned long start, unsigned long size);
+extern int s390_enable_sie(void);
+extern void s390_enable_skey(void);
+
+/*
+ * No page table caches to initialise
+ */
+static inline void pgtable_cache_init(void) { }
+static inline void check_pgt_cache(void) { }
+
+#include <asm-generic/pgtable.h>
+
+#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
new file mode 100644
index 00000000000..6f02d452bbe
--- /dev/null
+++ b/arch/s390/include/asm/processor.h
@@ -0,0 +1,419 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Hartmut Penner (hp@de.ibm.com),
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/processor.h"
+ * Copyright (C) 1994, Linus Torvalds
+ */
+
+#ifndef __ASM_S390_PROCESSOR_H
+#define __ASM_S390_PROCESSOR_H
+
+#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
+#define CIF_ASCE 1 /* user asce needs fixup / uaccess */
+
+#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING)
+#define _CIF_ASCE (1<<CIF_ASCE)
+
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/irqflags.h>
+#include <asm/cpu.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/setup.h>
+#include <asm/runtime_instr.h>
+
+static inline void set_cpu_flag(int flag)
+{
+ S390_lowcore.cpu_flags |= (1U << flag);
+}
+
+static inline void clear_cpu_flag(int flag)
+{
+ S390_lowcore.cpu_flags &= ~(1U << flag);
+}
+
+static inline int test_cpu_flag(int flag)
+{
+ return !!(S390_lowcore.cpu_flags & (1U << flag));
+}
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
+
+static inline void get_cpu_id(struct cpuid *ptr)
+{
+ asm volatile("stidp %0" : "=Q" (*ptr));
+}
+
+extern void s390_adjust_jiffies(void);
+extern const struct seq_operations cpuinfo_op;
+extern int sysctl_ieee_emulation_warnings;
+extern void execve_tail(void);
+
+/*
+ * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
+ */
+#ifndef CONFIG_64BIT
+
+#define TASK_SIZE (1UL << 31)
+#define TASK_MAX_SIZE (1UL << 31)
+#define TASK_UNMAPPED_BASE (1UL << 30)
+
+#else /* CONFIG_64BIT */
+
+#define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit)
+#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
+ (1UL << 30) : (1UL << 41))
+#define TASK_SIZE TASK_SIZE_OF(current)
+#define TASK_MAX_SIZE (1UL << 53)
+
+#endif /* CONFIG_64BIT */
+
+#ifndef CONFIG_64BIT
+#define STACK_TOP (1UL << 31)
+#define STACK_TOP_MAX (1UL << 31)
+#else /* CONFIG_64BIT */
+#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42))
+#define STACK_TOP_MAX (1UL << 42)
+#endif /* CONFIG_64BIT */
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
+typedef struct {
+ __u32 ar4;
+} mm_segment_t;
+
+/*
+ * Thread structure
+ */
+struct thread_struct {
+ s390_fp_regs fp_regs;
+ unsigned int acrs[NUM_ACRS];
+ unsigned long ksp; /* kernel stack pointer */
+ mm_segment_t mm_segment;
+ unsigned long gmap_addr; /* address of last gmap fault. */
+ unsigned int gmap_pfault; /* signal of a pending guest pfault */
+ struct per_regs per_user; /* User specified PER registers */
+ struct per_event per_event; /* Cause of the last PER trap */
+ unsigned long per_flags; /* Flags to control debug behavior */
+ /* pfault_wait is used to block the process on a pfault event */
+ unsigned long pfault_wait;
+ struct list_head list;
+ /* cpu runtime instrumentation */
+ struct runtime_instr_cb *ri_cb;
+ int ri_signum;
+#ifdef CONFIG_64BIT
+ unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
+#endif
+};
+
+/* Flag to disable transactions. */
+#define PER_FLAG_NO_TE 1UL
+/* Flag to enable random transaction aborts. */
+#define PER_FLAG_TE_ABORT_RAND 2UL
+/* Flag to specify random transaction abort mode:
+ * - abort each transaction at a random instruction before TEND if set.
+ * - abort random transactions at a random instruction if cleared.
+ */
+#define PER_FLAG_TE_ABORT_RAND_TEND 4UL
+
+typedef struct thread_struct thread_struct;
+
+/*
+ * Stack layout of a C stack frame.
+ */
+#ifndef __PACK_STACK
+struct stack_frame {
+ unsigned long back_chain;
+ unsigned long empty1[5];
+ unsigned long gprs[10];
+ unsigned int empty2[8];
+};
+#else
+struct stack_frame {
+ unsigned long empty1[5];
+ unsigned int empty2[8];
+ unsigned long gprs[10];
+ unsigned long back_chain;
+};
+#endif
+
+#define ARCH_MIN_TASKALIGN 8
+
+#define INIT_THREAD { \
+ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \
+}
+
+/*
+ * Do necessary setup to start up a new thread.
+ */
+#define start_thread(regs, new_psw, new_stackp) do { \
+ regs->psw.mask = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA; \
+ regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
+ regs->gprs[15] = new_stackp; \
+ execve_tail(); \
+} while (0)
+
+#define start_thread31(regs, new_psw, new_stackp) do { \
+ regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \
+ regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
+ regs->gprs[15] = new_stackp; \
+ crst_table_downgrade(current->mm, 1UL << 31); \
+ execve_tail(); \
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+struct seq_file;
+
+#ifdef CONFIG_64BIT
+extern void show_cacheinfo(struct seq_file *m);
+#else
+static inline void show_cacheinfo(struct seq_file *m) { }
+#endif
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+extern unsigned long thread_saved_pc(struct task_struct *t);
+
+unsigned long get_wchan(struct task_struct *p);
+#define task_pt_regs(tsk) ((struct pt_regs *) \
+ (task_stack_page(tsk) + THREAD_SIZE) - 1)
+#define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr)
+#define KSTK_ESP(tsk) (task_pt_regs(tsk)->gprs[15])
+
+/* Has task runtime instrumentation enabled ? */
+#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
+
+static inline unsigned short stap(void)
+{
+ unsigned short cpu_address;
+
+ asm volatile("stap %0" : "=m" (cpu_address));
+ return cpu_address;
+}
+
+/*
+ * Give up the time slice of the virtual PU.
+ */
+static inline void cpu_relax(void)
+{
+ if (MACHINE_HAS_DIAG44)
+ asm volatile("diag 0,0,68");
+ barrier();
+}
+
+#define arch_mutex_cpu_relax() barrier()
+
+static inline void psw_set_key(unsigned int key)
+{
+ asm volatile("spka 0(%0)" : : "d" (key));
+}
+
+/*
+ * Set PSW to specified value.
+ */
+static inline void __load_psw(psw_t psw)
+{
+#ifndef CONFIG_64BIT
+ asm volatile("lpsw %0" : : "Q" (psw) : "cc");
+#else
+ asm volatile("lpswe %0" : : "Q" (psw) : "cc");
+#endif
+}
+
+/*
+ * Set PSW mask to specified value, while leaving the
+ * PSW addr pointing to the next instruction.
+ */
+static inline void __load_psw_mask (unsigned long mask)
+{
+ unsigned long addr;
+ psw_t psw;
+
+ psw.mask = mask;
+
+#ifndef CONFIG_64BIT
+ asm volatile(
+ " basr %0,0\n"
+ "0: ahi %0,1f-0b\n"
+ " st %0,%O1+4(%R1)\n"
+ " lpsw %1\n"
+ "1:"
+ : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
+#else /* CONFIG_64BIT */
+ asm volatile(
+ " larl %0,1f\n"
+ " stg %0,%O1+8(%R1)\n"
+ " lpswe %1\n"
+ "1:"
+ : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
+#endif /* CONFIG_64BIT */
+}
+
+/*
+ * Rewind PSW instruction address by specified number of bytes.
+ */
+static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
+{
+#ifndef CONFIG_64BIT
+ if (psw.addr & PSW_ADDR_AMODE)
+ /* 31 bit mode */
+ return (psw.addr - ilc) | PSW_ADDR_AMODE;
+ /* 24 bit mode */
+ return (psw.addr - ilc) & ((1UL << 24) - 1);
+#else
+ unsigned long mask;
+
+ mask = (psw.mask & PSW_MASK_EA) ? -1UL :
+ (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 :
+ (1UL << 24) - 1;
+ return (psw.addr - ilc) & mask;
+#endif
+}
+
+/*
+ * Function to drop a processor into disabled wait state
+ */
+static inline void __noreturn disabled_wait(unsigned long code)
+{
+ unsigned long ctl_buf;
+ psw_t dw_psw;
+
+ dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
+ dw_psw.addr = code;
+ /*
+ * Store status and then load disabled wait psw,
+ * the processor is dead afterwards
+ */
+#ifndef CONFIG_64BIT
+ asm volatile(
+ " stctl 0,0,0(%2)\n"
+ " ni 0(%2),0xef\n" /* switch off protection */
+ " lctl 0,0,0(%2)\n"
+ " stpt 0xd8\n" /* store timer */
+ " stckc 0xe0\n" /* store clock comparator */
+ " stpx 0x108\n" /* store prefix register */
+ " stam 0,15,0x120\n" /* store access registers */
+ " std 0,0x160\n" /* store f0 */
+ " std 2,0x168\n" /* store f2 */
+ " std 4,0x170\n" /* store f4 */
+ " std 6,0x178\n" /* store f6 */
+ " stm 0,15,0x180\n" /* store general registers */
+ " stctl 0,15,0x1c0\n" /* store control registers */
+ " oi 0x1c0,0x10\n" /* fake protection bit */
+ " lpsw 0(%1)"
+ : "=m" (ctl_buf)
+ : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc");
+#else /* CONFIG_64BIT */
+ asm volatile(
+ " stctg 0,0,0(%2)\n"
+ " ni 4(%2),0xef\n" /* switch off protection */
+ " lctlg 0,0,0(%2)\n"
+ " lghi 1,0x1000\n"
+ " stpt 0x328(1)\n" /* store timer */
+ " stckc 0x330(1)\n" /* store clock comparator */
+ " stpx 0x318(1)\n" /* store prefix register */
+ " stam 0,15,0x340(1)\n"/* store access registers */
+ " stfpc 0x31c(1)\n" /* store fpu control */
+ " std 0,0x200(1)\n" /* store f0 */
+ " std 1,0x208(1)\n" /* store f1 */
+ " std 2,0x210(1)\n" /* store f2 */
+ " std 3,0x218(1)\n" /* store f3 */
+ " std 4,0x220(1)\n" /* store f4 */
+ " std 5,0x228(1)\n" /* store f5 */
+ " std 6,0x230(1)\n" /* store f6 */
+ " std 7,0x238(1)\n" /* store f7 */
+ " std 8,0x240(1)\n" /* store f8 */
+ " std 9,0x248(1)\n" /* store f9 */
+ " std 10,0x250(1)\n" /* store f10 */
+ " std 11,0x258(1)\n" /* store f11 */
+ " std 12,0x260(1)\n" /* store f12 */
+ " std 13,0x268(1)\n" /* store f13 */
+ " std 14,0x270(1)\n" /* store f14 */
+ " std 15,0x278(1)\n" /* store f15 */
+ " stmg 0,15,0x280(1)\n"/* store general registers */
+ " stctg 0,15,0x380(1)\n"/* store control registers */
+ " oi 0x384(1),0x10\n"/* fake protection bit */
+ " lpswe 0(%1)"
+ : "=m" (ctl_buf)
+ : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1");
+#endif /* CONFIG_64BIT */
+ while (1);
+}
+
+/*
+ * Use to set psw mask except for the first byte which
+ * won't be changed by this function.
+ */
+static inline void
+__set_psw_mask(unsigned long mask)
+{
+ __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8)));
+}
+
+#define local_mcck_enable() \
+ __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK)
+#define local_mcck_disable() \
+ __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT)
+
+/*
+ * Basic Machine Check/Program Check Handler.
+ */
+
+extern void s390_base_mcck_handler(void);
+extern void s390_base_pgm_handler(void);
+extern void s390_base_ext_handler(void);
+
+extern void (*s390_base_mcck_handler_fn)(void);
+extern void (*s390_base_pgm_handler_fn)(void);
+extern void (*s390_base_ext_handler_fn)(void);
+
+#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
+
+extern int memcpy_real(void *, void *, size_t);
+extern void memcpy_absolute(void *, void *, size_t);
+
+#define mem_assign_absolute(dest, val) { \
+ __typeof__(dest) __tmp = (val); \
+ \
+ BUILD_BUG_ON(sizeof(__tmp) != sizeof(val)); \
+ memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
+}
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target) \
+ ".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
+ ".long (" #_fault ") - .\n" \
+ ".long (" #_target ") - .\n" \
+ ".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target) \
+ .section __ex_table,"a" ; \
+ .align 4 ; \
+ .long (_fault) - . ; \
+ .long (_target) - . ; \
+ .previous
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
new file mode 100644
index 00000000000..55d69dd7473
--- /dev/null
+++ b/arch/s390/include/asm/ptrace.h
@@ -0,0 +1,175 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ */
+#ifndef _S390_PTRACE_H
+#define _S390_PTRACE_H
+
+#include <uapi/asm/ptrace.h>
+
+#define PIF_SYSCALL 0 /* inside a system call */
+#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */
+
+#define _PIF_SYSCALL (1<<PIF_SYSCALL)
+#define _PIF_PER_TRAP (1<<PIF_PER_TRAP)
+
+#ifndef __ASSEMBLY__
+
+#define PSW_KERNEL_BITS (PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \
+ PSW_MASK_EA | PSW_MASK_BA)
+#define PSW_USER_BITS (PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
+ PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
+ PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
+
+struct psw_bits {
+ unsigned long long : 1;
+ unsigned long long r : 1; /* PER-Mask */
+ unsigned long long : 3;
+ unsigned long long t : 1; /* DAT Mode */
+ unsigned long long i : 1; /* Input/Output Mask */
+ unsigned long long e : 1; /* External Mask */
+ unsigned long long key : 4; /* PSW Key */
+ unsigned long long : 1;
+ unsigned long long m : 1; /* Machine-Check Mask */
+ unsigned long long w : 1; /* Wait State */
+ unsigned long long p : 1; /* Problem State */
+ unsigned long long as : 2; /* Address Space Control */
+ unsigned long long cc : 2; /* Condition Code */
+ unsigned long long pm : 4; /* Program Mask */
+ unsigned long long ri : 1; /* Runtime Instrumentation */
+ unsigned long long : 6;
+ unsigned long long eaba : 2; /* Addressing Mode */
+#ifdef CONFIG_64BIT
+ unsigned long long : 31;
+ unsigned long long ia : 64;/* Instruction Address */
+#else
+ unsigned long long ia : 31;/* Instruction Address */
+#endif
+};
+
+enum {
+ PSW_AMODE_24BIT = 0,
+ PSW_AMODE_31BIT = 1,
+ PSW_AMODE_64BIT = 3
+};
+
+enum {
+ PSW_AS_PRIMARY = 0,
+ PSW_AS_ACCREG = 1,
+ PSW_AS_SECONDARY = 2,
+ PSW_AS_HOME = 3
+};
+
+#define psw_bits(__psw) (*({ \
+ typecheck(psw_t, __psw); \
+ &(*(struct psw_bits *)(&(__psw))); \
+}))
+
+/*
+ * The pt_regs struct defines the way the registers are stored on
+ * the stack during a system call.
+ */
+struct pt_regs
+{
+ unsigned long args[1];
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+ unsigned long orig_gpr2;
+ unsigned int int_code;
+ unsigned int int_parm;
+ unsigned long int_parm_long;
+ unsigned long flags;
+};
+
+/*
+ * Program event recording (PER) register set.
+ */
+struct per_regs {
+ unsigned long control; /* PER control bits */
+ unsigned long start; /* PER starting address */
+ unsigned long end; /* PER ending address */
+};
+
+/*
+ * PER event contains information about the cause of the last PER exception.
+ */
+struct per_event {
+ unsigned short cause; /* PER code, ATMID and AI */
+ unsigned long address; /* PER address */
+ unsigned char paid; /* PER access identification */
+};
+
+/*
+ * Simplified per_info structure used to decode the ptrace user space ABI.
+ */
+struct per_struct_kernel {
+ unsigned long cr9; /* PER control bits */
+ unsigned long cr10; /* PER starting address */
+ unsigned long cr11; /* PER ending address */
+ unsigned long bits; /* Obsolete software bits */
+ unsigned long starting_addr; /* User specified start address */
+ unsigned long ending_addr; /* User specified end address */
+ unsigned short perc_atmid; /* PER trap ATMID */
+ unsigned long address; /* PER trap instruction address */
+ unsigned char access_id; /* PER trap access identification */
+};
+
+#define PER_EVENT_MASK 0xEB000000UL
+
+#define PER_EVENT_BRANCH 0x80000000UL
+#define PER_EVENT_IFETCH 0x40000000UL
+#define PER_EVENT_STORE 0x20000000UL
+#define PER_EVENT_STORE_REAL 0x08000000UL
+#define PER_EVENT_TRANSACTION_END 0x02000000UL
+#define PER_EVENT_NULLIFICATION 0x01000000UL
+
+#define PER_CONTROL_MASK 0x00e00000UL
+
+#define PER_CONTROL_BRANCH_ADDRESS 0x00800000UL
+#define PER_CONTROL_SUSPENSION 0x00400000UL
+#define PER_CONTROL_ALTERATION 0x00200000UL
+
+static inline void set_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+ regs->flags |= (1U << flag);
+}
+
+static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+ regs->flags &= ~(1U << flag);
+}
+
+static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+ return !!(regs->flags & (1U << flag));
+}
+
+/*
+ * These are defined as per linux/ptrace.h, which see.
+ */
+#define arch_has_single_step() (1)
+#define arch_has_block_step() (1)
+
+#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
+#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define user_stack_pointer(regs)((regs)->gprs[15])
+#define profile_pc(regs) instruction_pointer(regs)
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ return regs->gprs[2];
+}
+
+int regs_query_register_offset(const char *name);
+const char *regs_query_register_name(unsigned int offset);
+unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
+
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+ return regs->gprs[15] & PSW_ADDR_INSN;
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _S390_PTRACE_H */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
new file mode 100644
index 00000000000..d786c634e05
--- /dev/null
+++ b/arch/s390/include/asm/qdio.h
@@ -0,0 +1,436 @@
+/*
+ * Copyright IBM Corp. 2000, 2008
+ * Author(s): Utz Bacher <utz.bacher@de.ibm.com>
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ *
+ */
+#ifndef __QDIO_H__
+#define __QDIO_H__
+
+#include <linux/interrupt.h>
+#include <asm/cio.h>
+#include <asm/ccwdev.h>
+
+/* only use 4 queues to save some cachelines */
+#define QDIO_MAX_QUEUES_PER_IRQ 4
+#define QDIO_MAX_BUFFERS_PER_Q 128
+#define QDIO_MAX_BUFFERS_MASK (QDIO_MAX_BUFFERS_PER_Q - 1)
+#define QDIO_MAX_ELEMENTS_PER_BUFFER 16
+#define QDIO_SBAL_SIZE 256
+
+#define QDIO_QETH_QFMT 0
+#define QDIO_ZFCP_QFMT 1
+#define QDIO_IQDIO_QFMT 2
+
+/**
+ * struct qdesfmt0 - queue descriptor, format 0
+ * @sliba: storage list information block address
+ * @sla: storage list address
+ * @slsba: storage list state block address
+ * @akey: access key for DLIB
+ * @bkey: access key for SL
+ * @ckey: access key for SBALs
+ * @dkey: access key for SLSB
+ */
+struct qdesfmt0 {
+ u64 sliba;
+ u64 sla;
+ u64 slsba;
+ u32 : 32;
+ u32 akey : 4;
+ u32 bkey : 4;
+ u32 ckey : 4;
+ u32 dkey : 4;
+ u32 : 16;
+} __attribute__ ((packed));
+
+#define QDR_AC_MULTI_BUFFER_ENABLE 0x01
+
+/**
+ * struct qdr - queue description record (QDR)
+ * @qfmt: queue format
+ * @pfmt: implementation dependent parameter format
+ * @ac: adapter characteristics
+ * @iqdcnt: input queue descriptor count
+ * @oqdcnt: output queue descriptor count
+ * @iqdsz: inpout queue descriptor size
+ * @oqdsz: output queue descriptor size
+ * @qiba: queue information block address
+ * @qkey: queue information block key
+ * @qdf0: queue descriptions
+ */
+struct qdr {
+ u32 qfmt : 8;
+ u32 pfmt : 8;
+ u32 : 8;
+ u32 ac : 8;
+ u32 : 8;
+ u32 iqdcnt : 8;
+ u32 : 8;
+ u32 oqdcnt : 8;
+ u32 : 8;
+ u32 iqdsz : 8;
+ u32 : 8;
+ u32 oqdsz : 8;
+ /* private: */
+ u32 res[9];
+ /* public: */
+ u64 qiba;
+ u32 : 32;
+ u32 qkey : 4;
+ u32 : 28;
+ struct qdesfmt0 qdf0[126];
+} __attribute__ ((packed, aligned(4096)));
+
+#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40
+#define QIB_RFLAGS_ENABLE_QEBSM 0x80
+#define QIB_RFLAGS_ENABLE_DATA_DIV 0x02
+
+/**
+ * struct qib - queue information block (QIB)
+ * @qfmt: queue format
+ * @pfmt: implementation dependent parameter format
+ * @rflags: QEBSM
+ * @ac: adapter characteristics
+ * @isliba: absolute address of first input SLIB
+ * @osliba: absolute address of first output SLIB
+ * @ebcnam: adapter identifier in EBCDIC
+ * @parm: implementation dependent parameters
+ */
+struct qib {
+ u32 qfmt : 8;
+ u32 pfmt : 8;
+ u32 rflags : 8;
+ u32 ac : 8;
+ u32 : 32;
+ u64 isliba;
+ u64 osliba;
+ u32 : 32;
+ u32 : 32;
+ u8 ebcnam[8];
+ /* private: */
+ u8 res[88];
+ /* public: */
+ u8 parm[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct slibe - storage list information block element (SLIBE)
+ * @parms: implementation dependent parameters
+ */
+struct slibe {
+ u64 parms;
+};
+
+/**
+ * struct qaob - queue asynchronous operation block
+ * @res0: reserved parameters
+ * @res1: reserved parameter
+ * @res2: reserved parameter
+ * @res3: reserved parameter
+ * @aorc: asynchronous operation return code
+ * @flags: internal flags
+ * @cbtbs: control block type
+ * @sb_count: number of storage blocks
+ * @sba: storage block element addresses
+ * @dcount: size of storage block elements
+ * @user0: user defineable value
+ * @res4: reserved paramater
+ * @user1: user defineable value
+ * @user2: user defineable value
+ */
+struct qaob {
+ u64 res0[6];
+ u8 res1;
+ u8 res2;
+ u8 res3;
+ u8 aorc;
+ u8 flags;
+ u16 cbtbs;
+ u8 sb_count;
+ u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER];
+ u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER];
+ u64 user0;
+ u64 res4[2];
+ u64 user1;
+ u64 user2;
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct slib - storage list information block (SLIB)
+ * @nsliba: next SLIB address (if any)
+ * @sla: SL address
+ * @slsba: SLSB address
+ * @slibe: SLIB elements
+ */
+struct slib {
+ u64 nsliba;
+ u64 sla;
+ u64 slsba;
+ /* private: */
+ u8 res[1000];
+ /* public: */
+ struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(2048)));
+
+#define SBAL_EFLAGS_LAST_ENTRY 0x40
+#define SBAL_EFLAGS_CONTIGUOUS 0x20
+#define SBAL_EFLAGS_FIRST_FRAG 0x04
+#define SBAL_EFLAGS_MIDDLE_FRAG 0x08
+#define SBAL_EFLAGS_LAST_FRAG 0x0c
+#define SBAL_EFLAGS_MASK 0x6f
+
+#define SBAL_SFLAGS0_PCI_REQ 0x40
+#define SBAL_SFLAGS0_DATA_CONTINUATION 0x20
+
+/* Awesome OpenFCP extensions */
+#define SBAL_SFLAGS0_TYPE_STATUS 0x00
+#define SBAL_SFLAGS0_TYPE_WRITE 0x08
+#define SBAL_SFLAGS0_TYPE_READ 0x10
+#define SBAL_SFLAGS0_TYPE_WRITE_READ 0x18
+#define SBAL_SFLAGS0_MORE_SBALS 0x04
+#define SBAL_SFLAGS0_COMMAND 0x02
+#define SBAL_SFLAGS0_LAST_SBAL 0x00
+#define SBAL_SFLAGS0_ONLY_SBAL SBAL_SFLAGS0_COMMAND
+#define SBAL_SFLAGS0_MIDDLE_SBAL SBAL_SFLAGS0_MORE_SBALS
+#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND)
+
+/**
+ * struct qdio_buffer_element - SBAL entry
+ * @eflags: SBAL entry flags
+ * @scount: SBAL count
+ * @sflags: whole SBAL flags
+ * @length: length
+ * @addr: address
+*/
+struct qdio_buffer_element {
+ u8 eflags;
+ /* private: */
+ u8 res1;
+ /* public: */
+ u8 scount;
+ u8 sflags;
+ u32 length;
+#ifdef CONFIG_32BIT
+ /* private: */
+ void *res2;
+ /* public: */
+#endif
+ void *addr;
+} __attribute__ ((packed, aligned(16)));
+
+/**
+ * struct qdio_buffer - storage block address list (SBAL)
+ * @element: SBAL entries
+ */
+struct qdio_buffer {
+ struct qdio_buffer_element element[QDIO_MAX_ELEMENTS_PER_BUFFER];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct sl_element - storage list entry
+ * @sbal: absolute SBAL address
+ */
+struct sl_element {
+#ifdef CONFIG_32BIT
+ /* private: */
+ unsigned long reserved;
+ /* public: */
+#endif
+ unsigned long sbal;
+} __attribute__ ((packed));
+
+/**
+ * struct sl - storage list (SL)
+ * @element: SL entries
+ */
+struct sl {
+ struct sl_element element[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(1024)));
+
+/**
+ * struct slsb - storage list state block (SLSB)
+ * @val: state per buffer
+ */
+struct slsb {
+ u8 val[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct qdio_outbuf_state - SBAL related asynchronous operation information
+ * (for communication with upper layer programs)
+ * (only required for use with completion queues)
+ * @flags: flags indicating state of buffer
+ * @aob: pointer to QAOB used for the particular SBAL
+ * @user: pointer to upper layer program's state information related to SBAL
+ * (stored in user1 data of QAOB)
+ */
+struct qdio_outbuf_state {
+ u8 flags;
+ struct qaob *aob;
+ void *user;
+};
+
+#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00
+#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01
+
+#define CHSC_AC1_INITIATE_INPUTQ 0x80
+
+
+/* qdio adapter-characteristics-1 flag */
+#define AC1_SIGA_INPUT_NEEDED 0x40 /* process input queues */
+#define AC1_SIGA_OUTPUT_NEEDED 0x20 /* process output queues */
+#define AC1_SIGA_SYNC_NEEDED 0x10 /* ask hypervisor to sync */
+#define AC1_AUTOMATIC_SYNC_ON_THININT 0x08 /* set by hypervisor */
+#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI 0x04 /* set by hypervisor */
+#define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */
+#define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */
+
+#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080
+#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040
+#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010
+#define CHSC_AC2_DATA_DIV_ENABLED 0x0002
+
+#define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000
+
+struct qdio_ssqd_desc {
+ u8 flags;
+ u8:8;
+ u16 sch;
+ u8 qfmt;
+ u8 parm;
+ u8 qdioac1;
+ u8 sch_class;
+ u8 pcnt;
+ u8 icnt;
+ u8:8;
+ u8 ocnt;
+ u8:8;
+ u8 mbccnt;
+ u16 qdioac2;
+ u64 sch_token;
+ u8 mro;
+ u8 mri;
+ u16 qdioac3;
+ u16:16;
+ u8:8;
+ u8 mmwc;
+} __attribute__ ((packed));
+
+/* params are: ccw_device, qdio_error, queue_number,
+ first element processed, number of elements processed, int_parm */
+typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
+ int, int, unsigned long);
+
+/* qdio errors reported to the upper-layer program */
+#define QDIO_ERROR_ACTIVATE 0x0001
+#define QDIO_ERROR_GET_BUF_STATE 0x0002
+#define QDIO_ERROR_SET_BUF_STATE 0x0004
+#define QDIO_ERROR_SLSB_STATE 0x0100
+
+#define QDIO_ERROR_FATAL 0x00ff
+#define QDIO_ERROR_TEMPORARY 0xff00
+
+/* for qdio_cleanup */
+#define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01
+#define QDIO_FLAG_CLEANUP_USING_HALT 0x02
+
+/**
+ * struct qdio_initialize - qdio initialization data
+ * @cdev: associated ccw device
+ * @q_format: queue format
+ * @adapter_name: name for the adapter
+ * @qib_param_field_format: format for qib_parm_field
+ * @qib_param_field: pointer to 128 bytes or NULL, if no param field
+ * @qib_rflags: rflags to set
+ * @input_slib_elements: pointer to no_input_qs * 128 words of data or NULL
+ * @output_slib_elements: pointer to no_output_qs * 128 words of data or NULL
+ * @no_input_qs: number of input queues
+ * @no_output_qs: number of output queues
+ * @input_handler: handler to be called for input queues
+ * @output_handler: handler to be called for output queues
+ * @queue_start_poll_array: polling handlers (one per input queue or NULL)
+ * @int_parm: interruption parameter
+ * @input_sbal_addr_array: address of no_input_qs * 128 pointers
+ * @output_sbal_addr_array: address of no_output_qs * 128 pointers
+ * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL)
+ */
+struct qdio_initialize {
+ struct ccw_device *cdev;
+ unsigned char q_format;
+ unsigned char qdr_ac;
+ unsigned char adapter_name[8];
+ unsigned int qib_param_field_format;
+ unsigned char *qib_param_field;
+ unsigned char qib_rflags;
+ unsigned long *input_slib_elements;
+ unsigned long *output_slib_elements;
+ unsigned int no_input_qs;
+ unsigned int no_output_qs;
+ qdio_handler_t *input_handler;
+ qdio_handler_t *output_handler;
+ void (**queue_start_poll_array) (struct ccw_device *, int,
+ unsigned long);
+ int scan_threshold;
+ unsigned long int_parm;
+ void **input_sbal_addr_array;
+ void **output_sbal_addr_array;
+ struct qdio_outbuf_state *output_sbal_state_array;
+};
+
+/**
+ * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
+ * @l3_ipv6_addr: entry contains IPv6 address
+ * @l3_ipv4_addr: entry contains IPv4 address
+ * @l2_addr_lnid: entry contains MAC address and VLAN ID
+ */
+enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
+
+/**
+ * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
+ * @nit: Network interface token
+ * @addr: Address of one of the three types
+ *
+ * The struct is passed to the callback function by qdio_brinfo_desc()
+ */
+struct qdio_brinfo_entry_l3_ipv6 {
+ u64 nit;
+ struct { unsigned char _s6_addr[16]; } addr;
+} __packed;
+struct qdio_brinfo_entry_l3_ipv4 {
+ u64 nit;
+ struct { uint32_t _s_addr; } addr;
+} __packed;
+struct qdio_brinfo_entry_l2 {
+ u64 nit;
+ struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
+#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */
+#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */
+#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */
+#define QDIO_STATE_STOPPED 0x00000010 /* after queues went down */
+
+#define QDIO_FLAG_SYNC_INPUT 0x01
+#define QDIO_FLAG_SYNC_OUTPUT 0x02
+#define QDIO_FLAG_PCI_OUT 0x10
+
+extern int qdio_allocate(struct qdio_initialize *);
+extern int qdio_establish(struct qdio_initialize *);
+extern int qdio_activate(struct ccw_device *);
+extern void qdio_release_aob(struct qaob *);
+extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int,
+ unsigned int);
+extern int qdio_start_irq(struct ccw_device *, int);
+extern int qdio_stop_irq(struct ccw_device *, int);
+extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
+extern int qdio_shutdown(struct ccw_device *, int);
+extern int qdio_free(struct ccw_device *);
+extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
+extern int qdio_pnso_brinfo(struct subchannel_id schid,
+ int cnc, u16 *response,
+ void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
+ void *entry),
+ void *priv);
+
+#endif /* __QDIO_H__ */
diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h
new file mode 100644
index 00000000000..804578587a7
--- /dev/null
+++ b/arch/s390/include/asm/reset.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright IBM Corp. 2006
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_RESET_H
+#define _ASM_S390_RESET_H
+
+#include <linux/list.h>
+
+struct reset_call {
+ struct list_head list;
+ void (*fn)(void);
+};
+
+extern void register_reset_call(struct reset_call *reset);
+extern void unregister_reset_call(struct reset_call *reset);
+extern void s390_reset_system(void (*func)(void *), void *data);
+#endif /* _ASM_S390_RESET_H */
diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h
new file mode 100644
index 00000000000..830da737ff8
--- /dev/null
+++ b/arch/s390/include/asm/runtime_instr.h
@@ -0,0 +1,98 @@
+#ifndef _RUNTIME_INSTR_H
+#define _RUNTIME_INSTR_H
+
+#define S390_RUNTIME_INSTR_START 0x1
+#define S390_RUNTIME_INSTR_STOP 0x2
+
+struct runtime_instr_cb {
+ __u64 buf_current;
+ __u64 buf_origin;
+ __u64 buf_limit;
+
+ __u32 valid : 1;
+ __u32 pstate : 1;
+ __u32 pstate_set_buf : 1;
+ __u32 home_space : 1;
+ __u32 altered : 1;
+ __u32 : 3;
+ __u32 pstate_sample : 1;
+ __u32 sstate_sample : 1;
+ __u32 pstate_collect : 1;
+ __u32 sstate_collect : 1;
+ __u32 : 1;
+ __u32 halted_int : 1;
+ __u32 int_requested : 1;
+ __u32 buffer_full_int : 1;
+ __u32 key : 4;
+ __u32 : 9;
+ __u32 rgs : 3;
+
+ __u32 mode : 4;
+ __u32 next : 1;
+ __u32 mae : 1;
+ __u32 : 2;
+ __u32 call_type_br : 1;
+ __u32 return_type_br : 1;
+ __u32 other_type_br : 1;
+ __u32 bc_other_type : 1;
+ __u32 emit : 1;
+ __u32 tx_abort : 1;
+ __u32 : 2;
+ __u32 bp_xn : 1;
+ __u32 bp_xt : 1;
+ __u32 bp_ti : 1;
+ __u32 bp_ni : 1;
+ __u32 suppr_y : 1;
+ __u32 suppr_z : 1;
+
+ __u32 dc_miss_extra : 1;
+ __u32 lat_lev_ignore : 1;
+ __u32 ic_lat_lev : 4;
+ __u32 dc_lat_lev : 4;
+
+ __u64 reserved1;
+ __u64 scaling_factor;
+ __u64 rsic;
+ __u64 reserved2;
+} __packed __aligned(8);
+
+extern struct runtime_instr_cb runtime_instr_empty_cb;
+
+static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */
+ : : "Q" (*cb));
+}
+
+static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */
+ : "=Q" (*cb) : : "cc");
+}
+
+static inline void save_ri_cb(struct runtime_instr_cb *cb_prev)
+{
+#ifdef CONFIG_64BIT
+ if (cb_prev)
+ store_runtime_instr_cb(cb_prev);
+#endif
+}
+
+static inline void restore_ri_cb(struct runtime_instr_cb *cb_next,
+ struct runtime_instr_cb *cb_prev)
+{
+#ifdef CONFIG_64BIT
+ if (cb_next)
+ load_runtime_instr_cb(cb_next);
+ else if (cb_prev)
+ load_runtime_instr_cb(&runtime_instr_empty_cb);
+#endif
+}
+
+#ifdef CONFIG_64BIT
+extern void exit_thread_runtime_instr(void);
+#else
+static inline void exit_thread_runtime_instr(void) { }
+#endif
+
+#endif /* _RUNTIME_INSTR_H */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
new file mode 100644
index 00000000000..487f9b64efb
--- /dev/null
+++ b/arch/s390/include/asm/rwsem.h
@@ -0,0 +1,318 @@
+#ifndef _S390_RWSEM_H
+#define _S390_RWSEM_H
+
+/*
+ * S390 version
+ * Copyright IBM Corp. 2002
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h
+ */
+
+/*
+ *
+ * The MSW of the count is the negated number of active writers and waiting
+ * lockers, and the LSW is the total number of active locks
+ *
+ * The lock count is initialized to 0 (no active and no waiting lockers).
+ *
+ * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
+ * uncontended lock. This can be determined because XADD returns the old value.
+ * Readers increment by 1 and see a positive value when uncontended, negative
+ * if there are writers (and maybe) readers waiting (in which case it goes to
+ * sleep).
+ *
+ * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
+ * be extended to 65534 by manually checking the whole MSW rather than relying
+ * on the S flag.
+ *
+ * The value of ACTIVE_BIAS supports up to 65535 active processes.
+ *
+ * This should be totally fair - if anything is waiting, a process that wants a
+ * lock will go to the back of the queue. When the currently active lock is
+ * released, if there's a writer at the front of the queue, then that and only
+ * that will be woken up; if there's a bunch of consequtive readers at the
+ * front, then they'll all be woken up, but no other readers will be.
+ */
+
+#ifndef _LINUX_RWSEM_H
+#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
+#endif
+
+#ifndef CONFIG_64BIT
+#define RWSEM_UNLOCKED_VALUE 0x00000000
+#define RWSEM_ACTIVE_BIAS 0x00000001
+#define RWSEM_ACTIVE_MASK 0x0000ffff
+#define RWSEM_WAITING_BIAS (-0x00010000)
+#else /* CONFIG_64BIT */
+#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
+#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
+#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
+#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
+#endif /* CONFIG_64BIT */
+#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+
+/*
+ * lock for reading
+ */
+static inline void __down_read(struct rw_semaphore *sem)
+{
+ signed long old, new;
+
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " l %0,%2\n"
+ "0: lr %1,%0\n"
+ " ahi %1,%4\n"
+ " cs %0,%1,%2\n"
+ " jl 0b"
+#else /* CONFIG_64BIT */
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " aghi %1,%4\n"
+ " csg %0,%1,%2\n"
+ " jl 0b"
+#endif /* CONFIG_64BIT */
+ : "=&d" (old), "=&d" (new), "=Q" (sem->count)
+ : "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS)
+ : "cc", "memory");
+ if (old < 0)
+ rwsem_down_read_failed(sem);
+}
+
+/*
+ * trylock for reading -- returns 1 if successful, 0 if contention
+ */
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+ signed long old, new;
+
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " l %0,%2\n"
+ "0: ltr %1,%0\n"
+ " jm 1f\n"
+ " ahi %1,%4\n"
+ " cs %0,%1,%2\n"
+ " jl 0b\n"
+ "1:"
+#else /* CONFIG_64BIT */
+ " lg %0,%2\n"
+ "0: ltgr %1,%0\n"
+ " jm 1f\n"
+ " aghi %1,%4\n"
+ " csg %0,%1,%2\n"
+ " jl 0b\n"
+ "1:"
+#endif /* CONFIG_64BIT */
+ : "=&d" (old), "=&d" (new), "=Q" (sem->count)
+ : "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS)
+ : "cc", "memory");
+ return old >= 0 ? 1 : 0;
+}
+
+/*
+ * lock for writing
+ */
+static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+{
+ signed long old, new, tmp;
+
+ tmp = RWSEM_ACTIVE_WRITE_BIAS;
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " l %0,%2\n"
+ "0: lr %1,%0\n"
+ " a %1,%4\n"
+ " cs %0,%1,%2\n"
+ " jl 0b"
+#else /* CONFIG_64BIT */
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " ag %1,%4\n"
+ " csg %0,%1,%2\n"
+ " jl 0b"
+#endif /* CONFIG_64BIT */
+ : "=&d" (old), "=&d" (new), "=Q" (sem->count)
+ : "Q" (sem->count), "m" (tmp)
+ : "cc", "memory");
+ if (old != 0)
+ rwsem_down_write_failed(sem);
+}
+
+static inline void __down_write(struct rw_semaphore *sem)
+{
+ __down_write_nested(sem, 0);
+}
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+static inline int __down_write_trylock(struct rw_semaphore *sem)
+{
+ signed long old;
+
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " l %0,%1\n"
+ "0: ltr %0,%0\n"
+ " jnz 1f\n"
+ " cs %0,%3,%1\n"
+ " jl 0b\n"
+#else /* CONFIG_64BIT */
+ " lg %0,%1\n"
+ "0: ltgr %0,%0\n"
+ " jnz 1f\n"
+ " csg %0,%3,%1\n"
+ " jl 0b\n"
+#endif /* CONFIG_64BIT */
+ "1:"
+ : "=&d" (old), "=Q" (sem->count)
+ : "Q" (sem->count), "d" (RWSEM_ACTIVE_WRITE_BIAS)
+ : "cc", "memory");
+ return (old == RWSEM_UNLOCKED_VALUE) ? 1 : 0;
+}
+
+/*
+ * unlock after reading
+ */
+static inline void __up_read(struct rw_semaphore *sem)
+{
+ signed long old, new;
+
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " l %0,%2\n"
+ "0: lr %1,%0\n"
+ " ahi %1,%4\n"
+ " cs %0,%1,%2\n"
+ " jl 0b"
+#else /* CONFIG_64BIT */
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " aghi %1,%4\n"
+ " csg %0,%1,%2\n"
+ " jl 0b"
+#endif /* CONFIG_64BIT */
+ : "=&d" (old), "=&d" (new), "=Q" (sem->count)
+ : "Q" (sem->count), "i" (-RWSEM_ACTIVE_READ_BIAS)
+ : "cc", "memory");
+ if (new < 0)
+ if ((new & RWSEM_ACTIVE_MASK) == 0)
+ rwsem_wake(sem);
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+ signed long old, new, tmp;
+
+ tmp = -RWSEM_ACTIVE_WRITE_BIAS;
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " l %0,%2\n"
+ "0: lr %1,%0\n"
+ " a %1,%4\n"
+ " cs %0,%1,%2\n"
+ " jl 0b"
+#else /* CONFIG_64BIT */
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " ag %1,%4\n"
+ " csg %0,%1,%2\n"
+ " jl 0b"
+#endif /* CONFIG_64BIT */
+ : "=&d" (old), "=&d" (new), "=Q" (sem->count)
+ : "Q" (sem->count), "m" (tmp)
+ : "cc", "memory");
+ if (new < 0)
+ if ((new & RWSEM_ACTIVE_MASK) == 0)
+ rwsem_wake(sem);
+}
+
+/*
+ * downgrade write lock to read lock
+ */
+static inline void __downgrade_write(struct rw_semaphore *sem)
+{
+ signed long old, new, tmp;
+
+ tmp = -RWSEM_WAITING_BIAS;
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " l %0,%2\n"
+ "0: lr %1,%0\n"
+ " a %1,%4\n"
+ " cs %0,%1,%2\n"
+ " jl 0b"
+#else /* CONFIG_64BIT */
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " ag %1,%4\n"
+ " csg %0,%1,%2\n"
+ " jl 0b"
+#endif /* CONFIG_64BIT */
+ : "=&d" (old), "=&d" (new), "=Q" (sem->count)
+ : "Q" (sem->count), "m" (tmp)
+ : "cc", "memory");
+ if (new > 1)
+ rwsem_downgrade_wake(sem);
+}
+
+/*
+ * implement atomic add functionality
+ */
+static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
+{
+ signed long old, new;
+
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " l %0,%2\n"
+ "0: lr %1,%0\n"
+ " ar %1,%4\n"
+ " cs %0,%1,%2\n"
+ " jl 0b"
+#else /* CONFIG_64BIT */
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " agr %1,%4\n"
+ " csg %0,%1,%2\n"
+ " jl 0b"
+#endif /* CONFIG_64BIT */
+ : "=&d" (old), "=&d" (new), "=Q" (sem->count)
+ : "Q" (sem->count), "d" (delta)
+ : "cc", "memory");
+}
+
+/*
+ * implement exchange and add functionality
+ */
+static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
+{
+ signed long old, new;
+
+ asm volatile(
+#ifndef CONFIG_64BIT
+ " l %0,%2\n"
+ "0: lr %1,%0\n"
+ " ar %1,%4\n"
+ " cs %0,%1,%2\n"
+ " jl 0b"
+#else /* CONFIG_64BIT */
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " agr %1,%4\n"
+ " csg %0,%1,%2\n"
+ " jl 0b"
+#endif /* CONFIG_64BIT */
+ : "=&d" (old), "=&d" (new), "=Q" (sem->count)
+ : "Q" (sem->count), "d" (delta)
+ : "cc", "memory");
+ return new;
+}
+
+#endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
new file mode 100644
index 00000000000..6d45ef6c12a
--- /dev/null
+++ b/arch/s390/include/asm/scatterlist.h
@@ -0,0 +1,3 @@
+#include <asm-generic/scatterlist.h>
+
+#define ARCH_HAS_SG_CHAIN
diff --git a/arch/s390/include/asm/schid.h b/arch/s390/include/asm/schid.h
new file mode 100644
index 00000000000..40b47dfa9d6
--- /dev/null
+++ b/arch/s390/include/asm/schid.h
@@ -0,0 +1,21 @@
+#ifndef ASM_SCHID_H
+#define ASM_SCHID_H
+
+#include <linux/string.h>
+#include <uapi/asm/schid.h>
+
+/* Helper function for sane state of pre-allocated subchannel_id. */
+static inline void
+init_subchannel_id(struct subchannel_id *schid)
+{
+ memset(schid, 0, sizeof(struct subchannel_id));
+ schid->one = 1;
+}
+
+static inline int
+schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2)
+{
+ return !memcmp(schid1, schid2, sizeof(struct subchannel_id));
+}
+
+#endif /* ASM_SCHID_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
new file mode 100644
index 00000000000..1aba89b53cb
--- /dev/null
+++ b/arch/s390/include/asm/sclp.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright IBM Corp. 2007
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_SCLP_H
+#define _ASM_S390_SCLP_H
+
+#include <linux/types.h>
+#include <asm/chpid.h>
+#include <asm/cpu.h>
+
+#define SCLP_CHP_INFO_MASK_SIZE 32
+
+struct sclp_chp_info {
+ u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
+ u8 standby[SCLP_CHP_INFO_MASK_SIZE];
+ u8 configured[SCLP_CHP_INFO_MASK_SIZE];
+};
+
+#define LOADPARM_LEN 8
+
+struct sclp_ipl_info {
+ int is_valid;
+ int has_dump;
+ char loadparm[LOADPARM_LEN];
+};
+
+struct sclp_cpu_entry {
+ u8 address;
+ u8 reserved0[2];
+ u8 : 3;
+ u8 siif : 1;
+ u8 : 4;
+ u8 reserved2[10];
+ u8 type;
+ u8 reserved1;
+} __attribute__((packed));
+
+struct sclp_cpu_info {
+ unsigned int configured;
+ unsigned int standby;
+ unsigned int combined;
+ int has_cpu_type;
+ struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1];
+};
+
+int sclp_get_cpu_info(struct sclp_cpu_info *info);
+int sclp_cpu_configure(u8 cpu);
+int sclp_cpu_deconfigure(u8 cpu);
+unsigned long long sclp_get_rnmax(void);
+unsigned long long sclp_get_rzm(void);
+unsigned int sclp_get_max_cpu(void);
+int sclp_sdias_blk_count(void);
+int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
+int sclp_chp_configure(struct chp_id chpid);
+int sclp_chp_deconfigure(struct chp_id chpid);
+int sclp_chp_read_info(struct sclp_chp_info *info);
+void sclp_get_ipl_info(struct sclp_ipl_info *info);
+bool __init sclp_has_linemode(void);
+bool __init sclp_has_vt220(void);
+bool sclp_has_sprp(void);
+int sclp_pci_configure(u32 fid);
+int sclp_pci_deconfigure(u32 fid);
+int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
+unsigned long sclp_get_hsa_size(void);
+void sclp_early_detect(void);
+int sclp_has_siif(void);
+unsigned int sclp_get_ibc(void);
+
+#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
new file mode 100644
index 00000000000..4af99cdaddf
--- /dev/null
+++ b/arch/s390/include/asm/scsw.h
@@ -0,0 +1,988 @@
+/*
+ * Helper functions for scsw access.
+ *
+ * Copyright IBM Corp. 2008, 2012
+ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_SCSW_H_
+#define _ASM_S390_SCSW_H_
+
+#include <linux/types.h>
+#include <asm/css_chars.h>
+#include <asm/cio.h>
+
+/**
+ * struct cmd_scsw - command-mode subchannel status word
+ * @key: subchannel key
+ * @sctl: suspend control
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @pfch: prefetch
+ * @isic: initial-status interruption control
+ * @alcc: address-limit checking control
+ * @ssi: suppress-suspended interruption
+ * @zcc: zero condition code
+ * @ectl: extended control
+ * @pno: path not operational
+ * @res: reserved
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @cpa: channel program address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @count: residual count
+ */
+struct cmd_scsw {
+ __u32 key : 4;
+ __u32 sctl : 1;
+ __u32 eswf : 1;
+ __u32 cc : 2;
+ __u32 fmt : 1;
+ __u32 pfch : 1;
+ __u32 isic : 1;
+ __u32 alcc : 1;
+ __u32 ssi : 1;
+ __u32 zcc : 1;
+ __u32 ectl : 1;
+ __u32 pno : 1;
+ __u32 res : 1;
+ __u32 fctl : 3;
+ __u32 actl : 7;
+ __u32 stctl : 5;
+ __u32 cpa;
+ __u32 dstat : 8;
+ __u32 cstat : 8;
+ __u32 count : 16;
+} __attribute__ ((packed));
+
+/**
+ * struct tm_scsw - transport-mode subchannel status word
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @x: IRB-format control
+ * @q: interrogate-complete
+ * @ectl: extended control
+ * @pno: path not operational
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @tcw: TCW address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @fcxs: FCX status
+ * @schxs: subchannel-extended status
+ */
+struct tm_scsw {
+ u32 key:4;
+ u32 :1;
+ u32 eswf:1;
+ u32 cc:2;
+ u32 fmt:3;
+ u32 x:1;
+ u32 q:1;
+ u32 :1;
+ u32 ectl:1;
+ u32 pno:1;
+ u32 :1;
+ u32 fctl:3;
+ u32 actl:7;
+ u32 stctl:5;
+ u32 tcw;
+ u32 dstat:8;
+ u32 cstat:8;
+ u32 fcxs:8;
+ u32 schxs:8;
+} __attribute__ ((packed));
+
+/**
+ * struct eadm_scsw - subchannel status word for eadm subchannels
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @ectl: extended control
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @aob: AOB address
+ * @dstat: device status
+ * @cstat: subchannel status
+ */
+struct eadm_scsw {
+ u32 key:4;
+ u32:1;
+ u32 eswf:1;
+ u32 cc:2;
+ u32:6;
+ u32 ectl:1;
+ u32:2;
+ u32 fctl:3;
+ u32 actl:7;
+ u32 stctl:5;
+ u32 aob;
+ u32 dstat:8;
+ u32 cstat:8;
+ u32:16;
+} __packed;
+
+/**
+ * union scsw - subchannel status word
+ * @cmd: command-mode SCSW
+ * @tm: transport-mode SCSW
+ * @eadm: eadm SCSW
+ */
+union scsw {
+ struct cmd_scsw cmd;
+ struct tm_scsw tm;
+ struct eadm_scsw eadm;
+} __packed;
+
+#define SCSW_FCTL_CLEAR_FUNC 0x1
+#define SCSW_FCTL_HALT_FUNC 0x2
+#define SCSW_FCTL_START_FUNC 0x4
+
+#define SCSW_ACTL_SUSPENDED 0x1
+#define SCSW_ACTL_DEVACT 0x2
+#define SCSW_ACTL_SCHACT 0x4
+#define SCSW_ACTL_CLEAR_PEND 0x8
+#define SCSW_ACTL_HALT_PEND 0x10
+#define SCSW_ACTL_START_PEND 0x20
+#define SCSW_ACTL_RESUME_PEND 0x40
+
+#define SCSW_STCTL_STATUS_PEND 0x1
+#define SCSW_STCTL_SEC_STATUS 0x2
+#define SCSW_STCTL_PRIM_STATUS 0x4
+#define SCSW_STCTL_INTER_STATUS 0x8
+#define SCSW_STCTL_ALERT_STATUS 0x10
+
+#define DEV_STAT_ATTENTION 0x80
+#define DEV_STAT_STAT_MOD 0x40
+#define DEV_STAT_CU_END 0x20
+#define DEV_STAT_BUSY 0x10
+#define DEV_STAT_CHN_END 0x08
+#define DEV_STAT_DEV_END 0x04
+#define DEV_STAT_UNIT_CHECK 0x02
+#define DEV_STAT_UNIT_EXCEP 0x01
+
+#define SCHN_STAT_PCI 0x80
+#define SCHN_STAT_INCORR_LEN 0x40
+#define SCHN_STAT_PROG_CHECK 0x20
+#define SCHN_STAT_PROT_CHECK 0x10
+#define SCHN_STAT_CHN_DATA_CHK 0x08
+#define SCHN_STAT_CHN_CTRL_CHK 0x04
+#define SCHN_STAT_INTF_CTRL_CHK 0x02
+#define SCHN_STAT_CHAIN_CHECK 0x01
+
+/*
+ * architectured values for first sense byte
+ */
+#define SNS0_CMD_REJECT 0x80
+#define SNS_CMD_REJECT SNS0_CMD_REJEC
+#define SNS0_INTERVENTION_REQ 0x40
+#define SNS0_BUS_OUT_CHECK 0x20
+#define SNS0_EQUIPMENT_CHECK 0x10
+#define SNS0_DATA_CHECK 0x08
+#define SNS0_OVERRUN 0x04
+#define SNS0_INCOMPL_DOMAIN 0x01
+
+/*
+ * architectured values for second sense byte
+ */
+#define SNS1_PERM_ERR 0x80
+#define SNS1_INV_TRACK_FORMAT 0x40
+#define SNS1_EOC 0x20
+#define SNS1_MESSAGE_TO_OPER 0x10
+#define SNS1_NO_REC_FOUND 0x08
+#define SNS1_FILE_PROTECTED 0x04
+#define SNS1_WRITE_INHIBITED 0x02
+#define SNS1_INPRECISE_END 0x01
+
+/*
+ * architectured values for third sense byte
+ */
+#define SNS2_REQ_INH_WRITE 0x80
+#define SNS2_CORRECTABLE 0x40
+#define SNS2_FIRST_LOG_ERR 0x20
+#define SNS2_ENV_DATA_PRESENT 0x10
+#define SNS2_INPRECISE_END 0x04
+
+/**
+ * scsw_is_tm - check for transport mode scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the specified scsw is a transport mode scsw, zero
+ * otherwise.
+ */
+static inline int scsw_is_tm(union scsw *scsw)
+{
+ return css_general_characteristics.fcx && (scsw->tm.x == 1);
+}
+
+/**
+ * scsw_key - return scsw key field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the key field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_key(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.key;
+ else
+ return scsw->cmd.key;
+}
+
+/**
+ * scsw_eswf - return scsw eswf field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the eswf field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_eswf(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.eswf;
+ else
+ return scsw->cmd.eswf;
+}
+
+/**
+ * scsw_cc - return scsw cc field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the cc field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_cc(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.cc;
+ else
+ return scsw->cmd.cc;
+}
+
+/**
+ * scsw_ectl - return scsw ectl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the ectl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_ectl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.ectl;
+ else
+ return scsw->cmd.ectl;
+}
+
+/**
+ * scsw_pno - return scsw pno field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the pno field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_pno(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.pno;
+ else
+ return scsw->cmd.pno;
+}
+
+/**
+ * scsw_fctl - return scsw fctl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the fctl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_fctl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.fctl;
+ else
+ return scsw->cmd.fctl;
+}
+
+/**
+ * scsw_actl - return scsw actl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the actl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_actl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.actl;
+ else
+ return scsw->cmd.actl;
+}
+
+/**
+ * scsw_stctl - return scsw stctl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the stctl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_stctl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.stctl;
+ else
+ return scsw->cmd.stctl;
+}
+
+/**
+ * scsw_dstat - return scsw dstat field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the dstat field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_dstat(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.dstat;
+ else
+ return scsw->cmd.dstat;
+}
+
+/**
+ * scsw_cstat - return scsw cstat field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the cstat field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_cstat(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.cstat;
+ else
+ return scsw->cmd.cstat;
+}
+
+/**
+ * scsw_cmd_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_key(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_sctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_sctl(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_eswf(union scsw *scsw)
+{
+ return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_cmd_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_cc(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
+ (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_cmd_is_valid_fmt - check fmt field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fmt field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_fmt(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_pfch - check pfch field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pfch field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_pfch(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_isic - check isic field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the isic field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_isic(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_alcc - check alcc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the alcc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_alcc(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_ssi - check ssi field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ssi field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_ssi(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_zcc - check zcc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the zcc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_zcc(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
+ (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS);
+}
+
+/**
+ * scsw_cmd_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
+{
+ return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+ !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
+ (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS);
+}
+
+/**
+ * scsw_cmd_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_pno(union scsw *scsw)
+{
+ return (scsw->cmd.fctl != 0) &&
+ (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) ||
+ ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
+ (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)));
+}
+
+/**
+ * scsw_cmd_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_fctl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_actl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_stctl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_dstat(union scsw *scsw)
+{
+ return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (scsw->cmd.cc != 3);
+}
+
+/**
+ * scsw_cmd_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_cstat(union scsw *scsw)
+{
+ return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (scsw->cmd.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_key(union scsw *scsw)
+{
+ return (scsw->tm.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_tm_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_eswf(union scsw *scsw)
+{
+ return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_tm_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_cc(union scsw *scsw)
+{
+ return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) &&
+ (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_tm_is_valid_fmt - check fmt field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fmt field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fmt(union scsw *scsw)
+{
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_x - check x field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the x field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_x(union scsw *scsw)
+{
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_q - check q field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the q field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_q(union scsw *scsw)
+{
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
+{
+ return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+ !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
+ (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS);
+}
+
+/**
+ * scsw_tm_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_pno(union scsw *scsw)
+{
+ return (scsw->tm.fctl != 0) &&
+ (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) ||
+ ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
+ (scsw->tm.actl & SCSW_ACTL_SUSPENDED)));
+}
+
+/**
+ * scsw_tm_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fctl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_actl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_stctl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_dstat(union scsw *scsw)
+{
+ return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (scsw->tm.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_cstat(union scsw *scsw)
+{
+ return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (scsw->tm.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_fcxs - check fcxs field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fcxs field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fcxs(union scsw *scsw)
+{
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_schxs - check schxs field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the schxs field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_schxs(union scsw *scsw)
+{
+ return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK |
+ SCHN_STAT_INTF_CTRL_CHK |
+ SCHN_STAT_PROT_CHECK |
+ SCHN_STAT_CHN_DATA_CHK));
+}
+
+/**
+ * scsw_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_actl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_actl(scsw);
+ else
+ return scsw_cmd_is_valid_actl(scsw);
+}
+
+/**
+ * scsw_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_cc(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_cc(scsw);
+ else
+ return scsw_cmd_is_valid_cc(scsw);
+}
+
+/**
+ * scsw_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_cstat(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_cstat(scsw);
+ else
+ return scsw_cmd_is_valid_cstat(scsw);
+}
+
+/**
+ * scsw_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_dstat(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_dstat(scsw);
+ else
+ return scsw_cmd_is_valid_dstat(scsw);
+}
+
+/**
+ * scsw_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_ectl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_ectl(scsw);
+ else
+ return scsw_cmd_is_valid_ectl(scsw);
+}
+
+/**
+ * scsw_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_eswf(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_eswf(scsw);
+ else
+ return scsw_cmd_is_valid_eswf(scsw);
+}
+
+/**
+ * scsw_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_fctl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_fctl(scsw);
+ else
+ return scsw_cmd_is_valid_fctl(scsw);
+}
+
+/**
+ * scsw_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_key(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_key(scsw);
+ else
+ return scsw_cmd_is_valid_key(scsw);
+}
+
+/**
+ * scsw_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_pno(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_pno(scsw);
+ else
+ return scsw_cmd_is_valid_pno(scsw);
+}
+
+/**
+ * scsw_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_stctl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_stctl(scsw);
+ else
+ return scsw_cmd_is_valid_stctl(scsw);
+}
+
+/**
+ * scsw_cmd_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the command mode scsw indicates that the associated
+ * status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_cmd_is_solicited(union scsw *scsw)
+{
+ return (scsw->cmd.cc != 0) || (scsw->cmd.stctl !=
+ (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
+}
+
+/**
+ * scsw_tm_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the transport mode scsw indicates that the associated
+ * status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_tm_is_solicited(union scsw *scsw)
+{
+ return (scsw->tm.cc != 0) || (scsw->tm.stctl !=
+ (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
+}
+
+/**
+ * scsw_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the transport or command mode scsw indicates that the
+ * associated status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_is_solicited(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_solicited(scsw);
+ else
+ return scsw_cmd_is_solicited(scsw);
+}
+
+#endif /* _ASM_S390_SCSW_H_ */
diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h
new file mode 100644
index 00000000000..781a9cf9b00
--- /dev/null
+++ b/arch/s390/include/asm/seccomp.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_S390_SECCOMP_H
+#define _ASM_S390_SECCOMP_H
+
+#include <linux/unistd.h>
+
+#define __NR_seccomp_read __NR_read
+#define __NR_seccomp_write __NR_write
+#define __NR_seccomp_exit __NR_exit
+#define __NR_seccomp_sigreturn __NR_sigreturn
+
+#define __NR_seccomp_read_32 __NR_read
+#define __NR_seccomp_write_32 __NR_write
+#define __NR_seccomp_exit_32 __NR_exit
+#define __NR_seccomp_sigreturn_32 __NR_sigreturn
+
+#endif /* _ASM_S390_SECCOMP_H */
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
new file mode 100644
index 00000000000..fbd9116eb17
--- /dev/null
+++ b/arch/s390/include/asm/sections.h
@@ -0,0 +1,8 @@
+#ifndef _S390_SECTIONS_H
+#define _S390_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char _eshared[], _ehead[];
+
+#endif
diff --git a/arch/s390/include/asm/segment.h b/arch/s390/include/asm/segment.h
new file mode 100644
index 00000000000..8bfce3475b1
--- /dev/null
+++ b/arch/s390/include/asm/segment.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_SEGMENT_H
+#define _ASM_SEGMENT_H
+
+#endif
diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h
new file mode 100644
index 00000000000..5b3e48ef534
--- /dev/null
+++ b/arch/s390/include/asm/serial.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_SERIAL_H
+#define _ASM_S390_SERIAL_H
+
+#define BASE_BAUD 0
+
+#endif /* _ASM_S390_SERIAL_H */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
new file mode 100644
index 00000000000..089a49814c5
--- /dev/null
+++ b/arch/s390/include/asm/setup.h
@@ -0,0 +1,151 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2010
+ */
+#ifndef _ASM_S390_SETUP_H
+#define _ASM_S390_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+
+#define PARMAREA 0x10400
+
+#ifndef __ASSEMBLY__
+
+#include <asm/lowcore.h>
+#include <asm/types.h>
+
+#ifndef CONFIG_64BIT
+#define IPL_DEVICE (*(unsigned long *) (0x10404))
+#define INITRD_START (*(unsigned long *) (0x1040C))
+#define INITRD_SIZE (*(unsigned long *) (0x10414))
+#define OLDMEM_BASE (*(unsigned long *) (0x1041C))
+#define OLDMEM_SIZE (*(unsigned long *) (0x10424))
+#else /* CONFIG_64BIT */
+#define IPL_DEVICE (*(unsigned long *) (0x10400))
+#define INITRD_START (*(unsigned long *) (0x10408))
+#define INITRD_SIZE (*(unsigned long *) (0x10410))
+#define OLDMEM_BASE (*(unsigned long *) (0x10418))
+#define OLDMEM_SIZE (*(unsigned long *) (0x10420))
+#endif /* CONFIG_64BIT */
+#define COMMAND_LINE ((char *) (0x10480))
+
+extern int memory_end_set;
+extern unsigned long memory_end;
+extern unsigned long max_physmem_end;
+
+extern void detect_memory_memblock(void);
+
+/*
+ * Machine features detected in head.S
+ */
+
+#define MACHINE_FLAG_VM (1UL << 0)
+#define MACHINE_FLAG_IEEE (1UL << 1)
+#define MACHINE_FLAG_CSP (1UL << 2)
+#define MACHINE_FLAG_MVPG (1UL << 3)
+#define MACHINE_FLAG_DIAG44 (1UL << 4)
+#define MACHINE_FLAG_IDTE (1UL << 5)
+#define MACHINE_FLAG_DIAG9C (1UL << 6)
+#define MACHINE_FLAG_KVM (1UL << 8)
+#define MACHINE_FLAG_ESOP (1UL << 9)
+#define MACHINE_FLAG_EDAT1 (1UL << 10)
+#define MACHINE_FLAG_EDAT2 (1UL << 11)
+#define MACHINE_FLAG_LPAR (1UL << 12)
+#define MACHINE_FLAG_LPP (1UL << 13)
+#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
+#define MACHINE_FLAG_TE (1UL << 15)
+#define MACHINE_FLAG_RRBM (1UL << 16)
+#define MACHINE_FLAG_TLB_LC (1UL << 17)
+
+#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
+#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
+#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
+
+#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
+#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
+#define MACHINE_HAS_PFMF MACHINE_HAS_EDAT1
+#define MACHINE_HAS_HPAGE MACHINE_HAS_EDAT1
+
+#ifndef CONFIG_64BIT
+#define MACHINE_HAS_IEEE (S390_lowcore.machine_flags & MACHINE_FLAG_IEEE)
+#define MACHINE_HAS_CSP (S390_lowcore.machine_flags & MACHINE_FLAG_CSP)
+#define MACHINE_HAS_IDTE (0)
+#define MACHINE_HAS_DIAG44 (1)
+#define MACHINE_HAS_MVPG (S390_lowcore.machine_flags & MACHINE_FLAG_MVPG)
+#define MACHINE_HAS_EDAT1 (0)
+#define MACHINE_HAS_EDAT2 (0)
+#define MACHINE_HAS_LPP (0)
+#define MACHINE_HAS_TOPOLOGY (0)
+#define MACHINE_HAS_TE (0)
+#define MACHINE_HAS_RRBM (0)
+#define MACHINE_HAS_TLB_LC (0)
+#else /* CONFIG_64BIT */
+#define MACHINE_HAS_IEEE (1)
+#define MACHINE_HAS_CSP (1)
+#define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
+#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
+#define MACHINE_HAS_MVPG (1)
+#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
+#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
+#define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP)
+#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
+#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
+#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM)
+#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
+#endif /* CONFIG_64BIT */
+
+/*
+ * Console mode. Override with conmode=
+ */
+extern unsigned int console_mode;
+extern unsigned int console_devno;
+extern unsigned int console_irq;
+
+extern char vmhalt_cmd[];
+extern char vmpoff_cmd[];
+
+#define CONSOLE_IS_UNDEFINED (console_mode == 0)
+#define CONSOLE_IS_SCLP (console_mode == 1)
+#define CONSOLE_IS_3215 (console_mode == 2)
+#define CONSOLE_IS_3270 (console_mode == 3)
+#define SET_CONSOLE_SCLP do { console_mode = 1; } while (0)
+#define SET_CONSOLE_3215 do { console_mode = 2; } while (0)
+#define SET_CONSOLE_3270 do { console_mode = 3; } while (0)
+
+#define NSS_NAME_SIZE 8
+extern char kernel_nss_name[];
+
+#ifdef CONFIG_PFAULT
+extern int pfault_init(void);
+extern void pfault_fini(void);
+#else /* CONFIG_PFAULT */
+#define pfault_init() ({-1;})
+#define pfault_fini() do { } while (0)
+#endif /* CONFIG_PFAULT */
+
+extern void cmma_init(void);
+
+extern void (*_machine_restart)(char *command);
+extern void (*_machine_halt)(void);
+extern void (*_machine_power_off)(void);
+
+#else /* __ASSEMBLY__ */
+
+#ifndef CONFIG_64BIT
+#define IPL_DEVICE 0x10404
+#define INITRD_START 0x1040C
+#define INITRD_SIZE 0x10414
+#define OLDMEM_BASE 0x1041C
+#define OLDMEM_SIZE 0x10424
+#else /* CONFIG_64BIT */
+#define IPL_DEVICE 0x10400
+#define INITRD_START 0x10408
+#define INITRD_SIZE 0x10410
+#define OLDMEM_BASE 0x10418
+#define OLDMEM_SIZE 0x10420
+#endif /* CONFIG_64BIT */
+#define COMMAND_LINE 0x10480
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_SETUP_H */
diff --git a/arch/s390/include/asm/sfp-machine.h b/arch/s390/include/asm/sfp-machine.h
new file mode 100644
index 00000000000..4e16aede4b0
--- /dev/null
+++ b/arch/s390/include/asm/sfp-machine.h
@@ -0,0 +1,142 @@
+/* Machine-dependent software floating-point definitions.
+ S/390 kernel version.
+ Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Richard Henderson (rth@cygnus.com),
+ Jakub Jelinek (jj@ultra.linux.cz),
+ David S. Miller (davem@redhat.com) and
+ Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If
+ not, write to the Free Software Foundation, Inc.,
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _SFP_MACHINE_H
+#define _SFP_MACHINE_H
+
+
+#define _FP_W_TYPE_SIZE 32
+#define _FP_W_TYPE unsigned int
+#define _FP_WS_TYPE signed int
+#define _FP_I_TYPE int
+
+#define _FP_MUL_MEAT_S(R,X,Y) \
+ _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y) \
+ _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y) \
+ _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S 0
+#define _FP_NANSIGN_D 0
+#define _FP_NANSIGN_Q 0
+
+#define _FP_KEEPNANFRACP 1
+
+/*
+ * If one NaN is signaling and the other is not,
+ * we choose that one, otherwise we choose X.
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
+ do { \
+ if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \
+ && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \
+ { \
+ R##_s = Y##_s; \
+ _FP_FRAC_COPY_##wc(R,Y); \
+ } \
+ else \
+ { \
+ R##_s = X##_s; \
+ _FP_FRAC_COPY_##wc(R,X); \
+ } \
+ R##_c = FP_CLS_NAN; \
+ } while (0)
+
+/* Some assembly to speed things up. */
+#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({ \
+ unsigned int __r2 = (x2) + (y2); \
+ unsigned int __r1 = (x1); \
+ unsigned int __r0 = (x0); \
+ asm volatile( \
+ " alr %2,%3\n" \
+ " brc 12,0f\n" \
+ " lhi 0,1\n" \
+ " alr %1,0\n" \
+ " brc 12,0f\n" \
+ " alr %0,0\n" \
+ "0:" \
+ : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0) \
+ : "d" (y0), "i" (1) : "cc", "0" ); \
+ asm volatile( \
+ " alr %1,%2\n" \
+ " brc 12,0f\n" \
+ " ahi %0,1\n" \
+ "0:" \
+ : "+&d" (__r2), "+&d" (__r1) \
+ : "d" (y1) : "cc"); \
+ (r2) = __r2; \
+ (r1) = __r1; \
+ (r0) = __r0; \
+})
+
+#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({ \
+ unsigned int __r2 = (x2) - (y2); \
+ unsigned int __r1 = (x1); \
+ unsigned int __r0 = (x0); \
+ asm volatile( \
+ " slr %2,%3\n" \
+ " brc 3,0f\n" \
+ " lhi 0,1\n" \
+ " slr %1,0\n" \
+ " brc 3,0f\n" \
+ " slr %0,0\n" \
+ "0:" \
+ : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0) \
+ : "d" (y0) : "cc", "0"); \
+ asm volatile( \
+ " slr %1,%2\n" \
+ " brc 3,0f\n" \
+ " ahi %0,-1\n" \
+ "0:" \
+ : "+&d" (__r2), "+&d" (__r1) \
+ : "d" (y1) : "cc"); \
+ (r2) = __r2; \
+ (r1) = __r1; \
+ (r0) = __r0; \
+})
+
+#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0)
+
+/* Obtain the current rounding mode. */
+#define FP_ROUNDMODE mode
+
+/* Exception flags. */
+#define FP_EX_INVALID 0x800000
+#define FP_EX_DIVZERO 0x400000
+#define FP_EX_OVERFLOW 0x200000
+#define FP_EX_UNDERFLOW 0x100000
+#define FP_EX_INEXACT 0x080000
+
+/* We write the results always */
+#define FP_INHIBIT_RESULTS 0
+
+#endif
diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h
new file mode 100644
index 00000000000..5959bfb3b69
--- /dev/null
+++ b/arch/s390/include/asm/sfp-util.h
@@ -0,0 +1,77 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({ \
+ unsigned int __sh = (ah); \
+ unsigned int __sl = (al); \
+ asm volatile( \
+ " alr %1,%3\n" \
+ " brc 12,0f\n" \
+ " ahi %0,1\n" \
+ "0: alr %0,%2" \
+ : "+&d" (__sh), "+d" (__sl) \
+ : "d" (bh), "d" (bl) : "cc"); \
+ (sh) = __sh; \
+ (sl) = __sl; \
+})
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({ \
+ unsigned int __sh = (ah); \
+ unsigned int __sl = (al); \
+ asm volatile( \
+ " slr %1,%3\n" \
+ " brc 3,0f\n" \
+ " ahi %0,-1\n" \
+ "0: slr %0,%2" \
+ : "+&d" (__sh), "+d" (__sl) \
+ : "d" (bh), "d" (bl) : "cc"); \
+ (sh) = __sh; \
+ (sl) = __sl; \
+})
+
+/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */
+#define umul_ppmm(wh, wl, u, v) ({ \
+ unsigned int __wh = u; \
+ unsigned int __wl = v; \
+ asm volatile( \
+ " ltr 1,%0\n" \
+ " mr 0,%1\n" \
+ " jnm 0f\n" \
+ " alr 0,%1\n" \
+ "0: ltr %1,%1\n" \
+ " jnm 1f\n" \
+ " alr 0,%0\n" \
+ "1: lr %0,0\n" \
+ " lr %1,1\n" \
+ : "+d" (__wh), "+d" (__wl) \
+ : : "0", "1", "cc"); \
+ wh = __wh; \
+ wl = __wl; \
+})
+
+#ifdef CONFIG_64BIT
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { unsigned long __n; \
+ unsigned int __r, __d; \
+ __n = ((unsigned long)(n1) << 32) + n0; \
+ __d = (d); \
+ (q) = __n / __d; \
+ (r) = __n % __d; \
+ } while (0)
+#else
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { unsigned int __r; \
+ (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
+ (r) = __r; \
+ } while (0)
+extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int,
+ unsigned int , unsigned int);
+#endif
+
+#define UDIV_NEEDS_NORMALIZATION 0
+
+#define abort() BUG()
+
+#define __BYTE_ORDER __BIG_ENDIAN
diff --git a/arch/s390/include/asm/shmparam.h b/arch/s390/include/asm/shmparam.h
new file mode 100644
index 00000000000..e985182738f
--- /dev/null
+++ b/arch/s390/include/asm/shmparam.h
@@ -0,0 +1,11 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/shmparam.h"
+ */
+#ifndef _ASM_S390_SHMPARAM_H
+#define _ASM_S390_SHMPARAM_H
+
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
+
+#endif /* _ASM_S390_SHMPARAM_H */
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
new file mode 100644
index 00000000000..abf9e573594
--- /dev/null
+++ b/arch/s390/include/asm/signal.h
@@ -0,0 +1,25 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/signal.h"
+ */
+#ifndef _ASMS390_SIGNAL_H
+#define _ASMS390_SIGNAL_H
+
+#include <uapi/asm/signal.h>
+
+/* Most things should be clean enough to redefine this at will, if care
+ is taken to make libc match. */
+#include <asm/sigcontext.h>
+#define _NSIG _SIGCONTEXT_NSIG
+#define _NSIG_BPW _SIGCONTEXT_NSIG_BPW
+#define _NSIG_WORDS _SIGCONTEXT_NSIG_WORDS
+
+typedef unsigned long old_sigset_t; /* at least 32 bits */
+
+typedef struct {
+ unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#define __ARCH_HAS_SA_RESTORER
+#endif
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
new file mode 100644
index 00000000000..bf9c823d402
--- /dev/null
+++ b/arch/s390/include/asm/sigp.h
@@ -0,0 +1,53 @@
+#ifndef __S390_ASM_SIGP_H
+#define __S390_ASM_SIGP_H
+
+/* SIGP order codes */
+#define SIGP_SENSE 1
+#define SIGP_EXTERNAL_CALL 2
+#define SIGP_EMERGENCY_SIGNAL 3
+#define SIGP_START 4
+#define SIGP_STOP 5
+#define SIGP_RESTART 6
+#define SIGP_STOP_AND_STORE_STATUS 9
+#define SIGP_INITIAL_CPU_RESET 11
+#define SIGP_SET_PREFIX 13
+#define SIGP_STORE_STATUS_AT_ADDRESS 14
+#define SIGP_SET_ARCHITECTURE 18
+#define SIGP_COND_EMERGENCY_SIGNAL 19
+#define SIGP_SENSE_RUNNING 21
+
+/* SIGP condition codes */
+#define SIGP_CC_ORDER_CODE_ACCEPTED 0
+#define SIGP_CC_STATUS_STORED 1
+#define SIGP_CC_BUSY 2
+#define SIGP_CC_NOT_OPERATIONAL 3
+
+/* SIGP cpu status bits */
+
+#define SIGP_STATUS_CHECK_STOP 0x00000010UL
+#define SIGP_STATUS_STOPPED 0x00000040UL
+#define SIGP_STATUS_EXT_CALL_PENDING 0x00000080UL
+#define SIGP_STATUS_INVALID_PARAMETER 0x00000100UL
+#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL
+#define SIGP_STATUS_NOT_RUNNING 0x00000400UL
+
+#ifndef __ASSEMBLY__
+
+static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
+{
+ register unsigned int reg1 asm ("1") = parm;
+ int cc;
+
+ asm volatile(
+ " sigp %1,%2,0(%3)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+ if (status && cc == 1)
+ *status = reg1;
+ return cc;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
new file mode 100644
index 00000000000..4f1307962a9
--- /dev/null
+++ b/arch/s390/include/asm/smp.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Denis Joseph Barrow,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <asm/sigp.h>
+
+#ifdef CONFIG_SMP
+
+#include <asm/lowcore.h>
+
+#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
+
+extern struct mutex smp_cpu_state_mutex;
+
+extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+extern void smp_call_online_cpu(void (*func)(void *), void *);
+extern void smp_call_ipl_cpu(void (*func)(void *), void *);
+
+extern int smp_find_processor_id(u16 address);
+extern int smp_store_status(int cpu);
+extern int smp_vcpu_scheduled(int cpu);
+extern void smp_yield_cpu(int cpu);
+extern void smp_yield(void);
+extern void smp_cpu_set_polarization(int cpu, int val);
+extern int smp_cpu_get_polarization(int cpu);
+extern void smp_fill_possible_mask(void);
+
+#else /* CONFIG_SMP */
+
+static inline void smp_call_ipl_cpu(void (*func)(void *), void *data)
+{
+ func(data);
+}
+
+static inline void smp_call_online_cpu(void (*func)(void *), void *data)
+{
+ func(data);
+}
+
+static inline int smp_find_processor_id(u16 address) { return 0; }
+static inline int smp_store_status(int cpu) { return 0; }
+static inline int smp_vcpu_scheduled(int cpu) { return 1; }
+static inline void smp_yield_cpu(int cpu) { }
+static inline void smp_yield(void) { }
+static inline void smp_fill_possible_mask(void) { }
+
+#endif /* CONFIG_SMP */
+
+static inline void smp_stop_cpu(void)
+{
+ u16 pcpu = stap();
+
+ for (;;) {
+ __pcpu_sigp(pcpu, SIGP_STOP, 0, NULL);
+ cpu_relax();
+ }
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int smp_rescan_cpus(void);
+extern void __noreturn cpu_die(void);
+extern void __cpu_die(unsigned int cpu);
+extern int __cpu_disable(void);
+#else
+static inline int smp_rescan_cpus(void) { return 0; }
+static inline void cpu_die(void) { }
+#endif
+
+#endif /* __ASM_SMP_H */
diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h
new file mode 100644
index 00000000000..a60d085ddb4
--- /dev/null
+++ b/arch/s390/include/asm/sparsemem.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_S390_SPARSEMEM_H
+#define _ASM_S390_SPARSEMEM_H
+
+#ifdef CONFIG_64BIT
+
+#define SECTION_SIZE_BITS 28
+#define MAX_PHYSMEM_BITS 46
+
+#else
+
+#define SECTION_SIZE_BITS 25
+#define MAX_PHYSMEM_BITS 31
+
+#endif /* CONFIG_64BIT */
+
+#endif /* _ASM_S390_SPARSEMEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
new file mode 100644
index 00000000000..96879f7ad6d
--- /dev/null
+++ b/arch/s390/include/asm/spinlock.h
@@ -0,0 +1,202 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/spinlock.h"
+ */
+
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <linux/smp.h>
+
+#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
+
+extern int spin_retry;
+
+static inline int
+_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
+{
+ unsigned int old_expected = old;
+
+ asm volatile(
+ " cs %0,%3,%1"
+ : "=d" (old), "=Q" (*lock)
+ : "0" (old), "d" (new), "Q" (*lock)
+ : "cc", "memory" );
+ return old == old_expected;
+}
+
+/*
+ * Simple spin lock operations. There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+
+void arch_spin_lock_wait(arch_spinlock_t *);
+int arch_spin_trylock_retry(arch_spinlock_t *);
+void arch_spin_relax(arch_spinlock_t *);
+void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
+
+static inline u32 arch_spin_lockval(int cpu)
+{
+ return ~cpu;
+}
+
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+ return lock.lock == 0;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lp)
+{
+ return ACCESS_ONCE(lp->lock) != 0;
+}
+
+static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
+{
+ barrier();
+ return likely(arch_spin_value_unlocked(*lp) &&
+ _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL));
+}
+
+static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp)
+{
+ return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0);
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lp)
+{
+ if (!arch_spin_trylock_once(lp))
+ arch_spin_lock_wait(lp);
+}
+
+static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
+ unsigned long flags)
+{
+ if (!arch_spin_trylock_once(lp))
+ arch_spin_lock_wait_flags(lp, flags);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lp)
+{
+ if (!arch_spin_trylock_once(lp))
+ return arch_spin_trylock_retry(lp);
+ return 1;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lp)
+{
+ arch_spin_tryrelease_once(lp);
+}
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ while (arch_spin_is_locked(lock))
+ arch_spin_relax(lock);
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_read_can_lock(x) ((int)(x)->lock >= 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_write_can_lock(x) ((x)->lock == 0)
+
+extern void _raw_read_lock_wait(arch_rwlock_t *lp);
+extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags);
+extern int _raw_read_trylock_retry(arch_rwlock_t *lp);
+extern void _raw_write_lock_wait(arch_rwlock_t *lp);
+extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags);
+extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
+
+static inline int arch_read_trylock_once(arch_rwlock_t *rw)
+{
+ unsigned int old = ACCESS_ONCE(rw->lock);
+ return likely((int) old >= 0 &&
+ _raw_compare_and_swap(&rw->lock, old, old + 1));
+}
+
+static inline int arch_write_trylock_once(arch_rwlock_t *rw)
+{
+ unsigned int old = ACCESS_ONCE(rw->lock);
+ return likely(old == 0 &&
+ _raw_compare_and_swap(&rw->lock, 0, 0x80000000));
+}
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+ if (!arch_read_trylock_once(rw))
+ _raw_read_lock_wait(rw);
+}
+
+static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags)
+{
+ if (!arch_read_trylock_once(rw))
+ _raw_read_lock_wait_flags(rw, flags);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ unsigned int old;
+
+ do {
+ old = ACCESS_ONCE(rw->lock);
+ } while (!_raw_compare_and_swap(&rw->lock, old, old - 1));
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ if (!arch_write_trylock_once(rw))
+ _raw_write_lock_wait(rw);
+}
+
+static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags)
+{
+ if (!arch_write_trylock_once(rw))
+ _raw_write_lock_wait_flags(rw, flags);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ _raw_compare_and_swap(&rw->lock, 0x80000000, 0);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+ if (!arch_read_trylock_once(rw))
+ return _raw_read_trylock_retry(rw);
+ return 1;
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+ if (!arch_write_trylock_once(rw))
+ return _raw_write_trylock_retry(rw);
+ return 1;
+}
+
+#define arch_read_relax(lock) cpu_relax()
+#define arch_write_relax(lock) cpu_relax()
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
new file mode 100644
index 00000000000..b2cd6ff7c2c
--- /dev/null
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_SPINLOCK_TYPES_H
+#define __ASM_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+ unsigned int lock;
+} __attribute__ ((aligned (4))) arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
+
+typedef struct {
+ unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+
+#endif
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
new file mode 100644
index 00000000000..7e2dcd7c57e
--- /dev/null
+++ b/arch/s390/include/asm/string.h
@@ -0,0 +1,143 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#ifndef _S390_STRING_H_
+#define _S390_STRING_H_
+
+#ifndef _LINUX_TYPES_H
+#include <linux/types.h>
+#endif
+
+#define __HAVE_ARCH_MEMCHR /* inline & arch function */
+#define __HAVE_ARCH_MEMCMP /* arch function */
+#define __HAVE_ARCH_MEMCPY /* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMSCAN /* inline & arch function */
+#define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */
+#define __HAVE_ARCH_STRCAT /* inline & arch function */
+#define __HAVE_ARCH_STRCMP /* arch function */
+#define __HAVE_ARCH_STRCPY /* inline & arch function */
+#define __HAVE_ARCH_STRLCAT /* arch function */
+#define __HAVE_ARCH_STRLCPY /* arch function */
+#define __HAVE_ARCH_STRLEN /* inline & arch function */
+#define __HAVE_ARCH_STRNCAT /* arch function */
+#define __HAVE_ARCH_STRNCPY /* arch function */
+#define __HAVE_ARCH_STRNLEN /* inline & arch function */
+#define __HAVE_ARCH_STRRCHR /* arch function */
+#define __HAVE_ARCH_STRSTR /* arch function */
+
+/* Prototypes for non-inlined arch strings functions. */
+extern int memcmp(const void *, const void *, size_t);
+extern void *memcpy(void *, const void *, size_t);
+extern void *memset(void *, int, size_t);
+extern int strcmp(const char *,const char *);
+extern size_t strlcat(char *, const char *, size_t);
+extern size_t strlcpy(char *, const char *, size_t);
+extern char *strncat(char *, const char *, size_t);
+extern char *strncpy(char *, const char *, size_t);
+extern char *strrchr(const char *, int);
+extern char *strstr(const char *, const char *);
+
+#undef __HAVE_ARCH_MEMMOVE
+#undef __HAVE_ARCH_STRCHR
+#undef __HAVE_ARCH_STRNCHR
+#undef __HAVE_ARCH_STRNCMP
+#undef __HAVE_ARCH_STRNICMP
+#undef __HAVE_ARCH_STRPBRK
+#undef __HAVE_ARCH_STRSEP
+#undef __HAVE_ARCH_STRSPN
+
+#if !defined(IN_ARCH_STRING_C)
+
+static inline void *memchr(const void * s, int c, size_t n)
+{
+ register int r0 asm("0") = (char) c;
+ const void *ret = s + n;
+
+ asm volatile(
+ "0: srst %0,%1\n"
+ " jo 0b\n"
+ " jl 1f\n"
+ " la %0,0\n"
+ "1:"
+ : "+a" (ret), "+&a" (s) : "d" (r0) : "cc");
+ return (void *) ret;
+}
+
+static inline void *memscan(void *s, int c, size_t n)
+{
+ register int r0 asm("0") = (char) c;
+ const void *ret = s + n;
+
+ asm volatile(
+ "0: srst %0,%1\n"
+ " jo 0b\n"
+ : "+a" (ret), "+&a" (s) : "d" (r0) : "cc");
+ return (void *) ret;
+}
+
+static inline char *strcat(char *dst, const char *src)
+{
+ register int r0 asm("0") = 0;
+ unsigned long dummy;
+ char *ret = dst;
+
+ asm volatile(
+ "0: srst %0,%1\n"
+ " jo 0b\n"
+ "1: mvst %0,%2\n"
+ " jo 1b"
+ : "=&a" (dummy), "+a" (dst), "+a" (src)
+ : "d" (r0), "0" (0) : "cc", "memory" );
+ return ret;
+}
+
+static inline char *strcpy(char *dst, const char *src)
+{
+ register int r0 asm("0") = 0;
+ char *ret = dst;
+
+ asm volatile(
+ "0: mvst %0,%1\n"
+ " jo 0b"
+ : "+&a" (dst), "+&a" (src) : "d" (r0)
+ : "cc", "memory");
+ return ret;
+}
+
+static inline size_t strlen(const char *s)
+{
+ register unsigned long r0 asm("0") = 0;
+ const char *tmp = s;
+
+ asm volatile(
+ "0: srst %0,%1\n"
+ " jo 0b"
+ : "+d" (r0), "+a" (tmp) : : "cc");
+ return r0 - (unsigned long) s;
+}
+
+static inline size_t strnlen(const char * s, size_t n)
+{
+ register int r0 asm("0") = 0;
+ const char *tmp = s;
+ const char *end = s + n;
+
+ asm volatile(
+ "0: srst %0,%1\n"
+ " jo 0b"
+ : "+a" (end), "+a" (tmp) : "d" (r0) : "cc");
+ return end - s;
+}
+#else /* IN_ARCH_STRING_C */
+void *memchr(const void * s, int c, size_t n);
+void *memscan(void *s, int c, size_t n);
+char *strcat(char *dst, const char *src);
+char *strcpy(char *dst, const char *src);
+size_t strlen(const char *s);
+size_t strnlen(const char * s, size_t n);
+#endif /* !IN_ARCH_STRING_C */
+
+#endif /* __S390_STRING_H_ */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
new file mode 100644
index 00000000000..18ea9e3f814
--- /dev/null
+++ b/arch/s390/include/asm/switch_to.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_SWITCH_TO_H
+#define __ASM_SWITCH_TO_H
+
+#include <linux/thread_info.h>
+#include <asm/ptrace.h>
+
+extern struct task_struct *__switch_to(void *, void *);
+extern void update_cr_regs(struct task_struct *task);
+
+static inline int test_fp_ctl(u32 fpc)
+{
+ u32 orig_fpc;
+ int rc;
+
+ if (!MACHINE_HAS_IEEE)
+ return 0;
+
+ asm volatile(
+ " efpc %1\n"
+ " sfpc %2\n"
+ "0: sfpc %1\n"
+ " la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc), "=d" (orig_fpc)
+ : "d" (fpc), "0" (-EINVAL));
+ return rc;
+}
+
+static inline void save_fp_ctl(u32 *fpc)
+{
+ if (!MACHINE_HAS_IEEE)
+ return;
+
+ asm volatile(
+ " stfpc %0\n"
+ : "+Q" (*fpc));
+}
+
+static inline int restore_fp_ctl(u32 *fpc)
+{
+ int rc;
+
+ if (!MACHINE_HAS_IEEE)
+ return 0;
+
+ asm volatile(
+ " lfpc %1\n"
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
+ return rc;
+}
+
+static inline void save_fp_regs(freg_t *fprs)
+{
+ asm volatile("std 0,%0" : "=Q" (fprs[0]));
+ asm volatile("std 2,%0" : "=Q" (fprs[2]));
+ asm volatile("std 4,%0" : "=Q" (fprs[4]));
+ asm volatile("std 6,%0" : "=Q" (fprs[6]));
+ if (!MACHINE_HAS_IEEE)
+ return;
+ asm volatile("std 1,%0" : "=Q" (fprs[1]));
+ asm volatile("std 3,%0" : "=Q" (fprs[3]));
+ asm volatile("std 5,%0" : "=Q" (fprs[5]));
+ asm volatile("std 7,%0" : "=Q" (fprs[7]));
+ asm volatile("std 8,%0" : "=Q" (fprs[8]));
+ asm volatile("std 9,%0" : "=Q" (fprs[9]));
+ asm volatile("std 10,%0" : "=Q" (fprs[10]));
+ asm volatile("std 11,%0" : "=Q" (fprs[11]));
+ asm volatile("std 12,%0" : "=Q" (fprs[12]));
+ asm volatile("std 13,%0" : "=Q" (fprs[13]));
+ asm volatile("std 14,%0" : "=Q" (fprs[14]));
+ asm volatile("std 15,%0" : "=Q" (fprs[15]));
+}
+
+static inline void restore_fp_regs(freg_t *fprs)
+{
+ asm volatile("ld 0,%0" : : "Q" (fprs[0]));
+ asm volatile("ld 2,%0" : : "Q" (fprs[2]));
+ asm volatile("ld 4,%0" : : "Q" (fprs[4]));
+ asm volatile("ld 6,%0" : : "Q" (fprs[6]));
+ if (!MACHINE_HAS_IEEE)
+ return;
+ asm volatile("ld 1,%0" : : "Q" (fprs[1]));
+ asm volatile("ld 3,%0" : : "Q" (fprs[3]));
+ asm volatile("ld 5,%0" : : "Q" (fprs[5]));
+ asm volatile("ld 7,%0" : : "Q" (fprs[7]));
+ asm volatile("ld 8,%0" : : "Q" (fprs[8]));
+ asm volatile("ld 9,%0" : : "Q" (fprs[9]));
+ asm volatile("ld 10,%0" : : "Q" (fprs[10]));
+ asm volatile("ld 11,%0" : : "Q" (fprs[11]));
+ asm volatile("ld 12,%0" : : "Q" (fprs[12]));
+ asm volatile("ld 13,%0" : : "Q" (fprs[13]));
+ asm volatile("ld 14,%0" : : "Q" (fprs[14]));
+ asm volatile("ld 15,%0" : : "Q" (fprs[15]));
+}
+
+static inline void save_access_regs(unsigned int *acrs)
+{
+ typedef struct { int _[NUM_ACRS]; } acrstype;
+
+ asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
+}
+
+static inline void restore_access_regs(unsigned int *acrs)
+{
+ typedef struct { int _[NUM_ACRS]; } acrstype;
+
+ asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
+}
+
+#define switch_to(prev,next,last) do { \
+ if (prev->mm) { \
+ save_fp_ctl(&prev->thread.fp_regs.fpc); \
+ save_fp_regs(prev->thread.fp_regs.fprs); \
+ save_access_regs(&prev->thread.acrs[0]); \
+ save_ri_cb(prev->thread.ri_cb); \
+ } \
+ if (next->mm) { \
+ restore_fp_ctl(&next->thread.fp_regs.fpc); \
+ restore_fp_regs(next->thread.fp_regs.fprs); \
+ restore_access_regs(&next->thread.acrs[0]); \
+ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
+ update_cr_regs(next); \
+ } \
+ prev = __switch_to(prev,next); \
+} while (0)
+
+#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
new file mode 100644
index 00000000000..abad78d5b10
--- /dev/null
+++ b/arch/s390/include/asm/syscall.h
@@ -0,0 +1,100 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H 1
+
+#include <uapi/linux/audit.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <asm/ptrace.h>
+
+/*
+ * The syscall table always contains 32 bit pointers since we know that the
+ * address of the function to be called is (way) below 4GB. So the "int"
+ * type here is what we want [need] for both 32 bit and 64 bit systems.
+ */
+extern const unsigned int sys_call_table[];
+extern const unsigned int sys_call_table_emu[];
+
+static inline long syscall_get_nr(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return test_pt_regs_flag(regs, PIF_SYSCALL) ?
+ (regs->int_code & 0xffff) : -1;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ regs->gprs[2] = regs->orig_gpr2;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->gprs[2];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+ struct pt_regs *regs,
+ int error, long val)
+{
+ regs->gprs[2] = error ? -error : val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ unsigned long *args)
+{
+ unsigned long mask = -1UL;
+
+ BUG_ON(i + n > 6);
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(task, TIF_31BIT))
+ mask = 0xffffffff;
+#endif
+ while (n-- > 0)
+ if (i + n > 0)
+ args[n] = regs->gprs[2 + i + n] & mask;
+ if (i == 0)
+ args[0] = regs->orig_gpr2 & mask;
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ const unsigned long *args)
+{
+ BUG_ON(i + n > 6);
+ while (n-- > 0)
+ if (i + n > 0)
+ regs->gprs[2 + i + n] = args[n];
+ if (i == 0)
+ regs->orig_gpr2 = args[0];
+}
+
+static inline int syscall_get_arch(void)
+{
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(current, TIF_31BIT))
+ return AUDIT_ARCH_S390;
+#endif
+ return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390;
+}
+#endif /* _ASM_SYSCALL_H */
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
new file mode 100644
index 00000000000..f92428e459f
--- /dev/null
+++ b/arch/s390/include/asm/sysinfo.h
@@ -0,0 +1,169 @@
+/*
+ * definition for store system information stsi
+ *
+ * Copyright IBM Corp. 2001, 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Ulrich Weigand <weigand@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef __ASM_S390_SYSINFO_H
+#define __ASM_S390_SYSINFO_H
+
+#include <asm/bitsperlong.h>
+
+struct sysinfo_1_1_1 {
+ unsigned char p:1;
+ unsigned char :6;
+ unsigned char t:1;
+ unsigned char :8;
+ unsigned char ccr;
+ unsigned char cai;
+ char reserved_0[28];
+ char manufacturer[16];
+ char type[4];
+ char reserved_1[12];
+ char model_capacity[16];
+ char sequence[16];
+ char plant[4];
+ char model[16];
+ char model_perm_cap[16];
+ char model_temp_cap[16];
+ unsigned int model_cap_rating;
+ unsigned int model_perm_cap_rating;
+ unsigned int model_temp_cap_rating;
+ unsigned char typepct[5];
+ unsigned char reserved_2[3];
+ unsigned int ncr;
+ unsigned int npr;
+ unsigned int ntr;
+};
+
+struct sysinfo_1_2_1 {
+ char reserved_0[80];
+ char sequence[16];
+ char plant[4];
+ char reserved_1[2];
+ unsigned short cpu_address;
+};
+
+struct sysinfo_1_2_2 {
+ char format;
+ char reserved_0[1];
+ unsigned short acc_offset;
+ char reserved_1[20];
+ unsigned int nominal_cap;
+ unsigned int secondary_cap;
+ unsigned int capability;
+ unsigned short cpus_total;
+ unsigned short cpus_configured;
+ unsigned short cpus_standby;
+ unsigned short cpus_reserved;
+ unsigned short adjustment[0];
+};
+
+struct sysinfo_1_2_2_extension {
+ unsigned int alt_capability;
+ unsigned short alt_adjustment[0];
+};
+
+struct sysinfo_2_2_1 {
+ char reserved_0[80];
+ char sequence[16];
+ char plant[4];
+ unsigned short cpu_id;
+ unsigned short cpu_address;
+};
+
+struct sysinfo_2_2_2 {
+ char reserved_0[32];
+ unsigned short lpar_number;
+ char reserved_1;
+ unsigned char characteristics;
+ unsigned short cpus_total;
+ unsigned short cpus_configured;
+ unsigned short cpus_standby;
+ unsigned short cpus_reserved;
+ char name[8];
+ unsigned int caf;
+ char reserved_2[16];
+ unsigned short cpus_dedicated;
+ unsigned short cpus_shared;
+};
+
+#define LPAR_CHAR_DEDICATED (1 << 7)
+#define LPAR_CHAR_SHARED (1 << 6)
+#define LPAR_CHAR_LIMITED (1 << 5)
+
+struct sysinfo_3_2_2 {
+ char reserved_0[31];
+ unsigned char :4;
+ unsigned char count:4;
+ struct {
+ char reserved_0[4];
+ unsigned short cpus_total;
+ unsigned short cpus_configured;
+ unsigned short cpus_standby;
+ unsigned short cpus_reserved;
+ char name[8];
+ unsigned int caf;
+ char cpi[16];
+ char reserved_1[24];
+
+ } vm[8];
+ char reserved_544[3552];
+};
+
+extern int topology_max_mnest;
+
+#define TOPOLOGY_CPU_BITS 64
+#define TOPOLOGY_NR_MAG 6
+
+struct topology_cpu {
+ unsigned char reserved0[4];
+ unsigned char :6;
+ unsigned char pp:2;
+ unsigned char reserved1;
+ unsigned short origin;
+ unsigned long mask[TOPOLOGY_CPU_BITS / BITS_PER_LONG];
+};
+
+struct topology_container {
+ unsigned char reserved[7];
+ unsigned char id;
+};
+
+union topology_entry {
+ unsigned char nl;
+ struct topology_cpu cpu;
+ struct topology_container container;
+};
+
+struct sysinfo_15_1_x {
+ unsigned char reserved0[2];
+ unsigned short length;
+ unsigned char mag[TOPOLOGY_NR_MAG];
+ unsigned char reserved1;
+ unsigned char mnest;
+ unsigned char reserved2[4];
+ union topology_entry tle[0];
+};
+
+int stsi(void *sysinfo, int fc, int sel1, int sel2);
+
+/*
+ * Service level reporting interface.
+ */
+struct service_level {
+ struct list_head list;
+ void (*seq_print)(struct seq_file *, struct service_level *);
+};
+
+int register_service_level(struct service_level *);
+int unregister_service_level(struct service_level *);
+
+#endif /* __ASM_S390_SYSINFO_H */
diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h
new file mode 100644
index 00000000000..db028d17f06
--- /dev/null
+++ b/arch/s390/include/asm/termios.h
@@ -0,0 +1,25 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/termios.h"
+ */
+#ifndef _S390_TERMIOS_H
+#define _S390_TERMIOS_H
+
+#include <uapi/asm/termios.h>
+
+
+/* intr=^C quit=^\ erase=del kill=^U
+ eof=^D vtime=\0 vmin=\1 sxtc=\0
+ start=^Q stop=^S susp=^Z eol=\0
+ reprint=^R discard=^U werase=^W lnext=^V
+ eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
+#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
+
+#include <asm-generic/termios-base.h>
+
+#endif /* _S390_TERMIOS_H */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
new file mode 100644
index 00000000000..b833e9c0bfb
--- /dev/null
+++ b/arch/s390/include/asm/thread_info.h
@@ -0,0 +1,109 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 2002, 2006
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+/*
+ * Size of kernel stack for each process
+ */
+#ifndef CONFIG_64BIT
+#define THREAD_ORDER 1
+#define ASYNC_ORDER 1
+#else /* CONFIG_64BIT */
+#define THREAD_ORDER 2
+#define ASYNC_ORDER 2
+#endif /* CONFIG_64BIT */
+
+#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
+#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER)
+
+#ifndef __ASSEMBLY__
+#include <asm/lowcore.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed, the assembly constants must also be changed
+ */
+struct thread_info {
+ struct task_struct *task; /* main task structure */
+ struct exec_domain *exec_domain; /* execution domain */
+ unsigned long flags; /* low level flags */
+ unsigned long sys_call_table; /* System call table address */
+ unsigned int cpu; /* current CPU */
+ int preempt_count; /* 0 => preemptable, <0 => BUG */
+ struct restart_block restart_block;
+ unsigned int system_call;
+ __u64 user_timer;
+ __u64 system_timer;
+ unsigned long last_break; /* last breaking-event-address. */
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#define INIT_THREAD_INFO(tsk) \
+{ \
+ .task = &tsk, \
+ .exec_domain = &default_exec_domain, \
+ .flags = 0, \
+ .cpu = 0, \
+ .preempt_count = INIT_PREEMPT_COUNT, \
+ .restart_block = { \
+ .fn = do_no_restart_syscall, \
+ }, \
+}
+
+#define init_thread_info (init_thread_union.thread_info)
+#define init_stack (init_thread_union.stack)
+
+/* how to get the thread information struct from C */
+static inline struct thread_info *current_thread_info(void)
+{
+ return (struct thread_info *) S390_lowcore.thread_info;
+}
+
+#define THREAD_SIZE_ORDER THREAD_ORDER
+
+#endif
+
+/*
+ * thread information flags bit numbers
+ */
+#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
+#define TIF_SIGPENDING 1 /* signal pending */
+#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
+#define TIF_SYSCALL_TRACE 3 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */
+#define TIF_SECCOMP 5 /* secure computing */
+#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
+#define TIF_31BIT 16 /* 32bit process */
+#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
+#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
+#define TIF_SINGLE_STEP 19 /* This task is single stepped */
+#define TIF_BLOCK_STEP 20 /* This task is block stepped */
+
+#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
+#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP (1<<TIF_SECCOMP)
+#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
+#define _TIF_31BIT (1<<TIF_31BIT)
+#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP)
+
+#ifdef CONFIG_64BIT
+#define is_32bit_task() (test_thread_flag(TIF_31BIT))
+#else
+#define is_32bit_task() (1)
+#endif
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
new file mode 100644
index 00000000000..8beee1cceba
--- /dev/null
+++ b/arch/s390/include/asm/timex.h
@@ -0,0 +1,161 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ *
+ * Derived from "include/asm-i386/timex.h"
+ * Copyright (C) 1992, Linus Torvalds
+ */
+
+#ifndef _ASM_S390_TIMEX_H
+#define _ASM_S390_TIMEX_H
+
+#include <asm/lowcore.h>
+
+/* The value of the TOD clock for 1.1.1970. */
+#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
+
+/* Inline functions for clock register access. */
+static inline int set_tod_clock(__u64 time)
+{
+ int cc;
+
+ asm volatile(
+ " sck %1\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc) : "Q" (time) : "cc");
+ return cc;
+}
+
+static inline int store_tod_clock(__u64 *time)
+{
+ int cc;
+
+ asm volatile(
+ " stck %1\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc), "=Q" (*time) : : "cc");
+ return cc;
+}
+
+static inline void set_clock_comparator(__u64 time)
+{
+ asm volatile("sckc %0" : : "Q" (time));
+}
+
+static inline void store_clock_comparator(__u64 *time)
+{
+ asm volatile("stckc %0" : "=Q" (*time));
+}
+
+void clock_comparator_work(void);
+
+static inline unsigned long long local_tick_disable(void)
+{
+ unsigned long long old;
+
+ old = S390_lowcore.clock_comparator;
+ S390_lowcore.clock_comparator = -1ULL;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+ return old;
+}
+
+static inline void local_tick_enable(unsigned long long comp)
+{
+ S390_lowcore.clock_comparator = comp;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
+
+typedef unsigned long long cycles_t;
+
+static inline void get_tod_clock_ext(char clk[16])
+{
+ typedef struct { char _[sizeof(clk)]; } addrtype;
+
+ asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc");
+}
+
+static inline unsigned long long get_tod_clock(void)
+{
+ unsigned char clk[16];
+ get_tod_clock_ext(clk);
+ return *((unsigned long long *)&clk[1]);
+}
+
+static inline unsigned long long get_tod_clock_fast(void)
+{
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ unsigned long long clk;
+
+ asm volatile("stckf %0" : "=Q" (clk) : : "cc");
+ return clk;
+#else
+ return get_tod_clock();
+#endif
+}
+
+static inline cycles_t get_cycles(void)
+{
+ return (cycles_t) get_tod_clock() >> 2;
+}
+
+int get_sync_clock(unsigned long long *clock);
+void init_cpu_timer(void);
+unsigned long long monotonic_clock(void);
+
+void tod_to_timeval(__u64, struct timespec *);
+
+static inline
+void stck_to_timespec(unsigned long long stck, struct timespec *ts)
+{
+ tod_to_timeval(stck - TOD_UNIX_EPOCH, ts);
+}
+
+extern u64 sched_clock_base_cc;
+
+/**
+ * get_clock_monotonic - returns current time in clock rate units
+ *
+ * The caller must ensure that preemption is disabled.
+ * The clock and sched_clock_base get changed via stop_machine.
+ * Therefore preemption must be disabled when calling this
+ * function, otherwise the returned value is not guaranteed to
+ * be monotonic.
+ */
+static inline unsigned long long get_tod_clock_monotonic(void)
+{
+ return get_tod_clock() - sched_clock_base_cc;
+}
+
+/**
+ * tod_to_ns - convert a TOD format value to nanoseconds
+ * @todval: to be converted TOD format value
+ * Returns: number of nanoseconds that correspond to the TOD format value
+ *
+ * Converting a 64 Bit TOD format value to nanoseconds means that the value
+ * must be divided by 4.096. In order to achieve that we multiply with 125
+ * and divide by 512:
+ *
+ * ns = (todval * 125) >> 9;
+ *
+ * In order to avoid an overflow with the multiplication we can rewrite this.
+ * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits)
+ * we end up with
+ *
+ * ns = ((2^32 * th + tl) * 125 ) >> 9;
+ * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9);
+ *
+ */
+static inline unsigned long long tod_to_ns(unsigned long long todval)
+{
+ unsigned long long ns;
+
+ ns = ((todval >> 32) << 23) * 125;
+ ns += ((todval & 0xffffffff) * 125) >> 9;
+ return ns;
+}
+
+#endif
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
new file mode 100644
index 00000000000..a25f09fbaf3
--- /dev/null
+++ b/arch/s390/include/asm/tlb.h
@@ -0,0 +1,151 @@
+#ifndef _S390_TLB_H
+#define _S390_TLB_H
+
+/*
+ * TLB flushing on s390 is complicated. The following requirement
+ * from the principles of operation is the most arduous:
+ *
+ * "A valid table entry must not be changed while it is attached
+ * to any CPU and may be used for translation by that CPU except to
+ * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY,
+ * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page
+ * table entry, or (3) make a change by means of a COMPARE AND SWAP
+ * AND PURGE instruction that purges the TLB."
+ *
+ * The modification of a pte of an active mm struct therefore is
+ * a two step process: i) invalidate the pte, ii) store the new pte.
+ * This is true for the page protection bit as well.
+ * The only possible optimization is to flush at the beginning of
+ * a tlb_gather_mmu cycle if the mm_struct is currently not in use.
+ *
+ * Pages used for the page tables is a different story. FIXME: more
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <asm/processor.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+struct mmu_gather {
+ struct mm_struct *mm;
+ struct mmu_table_batch *batch;
+ unsigned int fullmm;
+ unsigned long start, end;
+};
+
+struct mmu_table_batch {
+ struct rcu_head rcu;
+ unsigned int nr;
+ void *tables[0];
+};
+
+#define MAX_TABLE_BATCH \
+ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
+static inline void tlb_gather_mmu(struct mmu_gather *tlb,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ tlb->mm = mm;
+ tlb->start = start;
+ tlb->end = end;
+ tlb->fullmm = !(start | (end+1));
+ tlb->batch = NULL;
+}
+
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+{
+ __tlb_flush_mm_lazy(tlb->mm);
+}
+
+static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+ tlb_table_flush(tlb);
+}
+
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+ tlb_flush_mmu_tlbonly(tlb);
+ tlb_flush_mmu_free(tlb);
+}
+
+static inline void tlb_finish_mmu(struct mmu_gather *tlb,
+ unsigned long start, unsigned long end)
+{
+ tlb_flush_mmu(tlb);
+}
+
+/*
+ * Release the page cache reference for a pte removed by
+ * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
+ * has already been freed, so just do free_page_and_swap_cache.
+ */
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+ free_page_and_swap_cache(page);
+ return 1; /* avoid calling tlb_flush_mmu */
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+ free_page_and_swap_cache(page);
+}
+
+/*
+ * pte_free_tlb frees a pte table and clears the CRSTE for the
+ * page table from the tlb.
+ */
+static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+ unsigned long address)
+{
+ page_table_free_rcu(tlb, (unsigned long *) pte);
+}
+
+/*
+ * pmd_free_tlb frees a pmd table and clears the CRSTE for the
+ * segment table entry from the tlb.
+ * If the mm uses a two level page table the single pmd is freed
+ * as the pgd. pmd_free_tlb checks the asce_limit against 2GB
+ * to avoid the double free of the pmd in this case.
+ */
+static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long address)
+{
+#ifdef CONFIG_64BIT
+ if (tlb->mm->context.asce_limit <= (1UL << 31))
+ return;
+ tlb_remove_table(tlb, pmd);
+#endif
+}
+
+/*
+ * pud_free_tlb frees a pud table and clears the CRSTE for the
+ * region third table entry from the tlb.
+ * If the mm uses a three level page table the single pud is freed
+ * as the pgd. pud_free_tlb checks the asce_limit against 4TB
+ * to avoid the double free of the pud in this case.
+ */
+static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long address)
+{
+#ifdef CONFIG_64BIT
+ if (tlb->mm->context.asce_limit <= (1UL << 42))
+ return;
+ tlb_remove_table(tlb, pud);
+#endif
+}
+
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
+#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0)
+#define tlb_migrate_finish(mm) do { } while (0)
+
+#endif /* _S390_TLB_H */
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
new file mode 100644
index 00000000000..16c9c88658c
--- /dev/null
+++ b/arch/s390/include/asm/tlbflush.h
@@ -0,0 +1,211 @@
+#ifndef _S390_TLBFLUSH_H
+#define _S390_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/pgalloc.h>
+
+/*
+ * Flush all TLB entries on the local CPU.
+ */
+static inline void __tlb_flush_local(void)
+{
+ asm volatile("ptlb" : : : "memory");
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs
+ */
+static inline void __tlb_flush_idte(unsigned long asce)
+{
+ /* Global TLB flush for the mm */
+ asm volatile(
+ " .insn rrf,0xb98e0000,0,%0,%1,0"
+ : : "a" (2048), "a" (asce) : "cc");
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on the local CPU
+ */
+static inline void __tlb_flush_idte_local(unsigned long asce)
+{
+ /* Local TLB flush for the mm */
+ asm volatile(
+ " .insn rrf,0xb98e0000,0,%0,%1,1"
+ : : "a" (2048), "a" (asce) : "cc");
+}
+
+#ifdef CONFIG_SMP
+void smp_ptlb_all(void);
+
+/*
+ * Flush all TLB entries on all CPUs.
+ */
+static inline void __tlb_flush_global(void)
+{
+ register unsigned long reg2 asm("2");
+ register unsigned long reg3 asm("3");
+ register unsigned long reg4 asm("4");
+ long dummy;
+
+#ifndef CONFIG_64BIT
+ if (!MACHINE_HAS_CSP) {
+ smp_ptlb_all();
+ return;
+ }
+#endif /* CONFIG_64BIT */
+
+ dummy = 0;
+ reg2 = reg3 = 0;
+ reg4 = ((unsigned long) &dummy) + 1;
+ asm volatile(
+ " csp %0,%2"
+ : : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
+}
+
+/*
+ * Flush TLB entries for a specific mm on all CPUs (in case gmap is used
+ * this implicates multiple ASCEs!).
+ */
+static inline void __tlb_flush_full(struct mm_struct *mm)
+{
+ preempt_disable();
+ atomic_add(0x10000, &mm->context.attach_count);
+ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+ /* Local TLB flush */
+ __tlb_flush_local();
+ } else {
+ /* Global TLB flush */
+ __tlb_flush_global();
+ /* Reset TLB flush mask */
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_copy(mm_cpumask(mm),
+ &mm->context.cpu_attach_mask);
+ }
+ atomic_sub(0x10000, &mm->context.attach_count);
+ preempt_enable();
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs.
+ */
+static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+{
+ int active, count;
+
+ preempt_disable();
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+ __tlb_flush_idte_local(asce);
+ } else {
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_idte(asce);
+ else
+ __tlb_flush_global();
+ /* Reset TLB flush mask */
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_copy(mm_cpumask(mm),
+ &mm->context.cpu_attach_mask);
+ }
+ atomic_sub(0x10000, &mm->context.attach_count);
+ preempt_enable();
+}
+
+static inline void __tlb_flush_kernel(void)
+{
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_idte((unsigned long) init_mm.pgd |
+ init_mm.context.asce_bits);
+ else
+ __tlb_flush_global();
+}
+#else
+#define __tlb_flush_global() __tlb_flush_local()
+#define __tlb_flush_full(mm) __tlb_flush_local()
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs.
+ */
+static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+{
+ if (MACHINE_HAS_TLB_LC)
+ __tlb_flush_idte_local(asce);
+ else
+ __tlb_flush_local();
+}
+
+static inline void __tlb_flush_kernel(void)
+{
+ if (MACHINE_HAS_TLB_LC)
+ __tlb_flush_idte_local((unsigned long) init_mm.pgd |
+ init_mm.context.asce_bits);
+ else
+ __tlb_flush_local();
+}
+#endif
+
+static inline void __tlb_flush_mm(struct mm_struct * mm)
+{
+ /*
+ * If the machine has IDTE we prefer to do a per mm flush
+ * on all cpus instead of doing a local flush if the mm
+ * only ran on the local cpu.
+ */
+ if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
+ __tlb_flush_asce(mm, (unsigned long) mm->pgd |
+ mm->context.asce_bits);
+ else
+ __tlb_flush_full(mm);
+}
+
+static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
+{
+ if (mm->context.flush_mm) {
+ __tlb_flush_mm(mm);
+ mm->context.flush_mm = 0;
+ }
+}
+
+/*
+ * TLB flushing:
+ * flush_tlb() - flushes the current mm struct TLBs
+ * flush_tlb_all() - flushes all processes TLBs
+ * flush_tlb_mm(mm) - flushes the specified mm context TLB's
+ * flush_tlb_page(vma, vmaddr) - flushes one page
+ * flush_tlb_range(vma, start, end) - flushes a range of pages
+ * flush_tlb_kernel_range(start, end) - flushes a range of kernel pages
+ */
+
+/*
+ * flush_tlb_mm goes together with ptep_set_wrprotect for the
+ * copy_page_range operation and flush_tlb_range is related to
+ * ptep_get_and_clear for change_protection. ptep_set_wrprotect and
+ * ptep_get_and_clear do not flush the TLBs directly if the mm has
+ * only one user. At the end of the update the flush_tlb_mm and
+ * flush_tlb_range functions need to do the flush.
+ */
+#define flush_tlb() do { } while (0)
+#define flush_tlb_all() do { } while (0)
+#define flush_tlb_page(vma, addr) do { } while (0)
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+ __tlb_flush_mm_lazy(mm);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ __tlb_flush_mm_lazy(vma->vm_mm);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+ __tlb_flush_kernel();
+}
+
+#endif /* _S390_TLBFLUSH_H */
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
new file mode 100644
index 00000000000..56af53093d2
--- /dev/null
+++ b/arch/s390/include/asm/topology.h
@@ -0,0 +1,60 @@
+#ifndef _ASM_S390_TOPOLOGY_H
+#define _ASM_S390_TOPOLOGY_H
+
+#include <linux/cpumask.h>
+
+struct sysinfo_15_1_x;
+struct cpu;
+
+#ifdef CONFIG_SCHED_BOOK
+
+struct cpu_topology_s390 {
+ unsigned short core_id;
+ unsigned short socket_id;
+ unsigned short book_id;
+ cpumask_t core_mask;
+ cpumask_t book_mask;
+};
+
+extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
+#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_mask)
+#define topology_book_id(cpu) (cpu_topology[cpu].book_id)
+#define topology_book_cpumask(cpu) (&cpu_topology[cpu].book_mask)
+
+#define mc_capable() 1
+
+int topology_cpu_init(struct cpu *);
+int topology_set_cpu_management(int fc);
+void topology_schedule_update(void);
+void store_topology(struct sysinfo_15_1_x *info);
+void topology_expect_change(void);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#else /* CONFIG_SCHED_BOOK */
+
+static inline void topology_schedule_update(void) { }
+static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
+static inline void topology_expect_change(void) { }
+
+#endif /* CONFIG_SCHED_BOOK */
+
+#define POLARIZATION_UNKNOWN (-1)
+#define POLARIZATION_HRZ (0)
+#define POLARIZATION_VL (1)
+#define POLARIZATION_VM (2)
+#define POLARIZATION_VH (3)
+
+#ifdef CONFIG_SCHED_BOOK
+void s390_init_cpu_topology(void);
+#else
+static inline void s390_init_cpu_topology(void)
+{
+};
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
new file mode 100644
index 00000000000..dccef3ca91f
--- /dev/null
+++ b/arch/s390/include/asm/types.h
@@ -0,0 +1,28 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/types.h"
+ */
+#ifndef _S390_TYPES_H
+#define _S390_TYPES_H
+
+#include <uapi/asm/types.h>
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+
+#ifndef __ASSEMBLY__
+
+#ifndef CONFIG_64BIT
+typedef union {
+ unsigned long long pair;
+ struct {
+ unsigned long even;
+ unsigned long odd;
+ } subreg;
+} register_pair;
+
+#endif /* ! CONFIG_64BIT */
+#endif /* __ASSEMBLY__ */
+#endif /* _S390_TYPES_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
new file mode 100644
index 00000000000..cd4c68e0398
--- /dev/null
+++ b/arch/s390/include/asm/uaccess.h
@@ -0,0 +1,376 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Hartmut Penner (hp@de.ibm.com),
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/uaccess.h"
+ */
+#ifndef __S390_UACCESS_H
+#define __S390_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <asm/ctl_reg.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not. If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(a) ((mm_segment_t) { (a) })
+
+
+#define KERNEL_DS MAKE_MM_SEG(0)
+#define USER_DS MAKE_MM_SEG(1)
+
+#define get_ds() (KERNEL_DS)
+#define get_fs() (current->thread.mm_segment)
+
+#define set_fs(x) \
+({ \
+ unsigned long __pto; \
+ current->thread.mm_segment = (x); \
+ __pto = current->thread.mm_segment.ar4 ? \
+ S390_lowcore.user_asce : S390_lowcore.kernel_asce; \
+ __ctl_load(__pto, 7, 7); \
+})
+
+#define segment_eq(a,b) ((a).ar4 == (b).ar4)
+
+static inline int __range_ok(unsigned long addr, unsigned long size)
+{
+ return 1;
+}
+
+#define __access_ok(addr, size) \
+({ \
+ __chk_user_ptr(addr); \
+ __range_ok((unsigned long)(addr), (size)); \
+})
+
+#define access_ok(type, addr, size) __access_ok(addr, size)
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue. No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+ int insn, fixup;
+};
+
+static inline unsigned long extable_insn(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->insn + x->insn;
+}
+
+static inline unsigned long extable_fixup(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->fixup + x->fixup;
+}
+
+#define ARCH_HAS_SORT_EXTABLE
+#define ARCH_HAS_SEARCH_EXTABLE
+
+/**
+ * __copy_from_user: - Copy a block of data from user space, with less checking.
+ * @to: Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from user space to kernel space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+unsigned long __must_check __copy_from_user(void *to, const void __user *from,
+ unsigned long n);
+
+/**
+ * __copy_to_user: - Copy a block of data into user space, with less checking.
+ * @to: Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from kernel space to user space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long __must_check __copy_to_user(void __user *to, const void *from,
+ unsigned long n);
+
+#define __copy_to_user_inatomic __copy_to_user
+#define __copy_from_user_inatomic __copy_from_user
+
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+
+#define __put_get_user_asm(to, from, size, spec) \
+({ \
+ register unsigned long __reg0 asm("0") = spec; \
+ int __rc; \
+ \
+ asm volatile( \
+ "0: mvcos %1,%3,%2\n" \
+ "1: xr %0,%0\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: lhi %0,%5\n" \
+ " jg 2b\n" \
+ ".popsection\n" \
+ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
+ : "=d" (__rc), "=Q" (*(to)) \
+ : "d" (size), "Q" (*(from)), \
+ "d" (__reg0), "K" (-EFAULT) \
+ : "cc"); \
+ __rc; \
+})
+
+#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL)
+#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL)
+
+#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */
+
+static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+{
+ size = __copy_to_user(ptr, x, size);
+ return size ? -EFAULT : 0;
+}
+
+static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+{
+ size = __copy_from_user(x, ptr, size);
+ return size ? -EFAULT : 0;
+}
+
+#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */
+
+/*
+ * These are the main single-value transfer routines. They automatically
+ * use the right size if we just have the right pointer type.
+ */
+#define __put_user(x, ptr) \
+({ \
+ __typeof__(*(ptr)) __x = (x); \
+ int __pu_err = -EFAULT; \
+ __chk_user_ptr(ptr); \
+ switch (sizeof (*(ptr))) { \
+ case 1: \
+ case 2: \
+ case 4: \
+ case 8: \
+ __pu_err = __put_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ break; \
+ default: \
+ __put_user_bad(); \
+ break; \
+ } \
+ __pu_err; \
+})
+
+#define put_user(x, ptr) \
+({ \
+ might_fault(); \
+ __put_user(x, ptr); \
+})
+
+
+int __put_user_bad(void) __attribute__((noreturn));
+
+#define __get_user(x, ptr) \
+({ \
+ int __gu_err = -EFAULT; \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: { \
+ unsigned char __x; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ case 2: { \
+ unsigned short __x; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ case 4: { \
+ unsigned int __x; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ case 8: { \
+ unsigned long long __x; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ default: \
+ __get_user_bad(); \
+ break; \
+ } \
+ __gu_err; \
+})
+
+#define get_user(x, ptr) \
+({ \
+ might_fault(); \
+ __get_user(x, ptr); \
+})
+
+int __get_user_bad(void) __attribute__((noreturn));
+
+#define __put_user_unaligned __put_user
+#define __get_user_unaligned __get_user
+
+/**
+ * copy_to_user: - Copy a block of data into user space.
+ * @to: Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from kernel space to user space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+static inline unsigned long __must_check
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ might_fault();
+ return __copy_to_user(to, from, n);
+}
+
+void copy_from_user_overflow(void)
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+__compiletime_warning("copy_from_user() buffer size is not provably correct")
+#endif
+;
+
+/**
+ * copy_from_user: - Copy a block of data from user space.
+ * @to: Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from user space to kernel space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+static inline unsigned long __must_check
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ unsigned int sz = __compiletime_object_size(to);
+
+ might_fault();
+ if (unlikely(sz != -1 && sz < n)) {
+ copy_from_user_overflow();
+ return n;
+ }
+ return __copy_from_user(to, from, n);
+}
+
+unsigned long __must_check
+__copy_in_user(void __user *to, const void __user *from, unsigned long n);
+
+static inline unsigned long __must_check
+copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+ might_fault();
+ return __copy_in_user(to, from, n);
+}
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+long __strncpy_from_user(char *dst, const char __user *src, long count);
+
+static inline long __must_check
+strncpy_from_user(char *dst, const char __user *src, long count)
+{
+ might_fault();
+ return __strncpy_from_user(dst, src, count);
+}
+
+unsigned long __must_check __strnlen_user(const char __user *src, unsigned long count);
+
+static inline unsigned long strnlen_user(const char __user *src, unsigned long n)
+{
+ might_fault();
+ return __strnlen_user(src, n);
+}
+
+/**
+ * strlen_user: - Get the size of a string in user space.
+ * @str: The string to measure.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ *
+ * If there is a limit on the length of a valid string, you may wish to
+ * consider using strnlen_user() instead.
+ */
+#define strlen_user(str) strnlen_user(str, ~0UL)
+
+/*
+ * Zero Userspace
+ */
+unsigned long __must_check __clear_user(void __user *to, unsigned long size);
+
+static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
+{
+ might_fault();
+ return __clear_user(to, n);
+}
+
+int copy_to_user_real(void __user *dest, void *src, unsigned long count);
+
+#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unaligned.h b/arch/s390/include/asm/unaligned.h
new file mode 100644
index 00000000000..da9627afe5d
--- /dev/null
+++ b/arch/s390/include/asm/unaligned.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_S390_UNALIGNED_H
+#define _ASM_S390_UNALIGNED_H
+
+/*
+ * The S390 can do unaligned accesses itself.
+ */
+#include <linux/unaligned/access_ok.h>
+#include <linux/unaligned/generic.h>
+
+#define get_unaligned __get_unaligned_be
+#define put_unaligned __put_unaligned_be
+
+#endif /* _ASM_S390_UNALIGNED_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
new file mode 100644
index 00000000000..65188635355
--- /dev/null
+++ b/arch/s390/include/asm/unistd.h
@@ -0,0 +1,57 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/unistd.h"
+ */
+#ifndef _ASM_S390_UNISTD_H_
+#define _ASM_S390_UNISTD_H_
+
+#include <uapi/asm/unistd.h>
+
+
+#ifndef CONFIG_64BIT
+#define __IGNORE_select
+#else
+#define __IGNORE_time
+#endif
+
+/* Ignore NUMA system calls. Not wired up on s390. */
+#define __IGNORE_mbind
+#define __IGNORE_get_mempolicy
+#define __IGNORE_set_mempolicy
+#define __IGNORE_migrate_pages
+#define __IGNORE_move_pages
+
+/* Ignore system calls that are also reachable via sys_socket */
+#define __IGNORE_recvmmsg
+#define __IGNORE_sendmmsg
+
+#define __ARCH_WANT_OLD_READDIR
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_SIGNAL
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_SOCKETCALL
+#define __ARCH_WANT_SYS_IPC
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_SIGPROCMASK
+# ifndef CONFIG_64BIT
+# define __ARCH_WANT_STAT64
+# define __ARCH_WANT_SYS_TIME
+# endif
+# ifdef CONFIG_COMPAT
+# define __ARCH_WANT_COMPAT_SYS_TIME
+# endif
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
+
+#endif /* _ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/asm/user.h b/arch/s390/include/asm/user.h
new file mode 100644
index 00000000000..6ed1d188633
--- /dev/null
+++ b/arch/s390/include/asm/user.h
@@ -0,0 +1,74 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/usr.h"
+ */
+
+#ifndef _S390_USER_H
+#define _S390_USER_H
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+/* Core file format: The core file is written in such a way that gdb
+ can understand it and provide useful information to the user (under
+ linux we use the 'trad-core' bfd). There are quite a number of
+ obstacles to being able to view the contents of the floating point
+ registers, and until these are solved you will not be able to view the
+ contents of them. Actually, you can read in the core file and look at
+ the contents of the user struct to find out what the floating point
+ registers contain.
+ The actual file contents are as follows:
+ UPAGE: 1 page consisting of a user struct that tells gdb what is present
+ in the file. Directly after this is a copy of the task_struct, which
+ is currently not used by gdb, but it may come in useful at some point.
+ All of the registers are stored as part of the upage. The upage should
+ always be only one page.
+ DATA: The data area is stored. We use current->end_text to
+ current->brk to pick up all of the user variables, plus any memory
+ that may have been malloced. No attempt is made to determine if a page
+ is demand-zero or if a page is totally unused, we just cover the entire
+ range. All of the addresses are rounded in such a way that an integral
+ number of pages is written.
+ STACK: We need the stack information in order to get a meaningful
+ backtrace. We need to write the data from (esp) to
+ current->start_stack, so we round each of these off in order to be able
+ to write an integer number of pages.
+ The minimum core file size is 3 pages, or 12288 bytes.
+*/
+
+
+/*
+ * This is the old layout of "struct pt_regs", and
+ * is still the layout used by user mode (the new
+ * pt_regs doesn't have all registers as the kernel
+ * doesn't use the extra segment registers)
+ */
+
+/* When the kernel dumps core, it starts by dumping the user struct -
+ this will be used by gdb to figure out where the data and stack segments
+ are within the file, and what virtual addresses to use. */
+struct user {
+/* We start with the registers, to mimic the way that "memory" is returned
+ from the ptrace(3,...) function. */
+ struct user_regs_struct regs; /* Where the registers are actually stored */
+/* The rest of this junk is to help gdb figure out what goes where */
+ unsigned long int u_tsize; /* Text segment size (pages). */
+ unsigned long int u_dsize; /* Data segment size (pages). */
+ unsigned long int u_ssize; /* Stack segment size (pages). */
+ unsigned long start_code; /* Starting virtual address of text. */
+ unsigned long start_stack; /* Starting virtual address of stack area.
+ This is actually the bottom of the stack,
+ the top of the stack is always found in the
+ esp register. */
+ long int signal; /* Signal that caused the core dump. */
+ unsigned long u_ar0; /* Used by gdb to help find the values for */
+ /* the registers. */
+ unsigned long magic; /* To uniquely identify a core file */
+ char u_comm[32]; /* User command that was responsible */
+};
+#define NBPG PAGE_SIZE
+#define UPAGES 1
+#define HOST_TEXT_START_ADDR (u.start_code)
+#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
+
+#endif /* _S390_USER_H */
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
new file mode 100644
index 00000000000..bc9746a7d47
--- /dev/null
+++ b/arch/s390/include/asm/vdso.h
@@ -0,0 +1,47 @@
+#ifndef __S390_VDSO_H__
+#define __S390_VDSO_H__
+
+/* Default link addresses for the vDSOs */
+#define VDSO32_LBASE 0
+#define VDSO64_LBASE 0
+
+#define VDSO_VERSION_STRING LINUX_2.6.29
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Note about the vdso_data and vdso_per_cpu_data structures:
+ *
+ * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
+ * structure is supposed to be known only to the function in the vdso
+ * itself and may change without notice.
+ */
+
+struct vdso_data {
+ __u64 tb_update_count; /* Timebase atomicity ctr 0x00 */
+ __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */
+ __u64 xtime_clock_sec; /* Kernel time 0x10 */
+ __u64 xtime_clock_nsec; /* 0x18 */
+ __u64 wtom_clock_sec; /* Wall to monotonic clock 0x20 */
+ __u64 wtom_clock_nsec; /* 0x28 */
+ __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */
+ __u32 tz_dsttime; /* Type of dst correction 0x34 */
+ __u32 ectg_available; /* ECTG instruction present 0x38 */
+ __u32 tk_mult; /* Mult. used for xtime_nsec 0x3c */
+ __u32 tk_shift; /* Shift used for xtime_nsec 0x40 */
+};
+
+struct vdso_per_cpu_data {
+ __u64 ectg_timer_base;
+ __u64 ectg_user_time;
+};
+
+extern struct vdso_data *vdso_data;
+
+#ifdef CONFIG_64BIT
+int vdso_alloc_per_cpu(struct _lowcore *lowcore);
+void vdso_free_per_cpu(struct _lowcore *lowcore);
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __S390_VDSO_H__ */
diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h
new file mode 100644
index 00000000000..d375526c261
--- /dev/null
+++ b/arch/s390/include/asm/vga.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_VGA_H
+#define _ASM_S390_VGA_H
+
+/* Avoid compile errors due to missing asm/vga.h */
+
+#endif /* _ASM_S390_VGA_H */
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 00000000000..af9896c53eb
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h
new file mode 100644
index 00000000000..bfe25d513ad
--- /dev/null
+++ b/arch/s390/include/asm/vtimer.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright IBM Corp. 2003, 2012
+ * Virtual CPU timer
+ *
+ * Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_TIMER_H
+#define _ASM_S390_TIMER_H
+
+#define VTIMER_MAX_SLICE (0x7fffffffffffffffULL)
+
+struct vtimer_list {
+ struct list_head entry;
+ u64 expires;
+ u64 interval;
+ void (*function)(unsigned long);
+ unsigned long data;
+};
+
+extern void init_virt_timer(struct vtimer_list *timer);
+extern void add_virt_timer(struct vtimer_list *timer);
+extern void add_virt_timer_periodic(struct vtimer_list *timer);
+extern int mod_virt_timer(struct vtimer_list *timer, u64 expires);
+extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires);
+extern int del_virt_timer(struct vtimer_list *timer);
+
+extern void init_cpu_vtimer(void);
+extern void vtime_init(void);
+
+extern void vtime_stop_cpu(void);
+
+#endif /* _ASM_S390_TIMER_H */
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
new file mode 100644
index 00000000000..c82eb12a5b1
--- /dev/null
+++ b/arch/s390/include/asm/xor.h
@@ -0,0 +1 @@
+#include <asm-generic/xor.h>
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..736637363d3
--- /dev/null
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -0,0 +1,51 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += byteorder.h
+header-y += chpid.h
+header-y += chsc.h
+header-y += cmb.h
+header-y += dasd.h
+header-y += debug.h
+header-y += errno.h
+header-y += fcntl.h
+header-y += ioctl.h
+header-y += ioctls.h
+header-y += ipcbuf.h
+header-y += kvm.h
+header-y += kvm_para.h
+header-y += kvm_virtio.h
+header-y += mman.h
+header-y += monwriter.h
+header-y += msgbuf.h
+header-y += param.h
+header-y += poll.h
+header-y += posix_types.h
+header-y += ptrace.h
+header-y += qeth.h
+header-y += resource.h
+header-y += schid.h
+header-y += sembuf.h
+header-y += setup.h
+header-y += shmbuf.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += sclp_ctl.h
+header-y += sie.h
+header-y += stat.h
+header-y += statfs.h
+header-y += swab.h
+header-y += tape390.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += ucontext.h
+header-y += unistd.h
+header-y += virtio-ccw.h
+header-y += vtoc.h
+header-y += zcrypt.h
diff --git a/arch/s390/include/uapi/asm/auxvec.h b/arch/s390/include/uapi/asm/auxvec.h
new file mode 100644
index 00000000000..a1f153e8913
--- /dev/null
+++ b/arch/s390/include/uapi/asm/auxvec.h
@@ -0,0 +1,6 @@
+#ifndef __ASMS390_AUXVEC_H
+#define __ASMS390_AUXVEC_H
+
+#define AT_SYSINFO_EHDR 33
+
+#endif
diff --git a/arch/s390/include/uapi/asm/bitsperlong.h b/arch/s390/include/uapi/asm/bitsperlong.h
new file mode 100644
index 00000000000..6b235aea9c6
--- /dev/null
+++ b/arch/s390/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_S390_BITSPERLONG_H
+#define __ASM_S390_BITSPERLONG_H
+
+#ifndef __s390x__
+#define __BITS_PER_LONG 32
+#else
+#define __BITS_PER_LONG 64
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_S390_BITSPERLONG_H */
+
diff --git a/arch/s390/include/uapi/asm/byteorder.h b/arch/s390/include/uapi/asm/byteorder.h
new file mode 100644
index 00000000000..a332e59e26f
--- /dev/null
+++ b/arch/s390/include/uapi/asm/byteorder.h
@@ -0,0 +1,6 @@
+#ifndef _S390_BYTEORDER_H
+#define _S390_BYTEORDER_H
+
+#include <linux/byteorder/big_endian.h>
+
+#endif /* _S390_BYTEORDER_H */
diff --git a/arch/s390/include/uapi/asm/chpid.h b/arch/s390/include/uapi/asm/chpid.h
new file mode 100644
index 00000000000..6b4fb29cc19
--- /dev/null
+++ b/arch/s390/include/uapi/asm/chpid.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright IBM Corp. 2007, 2012
+ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _UAPI_ASM_S390_CHPID_H
+#define _UAPI_ASM_S390_CHPID_H
+
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define __MAX_CHPID 255
+
+struct chp_id {
+ __u8 reserved1;
+ __u8 cssid;
+ __u8 reserved2;
+ __u8 id;
+} __attribute__((packed));
+
+
+#endif /* _UAPI_ASM_S390_CHPID_H */
diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h
new file mode 100644
index 00000000000..65dc694725a
--- /dev/null
+++ b/arch/s390/include/uapi/asm/chsc.h
@@ -0,0 +1,143 @@
+/*
+ * ioctl interface for /dev/chsc
+ *
+ * Copyright IBM Corp. 2008, 2012
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+
+#ifndef _ASM_CHSC_H
+#define _ASM_CHSC_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/chpid.h>
+#include <asm/schid.h>
+
+#define CHSC_SIZE 0x1000
+
+struct chsc_async_header {
+ __u16 length;
+ __u16 code;
+ __u32 cmd_dependend;
+ __u32 key : 4;
+ __u32 : 28;
+ struct subchannel_id sid;
+} __attribute__ ((packed));
+
+struct chsc_async_area {
+ struct chsc_async_header header;
+ __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)];
+} __attribute__ ((packed));
+
+struct chsc_header {
+ __u16 length;
+ __u16 code;
+} __attribute__ ((packed));
+
+struct chsc_sync_area {
+ struct chsc_header header;
+ __u8 data[CHSC_SIZE - sizeof(struct chsc_header)];
+} __attribute__ ((packed));
+
+struct chsc_response_struct {
+ __u16 length;
+ __u16 code;
+ __u32 parms;
+ __u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)];
+} __attribute__ ((packed));
+
+struct chsc_chp_cd {
+ struct chp_id chpid;
+ int m;
+ int fmt;
+ struct chsc_response_struct cpcb;
+};
+
+struct chsc_cu_cd {
+ __u16 cun;
+ __u8 cssid;
+ int m;
+ int fmt;
+ struct chsc_response_struct cucb;
+};
+
+struct chsc_sch_cud {
+ struct subchannel_id schid;
+ int fmt;
+ struct chsc_response_struct scub;
+};
+
+struct conf_id {
+ int m;
+ __u8 cssid;
+ __u8 ssid;
+};
+
+struct chsc_conf_info {
+ struct conf_id id;
+ int fmt;
+ struct chsc_response_struct scid;
+};
+
+struct ccl_parm_chpid {
+ int m;
+ struct chp_id chp;
+};
+
+struct ccl_parm_cssids {
+ __u8 f_cssid;
+ __u8 l_cssid;
+};
+
+struct chsc_comp_list {
+ struct {
+ enum {
+ CCL_CU_ON_CHP = 1,
+ CCL_CHP_TYPE_CAP = 2,
+ CCL_CSS_IMG = 4,
+ CCL_CSS_IMG_CONF_CHAR = 5,
+ CCL_IOP_CHP = 6,
+ } ctype;
+ int fmt;
+ struct ccl_parm_chpid chpid;
+ struct ccl_parm_cssids cssids;
+ } req;
+ struct chsc_response_struct sccl;
+};
+
+struct chsc_dcal {
+ struct {
+ enum {
+ DCAL_CSS_IID_PN = 4,
+ } atype;
+ __u32 list_parm[2];
+ int fmt;
+ } req;
+ struct chsc_response_struct sdcal;
+};
+
+struct chsc_cpd_info {
+ struct chp_id chpid;
+ int m;
+ int fmt;
+ int rfmt;
+ int c;
+ struct chsc_response_struct chpdb;
+};
+
+#define CHSC_IOCTL_MAGIC 'c'
+
+#define CHSC_START _IOWR(CHSC_IOCTL_MAGIC, 0x81, struct chsc_async_area)
+#define CHSC_INFO_CHANNEL_PATH _IOWR(CHSC_IOCTL_MAGIC, 0x82, \
+ struct chsc_chp_cd)
+#define CHSC_INFO_CU _IOWR(CHSC_IOCTL_MAGIC, 0x83, struct chsc_cu_cd)
+#define CHSC_INFO_SCH_CU _IOWR(CHSC_IOCTL_MAGIC, 0x84, struct chsc_sch_cud)
+#define CHSC_INFO_CI _IOWR(CHSC_IOCTL_MAGIC, 0x85, struct chsc_conf_info)
+#define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list)
+#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
+#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
+#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area)
+#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area)
+#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/cmb.h b/arch/s390/include/uapi/asm/cmb.h
new file mode 100644
index 00000000000..0c086d00d89
--- /dev/null
+++ b/arch/s390/include/uapi/asm/cmb.h
@@ -0,0 +1,53 @@
+#ifndef _UAPIS390_CMB_H
+#define _UAPIS390_CMB_H
+
+#include <linux/types.h>
+
+/**
+ * struct cmbdata - channel measurement block data for user space
+ * @size: size of the stored data
+ * @elapsed_time: time since last sampling
+ * @ssch_rsch_count: number of ssch and rsch
+ * @sample_count: number of samples
+ * @device_connect_time: time of device connect
+ * @function_pending_time: time of function pending
+ * @device_disconnect_time: time of device disconnect
+ * @control_unit_queuing_time: time of control unit queuing
+ * @device_active_only_time: time of device active only
+ * @device_busy_time: time of device busy (ext. format)
+ * @initial_command_response_time: initial command response time (ext. format)
+ *
+ * All values are stored as 64 bit for simplicity, especially
+ * in 32 bit emulation mode. All time values are normalized to
+ * nanoseconds.
+ * Currently, two formats are known, which differ by the size of
+ * this structure, i.e. the last two members are only set when
+ * the extended channel measurement facility (first shipped in
+ * z990 machines) is activated.
+ * Potentially, more fields could be added, which would result in a
+ * new ioctl number.
+ */
+struct cmbdata {
+ __u64 size;
+ __u64 elapsed_time;
+ /* basic and exended format: */
+ __u64 ssch_rsch_count;
+ __u64 sample_count;
+ __u64 device_connect_time;
+ __u64 function_pending_time;
+ __u64 device_disconnect_time;
+ __u64 control_unit_queuing_time;
+ __u64 device_active_only_time;
+ /* extended format only: */
+ __u64 device_busy_time;
+ __u64 initial_command_response_time;
+};
+
+/* enable channel measurement */
+#define BIODASDCMFENABLE _IO(DASD_IOCTL_LETTER, 32)
+/* enable channel measurement */
+#define BIODASDCMFDISABLE _IO(DASD_IOCTL_LETTER, 33)
+/* read channel measurement data */
+#define BIODASDREADALLCMB _IOWR(DASD_IOCTL_LETTER, 33, struct cmbdata)
+
+#endif /* _UAPIS390_CMB_H */
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
new file mode 100644
index 00000000000..5812a3b2df9
--- /dev/null
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -0,0 +1,295 @@
+/*
+ * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 1999, 2000
+ * EMC Symmetrix ioctl Copyright EMC Corporation, 2008
+ * Author.........: Nigel Hislop <hislop_nigel@emc.com>
+ *
+ * This file is the interface of the DASD device driver, which is exported to user space
+ * any future changes wrt the API will result in a change of the APIVERSION reported
+ * to userspace by the DASDAPIVER-ioctl
+ *
+ */
+
+#ifndef DASD_H
+#define DASD_H
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define DASD_IOCTL_LETTER 'D'
+
+#define DASD_API_VERSION 6
+
+/*
+ * struct dasd_information2_t
+ * represents any data about the device, which is visible to userspace.
+ * including foramt and featueres.
+ */
+typedef struct dasd_information2_t {
+ unsigned int devno; /* S/390 devno */
+ unsigned int real_devno; /* for aliases */
+ unsigned int schid; /* S/390 subchannel identifier */
+ unsigned int cu_type : 16; /* from SenseID */
+ unsigned int cu_model : 8; /* from SenseID */
+ unsigned int dev_type : 16; /* from SenseID */
+ unsigned int dev_model : 8; /* from SenseID */
+ unsigned int open_count;
+ unsigned int req_queue_len;
+ unsigned int chanq_len; /* length of chanq */
+ char type[4]; /* from discipline.name, 'none' for unknown */
+ unsigned int status; /* current device level */
+ unsigned int label_block; /* where to find the VOLSER */
+ unsigned int FBA_layout; /* fixed block size (like AIXVOL) */
+ unsigned int characteristics_size;
+ unsigned int confdata_size;
+ char characteristics[64]; /* from read_device_characteristics */
+ char configuration_data[256]; /* from read_configuration_data */
+ unsigned int format; /* format info like formatted/cdl/ldl/... */
+ unsigned int features; /* dasd features like 'ro',... */
+ unsigned int reserved0; /* reserved for further use ,... */
+ unsigned int reserved1; /* reserved for further use ,... */
+ unsigned int reserved2; /* reserved for further use ,... */
+ unsigned int reserved3; /* reserved for further use ,... */
+ unsigned int reserved4; /* reserved for further use ,... */
+ unsigned int reserved5; /* reserved for further use ,... */
+ unsigned int reserved6; /* reserved for further use ,... */
+ unsigned int reserved7; /* reserved for further use ,... */
+} dasd_information2_t;
+
+/*
+ * values to be used for dasd_information_t.format
+ * 0x00: NOT formatted
+ * 0x01: Linux disc layout
+ * 0x02: Common disc layout
+ */
+#define DASD_FORMAT_NONE 0
+#define DASD_FORMAT_LDL 1
+#define DASD_FORMAT_CDL 2
+/*
+ * values to be used for dasd_information_t.features
+ * 0x00: default features
+ * 0x01: readonly (ro)
+ * 0x02: use diag discipline (diag)
+ * 0x04: set the device initially online (internal use only)
+ * 0x08: enable ERP related logging
+ * 0x20: give access to raw eckd data
+ */
+#define DASD_FEATURE_DEFAULT 0x00
+#define DASD_FEATURE_READONLY 0x01
+#define DASD_FEATURE_USEDIAG 0x02
+#define DASD_FEATURE_INITIAL_ONLINE 0x04
+#define DASD_FEATURE_ERPLOG 0x08
+#define DASD_FEATURE_FAILFAST 0x10
+#define DASD_FEATURE_FAILONSLCK 0x20
+#define DASD_FEATURE_USERAW 0x40
+
+#define DASD_PARTN_BITS 2
+
+/*
+ * struct dasd_information_t
+ * represents any data about the data, which is visible to userspace
+ */
+typedef struct dasd_information_t {
+ unsigned int devno; /* S/390 devno */
+ unsigned int real_devno; /* for aliases */
+ unsigned int schid; /* S/390 subchannel identifier */
+ unsigned int cu_type : 16; /* from SenseID */
+ unsigned int cu_model : 8; /* from SenseID */
+ unsigned int dev_type : 16; /* from SenseID */
+ unsigned int dev_model : 8; /* from SenseID */
+ unsigned int open_count;
+ unsigned int req_queue_len;
+ unsigned int chanq_len; /* length of chanq */
+ char type[4]; /* from discipline.name, 'none' for unknown */
+ unsigned int status; /* current device level */
+ unsigned int label_block; /* where to find the VOLSER */
+ unsigned int FBA_layout; /* fixed block size (like AIXVOL) */
+ unsigned int characteristics_size;
+ unsigned int confdata_size;
+ char characteristics[64]; /* from read_device_characteristics */
+ char configuration_data[256]; /* from read_configuration_data */
+} dasd_information_t;
+
+/*
+ * Read Subsystem Data - Performance Statistics
+ */
+typedef struct dasd_rssd_perf_stats_t {
+ unsigned char invalid:1;
+ unsigned char format:3;
+ unsigned char data_format:4;
+ unsigned char unit_address;
+ unsigned short device_status;
+ unsigned int nr_read_normal;
+ unsigned int nr_read_normal_hits;
+ unsigned int nr_write_normal;
+ unsigned int nr_write_fast_normal_hits;
+ unsigned int nr_read_seq;
+ unsigned int nr_read_seq_hits;
+ unsigned int nr_write_seq;
+ unsigned int nr_write_fast_seq_hits;
+ unsigned int nr_read_cache;
+ unsigned int nr_read_cache_hits;
+ unsigned int nr_write_cache;
+ unsigned int nr_write_fast_cache_hits;
+ unsigned int nr_inhibit_cache;
+ unsigned int nr_bybass_cache;
+ unsigned int nr_seq_dasd_to_cache;
+ unsigned int nr_dasd_to_cache;
+ unsigned int nr_cache_to_dasd;
+ unsigned int nr_delayed_fast_write;
+ unsigned int nr_normal_fast_write;
+ unsigned int nr_seq_fast_write;
+ unsigned int nr_cache_miss;
+ unsigned char status2;
+ unsigned int nr_quick_write_promotes;
+ unsigned char reserved;
+ unsigned short ssid;
+ unsigned char reseved2[96];
+} __attribute__((packed)) dasd_rssd_perf_stats_t;
+
+/*
+ * struct profile_info_t
+ * holds the profinling information
+ */
+typedef struct dasd_profile_info_t {
+ unsigned int dasd_io_reqs; /* number of requests processed at all */
+ unsigned int dasd_io_sects; /* number of sectors processed at all */
+ unsigned int dasd_io_secs[32]; /* histogram of request's sizes */
+ unsigned int dasd_io_times[32]; /* histogram of requests's times */
+ unsigned int dasd_io_timps[32]; /* histogram of requests's times per sector */
+ unsigned int dasd_io_time1[32]; /* histogram of time from build to start */
+ unsigned int dasd_io_time2[32]; /* histogram of time from start to irq */
+ unsigned int dasd_io_time2ps[32]; /* histogram of time from start to irq */
+ unsigned int dasd_io_time3[32]; /* histogram of time from irq to end */
+ unsigned int dasd_io_nr_req[32]; /* histogram of # of requests in chanq */
+} dasd_profile_info_t;
+
+/*
+ * struct format_data_t
+ * represents all data necessary to format a dasd
+ */
+typedef struct format_data_t {
+ unsigned int start_unit; /* from track */
+ unsigned int stop_unit; /* to track */
+ unsigned int blksize; /* sectorsize */
+ unsigned int intensity;
+} format_data_t;
+
+/*
+ * values to be used for format_data_t.intensity
+ * 0/8: normal format
+ * 1/9: also write record zero
+ * 3/11: also write home address
+ * 4/12: invalidate track
+ */
+#define DASD_FMT_INT_FMT_R0 1 /* write record zero */
+#define DASD_FMT_INT_FMT_HA 2 /* write home address, also set FMT_R0 ! */
+#define DASD_FMT_INT_INVAL 4 /* invalidate tracks */
+#define DASD_FMT_INT_COMPAT 8 /* use OS/390 compatible disk layout */
+
+
+/*
+ * struct attrib_data_t
+ * represents the operation (cache) bits for the device.
+ * Used in DE to influence caching of the DASD.
+ */
+typedef struct attrib_data_t {
+ unsigned char operation:3; /* cache operation mode */
+ unsigned char reserved:5; /* cache operation mode */
+ __u16 nr_cyl; /* no of cyliners for read ahaed */
+ __u8 reserved2[29]; /* for future use */
+} __attribute__ ((packed)) attrib_data_t;
+
+/* definition of operation (cache) bits within attributes of DE */
+#define DASD_NORMAL_CACHE 0x0
+#define DASD_BYPASS_CACHE 0x1
+#define DASD_INHIBIT_LOAD 0x2
+#define DASD_SEQ_ACCESS 0x3
+#define DASD_SEQ_PRESTAGE 0x4
+#define DASD_REC_ACCESS 0x5
+
+/*
+ * Perform EMC Symmetrix I/O
+ */
+typedef struct dasd_symmio_parms {
+ unsigned char reserved[8]; /* compat with older releases */
+ unsigned long long psf_data; /* char * cast to u64 */
+ unsigned long long rssd_result; /* char * cast to u64 */
+ int psf_data_len;
+ int rssd_result_len;
+} __attribute__ ((packed)) dasd_symmio_parms_t;
+
+/*
+ * Data returned by Sense Path Group ID (SNID)
+ */
+struct dasd_snid_data {
+ struct {
+ __u8 group:2;
+ __u8 reserve:2;
+ __u8 mode:1;
+ __u8 res:3;
+ } __attribute__ ((packed)) path_state;
+ __u8 pgid[11];
+} __attribute__ ((packed));
+
+struct dasd_snid_ioctl_data {
+ struct dasd_snid_data data;
+ __u8 path_mask;
+} __attribute__ ((packed));
+
+
+/********************************************************************************
+ * SECTION: Definition of IOCTLs
+ *
+ * Here ist how the ioctl-nr should be used:
+ * 0 - 31 DASD driver itself
+ * 32 - 239 still open
+ * 240 - 255 reserved for EMC
+ *******************************************************************************/
+
+/* Disable the volume (for Linux) */
+#define BIODASDDISABLE _IO(DASD_IOCTL_LETTER,0)
+/* Enable the volume (for Linux) */
+#define BIODASDENABLE _IO(DASD_IOCTL_LETTER,1)
+/* Issue a reserve/release command, rsp. */
+#define BIODASDRSRV _IO(DASD_IOCTL_LETTER,2) /* reserve */
+#define BIODASDRLSE _IO(DASD_IOCTL_LETTER,3) /* release */
+#define BIODASDSLCK _IO(DASD_IOCTL_LETTER,4) /* steal lock */
+/* reset profiling information of a device */
+#define BIODASDPRRST _IO(DASD_IOCTL_LETTER,5)
+/* Quiesce IO on device */
+#define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6)
+/* Resume IO on device */
+#define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7)
+/* Abort all I/O on a device */
+#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240)
+/* Allow I/O on a device */
+#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241)
+
+
+/* retrieve API version number */
+#define DASDAPIVER _IOR(DASD_IOCTL_LETTER,0,int)
+/* Get information on a dasd device */
+#define BIODASDINFO _IOR(DASD_IOCTL_LETTER,1,dasd_information_t)
+/* retrieve profiling information of a device */
+#define BIODASDPRRD _IOR(DASD_IOCTL_LETTER,2,dasd_profile_info_t)
+/* Get information on a dasd device (enhanced) */
+#define BIODASDINFO2 _IOR(DASD_IOCTL_LETTER,3,dasd_information2_t)
+/* Performance Statistics Read */
+#define BIODASDPSRD _IOR(DASD_IOCTL_LETTER,4,dasd_rssd_perf_stats_t)
+/* Get Attributes (cache operations) */
+#define BIODASDGATTR _IOR(DASD_IOCTL_LETTER,5,attrib_data_t)
+
+
+/* #define BIODASDFORMAT _IOW(IOCTL_LETTER,0,format_data_t) , deprecated */
+#define BIODASDFMT _IOW(DASD_IOCTL_LETTER,1,format_data_t)
+/* Set Attributes (cache operations) */
+#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t)
+
+/* Get Sense Path Group ID (SNID) data */
+#define BIODASDSNID _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data)
+
+#define BIODASDSYMMIO _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t)
+
+#endif /* DASD_H */
+
diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h
new file mode 100644
index 00000000000..c59fc79125f
--- /dev/null
+++ b/arch/s390/include/uapi/asm/debug.h
@@ -0,0 +1,34 @@
+/*
+ * S/390 debug facility
+ *
+ * Copyright IBM Corp. 1999, 2000
+ */
+
+#ifndef _UAPIDEBUG_H
+#define _UAPIDEBUG_H
+
+#include <linux/fs.h>
+
+/* Note:
+ * struct __debug_entry must be defined outside of #ifdef __KERNEL__
+ * in order to allow a user program to analyze the 'raw'-view.
+ */
+
+struct __debug_entry{
+ union {
+ struct {
+ unsigned long long clock:52;
+ unsigned long long exception:1;
+ unsigned long long level:3;
+ unsigned long long cpuid:8;
+ } fields;
+
+ unsigned long long stck;
+ } id;
+ void* caller;
+} __attribute__((packed));
+
+
+#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */
+
+#endif /* _UAPIDEBUG_H */
diff --git a/arch/s390/include/uapi/asm/errno.h b/arch/s390/include/uapi/asm/errno.h
new file mode 100644
index 00000000000..395e97d8005
--- /dev/null
+++ b/arch/s390/include/uapi/asm/errno.h
@@ -0,0 +1,11 @@
+/*
+ * S390 version
+ *
+ */
+
+#ifndef _S390_ERRNO_H
+#define _S390_ERRNO_H
+
+#include <asm-generic/errno.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/fcntl.h b/arch/s390/include/uapi/asm/fcntl.h
new file mode 100644
index 00000000000..46ab12db573
--- /dev/null
+++ b/arch/s390/include/uapi/asm/fcntl.h
@@ -0,0 +1 @@
+#include <asm-generic/fcntl.h>
diff --git a/arch/s390/include/uapi/asm/hypfs.h b/arch/s390/include/uapi/asm/hypfs.h
new file mode 100644
index 00000000000..37998b44953
--- /dev/null
+++ b/arch/s390/include/uapi/asm/hypfs.h
@@ -0,0 +1,25 @@
+/*
+ * IOCTL interface for hypfs
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_HYPFS_CTL_H
+#define _ASM_HYPFS_CTL_H
+
+#include <linux/types.h>
+
+struct hypfs_diag304 {
+ __u32 args[2];
+ __u64 data;
+ __u64 rc;
+} __attribute__((packed));
+
+#define HYPFS_IOCTL_MAGIC 0x10
+
+#define HYPFS_DIAG304 \
+ _IOWR(HYPFS_IOCTL_MAGIC, 0x20, struct hypfs_diag304)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ioctl.h b/arch/s390/include/uapi/asm/ioctl.h
new file mode 100644
index 00000000000..b279fe06dfe
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ioctl.h
@@ -0,0 +1 @@
+#include <asm-generic/ioctl.h>
diff --git a/arch/s390/include/uapi/asm/ioctls.h b/arch/s390/include/uapi/asm/ioctls.h
new file mode 100644
index 00000000000..960a4c1ebdf
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ioctls.h
@@ -0,0 +1,8 @@
+#ifndef __ARCH_S390_IOCTLS_H__
+#define __ARCH_S390_IOCTLS_H__
+
+#define FIOQSIZE 0x545E
+
+#include <asm-generic/ioctls.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h
new file mode 100644
index 00000000000..37f293d12c8
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ipcbuf.h
@@ -0,0 +1,31 @@
+#ifndef __S390_IPCBUF_H__
+#define __S390_IPCBUF_H__
+
+/*
+ * The user_ipc_perm structure for S/390 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit mode_t and seq
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct ipc64_perm
+{
+ __kernel_key_t key;
+ __kernel_uid32_t uid;
+ __kernel_gid32_t gid;
+ __kernel_uid32_t cuid;
+ __kernel_gid32_t cgid;
+ __kernel_mode_t mode;
+ unsigned short __pad1;
+ unsigned short seq;
+#ifndef __s390x__
+ unsigned short __pad2;
+#endif /* ! __s390x__ */
+ unsigned long __unused1;
+ unsigned long __unused2;
+};
+
+#endif /* __S390_IPCBUF_H__ */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
new file mode 100644
index 00000000000..0fc26430a1e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -0,0 +1,131 @@
+#ifndef __LINUX_KVM_S390_H
+#define __LINUX_KVM_S390_H
+/*
+ * KVM s390 specific structures and definitions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+#include <linux/types.h>
+
+#define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
+
+/* Device control API: s390-specific devices */
+#define KVM_DEV_FLIC_GET_ALL_IRQS 1
+#define KVM_DEV_FLIC_ENQUEUE 2
+#define KVM_DEV_FLIC_CLEAR_IRQS 3
+#define KVM_DEV_FLIC_APF_ENABLE 4
+#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
+#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
+#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
+/*
+ * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
+ * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
+ * There are also sclp and machine checks. This gives us
+ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
+ * Lets round up to 8192 pages.
+ */
+#define KVM_S390_MAX_FLOAT_IRQS 266250
+#define KVM_S390_FLIC_MAX_BUFFER 0x2000000
+
+struct kvm_s390_io_adapter {
+ __u32 id;
+ __u8 isc;
+ __u8 maskable;
+ __u8 swap;
+ __u8 pad;
+};
+
+#define KVM_S390_IO_ADAPTER_MASK 1
+#define KVM_S390_IO_ADAPTER_MAP 2
+#define KVM_S390_IO_ADAPTER_UNMAP 3
+
+struct kvm_s390_io_adapter_req {
+ __u32 id;
+ __u8 type;
+ __u8 mask;
+ __u16 pad0;
+ __u64 addr;
+};
+
+/* kvm attr_group on vm fd */
+#define KVM_S390_VM_MEM_CTRL 0
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA 0
+#define KVM_S390_VM_MEM_CLR_CMMA 1
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+ /* general purpose regs for s390 */
+ __u64 gprs[16];
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+ __u32 acrs[16];
+ __u64 crs[16];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+ __u32 fpc;
+ __u64 fprs[16];
+};
+
+#define KVM_GUESTDBG_USE_HW_BP 0x00010000
+
+#define KVM_HW_BP 1
+#define KVM_HW_WP_WRITE 2
+#define KVM_SINGLESTEP 4
+
+struct kvm_debug_exit_arch {
+ __u64 addr;
+ __u8 type;
+ __u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+ __u64 addr;
+ __u64 phys_addr;
+ __u64 len;
+ __u8 type;
+ __u8 pad[7]; /* Should be set to 0 */
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+ __u32 nr_hw_bp;
+ __u32 pad; /* Should be set to 0 */
+ struct kvm_hw_breakpoint __user *hw_bp;
+};
+
+#define KVM_SYNC_PREFIX (1UL << 0)
+#define KVM_SYNC_GPRS (1UL << 1)
+#define KVM_SYNC_ACRS (1UL << 2)
+#define KVM_SYNC_CRS (1UL << 3)
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+ __u64 prefix; /* prefix register */
+ __u64 gprs[16]; /* general purpose registers */
+ __u32 acrs[16]; /* access registers */
+ __u64 crs[16]; /* control registers */
+};
+
+#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
+#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_S390_GBEA (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9)
+#endif
diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h
new file mode 100644
index 00000000000..ff1f4e7b301
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_para.h
@@ -0,0 +1,11 @@
+/*
+ * User API definitions for paravirtual devices on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
diff --git a/arch/s390/include/uapi/asm/kvm_virtio.h b/arch/s390/include/uapi/asm/kvm_virtio.h
new file mode 100644
index 00000000000..44a438ca9e7
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_virtio.h
@@ -0,0 +1,64 @@
+/*
+ * definition for virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_VIRTIO_H
+#define __KVM_S390_VIRTIO_H
+
+#include <linux/types.h>
+
+struct kvm_device_desc {
+ /* The device type: console, network, disk etc. Type 0 terminates. */
+ __u8 type;
+ /* The number of virtqueues (first in config array) */
+ __u8 num_vq;
+ /*
+ * The number of bytes of feature bits. Multiply by 2: one for host
+ * features and one for guest acknowledgements.
+ */
+ __u8 feature_len;
+ /* The number of bytes of the config array after virtqueues. */
+ __u8 config_len;
+ /* A status byte, written by the Guest. */
+ __u8 status;
+ __u8 config[0];
+};
+
+/*
+ * This is how we expect the device configuration field for a virtqueue
+ * to be laid out in config space.
+ */
+struct kvm_vqconfig {
+ /* The token returned with an interrupt. Set by the guest */
+ __u64 token;
+ /* The address of the virtio ring */
+ __u64 address;
+ /* The number of entries in the virtio_ring */
+ __u16 num;
+
+};
+
+#define KVM_S390_VIRTIO_NOTIFY 0
+#define KVM_S390_VIRTIO_RESET 1
+#define KVM_S390_VIRTIO_SET_STATUS 2
+
+/* The alignment to use between consumer and producer parts of vring.
+ * This is pagesize for historical reasons. */
+#define KVM_S390_VIRTIO_RING_ALIGN 4096
+
+
+/* These values are supposed to be in ext_params on an interrupt */
+#define VIRTIO_PARAM_MASK 0xff
+#define VIRTIO_PARAM_VRING_INTERRUPT 0x0
+#define VIRTIO_PARAM_CONFIG_CHANGED 0x1
+#define VIRTIO_PARAM_DEV_ADD 0x2
+
+#endif
diff --git a/arch/s390/include/uapi/asm/mman.h b/arch/s390/include/uapi/asm/mman.h
new file mode 100644
index 00000000000..de23da1f41b
--- /dev/null
+++ b/arch/s390/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/mman.h"
+ */
+#include <asm-generic/mman.h>
diff --git a/arch/s390/include/uapi/asm/monwriter.h b/arch/s390/include/uapi/asm/monwriter.h
new file mode 100644
index 00000000000..f845c8e2f86
--- /dev/null
+++ b/arch/s390/include/uapi/asm/monwriter.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright IBM Corp. 2006
+ * Character device driver for writing z/VM APPLDATA monitor records
+ * Version 1.0
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ *
+ */
+
+#ifndef _ASM_390_MONWRITER_H
+#define _ASM_390_MONWRITER_H
+
+/* mon_function values */
+#define MONWRITE_START_INTERVAL 0x00 /* start interval recording */
+#define MONWRITE_STOP_INTERVAL 0x01 /* stop interval or config recording */
+#define MONWRITE_GEN_EVENT 0x02 /* generate event record */
+#define MONWRITE_START_CONFIG 0x03 /* start configuration recording */
+
+/* the header the app uses in its write() data */
+struct monwrite_hdr {
+ unsigned char mon_function;
+ unsigned short applid;
+ unsigned char record_num;
+ unsigned short version;
+ unsigned short release;
+ unsigned short mod_level;
+ unsigned short datalen;
+ unsigned char hdrlen;
+
+} __attribute__((packed));
+
+#endif /* _ASM_390_MONWRITER_H */
diff --git a/arch/s390/include/uapi/asm/msgbuf.h b/arch/s390/include/uapi/asm/msgbuf.h
new file mode 100644
index 00000000000..1bbdee92792
--- /dev/null
+++ b/arch/s390/include/uapi/asm/msgbuf.h
@@ -0,0 +1,37 @@
+#ifndef _S390_MSGBUF_H
+#define _S390_MSGBUF_H
+
+/*
+ * The msqid64_ds structure for S/390 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct msqid64_ds {
+ struct ipc64_perm msg_perm;
+ __kernel_time_t msg_stime; /* last msgsnd time */
+#ifndef __s390x__
+ unsigned long __unused1;
+#endif /* ! __s390x__ */
+ __kernel_time_t msg_rtime; /* last msgrcv time */
+#ifndef __s390x__
+ unsigned long __unused2;
+#endif /* ! __s390x__ */
+ __kernel_time_t msg_ctime; /* last change time */
+#ifndef __s390x__
+ unsigned long __unused3;
+#endif /* ! __s390x__ */
+ unsigned long msg_cbytes; /* current number of bytes on queue */
+ unsigned long msg_qnum; /* number of messages in queue */
+ unsigned long msg_qbytes; /* max number of bytes on queue */
+ __kernel_pid_t msg_lspid; /* pid of last msgsnd */
+ __kernel_pid_t msg_lrpid; /* last receive pid */
+ unsigned long __unused4;
+ unsigned long __unused5;
+};
+
+#endif /* _S390_MSGBUF_H */
diff --git a/arch/s390/include/uapi/asm/param.h b/arch/s390/include/uapi/asm/param.h
new file mode 100644
index 00000000000..c616821bf2a
--- /dev/null
+++ b/arch/s390/include/uapi/asm/param.h
@@ -0,0 +1,6 @@
+#ifndef _ASMS390_PARAM_H
+#define _ASMS390_PARAM_H
+
+#include <asm-generic/param.h>
+
+#endif /* _ASMS390_PARAM_H */
diff --git a/arch/s390/include/uapi/asm/poll.h b/arch/s390/include/uapi/asm/poll.h
new file mode 100644
index 00000000000..c98509d3149
--- /dev/null
+++ b/arch/s390/include/uapi/asm/poll.h
@@ -0,0 +1 @@
+#include <asm-generic/poll.h>
diff --git a/arch/s390/include/uapi/asm/posix_types.h b/arch/s390/include/uapi/asm/posix_types.h
new file mode 100644
index 00000000000..bf2a2ad2f80
--- /dev/null
+++ b/arch/s390/include/uapi/asm/posix_types.h
@@ -0,0 +1,51 @@
+/*
+ * S390 version
+ *
+ */
+
+#ifndef __ARCH_S390_POSIX_TYPES_H
+#define __ARCH_S390_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc. Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned long __kernel_size_t;
+typedef long __kernel_ssize_t;
+#define __kernel_size_t __kernel_size_t
+
+typedef unsigned short __kernel_old_dev_t;
+#define __kernel_old_dev_t __kernel_old_dev_t
+
+#ifndef __s390x__
+
+typedef unsigned long __kernel_ino_t;
+typedef unsigned short __kernel_mode_t;
+typedef unsigned short __kernel_ipc_pid_t;
+typedef unsigned short __kernel_uid_t;
+typedef unsigned short __kernel_gid_t;
+typedef int __kernel_ptrdiff_t;
+
+#else /* __s390x__ */
+
+typedef unsigned int __kernel_ino_t;
+typedef unsigned int __kernel_mode_t;
+typedef int __kernel_ipc_pid_t;
+typedef unsigned int __kernel_uid_t;
+typedef unsigned int __kernel_gid_t;
+typedef long __kernel_ptrdiff_t;
+typedef unsigned long __kernel_sigset_t; /* at least 32 bits */
+
+#endif /* __s390x__ */
+
+#define __kernel_ino_t __kernel_ino_t
+#define __kernel_mode_t __kernel_mode_t
+#define __kernel_ipc_pid_t __kernel_ipc_pid_t
+#define __kernel_uid_t __kernel_uid_t
+#define __kernel_gid_t __kernel_gid_t
+
+#include <asm-generic/posix_types.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
new file mode 100644
index 00000000000..a150f4fabe4
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -0,0 +1,459 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ */
+
+#ifndef _UAPI_S390_PTRACE_H
+#define _UAPI_S390_PTRACE_H
+
+/*
+ * Offsets in the user_regs_struct. They are used for the ptrace
+ * system call and in entry.S
+ */
+#ifndef __s390x__
+
+#define PT_PSWMASK 0x00
+#define PT_PSWADDR 0x04
+#define PT_GPR0 0x08
+#define PT_GPR1 0x0C
+#define PT_GPR2 0x10
+#define PT_GPR3 0x14
+#define PT_GPR4 0x18
+#define PT_GPR5 0x1C
+#define PT_GPR6 0x20
+#define PT_GPR7 0x24
+#define PT_GPR8 0x28
+#define PT_GPR9 0x2C
+#define PT_GPR10 0x30
+#define PT_GPR11 0x34
+#define PT_GPR12 0x38
+#define PT_GPR13 0x3C
+#define PT_GPR14 0x40
+#define PT_GPR15 0x44
+#define PT_ACR0 0x48
+#define PT_ACR1 0x4C
+#define PT_ACR2 0x50
+#define PT_ACR3 0x54
+#define PT_ACR4 0x58
+#define PT_ACR5 0x5C
+#define PT_ACR6 0x60
+#define PT_ACR7 0x64
+#define PT_ACR8 0x68
+#define PT_ACR9 0x6C
+#define PT_ACR10 0x70
+#define PT_ACR11 0x74
+#define PT_ACR12 0x78
+#define PT_ACR13 0x7C
+#define PT_ACR14 0x80
+#define PT_ACR15 0x84
+#define PT_ORIGGPR2 0x88
+#define PT_FPC 0x90
+/*
+ * A nasty fact of life that the ptrace api
+ * only supports passing of longs.
+ */
+#define PT_FPR0_HI 0x98
+#define PT_FPR0_LO 0x9C
+#define PT_FPR1_HI 0xA0
+#define PT_FPR1_LO 0xA4
+#define PT_FPR2_HI 0xA8
+#define PT_FPR2_LO 0xAC
+#define PT_FPR3_HI 0xB0
+#define PT_FPR3_LO 0xB4
+#define PT_FPR4_HI 0xB8
+#define PT_FPR4_LO 0xBC
+#define PT_FPR5_HI 0xC0
+#define PT_FPR5_LO 0xC4
+#define PT_FPR6_HI 0xC8
+#define PT_FPR6_LO 0xCC
+#define PT_FPR7_HI 0xD0
+#define PT_FPR7_LO 0xD4
+#define PT_FPR8_HI 0xD8
+#define PT_FPR8_LO 0XDC
+#define PT_FPR9_HI 0xE0
+#define PT_FPR9_LO 0xE4
+#define PT_FPR10_HI 0xE8
+#define PT_FPR10_LO 0xEC
+#define PT_FPR11_HI 0xF0
+#define PT_FPR11_LO 0xF4
+#define PT_FPR12_HI 0xF8
+#define PT_FPR12_LO 0xFC
+#define PT_FPR13_HI 0x100
+#define PT_FPR13_LO 0x104
+#define PT_FPR14_HI 0x108
+#define PT_FPR14_LO 0x10C
+#define PT_FPR15_HI 0x110
+#define PT_FPR15_LO 0x114
+#define PT_CR_9 0x118
+#define PT_CR_10 0x11C
+#define PT_CR_11 0x120
+#define PT_IEEE_IP 0x13C
+#define PT_LASTOFF PT_IEEE_IP
+#define PT_ENDREGS 0x140-1
+
+#define GPR_SIZE 4
+#define CR_SIZE 4
+
+#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */
+
+#else /* __s390x__ */
+
+#define PT_PSWMASK 0x00
+#define PT_PSWADDR 0x08
+#define PT_GPR0 0x10
+#define PT_GPR1 0x18
+#define PT_GPR2 0x20
+#define PT_GPR3 0x28
+#define PT_GPR4 0x30
+#define PT_GPR5 0x38
+#define PT_GPR6 0x40
+#define PT_GPR7 0x48
+#define PT_GPR8 0x50
+#define PT_GPR9 0x58
+#define PT_GPR10 0x60
+#define PT_GPR11 0x68
+#define PT_GPR12 0x70
+#define PT_GPR13 0x78
+#define PT_GPR14 0x80
+#define PT_GPR15 0x88
+#define PT_ACR0 0x90
+#define PT_ACR1 0x94
+#define PT_ACR2 0x98
+#define PT_ACR3 0x9C
+#define PT_ACR4 0xA0
+#define PT_ACR5 0xA4
+#define PT_ACR6 0xA8
+#define PT_ACR7 0xAC
+#define PT_ACR8 0xB0
+#define PT_ACR9 0xB4
+#define PT_ACR10 0xB8
+#define PT_ACR11 0xBC
+#define PT_ACR12 0xC0
+#define PT_ACR13 0xC4
+#define PT_ACR14 0xC8
+#define PT_ACR15 0xCC
+#define PT_ORIGGPR2 0xD0
+#define PT_FPC 0xD8
+#define PT_FPR0 0xE0
+#define PT_FPR1 0xE8
+#define PT_FPR2 0xF0
+#define PT_FPR3 0xF8
+#define PT_FPR4 0x100
+#define PT_FPR5 0x108
+#define PT_FPR6 0x110
+#define PT_FPR7 0x118
+#define PT_FPR8 0x120
+#define PT_FPR9 0x128
+#define PT_FPR10 0x130
+#define PT_FPR11 0x138
+#define PT_FPR12 0x140
+#define PT_FPR13 0x148
+#define PT_FPR14 0x150
+#define PT_FPR15 0x158
+#define PT_CR_9 0x160
+#define PT_CR_10 0x168
+#define PT_CR_11 0x170
+#define PT_IEEE_IP 0x1A8
+#define PT_LASTOFF PT_IEEE_IP
+#define PT_ENDREGS 0x1B0-1
+
+#define GPR_SIZE 8
+#define CR_SIZE 8
+
+#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
+
+#endif /* __s390x__ */
+
+#define NUM_GPRS 16
+#define NUM_FPRS 16
+#define NUM_CRS 16
+#define NUM_ACRS 16
+
+#define NUM_CR_WORDS 3
+
+#define FPR_SIZE 8
+#define FPC_SIZE 4
+#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */
+#define ACR_SIZE 4
+
+
+#define PTRACE_OLDSETOPTIONS 21
+
+#ifndef __ASSEMBLY__
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+typedef union
+{
+ float f;
+ double d;
+ __u64 ui;
+ struct
+ {
+ __u32 hi;
+ __u32 lo;
+ } fp;
+} freg_t;
+
+typedef struct
+{
+ __u32 fpc;
+ __u32 pad;
+ freg_t fprs[NUM_FPRS];
+} s390_fp_regs;
+
+#define FPC_EXCEPTION_MASK 0xF8000000
+#define FPC_FLAGS_MASK 0x00F80000
+#define FPC_DXC_MASK 0x0000FF00
+#define FPC_RM_MASK 0x00000003
+
+/* this typedef defines how a Program Status Word looks like */
+typedef struct
+{
+ unsigned long mask;
+ unsigned long addr;
+} __attribute__ ((aligned(8))) psw_t;
+
+#ifndef __s390x__
+
+#define PSW_MASK_PER 0x40000000UL
+#define PSW_MASK_DAT 0x04000000UL
+#define PSW_MASK_IO 0x02000000UL
+#define PSW_MASK_EXT 0x01000000UL
+#define PSW_MASK_KEY 0x00F00000UL
+#define PSW_MASK_BASE 0x00080000UL /* always one */
+#define PSW_MASK_MCHECK 0x00040000UL
+#define PSW_MASK_WAIT 0x00020000UL
+#define PSW_MASK_PSTATE 0x00010000UL
+#define PSW_MASK_ASC 0x0000C000UL
+#define PSW_MASK_CC 0x00003000UL
+#define PSW_MASK_PM 0x00000F00UL
+#define PSW_MASK_RI 0x00000000UL
+#define PSW_MASK_EA 0x00000000UL
+#define PSW_MASK_BA 0x00000000UL
+
+#define PSW_MASK_USER 0x0000FF00UL
+
+#define PSW_ADDR_AMODE 0x80000000UL
+#define PSW_ADDR_INSN 0x7FFFFFFFUL
+
+#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20)
+
+#define PSW_ASC_PRIMARY 0x00000000UL
+#define PSW_ASC_ACCREG 0x00004000UL
+#define PSW_ASC_SECONDARY 0x00008000UL
+#define PSW_ASC_HOME 0x0000C000UL
+
+#else /* __s390x__ */
+
+#define PSW_MASK_PER 0x4000000000000000UL
+#define PSW_MASK_DAT 0x0400000000000000UL
+#define PSW_MASK_IO 0x0200000000000000UL
+#define PSW_MASK_EXT 0x0100000000000000UL
+#define PSW_MASK_BASE 0x0000000000000000UL
+#define PSW_MASK_KEY 0x00F0000000000000UL
+#define PSW_MASK_MCHECK 0x0004000000000000UL
+#define PSW_MASK_WAIT 0x0002000000000000UL
+#define PSW_MASK_PSTATE 0x0001000000000000UL
+#define PSW_MASK_ASC 0x0000C00000000000UL
+#define PSW_MASK_CC 0x0000300000000000UL
+#define PSW_MASK_PM 0x00000F0000000000UL
+#define PSW_MASK_RI 0x0000008000000000UL
+#define PSW_MASK_EA 0x0000000100000000UL
+#define PSW_MASK_BA 0x0000000080000000UL
+
+#define PSW_MASK_USER 0x0000FF0180000000UL
+
+#define PSW_ADDR_AMODE 0x0000000000000000UL
+#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
+
+#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52)
+
+#define PSW_ASC_PRIMARY 0x0000000000000000UL
+#define PSW_ASC_ACCREG 0x0000400000000000UL
+#define PSW_ASC_SECONDARY 0x0000800000000000UL
+#define PSW_ASC_HOME 0x0000C00000000000UL
+
+#endif /* __s390x__ */
+
+
+/*
+ * The s390_regs structure is used to define the elf_gregset_t.
+ */
+typedef struct
+{
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+ unsigned int acrs[NUM_ACRS];
+ unsigned long orig_gpr2;
+} s390_regs;
+
+/*
+ * Now for the user space program event recording (trace) definitions.
+ * The following structures are used only for the ptrace interface, don't
+ * touch or even look at it if you don't want to modify the user-space
+ * ptrace interface. In particular stay away from it for in-kernel PER.
+ */
+typedef struct
+{
+ unsigned long cr[NUM_CR_WORDS];
+} per_cr_words;
+
+#define PER_EM_MASK 0xE8000000UL
+
+typedef struct
+{
+#ifdef __s390x__
+ unsigned : 32;
+#endif /* __s390x__ */
+ unsigned em_branching : 1;
+ unsigned em_instruction_fetch : 1;
+ /*
+ * Switching on storage alteration automatically fixes
+ * the storage alteration event bit in the users std.
+ */
+ unsigned em_storage_alteration : 1;
+ unsigned em_gpr_alt_unused : 1;
+ unsigned em_store_real_address : 1;
+ unsigned : 3;
+ unsigned branch_addr_ctl : 1;
+ unsigned : 1;
+ unsigned storage_alt_space_ctl : 1;
+ unsigned : 21;
+ unsigned long starting_addr;
+ unsigned long ending_addr;
+} per_cr_bits;
+
+typedef struct
+{
+ unsigned short perc_atmid;
+ unsigned long address;
+ unsigned char access_id;
+} per_lowcore_words;
+
+typedef struct
+{
+ unsigned perc_branching : 1;
+ unsigned perc_instruction_fetch : 1;
+ unsigned perc_storage_alteration : 1;
+ unsigned perc_gpr_alt_unused : 1;
+ unsigned perc_store_real_address : 1;
+ unsigned : 3;
+ unsigned atmid_psw_bit_31 : 1;
+ unsigned atmid_validity_bit : 1;
+ unsigned atmid_psw_bit_32 : 1;
+ unsigned atmid_psw_bit_5 : 1;
+ unsigned atmid_psw_bit_16 : 1;
+ unsigned atmid_psw_bit_17 : 1;
+ unsigned si : 2;
+ unsigned long address;
+ unsigned : 4;
+ unsigned access_id : 4;
+} per_lowcore_bits;
+
+typedef struct
+{
+ union {
+ per_cr_words words;
+ per_cr_bits bits;
+ } control_regs;
+ /*
+ * Use these flags instead of setting em_instruction_fetch
+ * directly they are used so that single stepping can be
+ * switched on & off while not affecting other tracing
+ */
+ unsigned single_step : 1;
+ unsigned instruction_fetch : 1;
+ unsigned : 30;
+ /*
+ * These addresses are copied into cr10 & cr11 if single
+ * stepping is switched off
+ */
+ unsigned long starting_addr;
+ unsigned long ending_addr;
+ union {
+ per_lowcore_words words;
+ per_lowcore_bits bits;
+ } lowcore;
+} per_struct;
+
+typedef struct
+{
+ unsigned int len;
+ unsigned long kernel_addr;
+ unsigned long process_addr;
+} ptrace_area;
+
+/*
+ * S/390 specific non posix ptrace requests. I chose unusual values so
+ * they are unlikely to clash with future ptrace definitions.
+ */
+#define PTRACE_PEEKUSR_AREA 0x5000
+#define PTRACE_POKEUSR_AREA 0x5001
+#define PTRACE_PEEKTEXT_AREA 0x5002
+#define PTRACE_PEEKDATA_AREA 0x5003
+#define PTRACE_POKETEXT_AREA 0x5004
+#define PTRACE_POKEDATA_AREA 0x5005
+#define PTRACE_GET_LAST_BREAK 0x5006
+#define PTRACE_PEEK_SYSTEM_CALL 0x5007
+#define PTRACE_POKE_SYSTEM_CALL 0x5008
+#define PTRACE_ENABLE_TE 0x5009
+#define PTRACE_DISABLE_TE 0x5010
+#define PTRACE_TE_ABORT_RAND 0x5011
+
+/*
+ * The numbers chosen here are somewhat arbitrary but absolutely MUST
+ * not overlap with any of the number assigned in <linux/ptrace.h>.
+ */
+#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */
+
+/*
+ * PT_PROT definition is loosely based on hppa bsd definition in
+ * gdb/hppab-nat.c
+ */
+#define PTRACE_PROT 21
+
+typedef enum
+{
+ ptprot_set_access_watchpoint,
+ ptprot_set_write_watchpoint,
+ ptprot_disable_watchpoint
+} ptprot_flags;
+
+typedef struct
+{
+ unsigned long lowaddr;
+ unsigned long hiaddr;
+ ptprot_flags prot;
+} ptprot_area;
+
+/* Sequence of bytes for breakpoint illegal instruction. */
+#define S390_BREAKPOINT {0x0,0x1}
+#define S390_BREAKPOINT_U16 ((__u16)0x0001)
+#define S390_SYSCALL_OPCODE ((__u16)0x0a00)
+#define S390_SYSCALL_SIZE 2
+
+/*
+ * The user_regs_struct defines the way the user registers are
+ * store on the stack for signal handling.
+ */
+struct user_regs_struct
+{
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+ unsigned int acrs[NUM_ACRS];
+ unsigned long orig_gpr2;
+ s390_fp_regs fp_regs;
+ /*
+ * These per registers are in here so that gdb can modify them
+ * itself as there is no "official" ptrace interface for hardware
+ * watchpoints. This is the way intel does it.
+ */
+ per_struct per_info;
+ unsigned long ieee_instruction_pointer; /* obsolete, always 0 */
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_S390_PTRACE_H */
diff --git a/arch/s390/include/uapi/asm/qeth.h b/arch/s390/include/uapi/asm/qeth.h
new file mode 100644
index 00000000000..3a896cf5258
--- /dev/null
+++ b/arch/s390/include/uapi/asm/qeth.h
@@ -0,0 +1,115 @@
+/*
+ * ioctl definitions for qeth driver
+ *
+ * Copyright IBM Corp. 2004
+ *
+ * Author(s): Thomas Spatzier <tspat@de.ibm.com>
+ *
+ */
+#ifndef __ASM_S390_QETH_IOCTL_H__
+#define __ASM_S390_QETH_IOCTL_H__
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define SIOC_QETH_ARP_SET_NO_ENTRIES (SIOCDEVPRIVATE)
+#define SIOC_QETH_ARP_QUERY_INFO (SIOCDEVPRIVATE + 1)
+#define SIOC_QETH_ARP_ADD_ENTRY (SIOCDEVPRIVATE + 2)
+#define SIOC_QETH_ARP_REMOVE_ENTRY (SIOCDEVPRIVATE + 3)
+#define SIOC_QETH_ARP_FLUSH_CACHE (SIOCDEVPRIVATE + 4)
+#define SIOC_QETH_ADP_SET_SNMP_CONTROL (SIOCDEVPRIVATE + 5)
+#define SIOC_QETH_GET_CARD_TYPE (SIOCDEVPRIVATE + 6)
+#define SIOC_QETH_QUERY_OAT (SIOCDEVPRIVATE + 7)
+
+struct qeth_arp_cache_entry {
+ __u8 macaddr[6];
+ __u8 reserved1[2];
+ __u8 ipaddr[16]; /* for both IPv4 and IPv6 */
+ __u8 reserved2[32];
+} __attribute__ ((packed));
+
+enum qeth_arp_ipaddrtype {
+ QETHARP_IP_ADDR_V4 = 1,
+ QETHARP_IP_ADDR_V6 = 2,
+};
+struct qeth_arp_entrytype {
+ __u8 mac;
+ __u8 ip;
+} __attribute__((packed));
+
+#define QETH_QARP_MEDIASPECIFIC_BYTES 32
+#define QETH_QARP_MACADDRTYPE_BYTES 1
+struct qeth_arp_qi_entry7 {
+ __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+ struct qeth_arp_entrytype type;
+ __u8 macaddr[6];
+ __u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_ipv6 {
+ __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+ struct qeth_arp_entrytype type;
+ __u8 macaddr[6];
+ __u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_short {
+ struct qeth_arp_entrytype type;
+ __u8 macaddr[6];
+ __u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_short_ipv6 {
+ struct qeth_arp_entrytype type;
+ __u8 macaddr[6];
+ __u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5 {
+ __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+ struct qeth_arp_entrytype type;
+ __u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_ipv6 {
+ __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+ struct qeth_arp_entrytype type;
+ __u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_short {
+ struct qeth_arp_entrytype type;
+ __u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_short_ipv6 {
+ struct qeth_arp_entrytype type;
+ __u8 ipaddr[16];
+} __attribute__((packed));
+/*
+ * can be set by user if no "media specific information" is wanted
+ * -> saves a lot of space in user space buffer
+ */
+#define QETH_QARP_STRIP_ENTRIES 0x8000
+#define QETH_QARP_WITH_IPV6 0x4000
+#define QETH_QARP_REQUEST_MASK 0x00ff
+
+/* data sent to user space as result of query arp ioctl */
+#define QETH_QARP_USER_DATA_SIZE 20000
+#define QETH_QARP_MASK_OFFSET 4
+#define QETH_QARP_ENTRIES_OFFSET 6
+struct qeth_arp_query_user_data {
+ union {
+ __u32 data_len; /* set by user space program */
+ __u32 no_entries; /* set by kernel */
+ } u;
+ __u16 mask_bits;
+ char *entries;
+} __attribute__((packed));
+
+struct qeth_query_oat_data {
+ __u32 command;
+ __u32 buffer_len;
+ __u32 response_len;
+ __u64 ptr;
+};
+#endif /* __ASM_S390_QETH_IOCTL_H__ */
diff --git a/arch/s390/include/uapi/asm/resource.h b/arch/s390/include/uapi/asm/resource.h
new file mode 100644
index 00000000000..ec23d1c73c9
--- /dev/null
+++ b/arch/s390/include/uapi/asm/resource.h
@@ -0,0 +1,13 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/resources.h"
+ */
+
+#ifndef _S390_RESOURCE_H
+#define _S390_RESOURCE_H
+
+#include <asm-generic/resource.h>
+
+#endif
+
diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h
new file mode 100644
index 00000000000..32f3ab2a820
--- /dev/null
+++ b/arch/s390/include/uapi/asm/schid.h
@@ -0,0 +1,16 @@
+#ifndef _UAPIASM_SCHID_H
+#define _UAPIASM_SCHID_H
+
+#include <linux/types.h>
+
+struct subchannel_id {
+ __u32 cssid : 8;
+ __u32 : 4;
+ __u32 m : 1;
+ __u32 ssid : 2;
+ __u32 one : 1;
+ __u32 sch_no : 16;
+} __attribute__ ((packed, aligned(4)));
+
+
+#endif /* _UAPIASM_SCHID_H */
diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h
new file mode 100644
index 00000000000..f2818613ee4
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sclp_ctl.h
@@ -0,0 +1,24 @@
+/*
+ * IOCTL interface for SCLP
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_SCLP_CTL_H
+#define _ASM_SCLP_CTL_H
+
+#include <linux/types.h>
+
+struct sclp_ctl_sccb {
+ __u32 cmdw;
+ __u64 sccb;
+} __attribute__((packed));
+
+#define SCLP_CTL_IOCTL_MAGIC 0x10
+
+#define SCLP_CTL_SCCB \
+ _IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/sembuf.h b/arch/s390/include/uapi/asm/sembuf.h
new file mode 100644
index 00000000000..32626b0cac4
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sembuf.h
@@ -0,0 +1,29 @@
+#ifndef _S390_SEMBUF_H
+#define _S390_SEMBUF_H
+
+/*
+ * The semid64_ds structure for S/390 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem (for !__s390x__)
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct semid64_ds {
+ struct ipc64_perm sem_perm; /* permissions .. see ipc.h */
+ __kernel_time_t sem_otime; /* last semop time */
+#ifndef __s390x__
+ unsigned long __unused1;
+#endif /* ! __s390x__ */
+ __kernel_time_t sem_ctime; /* last change time */
+#ifndef __s390x__
+ unsigned long __unused2;
+#endif /* ! __s390x__ */
+ unsigned long sem_nsems; /* no. of semaphores in array */
+ unsigned long __unused3;
+ unsigned long __unused4;
+};
+
+#endif /* _S390_SEMBUF_H */
diff --git a/arch/s390/include/uapi/asm/setup.h b/arch/s390/include/uapi/asm/setup.h
new file mode 100644
index 00000000000..5a637e3e385
--- /dev/null
+++ b/arch/s390/include/uapi/asm/setup.h
@@ -0,0 +1,13 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2010
+ */
+
+#ifndef _UAPI_ASM_S390_SETUP_H
+#define _UAPI_ASM_S390_SETUP_H
+
+#define COMMAND_LINE_SIZE 4096
+
+#define ARCH_COMMAND_LINE_SIZE 896
+
+#endif /* _UAPI_ASM_S390_SETUP_H */
diff --git a/arch/s390/include/uapi/asm/shmbuf.h b/arch/s390/include/uapi/asm/shmbuf.h
new file mode 100644
index 00000000000..eed2e280ce3
--- /dev/null
+++ b/arch/s390/include/uapi/asm/shmbuf.h
@@ -0,0 +1,48 @@
+#ifndef _S390_SHMBUF_H
+#define _S390_SHMBUF_H
+
+/*
+ * The shmid64_ds structure for S/390 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem (for !__s390x__)
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+ struct ipc64_perm shm_perm; /* operation perms */
+ size_t shm_segsz; /* size of segment (bytes) */
+ __kernel_time_t shm_atime; /* last attach time */
+#ifndef __s390x__
+ unsigned long __unused1;
+#endif /* ! __s390x__ */
+ __kernel_time_t shm_dtime; /* last detach time */
+#ifndef __s390x__
+ unsigned long __unused2;
+#endif /* ! __s390x__ */
+ __kernel_time_t shm_ctime; /* last change time */
+#ifndef __s390x__
+ unsigned long __unused3;
+#endif /* ! __s390x__ */
+ __kernel_pid_t shm_cpid; /* pid of creator */
+ __kernel_pid_t shm_lpid; /* pid of last operator */
+ unsigned long shm_nattch; /* no. of current attaches */
+ unsigned long __unused4;
+ unsigned long __unused5;
+};
+
+struct shminfo64 {
+ unsigned long shmmax;
+ unsigned long shmmin;
+ unsigned long shmmni;
+ unsigned long shmseg;
+ unsigned long shmall;
+ unsigned long __unused1;
+ unsigned long __unused2;
+ unsigned long __unused3;
+ unsigned long __unused4;
+};
+
+#endif /* _S390_SHMBUF_H */
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
new file mode 100644
index 00000000000..5d9cc19462c
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -0,0 +1,243 @@
+#ifndef _UAPI_ASM_S390_SIE_H
+#define _UAPI_ASM_S390_SIE_H
+
+#define diagnose_codes \
+ { 0x10, "DIAG (0x10) release pages" }, \
+ { 0x44, "DIAG (0x44) time slice end" }, \
+ { 0x9c, "DIAG (0x9c) time slice end directed" }, \
+ { 0x204, "DIAG (0x204) logical-cpu utilization" }, \
+ { 0x258, "DIAG (0x258) page-reference services" }, \
+ { 0x308, "DIAG (0x308) ipl functions" }, \
+ { 0x500, "DIAG (0x500) KVM virtio functions" }, \
+ { 0x501, "DIAG (0x501) KVM breakpoint" }
+
+#define sigp_order_codes \
+ { 0x01, "SIGP sense" }, \
+ { 0x02, "SIGP external call" }, \
+ { 0x03, "SIGP emergency signal" }, \
+ { 0x05, "SIGP stop" }, \
+ { 0x06, "SIGP restart" }, \
+ { 0x09, "SIGP stop and store status" }, \
+ { 0x0b, "SIGP initial cpu reset" }, \
+ { 0x0d, "SIGP set prefix" }, \
+ { 0x0e, "SIGP store status at address" }, \
+ { 0x12, "SIGP set architecture" }, \
+ { 0x15, "SIGP sense running" }
+
+#define icpt_prog_codes \
+ { 0x0001, "Prog Operation" }, \
+ { 0x0002, "Prog Privileged Operation" }, \
+ { 0x0003, "Prog Execute" }, \
+ { 0x0004, "Prog Protection" }, \
+ { 0x0005, "Prog Addressing" }, \
+ { 0x0006, "Prog Specification" }, \
+ { 0x0007, "Prog Data" }, \
+ { 0x0008, "Prog Fixedpoint overflow" }, \
+ { 0x0009, "Prog Fixedpoint divide" }, \
+ { 0x000A, "Prog Decimal overflow" }, \
+ { 0x000B, "Prog Decimal divide" }, \
+ { 0x000C, "Prog HFP exponent overflow" }, \
+ { 0x000D, "Prog HFP exponent underflow" }, \
+ { 0x000E, "Prog HFP significance" }, \
+ { 0x000F, "Prog HFP divide" }, \
+ { 0x0010, "Prog Segment translation" }, \
+ { 0x0011, "Prog Page translation" }, \
+ { 0x0012, "Prog Translation specification" }, \
+ { 0x0013, "Prog Special operation" }, \
+ { 0x0015, "Prog Operand" }, \
+ { 0x0016, "Prog Trace table" }, \
+ { 0x0017, "Prog ASNtranslation specification" }, \
+ { 0x001C, "Prog Spaceswitch event" }, \
+ { 0x001D, "Prog HFP square root" }, \
+ { 0x001F, "Prog PCtranslation specification" }, \
+ { 0x0020, "Prog AFX translation" }, \
+ { 0x0021, "Prog ASX translation" }, \
+ { 0x0022, "Prog LX translation" }, \
+ { 0x0023, "Prog EX translation" }, \
+ { 0x0024, "Prog Primary authority" }, \
+ { 0x0025, "Prog Secondary authority" }, \
+ { 0x0026, "Prog LFXtranslation exception" }, \
+ { 0x0027, "Prog LSXtranslation exception" }, \
+ { 0x0028, "Prog ALET specification" }, \
+ { 0x0029, "Prog ALEN translation" }, \
+ { 0x002A, "Prog ALE sequence" }, \
+ { 0x002B, "Prog ASTE validity" }, \
+ { 0x002C, "Prog ASTE sequence" }, \
+ { 0x002D, "Prog Extended authority" }, \
+ { 0x002E, "Prog LSTE sequence" }, \
+ { 0x002F, "Prog ASTE instance" }, \
+ { 0x0030, "Prog Stack full" }, \
+ { 0x0031, "Prog Stack empty" }, \
+ { 0x0032, "Prog Stack specification" }, \
+ { 0x0033, "Prog Stack type" }, \
+ { 0x0034, "Prog Stack operation" }, \
+ { 0x0039, "Prog Region first translation" }, \
+ { 0x003A, "Prog Region second translation" }, \
+ { 0x003B, "Prog Region third translation" }, \
+ { 0x0040, "Prog Monitor event" }, \
+ { 0x0080, "Prog PER event" }, \
+ { 0x0119, "Prog Crypto operation" }
+
+#define exit_code_ipa0(ipa0, opcode, mnemonic) \
+ { (ipa0 << 8 | opcode), #ipa0 " " mnemonic }
+#define exit_code(opcode, mnemonic) \
+ { opcode, mnemonic }
+
+#define icpt_insn_codes \
+ exit_code_ipa0(0x01, 0x01, "PR"), \
+ exit_code_ipa0(0x01, 0x04, "PTFF"), \
+ exit_code_ipa0(0x01, 0x07, "SCKPF"), \
+ exit_code_ipa0(0xAA, 0x00, "RINEXT"), \
+ exit_code_ipa0(0xAA, 0x01, "RION"), \
+ exit_code_ipa0(0xAA, 0x02, "TRIC"), \
+ exit_code_ipa0(0xAA, 0x03, "RIOFF"), \
+ exit_code_ipa0(0xAA, 0x04, "RIEMIT"), \
+ exit_code_ipa0(0xB2, 0x02, "STIDP"), \
+ exit_code_ipa0(0xB2, 0x04, "SCK"), \
+ exit_code_ipa0(0xB2, 0x05, "STCK"), \
+ exit_code_ipa0(0xB2, 0x06, "SCKC"), \
+ exit_code_ipa0(0xB2, 0x07, "STCKC"), \
+ exit_code_ipa0(0xB2, 0x08, "SPT"), \
+ exit_code_ipa0(0xB2, 0x09, "STPT"), \
+ exit_code_ipa0(0xB2, 0x0d, "PTLB"), \
+ exit_code_ipa0(0xB2, 0x10, "SPX"), \
+ exit_code_ipa0(0xB2, 0x11, "STPX"), \
+ exit_code_ipa0(0xB2, 0x12, "STAP"), \
+ exit_code_ipa0(0xB2, 0x14, "SIE"), \
+ exit_code_ipa0(0xB2, 0x16, "SETR"), \
+ exit_code_ipa0(0xB2, 0x17, "STETR"), \
+ exit_code_ipa0(0xB2, 0x18, "PC"), \
+ exit_code_ipa0(0xB2, 0x20, "SERVC"), \
+ exit_code_ipa0(0xB2, 0x28, "PT"), \
+ exit_code_ipa0(0xB2, 0x29, "ISKE"), \
+ exit_code_ipa0(0xB2, 0x2a, "RRBE"), \
+ exit_code_ipa0(0xB2, 0x2b, "SSKE"), \
+ exit_code_ipa0(0xB2, 0x2c, "TB"), \
+ exit_code_ipa0(0xB2, 0x2e, "PGIN"), \
+ exit_code_ipa0(0xB2, 0x2f, "PGOUT"), \
+ exit_code_ipa0(0xB2, 0x30, "CSCH"), \
+ exit_code_ipa0(0xB2, 0x31, "HSCH"), \
+ exit_code_ipa0(0xB2, 0x32, "MSCH"), \
+ exit_code_ipa0(0xB2, 0x33, "SSCH"), \
+ exit_code_ipa0(0xB2, 0x34, "STSCH"), \
+ exit_code_ipa0(0xB2, 0x35, "TSCH"), \
+ exit_code_ipa0(0xB2, 0x36, "TPI"), \
+ exit_code_ipa0(0xB2, 0x37, "SAL"), \
+ exit_code_ipa0(0xB2, 0x38, "RSCH"), \
+ exit_code_ipa0(0xB2, 0x39, "STCRW"), \
+ exit_code_ipa0(0xB2, 0x3a, "STCPS"), \
+ exit_code_ipa0(0xB2, 0x3b, "RCHP"), \
+ exit_code_ipa0(0xB2, 0x3c, "SCHM"), \
+ exit_code_ipa0(0xB2, 0x40, "BAKR"), \
+ exit_code_ipa0(0xB2, 0x48, "PALB"), \
+ exit_code_ipa0(0xB2, 0x4c, "TAR"), \
+ exit_code_ipa0(0xB2, 0x50, "CSP"), \
+ exit_code_ipa0(0xB2, 0x54, "MVPG"), \
+ exit_code_ipa0(0xB2, 0x58, "BSG"), \
+ exit_code_ipa0(0xB2, 0x5a, "BSA"), \
+ exit_code_ipa0(0xB2, 0x5f, "CHSC"), \
+ exit_code_ipa0(0xB2, 0x74, "SIGA"), \
+ exit_code_ipa0(0xB2, 0x76, "XSCH"), \
+ exit_code_ipa0(0xB2, 0x78, "STCKE"), \
+ exit_code_ipa0(0xB2, 0x7c, "STCKF"), \
+ exit_code_ipa0(0xB2, 0x7d, "STSI"), \
+ exit_code_ipa0(0xB2, 0xb0, "STFLE"), \
+ exit_code_ipa0(0xB2, 0xb1, "STFL"), \
+ exit_code_ipa0(0xB2, 0xb2, "LPSWE"), \
+ exit_code_ipa0(0xB2, 0xf8, "TEND"), \
+ exit_code_ipa0(0xB2, 0xfc, "TABORT"), \
+ exit_code_ipa0(0xB9, 0x1e, "KMAC"), \
+ exit_code_ipa0(0xB9, 0x28, "PCKMO"), \
+ exit_code_ipa0(0xB9, 0x2a, "KMF"), \
+ exit_code_ipa0(0xB9, 0x2b, "KMO"), \
+ exit_code_ipa0(0xB9, 0x2d, "KMCTR"), \
+ exit_code_ipa0(0xB9, 0x2e, "KM"), \
+ exit_code_ipa0(0xB9, 0x2f, "KMC"), \
+ exit_code_ipa0(0xB9, 0x3e, "KIMD"), \
+ exit_code_ipa0(0xB9, 0x3f, "KLMD"), \
+ exit_code_ipa0(0xB9, 0x8a, "CSPG"), \
+ exit_code_ipa0(0xB9, 0x8d, "EPSW"), \
+ exit_code_ipa0(0xB9, 0x8e, "IDTE"), \
+ exit_code_ipa0(0xB9, 0x8f, "CRDTE"), \
+ exit_code_ipa0(0xB9, 0x9c, "EQBS"), \
+ exit_code_ipa0(0xB9, 0xa2, "PTF"), \
+ exit_code_ipa0(0xB9, 0xab, "ESSA"), \
+ exit_code_ipa0(0xB9, 0xae, "RRBM"), \
+ exit_code_ipa0(0xB9, 0xaf, "PFMF"), \
+ exit_code_ipa0(0xE3, 0x03, "LRAG"), \
+ exit_code_ipa0(0xE3, 0x13, "LRAY"), \
+ exit_code_ipa0(0xE3, 0x25, "NTSTG"), \
+ exit_code_ipa0(0xE5, 0x00, "LASP"), \
+ exit_code_ipa0(0xE5, 0x01, "TPROT"), \
+ exit_code_ipa0(0xE5, 0x60, "TBEGIN"), \
+ exit_code_ipa0(0xE5, 0x61, "TBEGINC"), \
+ exit_code_ipa0(0xEB, 0x25, "STCTG"), \
+ exit_code_ipa0(0xEB, 0x2f, "LCTLG"), \
+ exit_code_ipa0(0xEB, 0x60, "LRIC"), \
+ exit_code_ipa0(0xEB, 0x61, "STRIC"), \
+ exit_code_ipa0(0xEB, 0x62, "MRIC"), \
+ exit_code_ipa0(0xEB, 0x8a, "SQBS"), \
+ exit_code_ipa0(0xC8, 0x01, "ECTG"), \
+ exit_code(0x0a, "SVC"), \
+ exit_code(0x80, "SSM"), \
+ exit_code(0x82, "LPSW"), \
+ exit_code(0x83, "DIAG"), \
+ exit_code(0xae, "SIGP"), \
+ exit_code(0xac, "STNSM"), \
+ exit_code(0xad, "STOSM"), \
+ exit_code(0xb1, "LRA"), \
+ exit_code(0xb6, "STCTL"), \
+ exit_code(0xb7, "LCTL"), \
+ exit_code(0xee, "PLO")
+
+#define sie_intercept_code \
+ { 0x00, "Host interruption" }, \
+ { 0x04, "Instruction" }, \
+ { 0x08, "Program interruption" }, \
+ { 0x0c, "Instruction and program interruption" }, \
+ { 0x10, "External request" }, \
+ { 0x14, "External interruption" }, \
+ { 0x18, "I/O request" }, \
+ { 0x1c, "Wait state" }, \
+ { 0x20, "Validity" }, \
+ { 0x28, "Stop request" }, \
+ { 0x2c, "Operation exception" }, \
+ { 0x38, "Partial-execution" }, \
+ { 0x3c, "I/O interruption" }, \
+ { 0x40, "I/O instruction" }, \
+ { 0x48, "Timing subset" }
+
+/*
+ * This is the simple interceptable instructions decoder.
+ *
+ * It will be used as userspace interface and it can be used in places
+ * that does not allow to use general decoder functions,
+ * such as trace events declarations.
+ *
+ * Some userspace tools may want to parse this code
+ * and would be confused by switch(), if() and other statements,
+ * but they can understand conditional operator.
+ */
+#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask) \
+ (insn >> 56) == (ipa0) ? \
+ ((ipa0 << 8) | ((insn >> rshift) & mask)) :
+
+#define INSN_DECODE(insn) (insn >> 56)
+
+/*
+ * The macro icpt_insn_decoder() takes an intercepted instruction
+ * and returns a key, which can be used to find a mnemonic name
+ * of the instruction in the icpt_insn_codes table.
+ */
+#define icpt_insn_decoder(insn) \
+ INSN_DECODE_IPA0(0x01, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f) \
+ INSN_DECODE_IPA0(0xb2, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xb9, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xe3, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xe5, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xeb, insn, 16, 0xff) \
+ INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f) \
+ INSN_DECODE(insn)
+
+#endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h
new file mode 100644
index 00000000000..b30de9c01bb
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sigcontext.h
@@ -0,0 +1,70 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ */
+
+#ifndef _ASM_S390_SIGCONTEXT_H
+#define _ASM_S390_SIGCONTEXT_H
+
+#include <linux/compiler.h>
+
+#define __NUM_GPRS 16
+#define __NUM_FPRS 16
+#define __NUM_ACRS 16
+
+#ifndef __s390x__
+
+/* Has to be at least _NSIG_WORDS from asm/signal.h */
+#define _SIGCONTEXT_NSIG 64
+#define _SIGCONTEXT_NSIG_BPW 32
+/* Size of stack frame allocated when calling signal handler. */
+#define __SIGNAL_FRAMESIZE 96
+
+#else /* __s390x__ */
+
+/* Has to be at least _NSIG_WORDS from asm/signal.h */
+#define _SIGCONTEXT_NSIG 64
+#define _SIGCONTEXT_NSIG_BPW 64
+/* Size of stack frame allocated when calling signal handler. */
+#define __SIGNAL_FRAMESIZE 160
+
+#endif /* __s390x__ */
+
+#define _SIGCONTEXT_NSIG_WORDS (_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW)
+#define _SIGMASK_COPY_SIZE (sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS)
+
+typedef struct
+{
+ unsigned long mask;
+ unsigned long addr;
+} __attribute__ ((aligned(8))) _psw_t;
+
+typedef struct
+{
+ _psw_t psw;
+ unsigned long gprs[__NUM_GPRS];
+ unsigned int acrs[__NUM_ACRS];
+} _s390_regs_common;
+
+typedef struct
+{
+ unsigned int fpc;
+ unsigned int pad;
+ double fprs[__NUM_FPRS];
+} _s390_fp_regs;
+
+typedef struct
+{
+ _s390_regs_common regs;
+ _s390_fp_regs fpregs;
+} _sigregs;
+
+struct sigcontext
+{
+ unsigned long oldmask[_SIGCONTEXT_NSIG_WORDS];
+ _sigregs __user *sregs;
+};
+
+
+#endif
+
diff --git a/arch/s390/include/uapi/asm/siginfo.h b/arch/s390/include/uapi/asm/siginfo.h
new file mode 100644
index 00000000000..91fd3e4b70c
--- /dev/null
+++ b/arch/s390/include/uapi/asm/siginfo.h
@@ -0,0 +1,16 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/siginfo.h"
+ */
+
+#ifndef _S390_SIGINFO_H
+#define _S390_SIGINFO_H
+
+#ifdef __s390x__
+#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
+#endif
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h
new file mode 100644
index 00000000000..2f43cfbf5f1
--- /dev/null
+++ b/arch/s390/include/uapi/asm/signal.h
@@ -0,0 +1,129 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/signal.h"
+ */
+
+#ifndef _UAPI_ASMS390_SIGNAL_H
+#define _UAPI_ASMS390_SIGNAL_H
+
+#include <linux/types.h>
+#include <linux/time.h>
+
+/* Avoid too many header ordering problems. */
+struct siginfo;
+struct pt_regs;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers. */
+
+#define NSIG 32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP 1
+#define SIGINT 2
+#define SIGQUIT 3
+#define SIGILL 4
+#define SIGTRAP 5
+#define SIGABRT 6
+#define SIGIOT 6
+#define SIGBUS 7
+#define SIGFPE 8
+#define SIGKILL 9
+#define SIGUSR1 10
+#define SIGSEGV 11
+#define SIGUSR2 12
+#define SIGPIPE 13
+#define SIGALRM 14
+#define SIGTERM 15
+#define SIGSTKFLT 16
+#define SIGCHLD 17
+#define SIGCONT 18
+#define SIGSTOP 19
+#define SIGTSTP 20
+#define SIGTTIN 21
+#define SIGTTOU 22
+#define SIGURG 23
+#define SIGXCPU 24
+#define SIGXFSZ 25
+#define SIGVTALRM 26
+#define SIGPROF 27
+#define SIGWINCH 28
+#define SIGIO 29
+#define SIGPOLL SIGIO
+/*
+#define SIGLOST 29
+*/
+#define SIGPWR 30
+#define SIGSYS 31
+#define SIGUNUSED 31
+
+/* These should not be considered constants from userland. */
+#define SIGRTMIN 32
+#define SIGRTMAX _NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP 0x00000001
+#define SA_NOCLDWAIT 0x00000002
+#define SA_SIGINFO 0x00000004
+#define SA_ONSTACK 0x08000000
+#define SA_RESTART 0x10000000
+#define SA_NODEFER 0x40000000
+#define SA_RESETHAND 0x80000000
+
+#define SA_NOMASK SA_NODEFER
+#define SA_ONESHOT SA_RESETHAND
+
+#define SA_RESTORER 0x04000000
+
+#define MINSIGSTKSZ 2048
+#define SIGSTKSZ 8192
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers. */
+
+struct sigaction {
+ union {
+ __sighandler_t _sa_handler;
+ void (*_sa_sigaction)(int, struct siginfo *, void *);
+ } _u;
+#ifndef __s390x__ /* lovely */
+ sigset_t sa_mask;
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+#else /* __s390x__ */
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+ sigset_t sa_mask;
+#endif /* __s390x__ */
+};
+
+#define sa_handler _u._sa_handler
+#define sa_sigaction _u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+ void __user *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+} stack_t;
+
+
+#endif /* _UAPI_ASMS390_SIGNAL_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
new file mode 100644
index 00000000000..e031332096d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -0,0 +1,89 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/socket.h"
+ */
+
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET 1
+
+#define SO_DEBUG 1
+#define SO_REUSEADDR 2
+#define SO_TYPE 3
+#define SO_ERROR 4
+#define SO_DONTROUTE 5
+#define SO_BROADCAST 6
+#define SO_SNDBUF 7
+#define SO_RCVBUF 8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
+#define SO_KEEPALIVE 9
+#define SO_OOBINLINE 10
+#define SO_NO_CHECK 11
+#define SO_PRIORITY 12
+#define SO_LINGER 13
+#define SO_BSDCOMPAT 14
+#define SO_REUSEPORT 15
+#define SO_PASSCRED 16
+#define SO_PEERCRED 17
+#define SO_RCVLOWAT 18
+#define SO_SNDLOWAT 19
+#define SO_RCVTIMEO 20
+#define SO_SNDTIMEO 21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION 22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
+#define SO_SECURITY_ENCRYPTION_NETWORK 24
+
+#define SO_BINDTODEVICE 25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER 26
+#define SO_DETACH_FILTER 27
+#define SO_GET_FILTER SO_ATTACH_FILTER
+
+#define SO_PEERNAME 28
+#define SO_TIMESTAMP 29
+#define SCM_TIMESTAMP SO_TIMESTAMP
+
+#define SO_ACCEPTCONN 30
+
+#define SO_PEERSEC 31
+#define SO_PASSSEC 34
+#define SO_TIMESTAMPNS 35
+#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
+
+#define SO_MARK 36
+
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
+#define SO_PROTOCOL 38
+#define SO_DOMAIN 39
+
+#define SO_RXQ_OVFL 40
+
+#define SO_WIFI_STATUS 41
+#define SCM_WIFI_STATUS SO_WIFI_STATUS
+#define SO_PEEK_OFF 42
+
+/* Instruct lower device to use last 4-bytes of skb data as FCS */
+#define SO_NOFCS 43
+
+#define SO_LOCK_FILTER 44
+
+#define SO_SELECT_ERR_QUEUE 45
+
+#define SO_BUSY_POLL 46
+
+#define SO_MAX_PACING_RATE 47
+
+#define SO_BPF_EXTENSIONS 48
+
+#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/sockios.h b/arch/s390/include/uapi/asm/sockios.h
new file mode 100644
index 00000000000..6f60eee7324
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sockios.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_SOCKIOS_H
+#define _ASM_S390_SOCKIOS_H
+
+#include <asm-generic/sockios.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/stat.h b/arch/s390/include/uapi/asm/stat.h
new file mode 100644
index 00000000000..b4ca97d9146
--- /dev/null
+++ b/arch/s390/include/uapi/asm/stat.h
@@ -0,0 +1,103 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/stat.h"
+ */
+
+#ifndef _S390_STAT_H
+#define _S390_STAT_H
+
+#ifndef __s390x__
+struct __old_kernel_stat {
+ unsigned short st_dev;
+ unsigned short st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+ unsigned short st_rdev;
+ unsigned long st_size;
+ unsigned long st_atime;
+ unsigned long st_mtime;
+ unsigned long st_ctime;
+};
+
+struct stat {
+ unsigned short st_dev;
+ unsigned short __pad1;
+ unsigned long st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+ unsigned short st_rdev;
+ unsigned short __pad2;
+ unsigned long st_size;
+ unsigned long st_blksize;
+ unsigned long st_blocks;
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long __unused4;
+ unsigned long __unused5;
+};
+
+/* This matches struct stat64 in glibc2.1, hence the absolutely
+ * insane amounts of padding around dev_t's.
+ */
+struct stat64 {
+ unsigned long long st_dev;
+ unsigned int __pad1;
+#define STAT64_HAS_BROKEN_ST_INO 1
+ unsigned long __st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ unsigned long st_uid;
+ unsigned long st_gid;
+ unsigned long long st_rdev;
+ unsigned int __pad3;
+ long long st_size;
+ unsigned long st_blksize;
+ unsigned char __pad4[4];
+ unsigned long __pad5; /* future possible st_blocks high bits */
+ unsigned long st_blocks; /* Number 512-byte blocks allocated. */
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec; /* will be high 32 bits of ctime someday */
+ unsigned long long st_ino;
+};
+
+#else /* __s390x__ */
+
+struct stat {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned long st_nlink;
+ unsigned int st_mode;
+ unsigned int st_uid;
+ unsigned int st_gid;
+ unsigned int __pad1;
+ unsigned long st_rdev;
+ unsigned long st_size;
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long st_blksize;
+ long st_blocks;
+ unsigned long __unused[3];
+};
+
+#endif /* __s390x__ */
+
+#define STAT_HAVE_NSEC 1
+
+#endif
diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h
new file mode 100644
index 00000000000..471eb09184d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/statfs.h
@@ -0,0 +1,50 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/statfs.h"
+ */
+
+#ifndef _S390_STATFS_H
+#define _S390_STATFS_H
+
+/*
+ * We can't use <asm-generic/statfs.h> because in 64-bit mode
+ * we mix ints of different sizes in our struct statfs.
+ */
+
+#ifndef __KERNEL_STRICT_NAMES
+#include <linux/types.h>
+typedef __kernel_fsid_t fsid_t;
+#endif
+
+struct statfs {
+ unsigned int f_type;
+ unsigned int f_bsize;
+ unsigned long f_blocks;
+ unsigned long f_bfree;
+ unsigned long f_bavail;
+ unsigned long f_files;
+ unsigned long f_ffree;
+ __kernel_fsid_t f_fsid;
+ unsigned int f_namelen;
+ unsigned int f_frsize;
+ unsigned int f_flags;
+ unsigned int f_spare[4];
+};
+
+struct statfs64 {
+ unsigned int f_type;
+ unsigned int f_bsize;
+ unsigned long long f_blocks;
+ unsigned long long f_bfree;
+ unsigned long long f_bavail;
+ unsigned long long f_files;
+ unsigned long long f_ffree;
+ __kernel_fsid_t f_fsid;
+ unsigned int f_namelen;
+ unsigned int f_frsize;
+ unsigned int f_flags;
+ unsigned int f_spare[4];
+};
+
+#endif
diff --git a/arch/s390/include/uapi/asm/swab.h b/arch/s390/include/uapi/asm/swab.h
new file mode 100644
index 00000000000..da3bfe5cc16
--- /dev/null
+++ b/arch/s390/include/uapi/asm/swab.h
@@ -0,0 +1,89 @@
+#ifndef _S390_SWAB_H
+#define _S390_SWAB_H
+
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/types.h>
+
+#ifndef __s390x__
+# define __SWAB_64_THRU_32__
+#endif
+
+#ifdef __s390x__
+static inline __u64 __arch_swab64p(const __u64 *x)
+{
+ __u64 result;
+
+ asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x));
+ return result;
+}
+#define __arch_swab64p __arch_swab64p
+
+static inline __u64 __arch_swab64(__u64 x)
+{
+ __u64 result;
+
+ asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x));
+ return result;
+}
+#define __arch_swab64 __arch_swab64
+
+static inline void __arch_swab64s(__u64 *x)
+{
+ *x = __arch_swab64p(x);
+}
+#define __arch_swab64s __arch_swab64s
+#endif /* __s390x__ */
+
+static inline __u32 __arch_swab32p(const __u32 *x)
+{
+ __u32 result;
+
+ asm volatile(
+#ifndef __s390x__
+ " icm %0,8,%O1+3(%R1)\n"
+ " icm %0,4,%O1+2(%R1)\n"
+ " icm %0,2,%O1+1(%R1)\n"
+ " ic %0,%1"
+ : "=&d" (result) : "Q" (*x) : "cc");
+#else /* __s390x__ */
+ " lrv %0,%1"
+ : "=d" (result) : "m" (*x));
+#endif /* __s390x__ */
+ return result;
+}
+#define __arch_swab32p __arch_swab32p
+
+#ifdef __s390x__
+static inline __u32 __arch_swab32(__u32 x)
+{
+ __u32 result;
+
+ asm volatile("lrvr %0,%1" : "=d" (result) : "d" (x));
+ return result;
+}
+#define __arch_swab32 __arch_swab32
+#endif /* __s390x__ */
+
+static inline __u16 __arch_swab16p(const __u16 *x)
+{
+ __u16 result;
+
+ asm volatile(
+#ifndef __s390x__
+ " icm %0,2,%O1+1(%R1)\n"
+ " ic %0,%1\n"
+ : "=&d" (result) : "Q" (*x) : "cc");
+#else /* __s390x__ */
+ " lrvh %0,%1"
+ : "=d" (result) : "m" (*x));
+#endif /* __s390x__ */
+ return result;
+}
+#define __arch_swab16p __arch_swab16p
+
+#endif /* _S390_SWAB_H */
diff --git a/arch/s390/include/uapi/asm/tape390.h b/arch/s390/include/uapi/asm/tape390.h
new file mode 100644
index 00000000000..b2bc4bab792
--- /dev/null
+++ b/arch/s390/include/uapi/asm/tape390.h
@@ -0,0 +1,102 @@
+/*************************************************************************
+ *
+ * enables user programs to display messages and control encryption
+ * on s390 tape devices
+ *
+ * Copyright IBM Corp. 2001, 2006
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ *
+ *************************************************************************/
+
+#ifndef _TAPE390_H
+#define _TAPE390_H
+
+#define TAPE390_DISPLAY _IOW('d', 1, struct display_struct)
+
+/*
+ * The TAPE390_DISPLAY ioctl calls the Load Display command
+ * which transfers 17 bytes of data from the channel to the subsystem:
+ * - 1 format control byte, and
+ * - two 8-byte messages
+ *
+ * Format control byte:
+ * 0-2: New Message Overlay
+ * 3: Alternate Messages
+ * 4: Blink Message
+ * 5: Display Low/High Message
+ * 6: Reserved
+ * 7: Automatic Load Request
+ *
+ */
+
+typedef struct display_struct {
+ char cntrl;
+ char message1[8];
+ char message2[8];
+} display_struct;
+
+/*
+ * Tape encryption support
+ */
+
+struct tape390_crypt_info {
+ char capability;
+ char status;
+ char medium_status;
+} __attribute__ ((packed));
+
+
+/* Macros for "capable" field */
+#define TAPE390_CRYPT_SUPPORTED_MASK 0x01
+#define TAPE390_CRYPT_SUPPORTED(x) \
+ ((x.capability & TAPE390_CRYPT_SUPPORTED_MASK))
+
+/* Macros for "status" field */
+#define TAPE390_CRYPT_ON_MASK 0x01
+#define TAPE390_CRYPT_ON(x) (((x.status) & TAPE390_CRYPT_ON_MASK))
+
+/* Macros for "medium status" field */
+#define TAPE390_MEDIUM_LOADED_MASK 0x01
+#define TAPE390_MEDIUM_ENCRYPTED_MASK 0x02
+#define TAPE390_MEDIUM_ENCRYPTED(x) \
+ (((x.medium_status) & TAPE390_MEDIUM_ENCRYPTED_MASK))
+#define TAPE390_MEDIUM_LOADED(x) \
+ (((x.medium_status) & TAPE390_MEDIUM_LOADED_MASK))
+
+/*
+ * The TAPE390_CRYPT_SET ioctl is used to switch on/off encryption.
+ * The "encryption_capable" and "tape_status" fields are ignored for this ioctl!
+ */
+#define TAPE390_CRYPT_SET _IOW('d', 2, struct tape390_crypt_info)
+
+/*
+ * The TAPE390_CRYPT_QUERY ioctl is used to query the encryption state.
+ */
+#define TAPE390_CRYPT_QUERY _IOR('d', 3, struct tape390_crypt_info)
+
+/* Values for "kekl1/2_type" and "kekl1/2_type_on_tape" fields */
+#define TAPE390_KEKL_TYPE_NONE 0
+#define TAPE390_KEKL_TYPE_LABEL 1
+#define TAPE390_KEKL_TYPE_HASH 2
+
+struct tape390_kekl {
+ unsigned char type;
+ unsigned char type_on_tape;
+ char label[65];
+} __attribute__ ((packed));
+
+struct tape390_kekl_pair {
+ struct tape390_kekl kekl[2];
+} __attribute__ ((packed));
+
+/*
+ * The TAPE390_KEKL_SET ioctl is used to set Key Encrypting Key labels.
+ */
+#define TAPE390_KEKL_SET _IOW('d', 4, struct tape390_kekl_pair)
+
+/*
+ * The TAPE390_KEKL_QUERY ioctl is used to query Key Encrypting Key labels.
+ */
+#define TAPE390_KEKL_QUERY _IOR('d', 5, struct tape390_kekl_pair)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/termbits.h b/arch/s390/include/uapi/asm/termbits.h
new file mode 100644
index 00000000000..71bf6ac6a2b
--- /dev/null
+++ b/arch/s390/include/uapi/asm/termbits.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_TERMBITS_H
+#define _ASM_S390_TERMBITS_H
+
+#include <asm-generic/termbits.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/termios.h b/arch/s390/include/uapi/asm/termios.h
new file mode 100644
index 00000000000..554f973db1e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/termios.h
@@ -0,0 +1,49 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/termios.h"
+ */
+
+#ifndef _UAPI_S390_TERMIOS_H
+#define _UAPI_S390_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+ unsigned short ws_row;
+ unsigned short ws_col;
+ unsigned short ws_xpixel;
+ unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+ unsigned short c_iflag; /* input mode flags */
+ unsigned short c_oflag; /* output mode flags */
+ unsigned short c_cflag; /* control mode flags */
+ unsigned short c_lflag; /* local mode flags */
+ unsigned char c_line; /* line discipline */
+ unsigned char c_cc[NCC]; /* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE 0x001
+#define TIOCM_DTR 0x002
+#define TIOCM_RTS 0x004
+#define TIOCM_ST 0x008
+#define TIOCM_SR 0x010
+#define TIOCM_CTS 0x020
+#define TIOCM_CAR 0x040
+#define TIOCM_RNG 0x080
+#define TIOCM_DSR 0x100
+#define TIOCM_CD TIOCM_CAR
+#define TIOCM_RI TIOCM_RNG
+#define TIOCM_OUT1 0x2000
+#define TIOCM_OUT2 0x4000
+#define TIOCM_LOOP 0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+
+#endif /* _UAPI_S390_TERMIOS_H */
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
new file mode 100644
index 00000000000..038f2b9178a
--- /dev/null
+++ b/arch/s390/include/uapi/asm/types.h
@@ -0,0 +1,22 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/types.h"
+ */
+
+#ifndef _UAPI_S390_TYPES_H
+#define _UAPI_S390_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+#ifndef __ASSEMBLY__
+
+/* A address type so that arithmetic can be done on it & it can be upgraded to
+ 64 bit when necessary
+*/
+typedef unsigned long addr_t;
+typedef __signed__ long saddr_t;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_S390_TYPES_H */
diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h
new file mode 100644
index 00000000000..3e077b2a470
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ucontext.h
@@ -0,0 +1,37 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/ucontext.h"
+ */
+
+#ifndef _ASM_S390_UCONTEXT_H
+#define _ASM_S390_UCONTEXT_H
+
+#define UC_EXTENDED 0x00000001
+
+#ifndef __s390x__
+
+struct ucontext_extended {
+ unsigned long uc_flags;
+ struct ucontext *uc_link;
+ stack_t uc_stack;
+ _sigregs uc_mcontext;
+ sigset_t uc_sigmask;
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ unsigned char __unused[128 - sizeof(sigset_t)];
+ unsigned long uc_gprs_high[16];
+};
+
+#endif
+
+struct ucontext {
+ unsigned long uc_flags;
+ struct ucontext *uc_link;
+ stack_t uc_stack;
+ _sigregs uc_mcontext;
+ sigset_t uc_sigmask;
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ unsigned char __unused[128 - sizeof(sigset_t)];
+};
+
+#endif /* !_ASM_S390_UCONTEXT_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
new file mode 100644
index 00000000000..3802d2d3a18
--- /dev/null
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -0,0 +1,378 @@
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/unistd.h"
+ */
+
+#ifndef _UAPI_ASM_S390_UNISTD_H_
+#define _UAPI_ASM_S390_UNISTD_H_
+
+/*
+ * This file contains the system call numbers.
+ */
+
+#define __NR_exit 1
+#define __NR_fork 2
+#define __NR_read 3
+#define __NR_write 4
+#define __NR_open 5
+#define __NR_close 6
+#define __NR_restart_syscall 7
+#define __NR_creat 8
+#define __NR_link 9
+#define __NR_unlink 10
+#define __NR_execve 11
+#define __NR_chdir 12
+#define __NR_mknod 14
+#define __NR_chmod 15
+#define __NR_lseek 19
+#define __NR_getpid 20
+#define __NR_mount 21
+#define __NR_umount 22
+#define __NR_ptrace 26
+#define __NR_alarm 27
+#define __NR_pause 29
+#define __NR_utime 30
+#define __NR_access 33
+#define __NR_nice 34
+#define __NR_sync 36
+#define __NR_kill 37
+#define __NR_rename 38
+#define __NR_mkdir 39
+#define __NR_rmdir 40
+#define __NR_dup 41
+#define __NR_pipe 42
+#define __NR_times 43
+#define __NR_brk 45
+#define __NR_signal 48
+#define __NR_acct 51
+#define __NR_umount2 52
+#define __NR_ioctl 54
+#define __NR_fcntl 55
+#define __NR_setpgid 57
+#define __NR_umask 60
+#define __NR_chroot 61
+#define __NR_ustat 62
+#define __NR_dup2 63
+#define __NR_getppid 64
+#define __NR_getpgrp 65
+#define __NR_setsid 66
+#define __NR_sigaction 67
+#define __NR_sigsuspend 72
+#define __NR_sigpending 73
+#define __NR_sethostname 74
+#define __NR_setrlimit 75
+#define __NR_getrusage 77
+#define __NR_gettimeofday 78
+#define __NR_settimeofday 79
+#define __NR_symlink 83
+#define __NR_readlink 85
+#define __NR_uselib 86
+#define __NR_swapon 87
+#define __NR_reboot 88
+#define __NR_readdir 89
+#define __NR_mmap 90
+#define __NR_munmap 91
+#define __NR_truncate 92
+#define __NR_ftruncate 93
+#define __NR_fchmod 94
+#define __NR_getpriority 96
+#define __NR_setpriority 97
+#define __NR_statfs 99
+#define __NR_fstatfs 100
+#define __NR_socketcall 102
+#define __NR_syslog 103
+#define __NR_setitimer 104
+#define __NR_getitimer 105
+#define __NR_stat 106
+#define __NR_lstat 107
+#define __NR_fstat 108
+#define __NR_lookup_dcookie 110
+#define __NR_vhangup 111
+#define __NR_idle 112
+#define __NR_wait4 114
+#define __NR_swapoff 115
+#define __NR_sysinfo 116
+#define __NR_ipc 117
+#define __NR_fsync 118
+#define __NR_sigreturn 119
+#define __NR_clone 120
+#define __NR_setdomainname 121
+#define __NR_uname 122
+#define __NR_adjtimex 124
+#define __NR_mprotect 125
+#define __NR_sigprocmask 126
+#define __NR_create_module 127
+#define __NR_init_module 128
+#define __NR_delete_module 129
+#define __NR_get_kernel_syms 130
+#define __NR_quotactl 131
+#define __NR_getpgid 132
+#define __NR_fchdir 133
+#define __NR_bdflush 134
+#define __NR_sysfs 135
+#define __NR_personality 136
+#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
+#define __NR_getdents 141
+#define __NR_flock 143
+#define __NR_msync 144
+#define __NR_readv 145
+#define __NR_writev 146
+#define __NR_getsid 147
+#define __NR_fdatasync 148
+#define __NR__sysctl 149
+#define __NR_mlock 150
+#define __NR_munlock 151
+#define __NR_mlockall 152
+#define __NR_munlockall 153
+#define __NR_sched_setparam 154
+#define __NR_sched_getparam 155
+#define __NR_sched_setscheduler 156
+#define __NR_sched_getscheduler 157
+#define __NR_sched_yield 158
+#define __NR_sched_get_priority_max 159
+#define __NR_sched_get_priority_min 160
+#define __NR_sched_rr_get_interval 161
+#define __NR_nanosleep 162
+#define __NR_mremap 163
+#define __NR_query_module 167
+#define __NR_poll 168
+#define __NR_nfsservctl 169
+#define __NR_prctl 172
+#define __NR_rt_sigreturn 173
+#define __NR_rt_sigaction 174
+#define __NR_rt_sigprocmask 175
+#define __NR_rt_sigpending 176
+#define __NR_rt_sigtimedwait 177
+#define __NR_rt_sigqueueinfo 178
+#define __NR_rt_sigsuspend 179
+#define __NR_pread64 180
+#define __NR_pwrite64 181
+#define __NR_getcwd 183
+#define __NR_capget 184
+#define __NR_capset 185
+#define __NR_sigaltstack 186
+#define __NR_sendfile 187
+#define __NR_getpmsg 188
+#define __NR_putpmsg 189
+#define __NR_vfork 190
+#define __NR_pivot_root 217
+#define __NR_mincore 218
+#define __NR_madvise 219
+#define __NR_getdents64 220
+#define __NR_readahead 222
+#define __NR_setxattr 224
+#define __NR_lsetxattr 225
+#define __NR_fsetxattr 226
+#define __NR_getxattr 227
+#define __NR_lgetxattr 228
+#define __NR_fgetxattr 229
+#define __NR_listxattr 230
+#define __NR_llistxattr 231
+#define __NR_flistxattr 232
+#define __NR_removexattr 233
+#define __NR_lremovexattr 234
+#define __NR_fremovexattr 235
+#define __NR_gettid 236
+#define __NR_tkill 237
+#define __NR_futex 238
+#define __NR_sched_setaffinity 239
+#define __NR_sched_getaffinity 240
+#define __NR_tgkill 241
+/* Number 242 is reserved for tux */
+#define __NR_io_setup 243
+#define __NR_io_destroy 244
+#define __NR_io_getevents 245
+#define __NR_io_submit 246
+#define __NR_io_cancel 247
+#define __NR_exit_group 248
+#define __NR_epoll_create 249
+#define __NR_epoll_ctl 250
+#define __NR_epoll_wait 251
+#define __NR_set_tid_address 252
+#define __NR_fadvise64 253
+#define __NR_timer_create 254
+#define __NR_timer_settime (__NR_timer_create+1)
+#define __NR_timer_gettime (__NR_timer_create+2)
+#define __NR_timer_getoverrun (__NR_timer_create+3)
+#define __NR_timer_delete (__NR_timer_create+4)
+#define __NR_clock_settime (__NR_timer_create+5)
+#define __NR_clock_gettime (__NR_timer_create+6)
+#define __NR_clock_getres (__NR_timer_create+7)
+#define __NR_clock_nanosleep (__NR_timer_create+8)
+/* Number 263 is reserved for vserver */
+#define __NR_statfs64 265
+#define __NR_fstatfs64 266
+#define __NR_remap_file_pages 267
+/* Number 268 is reserved for new sys_mbind */
+/* Number 269 is reserved for new sys_get_mempolicy */
+/* Number 270 is reserved for new sys_set_mempolicy */
+#define __NR_mq_open 271
+#define __NR_mq_unlink 272
+#define __NR_mq_timedsend 273
+#define __NR_mq_timedreceive 274
+#define __NR_mq_notify 275
+#define __NR_mq_getsetattr 276
+#define __NR_kexec_load 277
+#define __NR_add_key 278
+#define __NR_request_key 279
+#define __NR_keyctl 280
+#define __NR_waitid 281
+#define __NR_ioprio_set 282
+#define __NR_ioprio_get 283
+#define __NR_inotify_init 284
+#define __NR_inotify_add_watch 285
+#define __NR_inotify_rm_watch 286
+/* Number 287 is reserved for new sys_migrate_pages */
+#define __NR_openat 288
+#define __NR_mkdirat 289
+#define __NR_mknodat 290
+#define __NR_fchownat 291
+#define __NR_futimesat 292
+#define __NR_unlinkat 294
+#define __NR_renameat 295
+#define __NR_linkat 296
+#define __NR_symlinkat 297
+#define __NR_readlinkat 298
+#define __NR_fchmodat 299
+#define __NR_faccessat 300
+#define __NR_pselect6 301
+#define __NR_ppoll 302
+#define __NR_unshare 303
+#define __NR_set_robust_list 304
+#define __NR_get_robust_list 305
+#define __NR_splice 306
+#define __NR_sync_file_range 307
+#define __NR_tee 308
+#define __NR_vmsplice 309
+/* Number 310 is reserved for new sys_move_pages */
+#define __NR_getcpu 311
+#define __NR_epoll_pwait 312
+#define __NR_utimes 313
+#define __NR_fallocate 314
+#define __NR_utimensat 315
+#define __NR_signalfd 316
+#define __NR_timerfd 317
+#define __NR_eventfd 318
+#define __NR_timerfd_create 319
+#define __NR_timerfd_settime 320
+#define __NR_timerfd_gettime 321
+#define __NR_signalfd4 322
+#define __NR_eventfd2 323
+#define __NR_inotify_init1 324
+#define __NR_pipe2 325
+#define __NR_dup3 326
+#define __NR_epoll_create1 327
+#define __NR_preadv 328
+#define __NR_pwritev 329
+#define __NR_rt_tgsigqueueinfo 330
+#define __NR_perf_event_open 331
+#define __NR_fanotify_init 332
+#define __NR_fanotify_mark 333
+#define __NR_prlimit64 334
+#define __NR_name_to_handle_at 335
+#define __NR_open_by_handle_at 336
+#define __NR_clock_adjtime 337
+#define __NR_syncfs 338
+#define __NR_setns 339
+#define __NR_process_vm_readv 340
+#define __NR_process_vm_writev 341
+#define __NR_s390_runtime_instr 342
+#define __NR_kcmp 343
+#define __NR_finit_module 344
+#define __NR_sched_setattr 345
+#define __NR_sched_getattr 346
+#define __NR_renameat2 347
+#define NR_syscalls 348
+
+/*
+ * There are some system calls that are not present on 64 bit, some
+ * have a different name although they do the same (e.g. __NR_chown32
+ * is __NR_chown on 64 bit).
+ */
+#ifndef __s390x__
+
+#define __NR_time 13
+#define __NR_lchown 16
+#define __NR_setuid 23
+#define __NR_getuid 24
+#define __NR_stime 25
+#define __NR_setgid 46
+#define __NR_getgid 47
+#define __NR_geteuid 49
+#define __NR_getegid 50
+#define __NR_setreuid 70
+#define __NR_setregid 71
+#define __NR_getrlimit 76
+#define __NR_getgroups 80
+#define __NR_setgroups 81
+#define __NR_fchown 95
+#define __NR_ioperm 101
+#define __NR_setfsuid 138
+#define __NR_setfsgid 139
+#define __NR__llseek 140
+#define __NR__newselect 142
+#define __NR_setresuid 164
+#define __NR_getresuid 165
+#define __NR_setresgid 170
+#define __NR_getresgid 171
+#define __NR_chown 182
+#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */
+#define __NR_mmap2 192
+#define __NR_truncate64 193
+#define __NR_ftruncate64 194
+#define __NR_stat64 195
+#define __NR_lstat64 196
+#define __NR_fstat64 197
+#define __NR_lchown32 198
+#define __NR_getuid32 199
+#define __NR_getgid32 200
+#define __NR_geteuid32 201
+#define __NR_getegid32 202
+#define __NR_setreuid32 203
+#define __NR_setregid32 204
+#define __NR_getgroups32 205
+#define __NR_setgroups32 206
+#define __NR_fchown32 207
+#define __NR_setresuid32 208
+#define __NR_getresuid32 209
+#define __NR_setresgid32 210
+#define __NR_getresgid32 211
+#define __NR_chown32 212
+#define __NR_setuid32 213
+#define __NR_setgid32 214
+#define __NR_setfsuid32 215
+#define __NR_setfsgid32 216
+#define __NR_fcntl64 221
+#define __NR_sendfile64 223
+#define __NR_fadvise64_64 264
+#define __NR_fstatat64 293
+
+#else
+
+#define __NR_select 142
+#define __NR_getrlimit 191 /* SuS compliant getrlimit */
+#define __NR_lchown 198
+#define __NR_getuid 199
+#define __NR_getgid 200
+#define __NR_geteuid 201
+#define __NR_getegid 202
+#define __NR_setreuid 203
+#define __NR_setregid 204
+#define __NR_getgroups 205
+#define __NR_setgroups 206
+#define __NR_fchown 207
+#define __NR_setresuid 208
+#define __NR_getresuid 209
+#define __NR_setresgid 210
+#define __NR_getresgid 211
+#define __NR_chown 212
+#define __NR_setuid 213
+#define __NR_setgid 214
+#define __NR_setfsuid 215
+#define __NR_setfsgid 216
+#define __NR_newfstatat 293
+
+#endif
+
+#endif /* _UAPI_ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h
new file mode 100644
index 00000000000..a9a4ebf79fa
--- /dev/null
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h
@@ -0,0 +1,21 @@
+/*
+ * Definitions for virtio-ccw devices.
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_VIRTIO_CCW_H
+#define __KVM_VIRTIO_CCW_H
+
+/* Alignment of vring buffers. */
+#define KVM_VIRTIO_CCW_RING_ALIGN 4096
+
+/* Subcode for diagnose 500 (virtio hypercall). */
+#define KVM_S390_VIRTIO_CCW_NOTIFY 3
+
+#endif
diff --git a/arch/s390/include/uapi/asm/vtoc.h b/arch/s390/include/uapi/asm/vtoc.h
new file mode 100644
index 00000000000..221419de275
--- /dev/null
+++ b/arch/s390/include/uapi/asm/vtoc.h
@@ -0,0 +1,213 @@
+/*
+ * This file contains volume label definitions for DASD devices.
+ *
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Volker Sameske <sameske@de.ibm.com>
+ *
+ */
+
+#ifndef _ASM_S390_VTOC_H
+#define _ASM_S390_VTOC_H
+
+#include <linux/types.h>
+
+struct vtoc_ttr
+{
+ __u16 tt;
+ __u8 r;
+} __attribute__ ((packed));
+
+struct vtoc_cchhb
+{
+ __u16 cc;
+ __u16 hh;
+ __u8 b;
+} __attribute__ ((packed));
+
+struct vtoc_cchh
+{
+ __u16 cc;
+ __u16 hh;
+} __attribute__ ((packed));
+
+struct vtoc_labeldate
+{
+ __u8 year;
+ __u16 day;
+} __attribute__ ((packed));
+
+struct vtoc_volume_label_cdl
+{
+ char volkey[4]; /* volume key = volume label */
+ char vollbl[4]; /* volume label */
+ char volid[6]; /* volume identifier */
+ __u8 security; /* security byte */
+ struct vtoc_cchhb vtoc; /* VTOC address */
+ char res1[5]; /* reserved */
+ char cisize[4]; /* CI-size for FBA,... */
+ /* ...blanks for CKD */
+ char blkperci[4]; /* no of blocks per CI (FBA), blanks for CKD */
+ char labperci[4]; /* no of labels per CI (FBA), blanks for CKD */
+ char res2[4]; /* reserved */
+ char lvtoc[14]; /* owner code for LVTOC */
+ char res3[29]; /* reserved */
+} __attribute__ ((packed));
+
+struct vtoc_volume_label_ldl {
+ char vollbl[4]; /* volume label */
+ char volid[6]; /* volume identifier */
+ char res3[69]; /* reserved */
+ char ldl_version; /* version number, valid for ldl format */
+ __u64 formatted_blocks; /* valid when ldl_version >= f2 */
+} __attribute__ ((packed));
+
+struct vtoc_extent
+{
+ __u8 typeind; /* extent type indicator */
+ __u8 seqno; /* extent sequence number */
+ struct vtoc_cchh llimit; /* starting point of this extent */
+ struct vtoc_cchh ulimit; /* ending point of this extent */
+} __attribute__ ((packed));
+
+struct vtoc_dev_const
+{
+ __u16 DS4DSCYL; /* number of logical cyls */
+ __u16 DS4DSTRK; /* number of tracks in a logical cylinder */
+ __u16 DS4DEVTK; /* device track length */
+ __u8 DS4DEVI; /* non-last keyed record overhead */
+ __u8 DS4DEVL; /* last keyed record overhead */
+ __u8 DS4DEVK; /* non-keyed record overhead differential */
+ __u8 DS4DEVFG; /* flag byte */
+ __u16 DS4DEVTL; /* device tolerance */
+ __u8 DS4DEVDT; /* number of DSCB's per track */
+ __u8 DS4DEVDB; /* number of directory blocks per track */
+} __attribute__ ((packed));
+
+struct vtoc_format1_label
+{
+ char DS1DSNAM[44]; /* data set name */
+ __u8 DS1FMTID; /* format identifier */
+ char DS1DSSN[6]; /* data set serial number */
+ __u16 DS1VOLSQ; /* volume sequence number */
+ struct vtoc_labeldate DS1CREDT; /* creation date: ydd */
+ struct vtoc_labeldate DS1EXPDT; /* expiration date */
+ __u8 DS1NOEPV; /* number of extents on volume */
+ __u8 DS1NOBDB; /* no. of bytes used in last direction blk */
+ __u8 DS1FLAG1; /* flag 1 */
+ char DS1SYSCD[13]; /* system code */
+ struct vtoc_labeldate DS1REFD; /* date last referenced */
+ __u8 DS1SMSFG; /* system managed storage indicators */
+ __u8 DS1SCXTF; /* sec. space extension flag byte */
+ __u16 DS1SCXTV; /* secondary space extension value */
+ __u8 DS1DSRG1; /* data set organisation byte 1 */
+ __u8 DS1DSRG2; /* data set organisation byte 2 */
+ __u8 DS1RECFM; /* record format */
+ __u8 DS1OPTCD; /* option code */
+ __u16 DS1BLKL; /* block length */
+ __u16 DS1LRECL; /* record length */
+ __u8 DS1KEYL; /* key length */
+ __u16 DS1RKP; /* relative key position */
+ __u8 DS1DSIND; /* data set indicators */
+ __u8 DS1SCAL1; /* secondary allocation flag byte */
+ char DS1SCAL3[3]; /* secondary allocation quantity */
+ struct vtoc_ttr DS1LSTAR; /* last used track and block on track */
+ __u16 DS1TRBAL; /* space remaining on last used track */
+ __u16 res1; /* reserved */
+ struct vtoc_extent DS1EXT1; /* first extent description */
+ struct vtoc_extent DS1EXT2; /* second extent description */
+ struct vtoc_extent DS1EXT3; /* third extent description */
+ struct vtoc_cchhb DS1PTRDS; /* possible pointer to f2 or f3 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_format4_label
+{
+ char DS4KEYCD[44]; /* key code for VTOC labels: 44 times 0x04 */
+ __u8 DS4IDFMT; /* format identifier */
+ struct vtoc_cchhb DS4HPCHR; /* highest address of a format 1 DSCB */
+ __u16 DS4DSREC; /* number of available DSCB's */
+ struct vtoc_cchh DS4HCCHH; /* CCHH of next available alternate track */
+ __u16 DS4NOATK; /* number of remaining alternate tracks */
+ __u8 DS4VTOCI; /* VTOC indicators */
+ __u8 DS4NOEXT; /* number of extents in VTOC */
+ __u8 DS4SMSFG; /* system managed storage indicators */
+ __u8 DS4DEVAC; /* number of alternate cylinders.
+ * Subtract from first two bytes of
+ * DS4DEVSZ to get number of usable
+ * cylinders. can be zero. valid
+ * only if DS4DEVAV on. */
+ struct vtoc_dev_const DS4DEVCT; /* device constants */
+ char DS4AMTIM[8]; /* VSAM time stamp */
+ char DS4AMCAT[3]; /* VSAM catalog indicator */
+ char DS4R2TIM[8]; /* VSAM volume/catalog match time stamp */
+ char res1[5]; /* reserved */
+ char DS4F6PTR[5]; /* pointer to first format 6 DSCB */
+ struct vtoc_extent DS4VTOCE; /* VTOC extent description */
+ char res2[10]; /* reserved */
+ __u8 DS4EFLVL; /* extended free-space management level */
+ struct vtoc_cchhb DS4EFPTR; /* pointer to extended free-space info */
+ char res3; /* reserved */
+ __u32 DS4DCYL; /* number of logical cyls */
+ char res4[2]; /* reserved */
+ __u8 DS4DEVF2; /* device flags */
+ char res5; /* reserved */
+} __attribute__ ((packed));
+
+struct vtoc_ds5ext
+{
+ __u16 t; /* RTA of the first track of free extent */
+ __u16 fc; /* number of whole cylinders in free ext. */
+ __u8 ft; /* number of remaining free tracks */
+} __attribute__ ((packed));
+
+struct vtoc_format5_label
+{
+ char DS5KEYID[4]; /* key identifier */
+ struct vtoc_ds5ext DS5AVEXT; /* first available (free-space) extent. */
+ struct vtoc_ds5ext DS5EXTAV[7]; /* seven available extents */
+ __u8 DS5FMTID; /* format identifier */
+ struct vtoc_ds5ext DS5MAVET[18]; /* eighteen available extents */
+ struct vtoc_cchhb DS5PTRDS; /* pointer to next format5 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_ds7ext
+{
+ __u32 a; /* starting RTA value */
+ __u32 b; /* ending RTA value + 1 */
+} __attribute__ ((packed));
+
+struct vtoc_format7_label
+{
+ char DS7KEYID[4]; /* key identifier */
+ struct vtoc_ds7ext DS7EXTNT[5]; /* space for 5 extent descriptions */
+ __u8 DS7FMTID; /* format identifier */
+ struct vtoc_ds7ext DS7ADEXT[11]; /* space for 11 extent descriptions */
+ char res1[2]; /* reserved */
+ struct vtoc_cchhb DS7PTRDS; /* pointer to next FMT7 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_cms_label {
+ __u8 label_id[4]; /* Label identifier */
+ __u8 vol_id[6]; /* Volid */
+ __u16 version_id; /* Version identifier */
+ __u32 block_size; /* Disk block size */
+ __u32 origin_ptr; /* Disk origin pointer */
+ __u32 usable_count; /* Number of usable cylinders/blocks */
+ __u32 formatted_count; /* Maximum number of formatted cylinders/
+ * blocks */
+ __u32 block_count; /* Disk size in CMS blocks */
+ __u32 used_count; /* Number of CMS blocks in use */
+ __u32 fst_size; /* File Status Table (FST) size */
+ __u32 fst_count; /* Number of FSTs per CMS block */
+ __u8 format_date[6]; /* Disk FORMAT date */
+ __u8 reserved1[2];
+ __u32 disk_offset; /* Disk offset when reserved*/
+ __u32 map_block; /* Allocation Map Block with next hole */
+ __u32 hblk_disp; /* Displacement into HBLK data of next hole */
+ __u32 user_disp; /* Displacement into user part of Allocation
+ * map */
+ __u8 reserved2[4];
+ __u8 segment_name[8]; /* Name of shared segment */
+} __attribute__ ((packed));
+
+#endif /* _ASM_S390_VTOC_H */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
new file mode 100644
index 00000000000..f2b18eacaca
--- /dev/null
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -0,0 +1,341 @@
+/*
+ * include/asm-s390/zcrypt.h
+ *
+ * zcrypt 2.1.0 (user-visible header)
+ *
+ * Copyright IBM Corp. 2001, 2006
+ * Author(s): Robert Burroughs
+ * Eric Rossman (edrossma@us.ibm.com)
+ *
+ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ASM_S390_ZCRYPT_H
+#define __ASM_S390_ZCRYPT_H
+
+#define ZCRYPT_VERSION 2
+#define ZCRYPT_RELEASE 1
+#define ZCRYPT_VARIANT 1
+
+#include <linux/ioctl.h>
+#include <linux/compiler.h>
+
+/**
+ * struct ica_rsa_modexpo
+ *
+ * Requirements:
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ * the left with zeroes.
+ * - length(b_key) = inputdatalength
+ * - length(n_modulus) = inputdatalength
+ */
+struct ica_rsa_modexpo {
+ char __user * inputdata;
+ unsigned int inputdatalength;
+ char __user * outputdata;
+ unsigned int outputdatalength;
+ char __user * b_key;
+ char __user * n_modulus;
+};
+
+/**
+ * struct ica_rsa_modexpo_crt
+ *
+ * Requirements:
+ * - inputdatalength is even.
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ * the left with zeroes.
+ * - length(bp_key) = inputdatalength/2 + 8
+ * - length(bq_key) = inputdatalength/2
+ * - length(np_key) = inputdatalength/2 + 8
+ * - length(nq_key) = inputdatalength/2
+ * - length(u_mult_inv) = inputdatalength/2 + 8
+ */
+struct ica_rsa_modexpo_crt {
+ char __user * inputdata;
+ unsigned int inputdatalength;
+ char __user * outputdata;
+ unsigned int outputdatalength;
+ char __user * bp_key;
+ char __user * bq_key;
+ char __user * np_prime;
+ char __user * nq_prime;
+ char __user * u_mult_inv;
+};
+
+/**
+ * CPRBX
+ * Note that all shorts and ints are big-endian.
+ * All pointer fields are 16 bytes long, and mean nothing.
+ *
+ * A request CPRB is followed by a request_parameter_block.
+ *
+ * The request (or reply) parameter block is organized thus:
+ * function code
+ * VUD block
+ * key block
+ */
+struct CPRBX {
+ unsigned short cprb_len; /* CPRB length 220 */
+ unsigned char cprb_ver_id; /* CPRB version id. 0x02 */
+ unsigned char pad_000[3]; /* Alignment pad bytes */
+ unsigned char func_id[2]; /* function id 0x5432 */
+ unsigned char cprb_flags[4]; /* Flags */
+ unsigned int req_parml; /* request parameter buffer len */
+ unsigned int req_datal; /* request data buffer */
+ unsigned int rpl_msgbl; /* reply message block length */
+ unsigned int rpld_parml; /* replied parameter block len */
+ unsigned int rpl_datal; /* reply data block len */
+ unsigned int rpld_datal; /* replied data block len */
+ unsigned int req_extbl; /* request extension block len */
+ unsigned char pad_001[4]; /* reserved */
+ unsigned int rpld_extbl; /* replied extension block len */
+ unsigned char padx000[16 - sizeof (char *)];
+ unsigned char * req_parmb; /* request parm block 'address' */
+ unsigned char padx001[16 - sizeof (char *)];
+ unsigned char * req_datab; /* request data block 'address' */
+ unsigned char padx002[16 - sizeof (char *)];
+ unsigned char * rpl_parmb; /* reply parm block 'address' */
+ unsigned char padx003[16 - sizeof (char *)];
+ unsigned char * rpl_datab; /* reply data block 'address' */
+ unsigned char padx004[16 - sizeof (char *)];
+ unsigned char * req_extb; /* request extension block 'addr'*/
+ unsigned char padx005[16 - sizeof (char *)];
+ unsigned char * rpl_extb; /* reply extension block 'address'*/
+ unsigned short ccp_rtcode; /* server return code */
+ unsigned short ccp_rscode; /* server reason code */
+ unsigned int mac_data_len; /* Mac Data Length */
+ unsigned char logon_id[8]; /* Logon Identifier */
+ unsigned char mac_value[8]; /* Mac Value */
+ unsigned char mac_content_flgs;/* Mac content flag byte */
+ unsigned char pad_002; /* Alignment */
+ unsigned short domain; /* Domain */
+ unsigned char usage_domain[4];/* Usage domain */
+ unsigned char cntrl_domain[4];/* Control domain */
+ unsigned char S390enf_mask[4];/* S/390 enforcement mask */
+ unsigned char pad_004[36]; /* reserved */
+} __attribute__((packed));
+
+/**
+ * xcRB
+ */
+struct ica_xcRB {
+ unsigned short agent_ID;
+ unsigned int user_defined;
+ unsigned short request_ID;
+ unsigned int request_control_blk_length;
+ unsigned char padding1[16 - sizeof (char *)];
+ char __user * request_control_blk_addr;
+ unsigned int request_data_length;
+ char padding2[16 - sizeof (char *)];
+ char __user * request_data_address;
+ unsigned int reply_control_blk_length;
+ char padding3[16 - sizeof (char *)];
+ char __user * reply_control_blk_addr;
+ unsigned int reply_data_length;
+ char padding4[16 - sizeof (char *)];
+ char __user * reply_data_addr;
+ unsigned short priority_window;
+ unsigned int status;
+} __attribute__((packed));
+
+/**
+ * struct ep11_cprb - EP11 connectivity programming request block
+ * @cprb_len: CPRB header length [0x0020]
+ * @cprb_ver_id: CPRB version id. [0x04]
+ * @pad_000: Alignment pad bytes
+ * @flags: Admin cmd [0x80] or functional cmd [0x00]
+ * @func_id: Function id / subtype [0x5434]
+ * @source_id: Source id [originator id]
+ * @target_id: Target id [usage/ctrl domain id]
+ * @ret_code: Return code
+ * @reserved1: Reserved
+ * @reserved2: Reserved
+ * @payload_len: Payload length
+ */
+struct ep11_cprb {
+ uint16_t cprb_len;
+ unsigned char cprb_ver_id;
+ unsigned char pad_000[2];
+ unsigned char flags;
+ unsigned char func_id[2];
+ uint32_t source_id;
+ uint32_t target_id;
+ uint32_t ret_code;
+ uint32_t reserved1;
+ uint32_t reserved2;
+ uint32_t payload_len;
+} __attribute__((packed));
+
+/**
+ * struct ep11_target_dev - EP11 target device list
+ * @ap_id: AP device id
+ * @dom_id: Usage domain id
+ */
+struct ep11_target_dev {
+ uint16_t ap_id;
+ uint16_t dom_id;
+};
+
+/**
+ * struct ep11_urb - EP11 user request block
+ * @targets_num: Number of target adapters
+ * @targets: Addr to target adapter list
+ * @weight: Level of request priority
+ * @req_no: Request id/number
+ * @req_len: Request length
+ * @req: Addr to request block
+ * @resp_len: Response length
+ * @resp: Addr to response block
+ */
+struct ep11_urb {
+ uint16_t targets_num;
+ uint64_t targets;
+ uint64_t weight;
+ uint64_t req_no;
+ uint64_t req_len;
+ uint64_t req;
+ uint64_t resp_len;
+ uint64_t resp;
+} __attribute__((packed));
+
+#define AUTOSELECT ((unsigned int)0xFFFFFFFF)
+
+#define ZCRYPT_IOCTL_MAGIC 'z'
+
+/**
+ * Interface notes:
+ *
+ * The ioctl()s which are implemented (along with relevant details)
+ * are:
+ *
+ * ICARSAMODEXPO
+ * Perform an RSA operation using a Modulus-Exponent pair
+ * This takes an ica_rsa_modexpo struct as its arg.
+ *
+ * NOTE: please refer to the comments preceding this structure
+ * for the implementation details for the contents of the
+ * block
+ *
+ * ICARSACRT
+ * Perform an RSA operation using a Chinese-Remainder Theorem key
+ * This takes an ica_rsa_modexpo_crt struct as its arg.
+ *
+ * NOTE: please refer to the comments preceding this structure
+ * for the implementation details for the contents of the
+ * block
+ *
+ * ZSECSENDCPRB
+ * Send an arbitrary CPRB to a crypto card.
+ *
+ * ZSENDEP11CPRB
+ * Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
+ *
+ * Z90STAT_STATUS_MASK
+ * Return an 64 element array of unsigned chars for the status of
+ * all devices.
+ * 0x01: PCICA
+ * 0x02: PCICC
+ * 0x03: PCIXCC_MCL2
+ * 0x04: PCIXCC_MCL3
+ * 0x05: CEX2C
+ * 0x06: CEX2A
+ * 0x0d: device is disabled via the proc filesystem
+ *
+ * Z90STAT_QDEPTH_MASK
+ * Return an 64 element array of unsigned chars for the queue
+ * depth of all devices.
+ *
+ * Z90STAT_PERDEV_REQCNT
+ * Return an 64 element array of unsigned integers for the number
+ * of successfully completed requests per device since the device
+ * was detected and made available.
+ *
+ * Z90STAT_REQUESTQ_COUNT
+ * Return an integer count of the number of entries waiting to be
+ * sent to a device.
+ *
+ * Z90STAT_PENDINGQ_COUNT
+ * Return an integer count of the number of entries sent to all
+ * devices awaiting the reply.
+ *
+ * Z90STAT_TOTALOPEN_COUNT
+ * Return an integer count of the number of open file handles.
+ *
+ * Z90STAT_DOMAIN_INDEX
+ * Return the integer value of the Cryptographic Domain.
+ *
+ * The following ioctls are deprecated and should be no longer used:
+ *
+ * Z90STAT_TOTALCOUNT
+ * Return an integer count of all device types together.
+ *
+ * Z90STAT_PCICACOUNT
+ * Return an integer count of all PCICAs.
+ *
+ * Z90STAT_PCICCCOUNT
+ * Return an integer count of all PCICCs.
+ *
+ * Z90STAT_PCIXCCMCL2COUNT
+ * Return an integer count of all MCL2 PCIXCCs.
+ *
+ * Z90STAT_PCIXCCMCL3COUNT
+ * Return an integer count of all MCL3 PCIXCCs.
+ *
+ * Z90STAT_CEX2CCOUNT
+ * Return an integer count of all CEX2Cs.
+ *
+ * Z90STAT_CEX2ACOUNT
+ * Return an integer count of all CEX2As.
+ *
+ * ICAZ90STATUS
+ * Return some device driver status in a ica_z90_status struct
+ * This takes an ica_z90_status struct as its arg.
+ *
+ * Z90STAT_PCIXCCCOUNT
+ * Return an integer count of all PCIXCCs (MCL2 + MCL3).
+ * This is DEPRECATED now that MCL3 PCIXCCs are treated differently from
+ * MCL2 PCIXCCs.
+ */
+
+/**
+ * Supported ioctl calls
+ */
+#define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
+#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
+#define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
+
+/* New status calls */
+#define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
+#define Z90STAT_PCICACOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x41, int)
+#define Z90STAT_PCICCCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x42, int)
+#define Z90STAT_PCIXCCMCL2COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4b, int)
+#define Z90STAT_PCIXCCMCL3COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4c, int)
+#define Z90STAT_CEX2CCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4d, int)
+#define Z90STAT_CEX2ACOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4e, int)
+#define Z90STAT_REQUESTQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int)
+#define Z90STAT_PENDINGQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int)
+#define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int)
+#define Z90STAT_DOMAIN_INDEX _IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int)
+#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
+#define Z90STAT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64])
+#define Z90STAT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64])
+
+#endif /* __ASM_S390_ZCRYPT_H */
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore
new file mode 100644
index 00000000000..c5f676c3c22
--- /dev/null
+++ b/arch/s390/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 6302f508258..a95c4ca9961 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -2,35 +2,69 @@
# Makefile for the linux kernel.
#
+ifdef CONFIG_FUNCTION_TRACER
+# Don't trace early setup code and tracing code
+CFLAGS_REMOVE_early.o = -pg
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
#
# Passing null pointers is ok for smp code, since we access the lowcore here.
#
CFLAGS_smp.o := -Wno-nonnull
-obj-y := bitmap.o traps.o time.o process.o base.o early.o \
- setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
- s390_ext.o debug.o irq.o ipl.o dis.o diag.o
+#
+# Disable tailcall optimizations for stack / callchain walking functions
+# since this might generate broken code when accessing register 15 and
+# passing its content to other functions.
+#
+CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
+CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
+
+#
+# Pass UTS_MACHINE for user_regset definition
+#
+CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
+
+obj-y := traps.o time.o process.o base.o early.o setup.o vtime.o
+obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
+obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
+obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
+obj-y += dumpstack.o
obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o)
obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
+obj-y += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o)
-extra-y += head.o init_task.o vmlinux.lds
+extra-y += head.o vmlinux.lds
+extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o)
obj-$(CONFIG_MODULES) += s390_ksyms.o module.o
-obj-$(CONFIG_SMP) += smp.o topology.o
-
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SCHED_BOOK) += topology.o
+obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
-obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \
- compat_wrapper.o compat_exec_domain.o \
- binfmt_elf32.o $(compat-obj-y)
+obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
+obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y)
-obj-$(CONFIG_VIRT_TIMER) += vtime.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o)
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
-# Kexec part
-S390_KEXEC_OBJS := machine_kexec.o crash.o
-S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o)
-obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS)
+ifdef CONFIG_64BIT
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \
+ perf_cpum_cf_events.o
+obj-y += runtime_instr.o cache.o
+endif
+# vdso
+obj-$(CONFIG_64BIT) += vdso64/
+obj-$(CONFIG_32BIT) += vdso32/
+obj-$(CONFIG_COMPAT) += vdso32/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index fa28ecae636..afe1715a4eb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -4,39 +4,179 @@
* and format the required data.
*/
-#include <linux/sched.h>
+#define ASM_OFFSETS_C
+
#include <linux/kbuild.h>
+#include <linux/kvm_host.h>
+#include <linux/sched.h>
+#include <asm/cputime.h>
+#include <asm/vdso.h>
+#include <asm/pgtable.h>
+
+/*
+ * Make sure that the compiler is new enough. We want a compiler that
+ * is known to work with the "Q" assembler constraint.
+ */
+#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+#error Your compiler is too old; please use version 3.3.3 or newer
+#endif
int main(void)
{
DEFINE(__THREAD_info, offsetof(struct task_struct, stack));
DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp));
- DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info));
- DEFINE(__THREAD_mm_segment,
- offsetof(struct task_struct, thread.mm_segment));
+ DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment));
BLANK();
DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
BLANK();
- DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid));
- DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address));
- DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id));
+ DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause));
+ DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address));
+ DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid));
BLANK();
DEFINE(__TI_task, offsetof(struct thread_info, task));
DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain));
DEFINE(__TI_flags, offsetof(struct thread_info, flags));
+ DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
+ DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
+ DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer));
+ DEFINE(__TI_last_break, offsetof(struct thread_info, last_break));
BLANK();
DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
- DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc));
- DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap));
+ DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
+ DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
+ DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
+ DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags));
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs));
DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1));
+ BLANK();
+ /* timeval/timezone offsets for use by vdso */
+ DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count));
+ DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp));
+ DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec));
+ DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
+ DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
+ DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
+ DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
+ DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
+ DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult));
+ DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift));
+ DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base));
+ DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time));
+ /* constants used by the vdso */
+ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
+ DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
+ DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
+ BLANK();
+ /* idle data offsets */
+ DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter));
+ DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit));
+ DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter));
+ DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit));
+ /* lowcore offsets */
+ DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params));
+ DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));
+ DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code));
+ DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc));
+ DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code));
+ DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
+ DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
+ DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
+ DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
+ DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
+ DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
+ DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
+ DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
+ DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
+ DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
+ DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
+ DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
+ DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
+ DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
+ DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
+ DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
+ DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
+ DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
+ DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
+ DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
+ DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
+ DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
+ DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw));
+ DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw));
+ DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw));
+ DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));
+ DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw));
+ DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw));
+ DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw));
+ DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw));
+ DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw));
+ BLANK();
+ DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));
+ DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));
+ DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart));
+ DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags));
+ DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw));
+ DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
+ DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
+ DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer));
+ DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer));
+ DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer));
+ DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer));
+ DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer));
+ DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer));
+ DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer));
+ DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock));
+ DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task));
+ DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid));
+ DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info));
+ DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));
+ DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));
+ DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack));
+ DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack));
+ DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));
+ DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data));
+ DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source));
+ DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce));
+ DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
+ DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
+ DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
+ DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
+ DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));
+ DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
+ BLANK();
+ DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
+ DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area));
+ DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area));
+ DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area));
+ DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area));
+ DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
+ DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
+ DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
+#ifdef CONFIG_32BIT
+ DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
+#else /* CONFIG_32BIT */
+ DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
+ DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
+ DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
+ DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
+ DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
+ DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area));
+ DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr));
+ DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
+ DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
+ DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
+ DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
+ DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
+ DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
+ DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
+#endif /* CONFIG_32BIT */
return 0;
}
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index dc7e5259770..797a823a227 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -1,18 +1,19 @@
/*
* arch/s390/kernel/base.S
*
- * Copyright IBM Corp. 2006,2007
+ * Copyright IBM Corp. 2006, 2007
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
* Michael Holzheu <holzheu@de.ibm.com>
*/
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
-#include <asm/lowcore.h>
+#include <asm/sigp.h>
#ifdef CONFIG_64BIT
- .globl s390_base_mcck_handler
-s390_base_mcck_handler:
+ENTRY(s390_base_mcck_handler)
basr %r13,0
0: lg %r15,__LC_PANIC_STACK # load panic stack
aghi %r15,-STACK_FRAME_OVERHEAD
@@ -26,14 +27,14 @@ s390_base_mcck_handler:
lpswe __LC_MCK_OLD_PSW
.section .bss
+ .align 8
.globl s390_base_mcck_handler_fn
s390_base_mcck_handler_fn:
.quad 0
.previous
- .globl s390_base_ext_handler
-s390_base_ext_handler:
- stmg %r0,%r15,__LC_SAVE_AREA
+ENTRY(s390_base_ext_handler)
+ stmg %r0,%r15,__LC_SAVE_AREA_ASYNC
basr %r13,0
0: aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_ext_handler_fn
@@ -41,19 +42,19 @@ s390_base_ext_handler:
ltgr %r1,%r1
jz 1f
basr %r14,%r1
-1: lmg %r0,%r15,__LC_SAVE_AREA
+1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC
ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
lpswe __LC_EXT_OLD_PSW
.section .bss
+ .align 8
.globl s390_base_ext_handler_fn
s390_base_ext_handler_fn:
.quad 0
.previous
- .globl s390_base_pgm_handler
-s390_base_pgm_handler:
- stmg %r0,%r15,__LC_SAVE_AREA
+ENTRY(s390_base_pgm_handler)
+ stmg %r0,%r15,__LC_SAVE_AREA_SYNC
basr %r13,0
0: aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_pgm_handler_fn
@@ -61,7 +62,7 @@ s390_base_pgm_handler:
ltgr %r1,%r1
jz 1f
basr %r14,%r1
- lmg %r0,%r15,__LC_SAVE_AREA
+ lmg %r0,%r15,__LC_SAVE_AREA_SYNC
lpswe __LC_PGM_OLD_PSW
1: lpswe disabled_wait_psw-0b(%r13)
@@ -70,15 +71,69 @@ disabled_wait_psw:
.quad 0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler
.section .bss
+ .align 8
.globl s390_base_pgm_handler_fn
s390_base_pgm_handler_fn:
.quad 0
.previous
+#
+# Calls diag 308 subcode 1 and continues execution
+#
+# The following conditions must be ensured before calling this function:
+# * Prefix register = 0
+# * Lowcore protection is disabled
+#
+ENTRY(diag308_reset)
+ larl %r4,.Lctlregs # Save control registers
+ stctg %c0,%c15,0(%r4)
+ larl %r4,.Lfpctl # Floating point control register
+ stfpc 0(%r4)
+ larl %r4,.Lcontinue_psw # Save PSW flags
+ epsw %r2,%r3
+ stm %r2,%r3,0(%r4)
+ larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0
+ lghi %r3,0
+ lg %r4,0(%r4) # Save PSW
+ sturg %r4,%r3 # Use sturg, because of large pages
+ lghi %r1,1
+ diag %r1,%r1,0x308
+.Lrestart_part2:
+ lhi %r0,0 # Load r0 with zero
+ lhi %r1,2 # Use mode 2 = ESAME (dump)
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
+ sam64 # Switch to 64 bit addressing mode
+ larl %r4,.Lctlregs # Restore control registers
+ lctlg %c0,%c15,0(%r4)
+ larl %r4,.Lfpctl # Restore floating point ctl register
+ lfpc 0(%r4)
+ larl %r4,.Lcontinue_psw # Restore PSW flags
+ lpswe 0(%r4)
+.Lcontinue:
+ br %r14
+.align 16
+.Lrestart_psw:
+ .long 0x00080000,0x80000000 + .Lrestart_part2
+
+ .section .data..nosave,"aw",@progbits
+.align 8
+.Lcontinue_psw:
+ .quad 0,.Lcontinue
+ .previous
+
+ .section .bss
+.align 8
+.Lctlregs:
+ .rept 16
+ .quad 0
+ .endr
+.Lfpctl:
+ .long 0
+ .previous
+
#else /* CONFIG_64BIT */
- .globl s390_base_mcck_handler
-s390_base_mcck_handler:
+ENTRY(s390_base_mcck_handler)
basr %r13,0
0: l %r15,__LC_PANIC_STACK # load panic stack
ahi %r15,-STACK_FRAME_OVERHEAD
@@ -93,14 +148,14 @@ s390_base_mcck_handler:
2: .long s390_base_mcck_handler_fn
.section .bss
+ .align 4
.globl s390_base_mcck_handler_fn
s390_base_mcck_handler_fn:
.long 0
.previous
- .globl s390_base_ext_handler
-s390_base_ext_handler:
- stm %r0,%r15,__LC_SAVE_AREA
+ENTRY(s390_base_ext_handler)
+ stm %r0,%r15,__LC_SAVE_AREA_ASYNC
basr %r13,0
0: ahi %r15,-STACK_FRAME_OVERHEAD
l %r1,2f-0b(%r13)
@@ -108,21 +163,21 @@ s390_base_ext_handler:
ltr %r1,%r1
jz 1f
basr %r14,%r1
-1: lm %r0,%r15,__LC_SAVE_AREA
+1: lm %r0,%r15,__LC_SAVE_AREA_ASYNC
ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
lpsw __LC_EXT_OLD_PSW
2: .long s390_base_ext_handler_fn
.section .bss
+ .align 4
.globl s390_base_ext_handler_fn
s390_base_ext_handler_fn:
.long 0
.previous
- .globl s390_base_pgm_handler
-s390_base_pgm_handler:
- stm %r0,%r15,__LC_SAVE_AREA
+ENTRY(s390_base_pgm_handler)
+ stm %r0,%r15,__LC_SAVE_AREA_SYNC
basr %r13,0
0: ahi %r15,-STACK_FRAME_OVERHEAD
l %r1,2f-0b(%r13)
@@ -130,7 +185,7 @@ s390_base_pgm_handler:
ltr %r1,%r1
jz 1f
basr %r14,%r1
- lm %r0,%r15,__LC_SAVE_AREA
+ lm %r0,%r15,__LC_SAVE_AREA_SYNC
lpsw __LC_PGM_OLD_PSW
1: lpsw disabled_wait_psw-0b(%r13)
@@ -142,6 +197,7 @@ disabled_wait_psw:
.long 0x000a0000,0x00000000 + s390_base_pgm_handler
.section .bss
+ .align 4
.globl s390_base_pgm_handler_fn
s390_base_pgm_handler_fn:
.long 0
diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c
deleted file mode 100644
index 3e1c315b736..00000000000
--- a/arch/s390/kernel/binfmt_elf32.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Support for 32-bit Linux for S390 ELF binaries.
- *
- * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Gerhard Tonn (ton@de.ibm.com)
- *
- * Heavily inspired by the 32-bit Sparc compat code which is
- * Copyright (C) 1995, 1996, 1997, 1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1995, 1996, 1997, 1998 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#define __ASMS390_ELF_H
-
-#include <linux/time.h>
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS ELFCLASS32
-#define ELF_DATA ELFDATA2MSB
-#define ELF_ARCH EM_S390
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) \
- (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
- && (x)->e_ident[EI_CLASS] == ELF_CLASS)
-
-/* ELF register definitions */
-#define NUM_GPRS 16
-#define NUM_FPRS 16
-#define NUM_ACRS 16
-
-/* For SVR4/S390 the function pointer to be registered with `atexit` is
- passed in R14. */
-#define ELF_PLAT_INIT(_r, load_addr) \
- do { \
- _r->gprs[14] = 0; \
- } while(0)
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE 4096
-
-/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
- use of this is to invoke "./ld.so someprog" to test out a new version of
- the loader. We need to make sure that it is out of the way of the program
- that it will "exec", and that there is sufficient room for the brk. */
-
-#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
-
-/* Wow, the "main" arch needs arch dependent functions too.. :) */
-
-/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
- now struct_user_regs, they are different) */
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs) dump_regs32(regs, &pr_reg);
-
-#define ELF_CORE_COPY_TASK_REGS(tsk, regs) dump_task_regs32(tsk, regs)
-
-#define ELF_CORE_COPY_FPREGS(tsk, fpregs) dump_task_fpu(tsk, fpregs)
-
-/* This yields a mask that user programs can use to figure out what
- instruction set this CPU supports. */
-
-#define ELF_HWCAP (0)
-
-/* This yields a string that ld.so will use to load implementation
- specific libraries for optimization. This is more specific in
- intent than poking at uname or /proc/cpuinfo.
-
- For the moment, we have only optimizations for the Intel generations,
- but that could change... */
-
-#define ELF_PLATFORM (NULL)
-
-#define SET_PERSONALITY(ex, ibcs2) \
-do { \
- if (ibcs2) \
- set_personality(PER_SVR4); \
- else if (current->personality != PER_LINUX32) \
- set_personality(PER_LINUX); \
- set_thread_flag(TIF_31BIT); \
-} while (0)
-
-#include "compat_linux.h"
-
-typedef _s390_fp_regs32 elf_fpregset_t;
-
-typedef struct
-{
-
- _psw_t32 psw;
- __u32 gprs[__NUM_GPRS];
- __u32 acrs[__NUM_ACRS];
- __u32 orig_gpr2;
-} s390_regs32;
-typedef s390_regs32 elf_gregset_t;
-
-static inline int dump_regs32(struct pt_regs *ptregs, elf_gregset_t *regs)
-{
- int i;
-
- memcpy(&regs->psw.mask, &ptregs->psw.mask, 4);
- memcpy(&regs->psw.addr, (char *)&ptregs->psw.addr + 4, 4);
- for (i = 0; i < NUM_GPRS; i++)
- regs->gprs[i] = ptregs->gprs[i];
- save_access_regs(regs->acrs);
- regs->orig_gpr2 = ptregs->orig_gpr2;
- return 1;
-}
-
-static inline int dump_task_regs32(struct task_struct *tsk, elf_gregset_t *regs)
-{
- struct pt_regs *ptregs = task_pt_regs(tsk);
- int i;
-
- memcpy(&regs->psw.mask, &ptregs->psw.mask, 4);
- memcpy(&regs->psw.addr, (char *)&ptregs->psw.addr + 4, 4);
- for (i = 0; i < NUM_GPRS; i++)
- regs->gprs[i] = ptregs->gprs[i];
- memcpy(regs->acrs, tsk->thread.acrs, sizeof(regs->acrs));
- regs->orig_gpr2 = ptregs->orig_gpr2;
- return 1;
-}
-
-static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
-{
- if (tsk == current)
- save_fp_regs((s390_fp_regs *) fpregs);
- else
- memcpy(fpregs, &tsk->thread.fp_regs, sizeof(elf_fpregset_t));
- return 1;
-}
-
-#include <asm/processor.h>
-#include <asm/pgalloc.h>
-#include <linux/module.h>
-#include <linux/elfcore.h>
-#include <linux/binfmts.h>
-#include <linux/compat.h>
-
-#define elf_prstatus elf_prstatus32
-struct elf_prstatus32
-{
- struct elf_siginfo pr_info; /* Info associated with signal */
- short pr_cursig; /* Current signal */
- u32 pr_sigpend; /* Set of pending signals */
- u32 pr_sighold; /* Set of held signals */
- pid_t pr_pid;
- pid_t pr_ppid;
- pid_t pr_pgrp;
- pid_t pr_sid;
- struct compat_timeval pr_utime; /* User time */
- struct compat_timeval pr_stime; /* System time */
- struct compat_timeval pr_cutime; /* Cumulative user time */
- struct compat_timeval pr_cstime; /* Cumulative system time */
- elf_gregset_t pr_reg; /* GP registers */
- int pr_fpvalid; /* True if math co-processor being used. */
-};
-
-#define elf_prpsinfo elf_prpsinfo32
-struct elf_prpsinfo32
-{
- char pr_state; /* numeric process state */
- char pr_sname; /* char for pr_state */
- char pr_zomb; /* zombie */
- char pr_nice; /* nice val */
- u32 pr_flag; /* flags */
- u16 pr_uid;
- u16 pr_gid;
- pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
- /* Lots missing */
- char pr_fname[16]; /* filename of executable */
- char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
-};
-
-#include <linux/highuid.h>
-
-/*
-#define init_elf_binfmt init_elf32_binfmt
-*/
-
-#undef start_thread
-#define start_thread start_thread31
-
-static inline void start_thread31(struct pt_regs *regs, unsigned long new_psw,
- unsigned long new_stackp)
-{
- set_fs(USER_DS);
- regs->psw.mask = psw_user32_bits;
- regs->psw.addr = new_psw;
- regs->gprs[15] = new_stackp;
- crst_table_downgrade(current->mm, 1UL << 31);
-}
-
-MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries,"
- " Copyright 2000 IBM Corporation");
-MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>");
-
-#undef MODULE_DESCRIPTION
-#undef MODULE_AUTHOR
-
-#undef cputime_to_timeval
-#define cputime_to_timeval cputime_to_compat_timeval
-static inline void
-cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
-{
- value->tv_usec = cputime % 1000000;
- value->tv_sec = cputime / 1000000;
-}
-
-#include "../../../fs/binfmt_elf.c"
-
diff --git a/arch/s390/kernel/bitmap.S b/arch/s390/kernel/bitmap.S
deleted file mode 100644
index dfb41f946e2..00000000000
--- a/arch/s390/kernel/bitmap.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * arch/s390/kernel/bitmap.S
- * Bitmaps for set_bit, clear_bit, test_and_set_bit, ...
- * See include/asm-s390/{bitops.h|posix_types.h} for details
- *
- * S390 version
- * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- */
-
- .globl _oi_bitmap
-_oi_bitmap:
- .byte 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
-
- .globl _ni_bitmap
-_ni_bitmap:
- .byte 0xFE,0xFD,0xFB,0xF7,0xEF,0xDF,0xBF,0x7F
-
- .globl _zb_findmap
-_zb_findmap:
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4
- .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8
-
- .globl _sb_findmap
-_sb_findmap:
- .byte 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
- .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
-
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
new file mode 100644
index 00000000000..c0b03c28d15
--- /dev/null
+++ b/arch/s390/kernel/cache.c
@@ -0,0 +1,389 @@
+/*
+ * Extract CPU cache information and expose them via sysfs.
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/notifier.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+
+struct cache {
+ unsigned long size;
+ unsigned int line_size;
+ unsigned int associativity;
+ unsigned int nr_sets;
+ unsigned int level : 3;
+ unsigned int type : 2;
+ unsigned int private : 1;
+ struct list_head list;
+};
+
+struct cache_dir {
+ struct kobject *kobj;
+ struct cache_index_dir *index;
+};
+
+struct cache_index_dir {
+ struct kobject kobj;
+ int cpu;
+ struct cache *cache;
+ struct cache_index_dir *next;
+};
+
+enum {
+ CACHE_SCOPE_NOTEXISTS,
+ CACHE_SCOPE_PRIVATE,
+ CACHE_SCOPE_SHARED,
+ CACHE_SCOPE_RESERVED,
+};
+
+enum {
+ CACHE_TYPE_SEPARATE,
+ CACHE_TYPE_DATA,
+ CACHE_TYPE_INSTRUCTION,
+ CACHE_TYPE_UNIFIED,
+};
+
+enum {
+ EXTRACT_TOPOLOGY,
+ EXTRACT_LINE_SIZE,
+ EXTRACT_SIZE,
+ EXTRACT_ASSOCIATIVITY,
+};
+
+enum {
+ CACHE_TI_UNIFIED = 0,
+ CACHE_TI_DATA = 0,
+ CACHE_TI_INSTRUCTION,
+};
+
+struct cache_info {
+ unsigned char : 4;
+ unsigned char scope : 2;
+ unsigned char type : 2;
+};
+
+#define CACHE_MAX_LEVEL 8
+
+union cache_topology {
+ struct cache_info ci[CACHE_MAX_LEVEL];
+ unsigned long long raw;
+};
+
+static const char * const cache_type_string[] = {
+ "Data",
+ "Instruction",
+ "Unified",
+};
+
+static struct cache_dir *cache_dir_cpu[NR_CPUS];
+static LIST_HEAD(cache_list);
+
+void show_cacheinfo(struct seq_file *m)
+{
+ struct cache *cache;
+ int index = 0;
+
+ list_for_each_entry(cache, &cache_list, list) {
+ seq_printf(m, "cache%-11d: ", index);
+ seq_printf(m, "level=%d ", cache->level);
+ seq_printf(m, "type=%s ", cache_type_string[cache->type]);
+ seq_printf(m, "scope=%s ", cache->private ? "Private" : "Shared");
+ seq_printf(m, "size=%luK ", cache->size >> 10);
+ seq_printf(m, "line_size=%u ", cache->line_size);
+ seq_printf(m, "associativity=%d", cache->associativity);
+ seq_puts(m, "\n");
+ index++;
+ }
+}
+
+static inline unsigned long ecag(int ai, int li, int ti)
+{
+ unsigned long cmd, val;
+
+ cmd = ai << 4 | li << 1 | ti;
+ asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
+ : "=d" (val) : "a" (cmd));
+ return val;
+}
+
+static int __init cache_add(int level, int private, int type)
+{
+ struct cache *cache;
+ int ti;
+
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+ if (!cache)
+ return -ENOMEM;
+ if (type == CACHE_TYPE_INSTRUCTION)
+ ti = CACHE_TI_INSTRUCTION;
+ else
+ ti = CACHE_TI_UNIFIED;
+ cache->size = ecag(EXTRACT_SIZE, level, ti);
+ cache->line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
+ cache->associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
+ cache->nr_sets = cache->size / cache->associativity;
+ cache->nr_sets /= cache->line_size;
+ cache->private = private;
+ cache->level = level + 1;
+ cache->type = type - 1;
+ list_add_tail(&cache->list, &cache_list);
+ return 0;
+}
+
+static void __init cache_build_info(void)
+{
+ struct cache *cache, *next;
+ union cache_topology ct;
+ int level, private, rc;
+
+ ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+ for (level = 0; level < CACHE_MAX_LEVEL; level++) {
+ switch (ct.ci[level].scope) {
+ case CACHE_SCOPE_SHARED:
+ private = 0;
+ break;
+ case CACHE_SCOPE_PRIVATE:
+ private = 1;
+ break;
+ default:
+ return;
+ }
+ if (ct.ci[level].type == CACHE_TYPE_SEPARATE) {
+ rc = cache_add(level, private, CACHE_TYPE_DATA);
+ rc |= cache_add(level, private, CACHE_TYPE_INSTRUCTION);
+ } else {
+ rc = cache_add(level, private, ct.ci[level].type);
+ }
+ if (rc)
+ goto error;
+ }
+ return;
+error:
+ list_for_each_entry_safe(cache, next, &cache_list, list) {
+ list_del(&cache->list);
+ kfree(cache);
+ }
+}
+
+static struct cache_dir *cache_create_cache_dir(int cpu)
+{
+ struct cache_dir *cache_dir;
+ struct kobject *kobj = NULL;
+ struct device *dev;
+
+ dev = get_cpu_device(cpu);
+ if (!dev)
+ goto out;
+ kobj = kobject_create_and_add("cache", &dev->kobj);
+ if (!kobj)
+ goto out;
+ cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL);
+ if (!cache_dir)
+ goto out;
+ cache_dir->kobj = kobj;
+ cache_dir_cpu[cpu] = cache_dir;
+ return cache_dir;
+out:
+ kobject_put(kobj);
+ return NULL;
+}
+
+static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *kobj)
+{
+ return container_of(kobj, struct cache_index_dir, kobj);
+}
+
+static void cache_index_release(struct kobject *kobj)
+{
+ struct cache_index_dir *index;
+
+ index = kobj_to_cache_index_dir(kobj);
+ kfree(index);
+}
+
+static ssize_t cache_index_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct kobj_attribute *kobj_attr;
+
+ kobj_attr = container_of(attr, struct kobj_attribute, attr);
+ return kobj_attr->show(kobj, kobj_attr, buf);
+}
+
+#define DEFINE_CACHE_ATTR(_name, _format, _value) \
+static ssize_t cache_##_name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *buf) \
+{ \
+ struct cache_index_dir *index; \
+ \
+ index = kobj_to_cache_index_dir(kobj); \
+ return sprintf(buf, _format, _value); \
+} \
+static struct kobj_attribute cache_##_name##_attr = \
+ __ATTR(_name, 0444, cache_##_name##_show, NULL);
+
+DEFINE_CACHE_ATTR(size, "%luK\n", index->cache->size >> 10);
+DEFINE_CACHE_ATTR(coherency_line_size, "%u\n", index->cache->line_size);
+DEFINE_CACHE_ATTR(number_of_sets, "%u\n", index->cache->nr_sets);
+DEFINE_CACHE_ATTR(ways_of_associativity, "%u\n", index->cache->associativity);
+DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type]);
+DEFINE_CACHE_ATTR(level, "%d\n", index->cache->level);
+
+static ssize_t shared_cpu_map_func(struct kobject *kobj, int type, char *buf)
+{
+ struct cache_index_dir *index;
+ int len;
+
+ index = kobj_to_cache_index_dir(kobj);
+ len = type ?
+ cpulist_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)) :
+ cpumask_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu));
+ len += sprintf(&buf[len], "\n");
+ return len;
+}
+
+static ssize_t shared_cpu_map_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return shared_cpu_map_func(kobj, 0, buf);
+}
+static struct kobj_attribute cache_shared_cpu_map_attr =
+ __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+
+static ssize_t shared_cpu_list_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return shared_cpu_map_func(kobj, 1, buf);
+}
+static struct kobj_attribute cache_shared_cpu_list_attr =
+ __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
+
+static struct attribute *cache_index_default_attrs[] = {
+ &cache_type_attr.attr,
+ &cache_size_attr.attr,
+ &cache_number_of_sets_attr.attr,
+ &cache_ways_of_associativity_attr.attr,
+ &cache_level_attr.attr,
+ &cache_coherency_line_size_attr.attr,
+ &cache_shared_cpu_map_attr.attr,
+ &cache_shared_cpu_list_attr.attr,
+ NULL,
+};
+
+static const struct sysfs_ops cache_index_ops = {
+ .show = cache_index_show,
+};
+
+static struct kobj_type cache_index_type = {
+ .sysfs_ops = &cache_index_ops,
+ .release = cache_index_release,
+ .default_attrs = cache_index_default_attrs,
+};
+
+static int cache_create_index_dir(struct cache_dir *cache_dir,
+ struct cache *cache, int index, int cpu)
+{
+ struct cache_index_dir *index_dir;
+ int rc;
+
+ index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
+ if (!index_dir)
+ return -ENOMEM;
+ index_dir->cache = cache;
+ index_dir->cpu = cpu;
+ rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
+ cache_dir->kobj, "index%d", index);
+ if (rc)
+ goto out;
+ index_dir->next = cache_dir->index;
+ cache_dir->index = index_dir;
+ return 0;
+out:
+ kfree(index_dir);
+ return rc;
+}
+
+static int cache_add_cpu(int cpu)
+{
+ struct cache_dir *cache_dir;
+ struct cache *cache;
+ int rc, index = 0;
+
+ if (list_empty(&cache_list))
+ return 0;
+ cache_dir = cache_create_cache_dir(cpu);
+ if (!cache_dir)
+ return -ENOMEM;
+ list_for_each_entry(cache, &cache_list, list) {
+ if (!cache->private)
+ break;
+ rc = cache_create_index_dir(cache_dir, cache, index, cpu);
+ if (rc)
+ return rc;
+ index++;
+ }
+ return 0;
+}
+
+static void cache_remove_cpu(int cpu)
+{
+ struct cache_index_dir *index, *next;
+ struct cache_dir *cache_dir;
+
+ cache_dir = cache_dir_cpu[cpu];
+ if (!cache_dir)
+ return;
+ index = cache_dir->index;
+ while (index) {
+ next = index->next;
+ kobject_put(&index->kobj);
+ index = next;
+ }
+ kobject_put(cache_dir->kobj);
+ kfree(cache_dir);
+ cache_dir_cpu[cpu] = NULL;
+}
+
+static int cache_hotplug(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
+{
+ int cpu = (long)hcpu;
+ int rc = 0;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ rc = cache_add_cpu(cpu);
+ if (rc)
+ cache_remove_cpu(cpu);
+ break;
+ case CPU_DEAD:
+ cache_remove_cpu(cpu);
+ break;
+ }
+ return rc ? NOTIFY_BAD : NOTIFY_OK;
+}
+
+static int __init cache_init(void)
+{
+ int cpu;
+
+ if (!test_facility(34))
+ return 0;
+ cache_build_info();
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu)
+ cache_add_cpu(cpu);
+ __hotcpu_notifier(cache_hotplug, 0);
+ cpu_notifier_register_done();
+ return 0;
+}
+device_initcall(cache_init);
diff --git a/arch/s390/kernel/compat_exec_domain.c b/arch/s390/kernel/compat_exec_domain.c
deleted file mode 100644
index 914d49444f9..00000000000
--- a/arch/s390/kernel/compat_exec_domain.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Support for 32-bit Linux for S390 personality.
- *
- * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Gerhard Tonn (ton@de.ibm.com)
- *
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/personality.h>
-#include <linux/sched.h>
-
-static struct exec_domain s390_exec_domain;
-
-static int __init s390_init (void)
-{
- s390_exec_domain.name = "Linux/s390";
- s390_exec_domain.handler = NULL;
- s390_exec_domain.pers_low = PER_LINUX32;
- s390_exec_domain.pers_high = PER_LINUX32;
- s390_exec_domain.signal_map = default_exec_domain.signal_map;
- s390_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
- register_exec_domain(&s390_exec_domain);
- return 0;
-}
-
-__initcall(s390_init);
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index d7f22226fc4..ca38139423a 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -1,8 +1,6 @@
/*
- * arch/s390x/kernel/linux32.c
- *
* S390 version
- * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 2000
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Gerhard Tonn (ton@de.ibm.com)
* Thomas Spatzier (tspat@de.ibm.com)
@@ -24,22 +22,13 @@
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/times.h>
-#include <linux/utsname.h>
#include <linux/smp.h>
-#include <linux/smp_lock.h>
#include <linux/sem.h>
#include <linux/msg.h>
#include <linux/shm.h>
-#include <linux/slab.h>
#include <linux/uio.h>
-#include <linux/nfs_fs.h>
#include <linux/quota.h>
#include <linux/module.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
-#include <linux/nfsd/syscall.h>
#include <linux/poll.h>
#include <linux/personality.h>
#include <linux/stat.h>
@@ -59,6 +48,7 @@
#include <linux/ptrace.h>
#include <linux/fadvise.h>
#include <linux/ipc.h>
+#include <linux/slab.h>
#include <asm/types.h>
#include <asm/uaccess.h>
@@ -68,13 +58,6 @@
#include "compat_linux.h"
-long psw_user32_bits = (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
- PSW_MASK_PSTATE | PSW_DEFAULT_KEY);
-long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME |
- PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK |
- PSW32_MASK_PSTATE);
-
/* For this source file, we want overflow handling. */
#undef high2lowuid
@@ -103,92 +86,111 @@ long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME |
#define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid)
#define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid)
-asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group)
+COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename,
+ u16, user, u16, group)
{
return sys_chown(filename, low2highuid(user), low2highgid(group));
}
-asmlinkage long sys32_lchown16(const char __user * filename, u16 user, u16 group)
+COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *,
+ filename, u16, user, u16, group)
{
return sys_lchown(filename, low2highuid(user), low2highgid(group));
}
-asmlinkage long sys32_fchown16(unsigned int fd, u16 user, u16 group)
+COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group)
{
return sys_fchown(fd, low2highuid(user), low2highgid(group));
}
-asmlinkage long sys32_setregid16(u16 rgid, u16 egid)
+COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid)
{
return sys_setregid(low2highgid(rgid), low2highgid(egid));
}
-asmlinkage long sys32_setgid16(u16 gid)
+COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid)
{
return sys_setgid((gid_t)gid);
}
-asmlinkage long sys32_setreuid16(u16 ruid, u16 euid)
+COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid)
{
return sys_setreuid(low2highuid(ruid), low2highuid(euid));
}
-asmlinkage long sys32_setuid16(u16 uid)
+COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid)
{
return sys_setuid((uid_t)uid);
}
-asmlinkage long sys32_setresuid16(u16 ruid, u16 euid, u16 suid)
+COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid)
{
return sys_setresuid(low2highuid(ruid), low2highuid(euid),
- low2highuid(suid));
+ low2highuid(suid));
}
-asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid)
+COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp,
+ u16 __user *, euidp, u16 __user *, suidp)
{
+ const struct cred *cred = current_cred();
int retval;
+ u16 ruid, euid, suid;
- if (!(retval = put_user(high2lowuid(current->uid), ruid)) &&
- !(retval = put_user(high2lowuid(current->euid), euid)))
- retval = put_user(high2lowuid(current->suid), suid);
+ ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid));
+ euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid));
+ suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid));
+
+ if (!(retval = put_user(ruid, ruidp)) &&
+ !(retval = put_user(euid, euidp)))
+ retval = put_user(suid, suidp);
return retval;
}
-asmlinkage long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid)
+COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid)
{
return sys_setresgid(low2highgid(rgid), low2highgid(egid),
- low2highgid(sgid));
+ low2highgid(sgid));
}
-asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid)
+COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp,
+ u16 __user *, egidp, u16 __user *, sgidp)
{
+ const struct cred *cred = current_cred();
int retval;
+ u16 rgid, egid, sgid;
+
+ rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid));
+ egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid));
+ sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid));
- if (!(retval = put_user(high2lowgid(current->gid), rgid)) &&
- !(retval = put_user(high2lowgid(current->egid), egid)))
- retval = put_user(high2lowgid(current->sgid), sgid);
+ if (!(retval = put_user(rgid, rgidp)) &&
+ !(retval = put_user(egid, egidp)))
+ retval = put_user(sgid, sgidp);
return retval;
}
-asmlinkage long sys32_setfsuid16(u16 uid)
+COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid)
{
return sys_setfsuid((uid_t)uid);
}
-asmlinkage long sys32_setfsgid16(u16 gid)
+COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid)
{
return sys_setfsgid((gid_t)gid);
}
static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info)
{
+ struct user_namespace *user_ns = current_user_ns();
int i;
u16 group;
+ kgid_t kgid;
for (i = 0; i < group_info->ngroups; i++) {
- group = (u16)GROUP_AT(group_info, i);
+ kgid = GROUP_AT(group_info, i);
+ group = (u16)from_kgid_munged(user_ns, kgid);
if (put_user(group, grouplist+i))
return -EFAULT;
}
@@ -198,43 +200,51 @@ static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info
static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist)
{
+ struct user_namespace *user_ns = current_user_ns();
int i;
u16 group;
+ kgid_t kgid;
for (i = 0; i < group_info->ngroups; i++) {
if (get_user(group, grouplist+i))
return -EFAULT;
- GROUP_AT(group_info, i) = (gid_t)group;
+
+ kgid = make_kgid(user_ns, (gid_t)group);
+ if (!gid_valid(kgid))
+ return -EINVAL;
+
+ GROUP_AT(group_info, i) = kgid;
}
return 0;
}
-asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist)
+COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist)
{
+ const struct cred *cred = current_cred();
int i;
if (gidsetsize < 0)
return -EINVAL;
- get_group_info(current->group_info);
- i = current->group_info->ngroups;
+ get_group_info(cred->group_info);
+ i = cred->group_info->ngroups;
if (gidsetsize) {
if (i > gidsetsize) {
i = -EINVAL;
goto out;
}
- if (groups16_to_user(grouplist, current->group_info)) {
+ if (groups16_to_user(grouplist, cred->group_info)) {
i = -EFAULT;
goto out;
}
}
out:
- put_group_info(current->group_info);
+ put_group_info(cred->group_info);
return i;
}
-asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist)
+COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist)
{
struct group_info *group_info;
int retval;
@@ -259,479 +269,67 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist)
return retval;
}
-asmlinkage long sys32_getuid16(void)
-{
- return high2lowuid(current->uid);
-}
-
-asmlinkage long sys32_geteuid16(void)
+COMPAT_SYSCALL_DEFINE0(s390_getuid16)
{
- return high2lowuid(current->euid);
+ return high2lowuid(from_kuid_munged(current_user_ns(), current_uid()));
}
-asmlinkage long sys32_getgid16(void)
+COMPAT_SYSCALL_DEFINE0(s390_geteuid16)
{
- return high2lowgid(current->gid);
+ return high2lowuid(from_kuid_munged(current_user_ns(), current_euid()));
}
-asmlinkage long sys32_getegid16(void)
+COMPAT_SYSCALL_DEFINE0(s390_getgid16)
{
- return high2lowgid(current->egid);
+ return high2lowgid(from_kgid_munged(current_user_ns(), current_gid()));
}
-/* 32-bit timeval and related flotsam. */
-
-static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i)
+COMPAT_SYSCALL_DEFINE0(s390_getegid16)
{
- return (!access_ok(VERIFY_READ, o, sizeof(*o)) ||
- (__get_user(o->tv_sec, &i->tv_sec) ||
- __get_user(o->tv_usec, &i->tv_usec)));
+ return high2lowgid(from_kgid_munged(current_user_ns(), current_egid()));
}
-static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
-{
- return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
- (__put_user(i->tv_sec, &o->tv_sec) ||
- __put_user(i->tv_usec, &o->tv_usec)));
-}
-
-/*
- * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation.
- *
- * This is really horribly ugly.
- */
#ifdef CONFIG_SYSVIPC
-asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr)
+COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
+ compat_ulong_t, third, compat_uptr_t, ptr)
{
if (call >> 16) /* hack for backward compatibility */
return -EINVAL;
-
- call &= 0xffff;
-
- switch (call) {
- case SEMTIMEDOP:
- return compat_sys_semtimedop(first, compat_ptr(ptr),
- second, compat_ptr(third));
- case SEMOP:
- /* struct sembuf is the same on 32 and 64bit :)) */
- return sys_semtimedop(first, compat_ptr(ptr),
- second, NULL);
- case SEMGET:
- return sys_semget(first, second, third);
- case SEMCTL:
- return compat_sys_semctl(first, second, third,
- compat_ptr(ptr));
- case MSGSND:
- return compat_sys_msgsnd(first, second, third,
- compat_ptr(ptr));
- case MSGRCV:
- return compat_sys_msgrcv(first, second, 0, third,
- 0, compat_ptr(ptr));
- case MSGGET:
- return sys_msgget((key_t) first, second);
- case MSGCTL:
- return compat_sys_msgctl(first, second, compat_ptr(ptr));
- case SHMAT:
- return compat_sys_shmat(first, second, third,
- 0, compat_ptr(ptr));
- case SHMDT:
- return sys_shmdt(compat_ptr(ptr));
- case SHMGET:
- return sys_shmget(first, (unsigned)second, third);
- case SHMCTL:
- return compat_sys_shmctl(first, second, compat_ptr(ptr));
- }
-
- return -ENOSYS;
+ return compat_sys_ipc(call, first, second, third, ptr, third);
}
#endif
-asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low)
-{
- if ((int)high < 0)
- return -EINVAL;
- else
- return sys_truncate(path, (high << 32) | low);
-}
-
-asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low)
-{
- if ((int)high < 0)
- return -EINVAL;
- else
- return sys_ftruncate(fd, (high << 32) | low);
-}
-
-int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf)
-{
- compat_ino_t ino;
- int err;
-
- if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
- return -EOVERFLOW;
-
- ino = stat->ino;
- if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
- return -EOVERFLOW;
-
- err = put_user(old_encode_dev(stat->dev), &statbuf->st_dev);
- err |= put_user(stat->ino, &statbuf->st_ino);
- err |= put_user(stat->mode, &statbuf->st_mode);
- err |= put_user(stat->nlink, &statbuf->st_nlink);
- err |= put_user(high2lowuid(stat->uid), &statbuf->st_uid);
- err |= put_user(high2lowgid(stat->gid), &statbuf->st_gid);
- err |= put_user(old_encode_dev(stat->rdev), &statbuf->st_rdev);
- err |= put_user(stat->size, &statbuf->st_size);
- err |= put_user(stat->atime.tv_sec, &statbuf->st_atime);
- err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec);
- err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime);
- err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec);
- err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime);
- err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec);
- err |= put_user(stat->blksize, &statbuf->st_blksize);
- err |= put_user(stat->blocks, &statbuf->st_blocks);
-/* fixme
- err |= put_user(0, &statbuf->__unused4[0]);
- err |= put_user(0, &statbuf->__unused4[1]);
-*/
- return err;
-}
-
-asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
- struct compat_timespec __user *interval)
-{
- struct timespec t;
- int ret;
- mm_segment_t old_fs = get_fs ();
-
- set_fs (KERNEL_DS);
- ret = sys_sched_rr_get_interval(pid,
- (struct timespec __force __user *) &t);
- set_fs (old_fs);
- if (put_compat_timespec(&t, interval))
- return -EFAULT;
- return ret;
-}
-
-asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
- compat_sigset_t __user *oset, size_t sigsetsize)
-{
- sigset_t s;
- compat_sigset_t s32;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- if (set) {
- if (copy_from_user (&s32, set, sizeof(compat_sigset_t)))
- return -EFAULT;
- switch (_NSIG_WORDS) {
- case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
- case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
- case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
- case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
- }
- }
- set_fs (KERNEL_DS);
- ret = sys_rt_sigprocmask(how,
- set ? (sigset_t __force __user *) &s : NULL,
- oset ? (sigset_t __force __user *) &s : NULL,
- sigsetsize);
- set_fs (old_fs);
- if (ret) return ret;
- if (oset) {
- switch (_NSIG_WORDS) {
- case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
- case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
- case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
- case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
- }
- if (copy_to_user (oset, &s32, sizeof(compat_sigset_t)))
- return -EFAULT;
- }
- return 0;
-}
-
-asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
- size_t sigsetsize)
-{
- sigset_t s;
- compat_sigset_t s32;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- set_fs (KERNEL_DS);
- ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize);
- set_fs (old_fs);
- if (!ret) {
- switch (_NSIG_WORDS) {
- case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
- case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
- case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
- case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
- }
- if (copy_to_user (set, &s32, sizeof(compat_sigset_t)))
- return -EFAULT;
- }
- return ret;
-}
-
-asmlinkage long
-sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
-{
- siginfo_t info;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- if (copy_siginfo_from_user32(&info, uinfo))
- return -EFAULT;
- set_fs (KERNEL_DS);
- ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force __user *) &info);
- set_fs (old_fs);
- return ret;
-}
-
-/*
- * sys32_execve() executes a new program after the asm stub has set
- * things up for us. This should basically do what I want it to.
- */
-asmlinkage long sys32_execve(void)
-{
- struct pt_regs *regs = task_pt_regs(current);
- char *filename;
- unsigned long result;
- int rc;
-
- filename = getname(compat_ptr(regs->orig_gpr2));
- if (IS_ERR(filename)) {
- result = PTR_ERR(filename);
- goto out;
- }
- rc = compat_do_execve(filename, compat_ptr(regs->gprs[3]),
- compat_ptr(regs->gprs[4]), regs);
- if (rc) {
- result = rc;
- goto out_putname;
- }
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
- task_unlock(current);
- current->thread.fp_regs.fpc=0;
- asm volatile("sfpc %0,0" : : "d" (0));
- result = regs->gprs[2];
-out_putname:
- putname(filename);
-out:
- return result;
-}
-
-
-#ifdef CONFIG_MODULES
-
-asmlinkage long
-sys32_init_module(void __user *umod, unsigned long len,
- const char __user *uargs)
-{
- return sys_init_module(umod, len, uargs);
-}
-
-asmlinkage long
-sys32_delete_module(const char __user *name_user, unsigned int flags)
-{
- return sys_delete_module(name_user, flags);
-}
-
-#else /* CONFIG_MODULES */
-
-asmlinkage long
-sys32_init_module(void __user *umod, unsigned long len,
- const char __user *uargs)
-{
- return -ENOSYS;
-}
-
-asmlinkage long
-sys32_delete_module(const char __user *name_user, unsigned int flags)
-{
- return -ENOSYS;
-}
-
-#endif /* CONFIG_MODULES */
-
-/* Translations due to time_t size differences. Which affects all
- sorts of things, like timeval and itimerval. */
-
-extern struct timezone sys_tz;
-
-asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
- if (tv) {
- struct timeval ktv;
- do_gettimeofday(&ktv);
- if (put_tv32(tv, &ktv))
- return -EFAULT;
- }
- if (tz) {
- if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
- return -EFAULT;
- }
- return 0;
-}
-
-static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i)
-{
- long usec;
-
- if (!access_ok(VERIFY_READ, i, sizeof(*i)))
- return -EFAULT;
- if (__get_user(o->tv_sec, &i->tv_sec))
- return -EFAULT;
- if (__get_user(usec, &i->tv_usec))
- return -EFAULT;
- o->tv_nsec = usec * 1000;
- return 0;
-}
-
-asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
+COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low)
{
- struct timespec kts;
- struct timezone ktz;
-
- if (tv) {
- if (get_ts32(&kts, tv))
- return -EFAULT;
- }
- if (tz) {
- if (copy_from_user(&ktz, tz, sizeof(ktz)))
- return -EFAULT;
- }
-
- return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
+ return sys_truncate(path, (unsigned long)high << 32 | low);
}
-/* These are here just in case some old sparc32 binary calls it. */
-asmlinkage long sys32_pause(void)
+COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low)
{
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- return -ERESTARTNOHAND;
+ return sys_ftruncate(fd, (unsigned long)high << 32 | low);
}
-asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf,
- size_t count, u32 poshi, u32 poslo)
+COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf,
+ compat_size_t, count, u32, high, u32, low)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
- return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo));
+ return sys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low);
}
-asmlinkage long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
- size_t count, u32 poshi, u32 poslo)
+COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf,
+ compat_size_t, count, u32, high, u32, low)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
- return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo));
-}
-
-asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count)
-{
- return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count);
-}
-
-asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, size_t count)
-{
- mm_segment_t old_fs = get_fs();
- int ret;
- off_t of;
-
- if (offset && get_user(of, offset))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- ret = sys_sendfile(out_fd, in_fd,
- offset ? (off_t __force __user *) &of : NULL, count);
- set_fs(old_fs);
-
- if (offset && put_user(of, offset))
- return -EFAULT;
-
- return ret;
+ return sys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low);
}
-asmlinkage long sys32_sendfile64(int out_fd, int in_fd,
- compat_loff_t __user *offset, s32 count)
+COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count)
{
- mm_segment_t old_fs = get_fs();
- int ret;
- loff_t lof;
-
- if (offset && get_user(lof, offset))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- ret = sys_sendfile64(out_fd, in_fd,
- offset ? (loff_t __force __user *) &lof : NULL,
- count);
- set_fs(old_fs);
-
- if (offset && put_user(lof, offset))
- return -EFAULT;
-
- return ret;
+ return sys_readahead(fd, (unsigned long)high << 32 | low, count);
}
-#ifdef CONFIG_SYSCTL_SYSCALL
-struct __sysctl_args32 {
- u32 name;
- int nlen;
- u32 oldval;
- u32 oldlenp;
- u32 newval;
- u32 newlen;
- u32 __unused[4];
-};
-
-asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args)
-{
- struct __sysctl_args32 tmp;
- int error;
- size_t oldlen;
- size_t __user *oldlenp = NULL;
- unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7;
-
- if (copy_from_user(&tmp, args, sizeof(tmp)))
- return -EFAULT;
-
- if (tmp.oldval && tmp.oldlenp) {
- /* Duh, this is ugly and might not work if sysctl_args
- is in read-only memory, but do_sysctl does indirectly
- a lot of uaccess in both directions and we'd have to
- basically copy the whole sysctl.c here, and
- glibc's __sysctl uses rw memory for the structure
- anyway. */
- if (get_user(oldlen, (u32 __user *)compat_ptr(tmp.oldlenp)) ||
- put_user(oldlen, (size_t __user *)addr))
- return -EFAULT;
- oldlenp = (size_t __user *)addr;
- }
-
- lock_kernel();
- error = do_sysctl(compat_ptr(tmp.name), tmp.nlen, compat_ptr(tmp.oldval),
- oldlenp, compat_ptr(tmp.newval), tmp.newlen);
- unlock_kernel();
- if (oldlenp) {
- if (!error) {
- if (get_user(oldlen, (size_t __user *)addr) ||
- put_user(oldlen, (u32 __user *)compat_ptr(tmp.oldlenp)))
- error = -EFAULT;
- }
- if (copy_to_user(args->__unused, tmp.__unused,
- sizeof(tmp.__unused)))
- error = -EFAULT;
- }
- return error;
-}
-#endif
-
struct stat64_emu31 {
unsigned long long st_dev;
unsigned int __pad1;
@@ -768,8 +366,8 @@ static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
tmp.__st_ino = (u32)stat->ino;
tmp.st_mode = stat->mode;
tmp.st_nlink = (unsigned int)stat->nlink;
- tmp.st_uid = stat->uid;
- tmp.st_gid = stat->gid;
+ tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
+ tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
tmp.st_rdev = huge_encode_dev(stat->rdev);
tmp.st_size = stat->size;
tmp.st_blksize = (u32)stat->blksize;
@@ -781,7 +379,7 @@ static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
}
-asmlinkage long sys32_stat64(char __user * filename, struct stat64_emu31 __user * statbuf)
+COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_stat(filename, &stat);
@@ -790,7 +388,7 @@ asmlinkage long sys32_stat64(char __user * filename, struct stat64_emu31 __user
return ret;
}
-asmlinkage long sys32_lstat64(char __user * filename, struct stat64_emu31 __user * statbuf)
+COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_lstat(filename, &stat);
@@ -799,7 +397,7 @@ asmlinkage long sys32_lstat64(char __user * filename, struct stat64_emu31 __user
return ret;
}
-asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf)
+COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_fstat(fd, &stat);
@@ -808,24 +406,16 @@ asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * sta
return ret;
}
-asmlinkage long sys32_fstatat64(unsigned int dfd, char __user *filename,
- struct stat64_emu31 __user* statbuf, int flag)
+COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename,
+ struct stat64_emu31 __user *, statbuf, int, flag)
{
struct kstat stat;
- int error = -EINVAL;
-
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
- goto out;
-
- if (flag & AT_SYMLINK_NOFOLLOW)
- error = vfs_lstat_fd(dfd, filename, &stat);
- else
- error = vfs_stat_fd(dfd, filename, &stat);
+ int error;
- if (!error)
- error = cp_stat64(statbuf, &stat);
-out:
- return error;
+ error = vfs_fstatat(dfd, filename, &stat, flag);
+ if (error)
+ return error;
+ return cp_stat64(statbuf, &stat);
}
/*
@@ -835,78 +425,36 @@ out:
*/
struct mmap_arg_struct_emu31 {
- u32 addr;
- u32 len;
- u32 prot;
- u32 flags;
- u32 fd;
- u32 offset;
+ compat_ulong_t addr;
+ compat_ulong_t len;
+ compat_ulong_t prot;
+ compat_ulong_t flags;
+ compat_ulong_t fd;
+ compat_ulong_t offset;
};
-/* common code for old and new mmaps */
-static inline long do_mmap2(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- struct file * file = NULL;
- unsigned long error = -EBADF;
-
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- goto out;
- }
-
- down_write(&current->mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- if (!IS_ERR((void *) error) && error + len >= 0x80000000ULL) {
- /* Result is out of bounds. */
- do_munmap(current->mm, addr, len);
- error = -ENOMEM;
- }
- up_write(&current->mm->mmap_sem);
-
- if (file)
- fput(file);
-out:
- return error;
-}
-
-
-asmlinkage unsigned long
-old32_mmap(struct mmap_arg_struct_emu31 __user *arg)
+COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg)
{
struct mmap_arg_struct_emu31 a;
- int error = -EFAULT;
if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
-
- error = -EINVAL;
+ return -EFAULT;
if (a.offset & ~PAGE_MASK)
- goto out;
-
- error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
-out:
- return error;
+ return -EINVAL;
+ return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+ a.offset >> PAGE_SHIFT);
}
-asmlinkage long
-sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg)
+COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg)
{
struct mmap_arg_struct_emu31 a;
- int error = -EFAULT;
if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
- error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
- return error;
+ return -EFAULT;
+ return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
}
-asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count)
+COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
@@ -914,7 +462,7 @@ asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count)
return sys_read(fd, buf, count);
}
-asmlinkage long sys32_write(unsigned int fd, char __user * buf, size_t count)
+COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
@@ -922,37 +470,19 @@ asmlinkage long sys32_write(unsigned int fd, char __user * buf, size_t count)
return sys_write(fd, buf, count);
}
-asmlinkage long sys32_clone(void)
-{
- struct pt_regs *regs = task_pt_regs(current);
- unsigned long clone_flags;
- unsigned long newsp;
- int __user *parent_tidptr, *child_tidptr;
-
- clone_flags = regs->gprs[3] & 0xffffffffUL;
- newsp = regs->orig_gpr2 & 0x7fffffffUL;
- parent_tidptr = compat_ptr(regs->gprs[4]);
- child_tidptr = compat_ptr(regs->gprs[5]);
- if (!newsp)
- newsp = regs->gprs[15];
- return do_fork(clone_flags, newsp, regs, 0,
- parent_tidptr, child_tidptr);
-}
-
/*
* 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
* These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
* because the 31 bit values differ from the 64 bit values.
*/
-asmlinkage long
-sys32_fadvise64(int fd, loff_t offset, size_t len, int advise)
+COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise)
{
if (advise == 4)
advise = POSIX_FADV_DONTNEED;
else if (advise == 5)
advise = POSIX_FADV_NOREUSE;
- return sys_fadvise64(fd, offset, len, advise);
+ return sys_fadvise64(fd, (unsigned long)high << 32 | low, len, advise);
}
struct fadvise64_64_args {
@@ -962,8 +492,7 @@ struct fadvise64_64_args {
int advice;
};
-asmlinkage long
-sys32_fadvise64_64(struct fadvise64_64_args __user *args)
+COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
{
struct fadvise64_64_args a;
@@ -975,3 +504,17 @@ sys32_fadvise64_64(struct fadvise64_64_args __user *args)
a.advice = POSIX_FADV_NOREUSE;
return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
}
+
+COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow,
+ u32, nhigh, u32, nlow, unsigned int, flags)
+{
+ return sys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow,
+ ((u64)nhigh << 32) + nlow, flags);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow,
+ u32, lenhigh, u32, lenlow)
+{
+ return sys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow,
+ ((u64)lenhigh << 32) + lenlow);
+}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 20723a06201..70d4b7c4bea 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -4,10 +4,6 @@
#include <linux/compat.h>
#include <linux/socket.h>
#include <linux/syscalls.h>
-#include <linux/nfs_fs.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/export.h>
/* Macro that masks the high order bit of an 32 bit pointer and converts it*/
/* to a 64 bit pointer */
@@ -21,81 +17,6 @@ struct ipc_kludge_32 {
__s32 msgtyp;
};
-struct old_sigaction32 {
- __u32 sa_handler; /* Really a pointer, but need to deal with 32 bits */
- compat_old_sigset_t sa_mask; /* A 32 bit mask */
- __u32 sa_flags;
- __u32 sa_restorer; /* Another 32 bit pointer */
-};
-
-typedef struct compat_siginfo {
- int si_signo;
- int si_errno;
- int si_code;
-
- union {
- int _pad[((128/sizeof(int)) - 3)];
-
- /* kill() */
- struct {
- pid_t _pid; /* sender's pid */
- uid_t _uid; /* sender's uid */
- } _kill;
-
- /* POSIX.1b timers */
- struct {
- compat_timer_t _tid; /* timer id */
- int _overrun; /* overrun count */
- compat_sigval_t _sigval; /* same as below */
- int _sys_private; /* not to be passed to user */
- } _timer;
-
- /* POSIX.1b signals */
- struct {
- pid_t _pid; /* sender's pid */
- uid_t _uid; /* sender's uid */
- compat_sigval_t _sigval;
- } _rt;
-
- /* SIGCHLD */
- struct {
- pid_t _pid; /* which child */
- uid_t _uid; /* sender's uid */
- int _status;/* exit code */
- compat_clock_t _utime;
- compat_clock_t _stime;
- } _sigchld;
-
- /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
- struct {
- __u32 _addr; /* faulting insn/memory ref. - pointer */
- } _sigfault;
-
- /* SIGPOLL */
- struct {
- int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
- int _fd;
- } _sigpoll;
- } _sifields;
-} compat_siginfo_t;
-
-/*
- * How these fields are to be accessed.
- */
-#define si_pid _sifields._kill._pid
-#define si_uid _sifields._kill._uid
-#define si_status _sifields._sigchld._status
-#define si_utime _sifields._sigchld._utime
-#define si_stime _sifields._sigchld._stime
-#define si_value _sifields._rt._sigval
-#define si_int _sifields._rt._sigval.sival_int
-#define si_ptr _sifields._rt._sigval.sival_ptr
-#define si_addr _sifields._sigfault._addr
-#define si_band _sifields._sigpoll._band
-#define si_fd _sifields._sigpoll._fd
-#define si_tid _sifields._timer._tid
-#define si_overrun _sifields._timer._overrun
-
/* asm/sigcontext.h */
typedef union
{
@@ -106,6 +27,7 @@ typedef union
typedef struct
{
unsigned int fpc;
+ unsigned int pad;
freg_t32 fprs[__NUM_FPRS];
} _s390_fp_regs32;
@@ -140,99 +62,59 @@ struct sigcontext32
};
/* asm/signal.h */
-struct sigaction32 {
- __u32 sa_handler; /* pointer */
- __u32 sa_flags;
- __u32 sa_restorer; /* pointer */
- compat_sigset_t sa_mask; /* mask last for extensibility */
-};
-
-typedef struct {
- __u32 ss_sp; /* pointer */
- int ss_flags;
- compat_size_t ss_size;
-} stack_t32;
/* asm/ucontext.h */
struct ucontext32 {
__u32 uc_flags;
__u32 uc_link; /* pointer */
- stack_t32 uc_stack;
+ compat_stack_t uc_stack;
_sigregs32 uc_mcontext;
- compat_sigset_t uc_sigmask; /* mask last for extensibility */
+ compat_sigset_t uc_sigmask;
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ unsigned char __unused[128 - sizeof(compat_sigset_t)];
};
-struct __sysctl_args32;
struct stat64_emu31;
struct mmap_arg_struct_emu31;
struct fadvise64_64_args;
-struct old_sigaction32;
-struct old_sigaction32;
-long sys32_chown16(const char __user * filename, u16 user, u16 group);
-long sys32_lchown16(const char __user * filename, u16 user, u16 group);
-long sys32_fchown16(unsigned int fd, u16 user, u16 group);
-long sys32_setregid16(u16 rgid, u16 egid);
-long sys32_setgid16(u16 gid);
-long sys32_setreuid16(u16 ruid, u16 euid);
-long sys32_setuid16(u16 uid);
-long sys32_setresuid16(u16 ruid, u16 euid, u16 suid);
-long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
-long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid);
-long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
-long sys32_setfsuid16(u16 uid);
-long sys32_setfsgid16(u16 gid);
-long sys32_getgroups16(int gidsetsize, u16 __user *grouplist);
-long sys32_setgroups16(int gidsetsize, u16 __user *grouplist);
-long sys32_getuid16(void);
-long sys32_geteuid16(void);
-long sys32_getgid16(void);
-long sys32_getegid16(void);
-long sys32_ipc(u32 call, int first, int second, int third, u32 ptr);
-long sys32_truncate64(const char __user * path, unsigned long high,
- unsigned long low);
-long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low);
-long sys32_sched_rr_get_interval(compat_pid_t pid,
- struct compat_timespec __user *interval);
-long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
- compat_sigset_t __user *oset, size_t sigsetsize);
-long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize);
-long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo);
-long sys32_execve(void);
-long sys32_init_module(void __user *umod, unsigned long len,
- const char __user *uargs);
-long sys32_delete_module(const char __user *name_user, unsigned int flags);
-long sys32_gettimeofday(struct compat_timeval __user *tv,
- struct timezone __user *tz);
-long sys32_settimeofday(struct compat_timeval __user *tv,
- struct timezone __user *tz);
-long sys32_pause(void);
-long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count,
- u32 poshi, u32 poslo);
-long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
- size_t count, u32 poshi, u32 poslo);
-compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count);
-long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset,
- size_t count);
-long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset,
- s32 count);
-long sys32_sysctl(struct __sysctl_args32 __user *args);
-long sys32_stat64(char __user * filename, struct stat64_emu31 __user * statbuf);
-long sys32_lstat64(char __user * filename,
- struct stat64_emu31 __user * statbuf);
-long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf);
-long sys32_fstatat64(unsigned int dfd, char __user *filename,
- struct stat64_emu31 __user* statbuf, int flag);
-unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg);
-long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg);
-long sys32_read(unsigned int fd, char __user * buf, size_t count);
-long sys32_write(unsigned int fd, char __user * buf, size_t count);
-long sys32_clone(void);
-long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise);
-long sys32_fadvise64_64(struct fadvise64_64_args __user *args);
-long sys32_sigaction(int sig, const struct old_sigaction32 __user *act,
- struct old_sigaction32 __user *oact);
-long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
- struct sigaction32 __user *oact, size_t sigsetsize);
-long sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss);
+long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group);
+long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group);
+long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group);
+long compat_sys_s390_setregid16(u16 rgid, u16 egid);
+long compat_sys_s390_setgid16(u16 gid);
+long compat_sys_s390_setreuid16(u16 ruid, u16 euid);
+long compat_sys_s390_setuid16(u16 uid);
+long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid);
+long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
+long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid);
+long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
+long compat_sys_s390_setfsuid16(u16 uid);
+long compat_sys_s390_setfsgid16(u16 gid);
+long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist);
+long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist);
+long compat_sys_s390_getuid16(void);
+long compat_sys_s390_geteuid16(void);
+long compat_sys_s390_getgid16(void);
+long compat_sys_s390_getegid16(void);
+long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
+long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
+long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count);
+long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
+long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count);
+long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count);
+long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
+long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
+long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
+long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+
#endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
index 419aef913ee..12b82383351 100644
--- a/arch/s390/kernel/compat_ptrace.h
+++ b/arch/s390/kernel/compat_ptrace.h
@@ -1,62 +1,42 @@
#ifndef _PTRACE32_H
#define _PTRACE32_H
-#include "compat_linux.h" /* needed for _psw_t32 */
-
-typedef struct {
- __u32 cr[3];
-} per_cr_words32;
-
-typedef struct {
- __u16 perc_atmid; /* 0x096 */
- __u32 address; /* 0x098 */
- __u8 access_id; /* 0x0a1 */
-} per_lowcore_words32;
-
-typedef struct {
- union {
- per_cr_words32 words;
- } control_regs;
- /*
- * Use these flags instead of setting em_instruction_fetch
- * directly they are used so that single stepping can be
- * switched on & off while not affecting other tracing
- */
- unsigned single_step : 1;
- unsigned instruction_fetch : 1;
- unsigned : 30;
- /*
- * These addresses are copied into cr10 & cr11 if single
- * stepping is switched off
- */
- __u32 starting_addr;
- __u32 ending_addr;
- union {
- per_lowcore_words32 words;
- } lowcore;
-} per_struct32;
+#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */
+#include "compat_linux.h" /* needed for psw_compat_t */
+
+struct compat_per_struct_kernel {
+ __u32 cr9; /* PER control bits */
+ __u32 cr10; /* PER starting address */
+ __u32 cr11; /* PER ending address */
+ __u32 bits; /* Obsolete software bits */
+ __u32 starting_addr; /* User specified start address */
+ __u32 ending_addr; /* User specified end address */
+ __u16 perc_atmid; /* PER trap ATMID */
+ __u32 address; /* PER trap instruction address */
+ __u8 access_id; /* PER trap access identification */
+};
-struct user_regs_struct32
+struct compat_user_regs_struct
{
- _psw_t32 psw;
+ psw_compat_t psw;
u32 gprs[NUM_GPRS];
u32 acrs[NUM_ACRS];
u32 orig_gpr2;
+ /* nb: there's a 4-byte hole here */
s390_fp_regs fp_regs;
/*
* These per registers are in here so that gdb can modify them
* itself as there is no "official" ptrace interface for hardware
* watchpoints. This is the way intel does it.
*/
- per_struct32 per_info;
- u32 ieee_instruction_pointer;
- /* Used to give failing instruction back to user for ieee exceptions */
+ struct compat_per_struct_kernel per_info;
+ u32 ieee_instruction_pointer; /* obsolete, always 0 */
};
-struct user32 {
+struct compat_user {
/* We start with the registers, to mimic the way that "memory"
is returned from the ptrace(3,...) function. */
- struct user_regs_struct32 regs; /* Where the registers are actually stored */
+ struct compat_user_regs_struct regs;
/* The rest of this junk is to help gdb figure out what goes where */
u32 u_tsize; /* Text segment size (pages). */
u32 u_dsize; /* Data segment size (pages). */
@@ -78,6 +58,6 @@ typedef struct
__u32 len;
__u32 kernel_addr;
__u32 process_addr;
-} ptrace_area_emu31;
+} compat_ptrace_area;
#endif /* _PTRACE32_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index c7f02e777af..f204d692036 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -1,7 +1,5 @@
/*
- * arch/s390/kernel/compat_signal.c
- *
- * Copyright (C) IBM Corp. 2000,2006
+ * Copyright IBM Corp. 2000, 2006
* Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
* Gerhard Tonn (ton@de.ibm.com)
*
@@ -27,18 +25,18 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/lowcore.h>
+#include <asm/switch_to.h>
#include "compat_linux.h"
#include "compat_ptrace.h"
#include "entry.h"
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
typedef struct
{
__u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
struct sigcontext32 sc;
_sigregs32 sregs;
int signo;
+ __u32 gprs_high[NUM_GPRS];
__u8 retcode[S390_SYSCALL_SIZE];
} sigframe32;
@@ -48,15 +46,13 @@ typedef struct
__u8 retcode[S390_SYSCALL_SIZE];
compat_siginfo_t info;
struct ucontext32 uc;
+ __u32 gprs_high[NUM_GPRS];
} rt_sigframe32;
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
{
int err;
- if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
- return -EFAULT;
-
/* If you change siginfo_t structure, please be sure
this code is fixed accordingly.
It should never copy any pad contained in the structure
@@ -103,7 +99,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
break;
}
}
- return err;
+ return err ? -EFAULT : 0;
}
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
@@ -111,9 +107,6 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
int err;
u32 tmp;
- if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t)))
- return -EFAULT;
-
err = __get_user(to->si_signo, &from->si_signo);
err |= __get_user(to->si_errno, &from->si_errno);
err |= __get_user(to->si_code, &from->si_code);
@@ -139,7 +132,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
break;
case __SI_FAULT >> 16:
err |= __get_user(tmp, &from->si_addr);
- to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN);
+ to->si_addr = (void __force __user *)
+ (u64) (tmp & PSW32_ADDR_INSN);
break;
case __SI_POLL >> 16:
err |= __get_user(to->si_band, &from->si_band);
@@ -154,260 +148,134 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
break;
}
}
- return err;
+ return err ? -EFAULT : 0;
}
-asmlinkage long
-sys32_sigaction(int sig, const struct old_sigaction32 __user *act,
- struct old_sigaction32 __user *oact)
+static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
{
- struct k_sigaction new_ka, old_ka;
- unsigned long sa_handler, sa_restorer;
- int ret;
-
- if (act) {
- compat_old_sigset_t mask;
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(sa_handler, &act->sa_handler) ||
- __get_user(sa_restorer, &act->sa_restorer) ||
- __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
- __get_user(mask, &act->sa_mask))
- return -EFAULT;
- new_ka.sa.sa_handler = (__sighandler_t) sa_handler;
- new_ka.sa.sa_restorer = (void (*)(void)) sa_restorer;
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- sa_handler = (unsigned long) old_ka.sa.sa_handler;
- sa_restorer = (unsigned long) old_ka.sa.sa_restorer;
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(sa_handler, &oact->sa_handler) ||
- __put_user(sa_restorer, &oact->sa_restorer) ||
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
- return -EFAULT;
- }
-
- return ret;
+ _sigregs32 user_sregs;
+ int i;
+
+ user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32);
+ user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI;
+ user_sregs.regs.psw.mask |= PSW32_USER_BITS;
+ user_sregs.regs.psw.addr = (__u32) regs->psw.addr |
+ (__u32)(regs->psw.mask & PSW_MASK_BA);
+ for (i = 0; i < NUM_GPRS; i++)
+ user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
+ save_access_regs(current->thread.acrs);
+ memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+ sizeof(user_sregs.regs.acrs));
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ save_fp_regs(current->thread.fp_regs.fprs);
+ memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
+ sizeof(user_sregs.fpregs));
+ if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
+ return -EFAULT;
+ return 0;
}
-asmlinkage long
-sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
- struct sigaction32 __user *oact, size_t sigsetsize)
+static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
{
- struct k_sigaction new_ka, old_ka;
- unsigned long sa_handler;
- int ret;
- compat_sigset_t set32;
+ _sigregs32 user_sregs;
+ int i;
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
+ /* Alwys make any pending restarted system call return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
- if (act) {
- ret = get_user(sa_handler, &act->sa_handler);
- ret |= __copy_from_user(&set32, &act->sa_mask,
- sizeof(compat_sigset_t));
- switch (_NSIG_WORDS) {
- case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
- | (((long)set32.sig[7]) << 32);
- case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
- | (((long)set32.sig[5]) << 32);
- case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
- | (((long)set32.sig[3]) << 32);
- case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
- | (((long)set32.sig[1]) << 32);
- }
- ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
-
- if (ret)
- return -EFAULT;
- new_ka.sa.sa_handler = (__sighandler_t) sa_handler;
- }
+ if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs)))
+ return -EFAULT;
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- switch (_NSIG_WORDS) {
- case 4:
- set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
- set32.sig[6] = old_ka.sa.sa_mask.sig[3];
- case 3:
- set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
- set32.sig[4] = old_ka.sa.sa_mask.sig[2];
- case 2:
- set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
- set32.sig[2] = old_ka.sa.sa_mask.sig[1];
- case 1:
- set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
- set32.sig[0] = old_ka.sa.sa_mask.sig[0];
- }
- ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler);
- ret |= __copy_to_user(&oact->sa_mask, &set32,
- sizeof(compat_sigset_t));
- ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
- }
+ if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
+ return -EINVAL;
- return ret;
-}
+ /* Loading the floating-point-control word can fail. Do that first. */
+ if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+ return -EINVAL;
-asmlinkage long
-sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss)
-{
- struct pt_regs *regs = task_pt_regs(current);
- stack_t kss, koss;
- unsigned long ss_sp;
- int ret, err = 0;
- mm_segment_t old_fs = get_fs();
-
- if (uss) {
- if (!access_ok(VERIFY_READ, uss, sizeof(*uss)))
- return -EFAULT;
- err |= __get_user(ss_sp, &uss->ss_sp);
- err |= __get_user(kss.ss_size, &uss->ss_size);
- err |= __get_user(kss.ss_flags, &uss->ss_flags);
- if (err)
- return -EFAULT;
- kss.ss_sp = (void __user *) ss_sp;
- }
+ /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+ regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+ (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
+ (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 |
+ (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE);
+ /* Check for invalid user address space control. */
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+ regs->psw.mask = PSW_ASC_PRIMARY |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN);
+ for (i = 0; i < NUM_GPRS; i++)
+ regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
+ memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+ sizeof(current->thread.acrs));
+ restore_access_regs(current->thread.acrs);
- set_fs (KERNEL_DS);
- ret = do_sigaltstack((stack_t __force __user *) (uss ? &kss : NULL),
- (stack_t __force __user *) (uoss ? &koss : NULL),
- regs->gprs[15]);
- set_fs (old_fs);
-
- if (!ret && uoss) {
- if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss)))
- return -EFAULT;
- ss_sp = (unsigned long) koss.ss_sp;
- err |= __put_user(ss_sp, &uoss->ss_sp);
- err |= __put_user(koss.ss_size, &uoss->ss_size);
- err |= __put_user(koss.ss_flags, &uoss->ss_flags);
- if (err)
- return -EFAULT;
- }
- return ret;
+ memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
+ sizeof(current->thread.fp_regs));
+
+ restore_fp_regs(current->thread.fp_regs.fprs);
+ clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+ return 0;
}
-static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
+static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
{
- _s390_regs_common32 regs32;
- int err, i;
+ __u32 gprs_high[NUM_GPRS];
+ int i;
- regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits,
- (__u32)(regs->psw.mask >> 32));
- regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr;
for (i = 0; i < NUM_GPRS; i++)
- regs32.gprs[i] = (__u32) regs->gprs[i];
- save_access_regs(current->thread.acrs);
- memcpy(regs32.acrs, current->thread.acrs, sizeof(regs32.acrs));
- err = __copy_to_user(&sregs->regs, &regs32, sizeof(regs32));
- if (err)
- return err;
- save_fp_regs(&current->thread.fp_regs);
- /* s390_fp_regs and _s390_fp_regs32 are the same ! */
- return __copy_to_user(&sregs->fpregs, &current->thread.fp_regs,
- sizeof(_s390_fp_regs32));
+ gprs_high[i] = regs->gprs[i] >> 32;
+ if (__copy_to_user(uregs, &gprs_high, sizeof(gprs_high)))
+ return -EFAULT;
+ return 0;
}
-static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
+static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
{
- _s390_regs_common32 regs32;
- int err, i;
-
- /* Alwys make any pending restarted system call return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ __u32 gprs_high[NUM_GPRS];
+ int i;
- err = __copy_from_user(&regs32, &sregs->regs, sizeof(regs32));
- if (err)
- return err;
- regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask,
- (__u64)regs32.psw.mask << 32);
- regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN);
+ if (__copy_from_user(&gprs_high, uregs, sizeof(gprs_high)))
+ return -EFAULT;
for (i = 0; i < NUM_GPRS; i++)
- regs->gprs[i] = (__u64) regs32.gprs[i];
- memcpy(current->thread.acrs, regs32.acrs, sizeof(current->thread.acrs));
- restore_access_regs(current->thread.acrs);
-
- err = __copy_from_user(&current->thread.fp_regs, &sregs->fpregs,
- sizeof(_s390_fp_regs32));
- current->thread.fp_regs.fpc &= FPC_VALID_MASK;
- if (err)
- return err;
-
- restore_fp_regs(&current->thread.fp_regs);
- regs->trap = -1; /* disable syscall checks */
+ *(__u32 *)&regs->gprs[i] = gprs_high[i];
return 0;
}
-asmlinkage long sys32_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15];
sigset_t set;
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
+ set_current_blocked(&set);
if (restore_sigregs32(regs, &frame->sregs))
goto badframe;
-
+ if (restore_sigregs_gprs_high(regs, frame->gprs_high))
+ goto badframe;
return regs->gprs[2];
-
badframe:
force_sig(SIGSEGV, current);
return 0;
}
-asmlinkage long sys32_rt_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15];
sigset_t set;
- stack_t st;
- __u32 ss_sp;
- int err;
- mm_segment_t old_fs = get_fs();
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
+ set_current_blocked(&set);
if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
goto badframe;
-
- err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp);
- st.ss_sp = compat_ptr(ss_sp);
- err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size);
- err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags);
- if (err)
+ if (restore_sigregs_gprs_high(regs, frame->gprs_high))
+ goto badframe;
+ if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
-
- set_fs (KERNEL_DS);
- do_sigaltstack((stack_t __force __user *)&st, NULL, regs->gprs[15]);
- set_fs (old_fs);
-
return regs->gprs[2];
-
badframe:
force_sig(SIGSEGV, current);
return 0;
@@ -439,13 +307,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
sp = current->sas_ss_sp + current->sas_ss_size;
}
- /* This is the legacy signal stack switching. */
- else if (!user_mode(regs) &&
- !(ka->sa.sa_flags & SA_RESTORER) &&
- ka->sa.sa_restorer) {
- sp = (unsigned long) ka->sa.sa_restorer;
- }
-
return (void __user *)((sp - frame_size) & -8ul);
}
@@ -463,8 +324,6 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
sigset_t *set, struct pt_regs * regs)
{
sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(sigframe32));
- if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32)))
- goto give_sigsegv;
if (frame == (void __user *) -1UL)
goto give_sigsegv;
@@ -474,17 +333,19 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
if (save_sigregs32(regs, &frame->sregs))
goto give_sigsegv;
+ if (save_sigregs_gprs_high(regs, frame->gprs_high))
+ goto give_sigsegv;
if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs))
goto give_sigsegv;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (__u64) ka->sa.sa_restorer;
+ regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
- regs->gprs[14] = (__u64) frame->retcode;
+ regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn,
- (u16 __user *)(frame->retcode)))
+ (u16 __force __user *)(frame->retcode)))
goto give_sigsegv;
}
@@ -493,19 +354,28 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
goto give_sigsegv;
/* Set up registers for signal handler */
- regs->gprs[15] = (__u64) frame;
- regs->psw.addr = (__u64) ka->sa.sa_handler;
+ regs->gprs[15] = (__force __u64) frame;
+ /* Force 31 bit amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (__force __u64) ka->sa.sa_handler;
regs->gprs[2] = map_signal(sig);
- regs->gprs[3] = (__u64) &frame->sc;
+ regs->gprs[3] = (__force __u64) &frame->sc;
/* We forgot to include these in the sigcontext.
To avoid breaking binary compatibility, they are passed as args. */
- regs->gprs[4] = current->thread.trap_no;
- regs->gprs[5] = current->thread.prot_addr;
+ if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+ sig == SIGTRAP || sig == SIGFPE) {
+ /* set extra registers only for synchronous signals */
+ regs->gprs[4] = regs->int_code & 127;
+ regs->gprs[5] = regs->int_parm_long;
+ regs->gprs[6] = task_thread_info(current)->last_break;
+ }
/* Place signal number on stack to allow backtrace from handler. */
- if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
+ if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
goto give_sigsegv;
return 0;
@@ -519,8 +389,6 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
{
int err = 0;
rt_sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(rt_sigframe32));
- if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32)))
- goto give_sigsegv;
if (frame == (void __user *) -1UL)
goto give_sigsegv;
@@ -529,13 +397,11 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv;
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->gprs[15]),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]);
err |= save_sigregs32(regs, &frame->uc.uc_mcontext);
+ err |= save_sigregs_gprs_high(regs, frame->gprs_high);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
goto give_sigsegv;
@@ -543,24 +409,30 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (__u64) ka->sa.sa_restorer;
+ regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
- regs->gprs[14] = (__u64) frame->retcode;
- err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
- (u16 __user *)(frame->retcode));
+ regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
+ (u16 __force __user *)(frame->retcode)))
+ goto give_sigsegv;
}
/* Set up backchain. */
- if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
+ if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
goto give_sigsegv;
/* Set up registers for signal handler */
- regs->gprs[15] = (__u64) frame;
- regs->psw.addr = (__u64) ka->sa.sa_handler;
+ regs->gprs[15] = (__force __u64) frame;
+ /* Force 31 bit amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (__u64 __force) ka->sa.sa_handler;
regs->gprs[2] = map_signal(sig);
- regs->gprs[3] = (__u64) &frame->info;
- regs->gprs[4] = (__u64) &frame->uc;
+ regs->gprs[3] = (__force __u64) &frame->info;
+ regs->gprs[4] = (__force __u64) &frame->uc;
+ regs->gprs[5] = task_thread_info(current)->last_break;
return 0;
give_sigsegv:
@@ -572,9 +444,8 @@ give_sigsegv:
* OK, we're invoking a handler
*/
-int
-handle_signal32(unsigned long sig, struct k_sigaction *ka,
- siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
+void handle_signal32(unsigned long sig, struct k_sigaction *ka,
+ siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
{
int ret;
@@ -583,15 +454,9 @@ handle_signal32(unsigned long sig, struct k_sigaction *ka,
ret = setup_rt_frame32(sig, ka, info, oldset, regs);
else
ret = setup_frame32(sig, ka, oldset, regs);
-
- if (ret == 0) {
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked,sig);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
- }
- return ret;
+ if (ret)
+ return;
+ signal_delivered(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLE_STEP));
}
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
deleted file mode 100644
index d003a6e16af..00000000000
--- a/arch/s390/kernel/compat_wrapper.S
+++ /dev/null
@@ -1,1734 +0,0 @@
-/*
-* arch/s390/kernel/compat_wrapper.S
-* wrapper for 31 bit compatible system calls.
-*
-* Copyright (C) IBM Corp. 2000,2006
-* Author(s): Gerhard Tonn (ton@de.ibm.com),
-* Thomas Spatzier (tspat@de.ibm.com)
-*/
-
- .globl sys32_exit_wrapper
-sys32_exit_wrapper:
- lgfr %r2,%r2 # int
- jg sys_exit # branch to sys_exit
-
- .globl sys32_read_wrapper
-sys32_read_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys32_read # branch to sys_read
-
- .globl sys32_write_wrapper
-sys32_write_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # size_t
- jg sys32_write # branch to system call
-
- .globl sys32_open_wrapper
-sys32_open_wrapper:
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- jg sys_open # branch to system call
-
- .globl sys32_close_wrapper
-sys32_close_wrapper:
- llgfr %r2,%r2 # unsigned int
- jg sys_close # branch to system call
-
- .globl sys32_creat_wrapper
-sys32_creat_wrapper:
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_creat # branch to system call
-
- .globl sys32_link_wrapper
-sys32_link_wrapper:
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_link # branch to system call
-
- .globl sys32_unlink_wrapper
-sys32_unlink_wrapper:
- llgtr %r2,%r2 # const char *
- jg sys_unlink # branch to system call
-
- .globl sys32_chdir_wrapper
-sys32_chdir_wrapper:
- llgtr %r2,%r2 # const char *
- jg sys_chdir # branch to system call
-
- .globl sys32_time_wrapper
-sys32_time_wrapper:
- llgtr %r2,%r2 # int *
- jg compat_sys_time # branch to system call
-
- .globl sys32_mknod_wrapper
-sys32_mknod_wrapper:
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # dev
- jg sys_mknod # branch to system call
-
- .globl sys32_chmod_wrapper
-sys32_chmod_wrapper:
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # mode_t
- jg sys_chmod # branch to system call
-
- .globl sys32_lchown16_wrapper
-sys32_lchown16_wrapper:
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- llgfr %r4,%r4 # __kernel_old_uid_emu31_t
- jg sys32_lchown16 # branch to system call
-
- .globl sys32_lseek_wrapper
-sys32_lseek_wrapper:
- llgfr %r2,%r2 # unsigned int
- lgfr %r3,%r3 # off_t
- llgfr %r4,%r4 # unsigned int
- jg sys_lseek # branch to system call
-
-#sys32_getpid_wrapper # void
-
- .globl sys32_mount_wrapper
-sys32_mount_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # char *
- llgfr %r5,%r5 # unsigned long
- llgtr %r6,%r6 # void *
- jg compat_sys_mount # branch to system call
-
- .globl sys32_oldumount_wrapper
-sys32_oldumount_wrapper:
- llgtr %r2,%r2 # char *
- jg sys_oldumount # branch to system call
-
- .globl sys32_setuid16_wrapper
-sys32_setuid16_wrapper:
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- jg sys32_setuid16 # branch to system call
-
-#sys32_getuid16_wrapper # void
-
- .globl sys32_ptrace_wrapper
-sys32_ptrace_wrapper:
- lgfr %r2,%r2 # long
- lgfr %r3,%r3 # long
- llgtr %r4,%r4 # long
- llgfr %r5,%r5 # long
- jg compat_sys_ptrace # branch to system call
-
- .globl sys32_alarm_wrapper
-sys32_alarm_wrapper:
- llgfr %r2,%r2 # unsigned int
- jg sys_alarm # branch to system call
-
-#sys32_pause_wrapper # void
-
- .globl compat_sys_utime_wrapper
-compat_sys_utime_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct compat_utimbuf *
- jg compat_sys_utime # branch to system call
-
- .globl sys32_access_wrapper
-sys32_access_wrapper:
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_access # branch to system call
-
- .globl sys32_nice_wrapper
-sys32_nice_wrapper:
- lgfr %r2,%r2 # int
- jg sys_nice # branch to system call
-
-#sys32_sync_wrapper # void
-
- .globl sys32_kill_wrapper
-sys32_kill_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_kill # branch to system call
-
- .globl sys32_rename_wrapper
-sys32_rename_wrapper:
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_rename # branch to system call
-
- .globl sys32_mkdir_wrapper
-sys32_mkdir_wrapper:
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_mkdir # branch to system call
-
- .globl sys32_rmdir_wrapper
-sys32_rmdir_wrapper:
- llgtr %r2,%r2 # const char *
- jg sys_rmdir # branch to system call
-
- .globl sys32_dup_wrapper
-sys32_dup_wrapper:
- llgfr %r2,%r2 # unsigned int
- jg sys_dup # branch to system call
-
- .globl sys32_pipe_wrapper
-sys32_pipe_wrapper:
- llgtr %r2,%r2 # u32 *
- jg sys_pipe # branch to system call
-
- .globl compat_sys_times_wrapper
-compat_sys_times_wrapper:
- llgtr %r2,%r2 # struct compat_tms *
- jg compat_sys_times # branch to system call
-
- .globl sys32_brk_wrapper
-sys32_brk_wrapper:
- llgtr %r2,%r2 # unsigned long
- jg sys_brk # branch to system call
-
- .globl sys32_setgid16_wrapper
-sys32_setgid16_wrapper:
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- jg sys32_setgid16 # branch to system call
-
-#sys32_getgid16_wrapper # void
-
- .globl sys32_signal_wrapper
-sys32_signal_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # __sighandler_t
- jg sys_signal
-
-#sys32_geteuid16_wrapper # void
-
-#sys32_getegid16_wrapper # void
-
- .globl sys32_acct_wrapper
-sys32_acct_wrapper:
- llgtr %r2,%r2 # char *
- jg sys_acct # branch to system call
-
- .globl sys32_umount_wrapper
-sys32_umount_wrapper:
- llgtr %r2,%r2 # char *
- lgfr %r3,%r3 # int
- jg sys_umount # branch to system call
-
- .globl compat_sys_ioctl_wrapper
-compat_sys_ioctl_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- llgfr %r4,%r4 # unsigned int
- jg compat_sys_ioctl # branch to system call
-
- .globl compat_sys_fcntl_wrapper
-compat_sys_fcntl_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_fcntl # branch to system call
-
- .globl sys32_setpgid_wrapper
-sys32_setpgid_wrapper:
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # pid_t
- jg sys_setpgid # branch to system call
-
- .globl sys32_umask_wrapper
-sys32_umask_wrapper:
- lgfr %r2,%r2 # int
- jg sys_umask # branch to system call
-
- .globl sys32_chroot_wrapper
-sys32_chroot_wrapper:
- llgtr %r2,%r2 # char *
- jg sys_chroot # branch to system call
-
- .globl sys32_ustat_wrapper
-sys32_ustat_wrapper:
- llgfr %r2,%r2 # dev_t
- llgtr %r3,%r3 # struct ustat *
- jg sys_ustat
-
- .globl sys32_dup2_wrapper
-sys32_dup2_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- jg sys_dup2 # branch to system call
-
-#sys32_getppid_wrapper # void
-
-#sys32_getpgrp_wrapper # void
-
-#sys32_setsid_wrapper # void
-
- .globl sys32_sigaction_wrapper
-sys32_sigaction_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const struct old_sigaction *
- llgtr %r4,%r4 # struct old_sigaction32 *
- jg sys32_sigaction # branch to system call
-
- .globl sys32_setreuid16_wrapper
-sys32_setreuid16_wrapper:
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- jg sys32_setreuid16 # branch to system call
-
- .globl sys32_setregid16_wrapper
-sys32_setregid16_wrapper:
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- llgfr %r3,%r3 # __kernel_old_gid_emu31_t
- jg sys32_setregid16 # branch to system call
-
- .globl sys_sigsuspend_wrapper
-sys_sigsuspend_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # old_sigset_t
- jg sys_sigsuspend
-
- .globl compat_sys_sigpending_wrapper
-compat_sys_sigpending_wrapper:
- llgtr %r2,%r2 # compat_old_sigset_t *
- jg compat_sys_sigpending # branch to system call
-
- .globl sys32_sethostname_wrapper
-sys32_sethostname_wrapper:
- llgtr %r2,%r2 # char *
- lgfr %r3,%r3 # int
- jg sys_sethostname # branch to system call
-
- .globl compat_sys_setrlimit_wrapper
-compat_sys_setrlimit_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct rlimit_emu31 *
- jg compat_sys_setrlimit # branch to system call
-
- .globl compat_sys_old_getrlimit_wrapper
-compat_sys_old_getrlimit_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct rlimit_emu31 *
- jg compat_sys_old_getrlimit # branch to system call
-
- .globl compat_sys_getrlimit_wrapper
-compat_sys_getrlimit_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct rlimit_emu31 *
- jg compat_sys_getrlimit # branch to system call
-
- .globl sys32_mmap2_wrapper
-sys32_mmap2_wrapper:
- llgtr %r2,%r2 # struct mmap_arg_struct_emu31 *
- jg sys32_mmap2 # branch to system call
-
- .globl compat_sys_getrusage_wrapper
-compat_sys_getrusage_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct rusage_emu31 *
- jg compat_sys_getrusage # branch to system call
-
- .globl sys32_gettimeofday_wrapper
-sys32_gettimeofday_wrapper:
- llgtr %r2,%r2 # struct timeval_emu31 *
- llgtr %r3,%r3 # struct timezone *
- jg sys32_gettimeofday # branch to system call
-
- .globl sys32_settimeofday_wrapper
-sys32_settimeofday_wrapper:
- llgtr %r2,%r2 # struct timeval_emu31 *
- llgtr %r3,%r3 # struct timezone *
- jg sys32_settimeofday # branch to system call
-
- .globl sys32_getgroups16_wrapper
-sys32_getgroups16_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # __kernel_old_gid_emu31_t *
- jg sys32_getgroups16 # branch to system call
-
- .globl sys32_setgroups16_wrapper
-sys32_setgroups16_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # __kernel_old_gid_emu31_t *
- jg sys32_setgroups16 # branch to system call
-
- .globl sys32_symlink_wrapper
-sys32_symlink_wrapper:
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_symlink # branch to system call
-
- .globl sys32_readlink_wrapper
-sys32_readlink_wrapper:
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # char *
- lgfr %r4,%r4 # int
- jg sys_readlink # branch to system call
-
- .globl sys32_uselib_wrapper
-sys32_uselib_wrapper:
- llgtr %r2,%r2 # const char *
- jg sys_uselib # branch to system call
-
- .globl sys32_swapon_wrapper
-sys32_swapon_wrapper:
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- jg sys_swapon # branch to system call
-
- .globl sys32_reboot_wrapper
-sys32_reboot_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # unsigned int
- llgtr %r5,%r5 # void *
- jg sys_reboot # branch to system call
-
- .globl old32_readdir_wrapper
-old32_readdir_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # void *
- llgfr %r4,%r4 # unsigned int
- jg compat_sys_old_readdir # branch to system call
-
- .globl old32_mmap_wrapper
-old32_mmap_wrapper:
- llgtr %r2,%r2 # struct mmap_arg_struct_emu31 *
- jg old32_mmap # branch to system call
-
- .globl sys32_munmap_wrapper
-sys32_munmap_wrapper:
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- jg sys_munmap # branch to system call
-
- .globl sys32_truncate_wrapper
-sys32_truncate_wrapper:
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # unsigned long
- jg sys_truncate # branch to system call
-
- .globl sys32_ftruncate_wrapper
-sys32_ftruncate_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned long
- jg sys_ftruncate # branch to system call
-
- .globl sys32_fchmod_wrapper
-sys32_fchmod_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # mode_t
- jg sys_fchmod # branch to system call
-
- .globl sys32_fchown16_wrapper
-sys32_fchown16_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # compat_uid_t
- llgfr %r4,%r4 # compat_uid_t
- jg sys32_fchown16 # branch to system call
-
- .globl sys32_getpriority_wrapper
-sys32_getpriority_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_getpriority # branch to system call
-
- .globl sys32_setpriority_wrapper
-sys32_setpriority_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- jg sys_setpriority # branch to system call
-
- .globl compat_sys_statfs_wrapper
-compat_sys_statfs_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct compat_statfs *
- jg compat_sys_statfs # branch to system call
-
- .globl compat_sys_fstatfs_wrapper
-compat_sys_fstatfs_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct compat_statfs *
- jg compat_sys_fstatfs # branch to system call
-
- .globl compat_sys_socketcall_wrapper
-compat_sys_socketcall_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # u32 *
- jg compat_sys_socketcall # branch to system call
-
- .globl sys32_syslog_wrapper
-sys32_syslog_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- lgfr %r4,%r4 # int
- jg sys_syslog # branch to system call
-
- .globl compat_sys_setitimer_wrapper
-compat_sys_setitimer_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct itimerval_emu31 *
- llgtr %r4,%r4 # struct itimerval_emu31 *
- jg compat_sys_setitimer # branch to system call
-
- .globl compat_sys_getitimer_wrapper
-compat_sys_getitimer_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct itimerval_emu31 *
- jg compat_sys_getitimer # branch to system call
-
- .globl compat_sys_newstat_wrapper
-compat_sys_newstat_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat_emu31 *
- jg compat_sys_newstat # branch to system call
-
- .globl compat_sys_newlstat_wrapper
-compat_sys_newlstat_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat_emu31 *
- jg compat_sys_newlstat # branch to system call
-
- .globl compat_sys_newfstat_wrapper
-compat_sys_newfstat_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # struct stat_emu31 *
- jg compat_sys_newfstat # branch to system call
-
-#sys32_vhangup_wrapper # void
-
- .globl compat_sys_wait4_wrapper
-compat_sys_wait4_wrapper:
- lgfr %r2,%r2 # pid_t
- llgtr %r3,%r3 # unsigned int *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # struct rusage *
- jg compat_sys_wait4 # branch to system call
-
- .globl sys32_swapoff_wrapper
-sys32_swapoff_wrapper:
- llgtr %r2,%r2 # const char *
- jg sys_swapoff # branch to system call
-
- .globl compat_sys_sysinfo_wrapper
-compat_sys_sysinfo_wrapper:
- llgtr %r2,%r2 # struct sysinfo_emu31 *
- jg compat_sys_sysinfo # branch to system call
-
- .globl sys32_ipc_wrapper
-sys32_ipc_wrapper:
- llgfr %r2,%r2 # uint
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- llgfr %r6,%r6 # u32
- jg sys32_ipc # branch to system call
-
- .globl sys32_fsync_wrapper
-sys32_fsync_wrapper:
- llgfr %r2,%r2 # unsigned int
- jg sys_fsync # branch to system call
-
-#sys32_sigreturn_wrapper # done in sigreturn_glue
-
-#sys32_clone_wrapper # done in clone_glue
-
- .globl sys32_setdomainname_wrapper
-sys32_setdomainname_wrapper:
- llgtr %r2,%r2 # char *
- lgfr %r3,%r3 # int
- jg sys_setdomainname # branch to system call
-
- .globl sys32_newuname_wrapper
-sys32_newuname_wrapper:
- llgtr %r2,%r2 # struct new_utsname *
- jg s390x_newuname # branch to system call
-
- .globl compat_sys_adjtimex_wrapper
-compat_sys_adjtimex_wrapper:
- llgtr %r2,%r2 # struct compat_timex *
- jg compat_sys_adjtimex # branch to system call
-
- .globl sys32_mprotect_wrapper
-sys32_mprotect_wrapper:
- llgtr %r2,%r2 # unsigned long (actually pointer
- llgfr %r3,%r3 # size_t
- llgfr %r4,%r4 # unsigned long
- jg sys_mprotect # branch to system call
-
- .globl compat_sys_sigprocmask_wrapper
-compat_sys_sigprocmask_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_old_sigset_t *
- llgtr %r4,%r4 # compat_old_sigset_t *
- jg compat_sys_sigprocmask # branch to system call
-
- .globl sys32_init_module_wrapper
-sys32_init_module_wrapper:
- llgtr %r2,%r2 # void *
- llgfr %r3,%r3 # unsigned long
- llgtr %r4,%r4 # char *
- jg sys32_init_module # branch to system call
-
- .globl sys32_delete_module_wrapper
-sys32_delete_module_wrapper:
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # unsigned int
- jg sys32_delete_module # branch to system call
-
- .globl sys32_quotactl_wrapper
-sys32_quotactl_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # qid_t
- llgtr %r5,%r5 # caddr_t
- jg sys_quotactl # branch to system call
-
- .globl sys32_getpgid_wrapper
-sys32_getpgid_wrapper:
- lgfr %r2,%r2 # pid_t
- jg sys_getpgid # branch to system call
-
- .globl sys32_fchdir_wrapper
-sys32_fchdir_wrapper:
- llgfr %r2,%r2 # unsigned int
- jg sys_fchdir # branch to system call
-
- .globl sys32_bdflush_wrapper
-sys32_bdflush_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # long
- jg sys_bdflush # branch to system call
-
- .globl sys32_sysfs_wrapper
-sys32_sysfs_wrapper:
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- jg sys_sysfs # branch to system call
-
- .globl sys32_personality_wrapper
-sys32_personality_wrapper:
- llgfr %r2,%r2 # unsigned long
- jg s390x_personality # branch to system call
-
- .globl sys32_setfsuid16_wrapper
-sys32_setfsuid16_wrapper:
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- jg sys32_setfsuid16 # branch to system call
-
- .globl sys32_setfsgid16_wrapper
-sys32_setfsgid16_wrapper:
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- jg sys32_setfsgid16 # branch to system call
-
- .globl sys32_llseek_wrapper
-sys32_llseek_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgtr %r5,%r5 # loff_t *
- llgfr %r6,%r6 # unsigned int
- jg sys_llseek # branch to system call
-
- .globl sys32_getdents_wrapper
-sys32_getdents_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # void *
- llgfr %r4,%r4 # unsigned int
- jg compat_sys_getdents # branch to system call
-
- .globl compat_sys_select_wrapper
-compat_sys_select_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_fd_set *
- llgtr %r4,%r4 # compat_fd_set *
- llgtr %r5,%r5 # compat_fd_set *
- llgtr %r6,%r6 # struct compat_timeval *
- jg compat_sys_select # branch to system call
-
- .globl sys32_flock_wrapper
-sys32_flock_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- jg sys_flock # branch to system call
-
- .globl sys32_msync_wrapper
-sys32_msync_wrapper:
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- lgfr %r4,%r4 # int
- jg sys_msync # branch to system call
-
- .globl compat_sys_readv_wrapper
-compat_sys_readv_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const struct compat_iovec *
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_readv # branch to system call
-
- .globl compat_sys_writev_wrapper
-compat_sys_writev_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const struct compat_iovec *
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_writev # branch to system call
-
- .globl sys32_getsid_wrapper
-sys32_getsid_wrapper:
- lgfr %r2,%r2 # pid_t
- jg sys_getsid # branch to system call
-
- .globl sys32_fdatasync_wrapper
-sys32_fdatasync_wrapper:
- llgfr %r2,%r2 # unsigned int
- jg sys_fdatasync # branch to system call
-
-#sys32_sysctl_wrapper # tbd
-
- .globl sys32_mlock_wrapper
-sys32_mlock_wrapper:
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- jg sys_mlock # branch to system call
-
- .globl sys32_munlock_wrapper
-sys32_munlock_wrapper:
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- jg sys_munlock # branch to system call
-
- .globl sys32_mlockall_wrapper
-sys32_mlockall_wrapper:
- lgfr %r2,%r2 # int
- jg sys_mlockall # branch to system call
-
-#sys32_munlockall_wrapper # void
-
- .globl sys32_sched_setparam_wrapper
-sys32_sched_setparam_wrapper:
- lgfr %r2,%r2 # pid_t
- llgtr %r3,%r3 # struct sched_param *
- jg sys_sched_setparam # branch to system call
-
- .globl sys32_sched_getparam_wrapper
-sys32_sched_getparam_wrapper:
- lgfr %r2,%r2 # pid_t
- llgtr %r3,%r3 # struct sched_param *
- jg sys_sched_getparam # branch to system call
-
- .globl sys32_sched_setscheduler_wrapper
-sys32_sched_setscheduler_wrapper:
- lgfr %r2,%r2 # pid_t
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct sched_param *
- jg sys_sched_setscheduler # branch to system call
-
- .globl sys32_sched_getscheduler_wrapper
-sys32_sched_getscheduler_wrapper:
- lgfr %r2,%r2 # pid_t
- jg sys_sched_getscheduler # branch to system call
-
-#sys32_sched_yield_wrapper # void
-
- .globl sys32_sched_get_priority_max_wrapper
-sys32_sched_get_priority_max_wrapper:
- lgfr %r2,%r2 # int
- jg sys_sched_get_priority_max # branch to system call
-
- .globl sys32_sched_get_priority_min_wrapper
-sys32_sched_get_priority_min_wrapper:
- lgfr %r2,%r2 # int
- jg sys_sched_get_priority_min # branch to system call
-
- .globl sys32_sched_rr_get_interval_wrapper
-sys32_sched_rr_get_interval_wrapper:
- lgfr %r2,%r2 # pid_t
- llgtr %r3,%r3 # struct compat_timespec *
- jg sys32_sched_rr_get_interval # branch to system call
-
- .globl compat_sys_nanosleep_wrapper
-compat_sys_nanosleep_wrapper:
- llgtr %r2,%r2 # struct compat_timespec *
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_nanosleep # branch to system call
-
- .globl sys32_mremap_wrapper
-sys32_mremap_wrapper:
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_mremap # branch to system call
-
- .globl sys32_setresuid16_wrapper
-sys32_setresuid16_wrapper:
- llgfr %r2,%r2 # __kernel_old_uid_emu31_t
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- llgfr %r4,%r4 # __kernel_old_uid_emu31_t
- jg sys32_setresuid16 # branch to system call
-
- .globl sys32_getresuid16_wrapper
-sys32_getresuid16_wrapper:
- llgtr %r2,%r2 # __kernel_old_uid_emu31_t *
- llgtr %r3,%r3 # __kernel_old_uid_emu31_t *
- llgtr %r4,%r4 # __kernel_old_uid_emu31_t *
- jg sys32_getresuid16 # branch to system call
-
- .globl sys32_poll_wrapper
-sys32_poll_wrapper:
- llgtr %r2,%r2 # struct pollfd *
- llgfr %r3,%r3 # unsigned int
- lgfr %r4,%r4 # long
- jg sys_poll # branch to system call
-
- .globl compat_sys_nfsservctl_wrapper
-compat_sys_nfsservctl_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct compat_nfsctl_arg*
- llgtr %r4,%r4 # union compat_nfsctl_res*
- jg compat_sys_nfsservctl # branch to system call
-
- .globl sys32_setresgid16_wrapper
-sys32_setresgid16_wrapper:
- llgfr %r2,%r2 # __kernel_old_gid_emu31_t
- llgfr %r3,%r3 # __kernel_old_gid_emu31_t
- llgfr %r4,%r4 # __kernel_old_gid_emu31_t
- jg sys32_setresgid16 # branch to system call
-
- .globl sys32_getresgid16_wrapper
-sys32_getresgid16_wrapper:
- llgtr %r2,%r2 # __kernel_old_gid_emu31_t *
- llgtr %r3,%r3 # __kernel_old_gid_emu31_t *
- llgtr %r4,%r4 # __kernel_old_gid_emu31_t *
- jg sys32_getresgid16 # branch to system call
-
- .globl sys32_prctl_wrapper
-sys32_prctl_wrapper:
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_prctl # branch to system call
-
-#sys32_rt_sigreturn_wrapper # done in rt_sigreturn_glue
-
- .globl sys32_rt_sigaction_wrapper
-sys32_rt_sigaction_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const struct sigaction_emu31 *
- llgtr %r4,%r4 # const struct sigaction_emu31 *
- llgfr %r5,%r5 # size_t
- jg sys32_rt_sigaction # branch to system call
-
- .globl sys32_rt_sigprocmask_wrapper
-sys32_rt_sigprocmask_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # old_sigset_emu31 *
- llgtr %r4,%r4 # old_sigset_emu31 *
- llgfr %r5,%r5 # size_t
- jg sys32_rt_sigprocmask # branch to system call
-
- .globl sys32_rt_sigpending_wrapper
-sys32_rt_sigpending_wrapper:
- llgtr %r2,%r2 # sigset_emu31 *
- llgfr %r3,%r3 # size_t
- jg sys32_rt_sigpending # branch to system call
-
- .globl compat_sys_rt_sigtimedwait_wrapper
-compat_sys_rt_sigtimedwait_wrapper:
- llgtr %r2,%r2 # const sigset_emu31_t *
- llgtr %r3,%r3 # siginfo_emu31_t *
- llgtr %r4,%r4 # const struct compat_timespec *
- llgfr %r5,%r5 # size_t
- jg compat_sys_rt_sigtimedwait # branch to system call
-
- .globl sys32_rt_sigqueueinfo_wrapper
-sys32_rt_sigqueueinfo_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # siginfo_emu31_t *
- jg sys32_rt_sigqueueinfo # branch to system call
-
- .globl compat_sys_rt_sigsuspend_wrapper
-compat_sys_rt_sigsuspend_wrapper:
- llgtr %r2,%r2 # compat_sigset_t *
- llgfr %r3,%r3 # compat_size_t
- jg compat_sys_rt_sigsuspend
-
- .globl sys32_pread64_wrapper
-sys32_pread64_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # u32
- llgfr %r6,%r6 # u32
- jg sys32_pread64 # branch to system call
-
- .globl sys32_pwrite64_wrapper
-sys32_pwrite64_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # u32
- llgfr %r6,%r6 # u32
- jg sys32_pwrite64 # branch to system call
-
- .globl sys32_chown16_wrapper
-sys32_chown16_wrapper:
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # __kernel_old_uid_emu31_t
- llgfr %r4,%r4 # __kernel_old_gid_emu31_t
- jg sys32_chown16 # branch to system call
-
- .globl sys32_getcwd_wrapper
-sys32_getcwd_wrapper:
- llgtr %r2,%r2 # char *
- llgfr %r3,%r3 # unsigned long
- jg sys_getcwd # branch to system call
-
- .globl sys32_capget_wrapper
-sys32_capget_wrapper:
- llgtr %r2,%r2 # cap_user_header_t
- llgtr %r3,%r3 # cap_user_data_t
- jg sys_capget # branch to system call
-
- .globl sys32_capset_wrapper
-sys32_capset_wrapper:
- llgtr %r2,%r2 # cap_user_header_t
- llgtr %r3,%r3 # const cap_user_data_t
- jg sys_capset # branch to system call
-
- .globl sys32_sigaltstack_wrapper
-sys32_sigaltstack_wrapper:
- llgtr %r2,%r2 # const stack_emu31_t *
- llgtr %r3,%r3 # stack_emu31_t *
- jg sys32_sigaltstack
-
- .globl sys32_sendfile_wrapper
-sys32_sendfile_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # __kernel_off_emu31_t *
- llgfr %r5,%r5 # size_t
- jg sys32_sendfile # branch to system call
-
-#sys32_vfork_wrapper # done in vfork_glue
-
- .globl sys32_truncate64_wrapper
-sys32_truncate64_wrapper:
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- jg sys32_truncate64 # branch to system call
-
- .globl sys32_ftruncate64_wrapper
-sys32_ftruncate64_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- jg sys32_ftruncate64 # branch to system call
-
- .globl sys32_lchown_wrapper
-sys32_lchown_wrapper:
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # gid_t
- jg sys_lchown # branch to system call
-
-#sys32_getuid_wrapper # void
-#sys32_getgid_wrapper # void
-#sys32_geteuid_wrapper # void
-#sys32_getegid_wrapper # void
-
- .globl sys32_setreuid_wrapper
-sys32_setreuid_wrapper:
- llgfr %r2,%r2 # uid_t
- llgfr %r3,%r3 # uid_t
- jg sys_setreuid # branch to system call
-
- .globl sys32_setregid_wrapper
-sys32_setregid_wrapper:
- llgfr %r2,%r2 # gid_t
- llgfr %r3,%r3 # gid_t
- jg sys_setregid # branch to system call
-
- .globl sys32_getgroups_wrapper
-sys32_getgroups_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # gid_t *
- jg sys_getgroups # branch to system call
-
- .globl sys32_setgroups_wrapper
-sys32_setgroups_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # gid_t *
- jg sys_setgroups # branch to system call
-
- .globl sys32_fchown_wrapper
-sys32_fchown_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # gid_t
- jg sys_fchown # branch to system call
-
- .globl sys32_setresuid_wrapper
-sys32_setresuid_wrapper:
- llgfr %r2,%r2 # uid_t
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # uid_t
- jg sys_setresuid # branch to system call
-
- .globl sys32_getresuid_wrapper
-sys32_getresuid_wrapper:
- llgtr %r2,%r2 # uid_t *
- llgtr %r3,%r3 # uid_t *
- llgtr %r4,%r4 # uid_t *
- jg sys_getresuid # branch to system call
-
- .globl sys32_setresgid_wrapper
-sys32_setresgid_wrapper:
- llgfr %r2,%r2 # gid_t
- llgfr %r3,%r3 # gid_t
- llgfr %r4,%r4 # gid_t
- jg sys_setresgid # branch to system call
-
- .globl sys32_getresgid_wrapper
-sys32_getresgid_wrapper:
- llgtr %r2,%r2 # gid_t *
- llgtr %r3,%r3 # gid_t *
- llgtr %r4,%r4 # gid_t *
- jg sys_getresgid # branch to system call
-
- .globl sys32_chown_wrapper
-sys32_chown_wrapper:
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # uid_t
- llgfr %r4,%r4 # gid_t
- jg sys_chown # branch to system call
-
- .globl sys32_setuid_wrapper
-sys32_setuid_wrapper:
- llgfr %r2,%r2 # uid_t
- jg sys_setuid # branch to system call
-
- .globl sys32_setgid_wrapper
-sys32_setgid_wrapper:
- llgfr %r2,%r2 # gid_t
- jg sys_setgid # branch to system call
-
- .globl sys32_setfsuid_wrapper
-sys32_setfsuid_wrapper:
- llgfr %r2,%r2 # uid_t
- jg sys_setfsuid # branch to system call
-
- .globl sys32_setfsgid_wrapper
-sys32_setfsgid_wrapper:
- llgfr %r2,%r2 # gid_t
- jg sys_setfsgid # branch to system call
-
- .globl sys32_pivot_root_wrapper
-sys32_pivot_root_wrapper:
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- jg sys_pivot_root # branch to system call
-
- .globl sys32_mincore_wrapper
-sys32_mincore_wrapper:
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- llgtr %r4,%r4 # unsigned char *
- jg sys_mincore # branch to system call
-
- .globl sys32_madvise_wrapper
-sys32_madvise_wrapper:
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # size_t
- lgfr %r4,%r4 # int
- jg sys_madvise # branch to system call
-
- .globl sys32_getdents64_wrapper
-sys32_getdents64_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # void *
- llgfr %r4,%r4 # unsigned int
- jg sys_getdents64 # branch to system call
-
- .globl compat_sys_fcntl64_wrapper
-compat_sys_fcntl64_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgfr %r3,%r3 # unsigned int
- llgfr %r4,%r4 # unsigned long
- jg compat_sys_fcntl64 # branch to system call
-
- .globl sys32_stat64_wrapper
-sys32_stat64_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat64 *
- jg sys32_stat64 # branch to system call
-
- .globl sys32_lstat64_wrapper
-sys32_lstat64_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct stat64 *
- jg sys32_lstat64 # branch to system call
-
- .globl sys32_stime_wrapper
-sys32_stime_wrapper:
- llgtr %r2,%r2 # long *
- jg compat_sys_stime # branch to system call
-
- .globl sys32_sysctl_wrapper
-sys32_sysctl_wrapper:
- llgtr %r2,%r2 # struct __sysctl_args32 *
- jg sys32_sysctl
-
- .globl sys32_fstat64_wrapper
-sys32_fstat64_wrapper:
- llgfr %r2,%r2 # unsigned long
- llgtr %r3,%r3 # struct stat64 *
- jg sys32_fstat64 # branch to system call
-
- .globl compat_sys_futex_wrapper
-compat_sys_futex_wrapper:
- llgtr %r2,%r2 # u32 *
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # struct compat_timespec *
- llgtr %r6,%r6 # u32 *
- lgf %r0,164(%r15) # int
- stg %r0,160(%r15)
- jg compat_sys_futex # branch to system call
-
- .globl sys32_setxattr_wrapper
-sys32_setxattr_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- lgfr %r6,%r6 # int
- jg sys_setxattr
-
- .globl sys32_lsetxattr_wrapper
-sys32_lsetxattr_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- lgfr %r6,%r6 # int
- jg sys_lsetxattr
-
- .globl sys32_fsetxattr_wrapper
-sys32_fsetxattr_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- lgfr %r6,%r6 # int
- jg sys_fsetxattr
-
- .globl sys32_getxattr_wrapper
-sys32_getxattr_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- jg sys_getxattr
-
- .globl sys32_lgetxattr_wrapper
-sys32_lgetxattr_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- jg sys_lgetxattr
-
- .globl sys32_fgetxattr_wrapper
-sys32_fgetxattr_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # void *
- llgfr %r5,%r5 # size_t
- jg sys_fgetxattr
-
- .globl sys32_listxattr_wrapper
-sys32_listxattr_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys_listxattr
-
- .globl sys32_llistxattr_wrapper
-sys32_llistxattr_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys_llistxattr
-
- .globl sys32_flistxattr_wrapper
-sys32_flistxattr_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- jg sys_flistxattr
-
- .globl sys32_removexattr_wrapper
-sys32_removexattr_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- jg sys_removexattr
-
- .globl sys32_lremovexattr_wrapper
-sys32_lremovexattr_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # char *
- jg sys_lremovexattr
-
- .globl sys32_fremovexattr_wrapper
-sys32_fremovexattr_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # char *
- jg sys_fremovexattr
-
- .globl sys32_sched_setaffinity_wrapper
-sys32_sched_setaffinity_wrapper:
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # unsigned long *
- jg compat_sys_sched_setaffinity
-
- .globl sys32_sched_getaffinity_wrapper
-sys32_sched_getaffinity_wrapper:
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # unsigned long *
- jg compat_sys_sched_getaffinity
-
- .globl sys32_exit_group_wrapper
-sys32_exit_group_wrapper:
- lgfr %r2,%r2 # int
- jg sys_exit_group # branch to system call
-
- .globl sys32_set_tid_address_wrapper
-sys32_set_tid_address_wrapper:
- llgtr %r2,%r2 # int *
- jg sys_set_tid_address # branch to system call
-
- .globl sys_epoll_create_wrapper
-sys_epoll_create_wrapper:
- lgfr %r2,%r2 # int
- jg sys_epoll_create # branch to system call
-
- .globl sys_epoll_ctl_wrapper
-sys_epoll_ctl_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # struct epoll_event *
- jg sys_epoll_ctl # branch to system call
-
- .globl sys_epoll_wait_wrapper
-sys_epoll_wait_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct epoll_event *
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- jg sys_epoll_wait # branch to system call
-
- .globl sys32_lookup_dcookie_wrapper
-sys32_lookup_dcookie_wrapper:
- sllg %r2,%r2,32 # get high word of 64bit dcookie
- or %r2,%r3 # get low word of 64bit dcookie
- llgtr %r3,%r4 # char *
- llgfr %r4,%r5 # size_t
- jg sys_lookup_dcookie
-
- .globl sys32_fadvise64_wrapper
-sys32_fadvise64_wrapper:
- lgfr %r2,%r2 # int
- sllg %r3,%r3,32 # get high word of 64bit loff_t
- or %r3,%r4 # get low word of 64bit loff_t
- llgfr %r4,%r5 # size_t (unsigned long)
- lgfr %r5,%r6 # int
- jg sys32_fadvise64
-
- .globl sys32_fadvise64_64_wrapper
-sys32_fadvise64_64_wrapper:
- llgtr %r2,%r2 # struct fadvise64_64_args *
- jg sys32_fadvise64_64
-
- .globl sys32_clock_settime_wrapper
-sys32_clock_settime_wrapper:
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_clock_settime
-
- .globl sys32_clock_gettime_wrapper
-sys32_clock_gettime_wrapper:
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_clock_gettime
-
- .globl sys32_clock_getres_wrapper
-sys32_clock_getres_wrapper:
- lgfr %r2,%r2 # clockid_t (int)
- llgtr %r3,%r3 # struct compat_timespec *
- jg compat_sys_clock_getres
-
- .globl sys32_clock_nanosleep_wrapper
-sys32_clock_nanosleep_wrapper:
- lgfr %r2,%r2 # clockid_t (int)
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct compat_timespec *
- llgtr %r5,%r5 # struct compat_timespec *
- jg compat_sys_clock_nanosleep
-
- .globl sys32_timer_create_wrapper
-sys32_timer_create_wrapper:
- lgfr %r2,%r2 # timer_t (int)
- llgtr %r3,%r3 # struct compat_sigevent *
- llgtr %r4,%r4 # timer_t *
- jg compat_sys_timer_create
-
- .globl sys32_timer_settime_wrapper
-sys32_timer_settime_wrapper:
- lgfr %r2,%r2 # timer_t (int)
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct compat_itimerspec *
- llgtr %r5,%r5 # struct compat_itimerspec *
- jg compat_sys_timer_settime
-
- .globl sys32_timer_gettime_wrapper
-sys32_timer_gettime_wrapper:
- lgfr %r2,%r2 # timer_t (int)
- llgtr %r3,%r3 # struct compat_itimerspec *
- jg compat_sys_timer_gettime
-
- .globl sys32_timer_getoverrun_wrapper
-sys32_timer_getoverrun_wrapper:
- lgfr %r2,%r2 # timer_t (int)
- jg sys_timer_getoverrun
-
- .globl sys32_timer_delete_wrapper
-sys32_timer_delete_wrapper:
- lgfr %r2,%r2 # timer_t (int)
- jg sys_timer_delete
-
- .globl sys32_io_setup_wrapper
-sys32_io_setup_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # u32 *
- jg compat_sys_io_setup
-
- .globl sys32_io_destroy_wrapper
-sys32_io_destroy_wrapper:
- llgfr %r2,%r2 # (aio_context_t) u32
- jg sys_io_destroy
-
- .globl sys32_io_getevents_wrapper
-sys32_io_getevents_wrapper:
- llgfr %r2,%r2 # (aio_context_t) u32
- lgfr %r3,%r3 # long
- lgfr %r4,%r4 # long
- llgtr %r5,%r5 # struct io_event *
- llgtr %r6,%r6 # struct compat_timespec *
- jg compat_sys_io_getevents
-
- .globl sys32_io_submit_wrapper
-sys32_io_submit_wrapper:
- llgfr %r2,%r2 # (aio_context_t) u32
- lgfr %r3,%r3 # long
- llgtr %r4,%r4 # struct iocb **
- jg compat_sys_io_submit
-
- .globl sys32_io_cancel_wrapper
-sys32_io_cancel_wrapper:
- llgfr %r2,%r2 # (aio_context_t) u32
- llgtr %r3,%r3 # struct iocb *
- llgtr %r4,%r4 # struct io_event *
- jg sys_io_cancel
-
- .globl compat_sys_statfs64_wrapper
-compat_sys_statfs64_wrapper:
- llgtr %r2,%r2 # const char *
- llgfr %r3,%r3 # compat_size_t
- llgtr %r4,%r4 # struct compat_statfs64 *
- jg compat_sys_statfs64
-
- .globl compat_sys_fstatfs64_wrapper
-compat_sys_fstatfs64_wrapper:
- llgfr %r2,%r2 # unsigned int fd
- llgfr %r3,%r3 # compat_size_t
- llgtr %r4,%r4 # struct compat_statfs64 *
- jg compat_sys_fstatfs64
-
- .globl compat_sys_mq_open_wrapper
-compat_sys_mq_open_wrapper:
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # mode_t
- llgtr %r5,%r5 # struct compat_mq_attr *
- jg compat_sys_mq_open
-
- .globl sys32_mq_unlink_wrapper
-sys32_mq_unlink_wrapper:
- llgtr %r2,%r2 # const char *
- jg sys_mq_unlink
-
- .globl compat_sys_mq_timedsend_wrapper
-compat_sys_mq_timedsend_wrapper:
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # unsigned int
- llgtr %r6,%r6 # const struct compat_timespec *
- jg compat_sys_mq_timedsend
-
- .globl compat_sys_mq_timedreceive_wrapper
-compat_sys_mq_timedreceive_wrapper:
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # char *
- llgfr %r4,%r4 # size_t
- llgtr %r5,%r5 # unsigned int *
- llgtr %r6,%r6 # const struct compat_timespec *
- jg compat_sys_mq_timedreceive
-
- .globl compat_sys_mq_notify_wrapper
-compat_sys_mq_notify_wrapper:
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # struct compat_sigevent *
- jg compat_sys_mq_notify
-
- .globl compat_sys_mq_getsetattr_wrapper
-compat_sys_mq_getsetattr_wrapper:
- lgfr %r2,%r2 # mqd_t
- llgtr %r3,%r3 # struct compat_mq_attr *
- llgtr %r4,%r4 # struct compat_mq_attr *
- jg compat_sys_mq_getsetattr
-
- .globl compat_sys_add_key_wrapper
-compat_sys_add_key_wrapper:
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- llgtr %r4,%r4 # const void *
- llgfr %r5,%r5 # size_t
- llgfr %r6,%r6 # (key_serial_t) u32
- jg sys_add_key
-
- .globl compat_sys_request_key_wrapper
-compat_sys_request_key_wrapper:
- llgtr %r2,%r2 # const char *
- llgtr %r3,%r3 # const char *
- llgtr %r4,%r4 # const void *
- llgfr %r5,%r5 # (key_serial_t) u32
- jg sys_request_key
-
- .globl sys32_remap_file_pages_wrapper
-sys32_remap_file_pages_wrapper:
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgfr %r4,%r4 # unsigned long
- llgfr %r5,%r5 # unsigned long
- llgfr %r6,%r6 # unsigned long
- jg sys_remap_file_pages
-
- .globl compat_sys_waitid_wrapper
-compat_sys_waitid_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # pid_t
- llgtr %r4,%r4 # siginfo_emu31_t *
- lgfr %r5,%r5 # int
- llgtr %r6,%r6 # struct rusage_emu31 *
- jg compat_sys_waitid
-
- .globl compat_sys_kexec_load_wrapper
-compat_sys_kexec_load_wrapper:
- llgfr %r2,%r2 # unsigned long
- llgfr %r3,%r3 # unsigned long
- llgtr %r4,%r4 # struct kexec_segment *
- llgfr %r5,%r5 # unsigned long
- jg compat_sys_kexec_load
-
- .globl sys_ioprio_set_wrapper
-sys_ioprio_set_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- lgfr %r4,%r4 # int
- jg sys_ioprio_set
-
- .globl sys_ioprio_get_wrapper
-sys_ioprio_get_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_ioprio_get
-
- .globl sys_inotify_add_watch_wrapper
-sys_inotify_add_watch_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # u32
- jg sys_inotify_add_watch
-
- .globl sys_inotify_rm_watch_wrapper
-sys_inotify_rm_watch_wrapper:
- lgfr %r2,%r2 # int
- llgfr %r3,%r3 # u32
- jg sys_inotify_rm_watch
-
- .globl compat_sys_openat_wrapper
-compat_sys_openat_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- jg compat_sys_openat
-
- .globl sys_mkdirat_wrapper
-sys_mkdirat_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- jg sys_mkdirat
-
- .globl sys_mknodat_wrapper
-sys_mknodat_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- llgfr %r5,%r5 # unsigned int
- jg sys_mknodat
-
- .globl sys_fchownat_wrapper
-sys_fchownat_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # uid_t
- llgfr %r5,%r5 # gid_t
- lgfr %r6,%r6 # int
- jg sys_fchownat
-
- .globl compat_sys_futimesat_wrapper
-compat_sys_futimesat_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # struct timeval *
- jg compat_sys_futimesat
-
- .globl sys32_fstatat64_wrapper
-sys32_fstatat64_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # struct stat64 *
- lgfr %r5,%r5 # int
- jg sys32_fstatat64
-
- .globl sys_unlinkat_wrapper
-sys_unlinkat_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- jg sys_unlinkat
-
- .globl sys_renameat_wrapper
-sys_renameat_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # const char *
- jg sys_renameat
-
- .globl sys_linkat_wrapper
-sys_linkat_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # const char *
- lgfr %r6,%r6 # int
- jg sys_linkat
-
- .globl sys_symlinkat_wrapper
-sys_symlinkat_wrapper:
- llgtr %r2,%r2 # const char *
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # const char *
- jg sys_symlinkat
-
- .globl sys_readlinkat_wrapper
-sys_readlinkat_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgtr %r4,%r4 # char *
- lgfr %r5,%r5 # int
- jg sys_readlinkat
-
- .globl sys_fchmodat_wrapper
-sys_fchmodat_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- llgfr %r4,%r4 # mode_t
- jg sys_fchmodat
-
- .globl sys_faccessat_wrapper
-sys_faccessat_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # const char *
- lgfr %r4,%r4 # int
- jg sys_faccessat
-
- .globl compat_sys_pselect6_wrapper
-compat_sys_pselect6_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # fd_set *
- llgtr %r4,%r4 # fd_set *
- llgtr %r5,%r5 # fd_set *
- llgtr %r6,%r6 # struct timespec *
- llgt %r0,164(%r15) # void *
- stg %r0,160(%r15)
- jg compat_sys_pselect6
-
- .globl compat_sys_ppoll_wrapper
-compat_sys_ppoll_wrapper:
- llgtr %r2,%r2 # struct pollfd *
- llgfr %r3,%r3 # unsigned int
- llgtr %r4,%r4 # struct timespec *
- llgtr %r5,%r5 # const sigset_t *
- llgfr %r6,%r6 # size_t
- jg compat_sys_ppoll
-
- .globl sys_unshare_wrapper
-sys_unshare_wrapper:
- llgfr %r2,%r2 # unsigned long
- jg sys_unshare
-
- .globl compat_sys_set_robust_list_wrapper
-compat_sys_set_robust_list_wrapper:
- llgtr %r2,%r2 # struct compat_robust_list_head *
- llgfr %r3,%r3 # size_t
- jg compat_sys_set_robust_list
-
- .globl compat_sys_get_robust_list_wrapper
-compat_sys_get_robust_list_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_uptr_t_t *
- llgtr %r4,%r4 # compat_size_t *
- jg compat_sys_get_robust_list
-
- .globl sys_splice_wrapper
-sys_splice_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # loff_t *
- lgfr %r4,%r4 # int
- llgtr %r5,%r5 # loff_t *
- llgfr %r6,%r6 # size_t
- llgf %r0,164(%r15) # unsigned int
- stg %r0,160(%r15)
- jg sys_splice
-
- .globl sys_sync_file_range_wrapper
-sys_sync_file_range_wrapper:
- lgfr %r2,%r2 # int
- sllg %r3,%r3,32 # get high word of 64bit loff_t
- or %r3,%r4 # get low word of 64bit loff_t
- sllg %r4,%r5,32 # get high word of 64bit loff_t
- or %r4,%r6 # get low word of 64bit loff_t
- llgf %r5,164(%r15) # unsigned int
- jg sys_sync_file_range
-
- .globl sys_tee_wrapper
-sys_tee_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgfr %r4,%r4 # size_t
- llgfr %r5,%r5 # unsigned int
- jg sys_tee
-
- .globl compat_sys_vmsplice_wrapper
-compat_sys_vmsplice_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_iovec *
- llgfr %r4,%r4 # unsigned int
- llgfr %r5,%r5 # unsigned int
- jg compat_sys_vmsplice
-
- .globl sys_getcpu_wrapper
-sys_getcpu_wrapper:
- llgtr %r2,%r2 # unsigned *
- llgtr %r3,%r3 # unsigned *
- llgtr %r4,%r4 # struct getcpu_cache *
- jg sys_getcpu
-
- .globl compat_sys_epoll_pwait_wrapper
-compat_sys_epoll_pwait_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct compat_epoll_event *
- lgfr %r4,%r4 # int
- lgfr %r5,%r5 # int
- llgtr %r6,%r6 # compat_sigset_t *
- llgf %r0,164(%r15) # compat_size_t
- stg %r0,160(%r15)
- jg compat_sys_epoll_pwait
-
- .globl compat_sys_utimes_wrapper
-compat_sys_utimes_wrapper:
- llgtr %r2,%r2 # char *
- llgtr %r3,%r3 # struct compat_timeval *
- jg compat_sys_utimes
-
- .globl compat_sys_utimensat_wrapper
-compat_sys_utimensat_wrapper:
- llgfr %r2,%r2 # unsigned int
- llgtr %r3,%r3 # char *
- llgtr %r4,%r4 # struct compat_timespec *
- lgfr %r5,%r5 # int
- jg compat_sys_utimensat
-
- .globl compat_sys_signalfd_wrapper
-compat_sys_signalfd_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # compat_sigset_t *
- llgfr %r4,%r4 # compat_size_t
- jg compat_sys_signalfd
-
- .globl sys_eventfd_wrapper
-sys_eventfd_wrapper:
- llgfr %r2,%r2 # unsigned int
- jg sys_eventfd
-
- .globl sys_fallocate_wrapper
-sys_fallocate_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- sllg %r4,%r4,32 # get high word of 64bit loff_t
- lr %r4,%r5 # get low word of 64bit loff_t
- sllg %r5,%r6,32 # get high word of 64bit loff_t
- l %r5,164(%r15) # get low word of 64bit loff_t
- jg sys_fallocate
-
- .globl sys_timerfd_create_wrapper
-sys_timerfd_create_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- jg sys_timerfd_create
-
- .globl compat_sys_timerfd_settime_wrapper
-compat_sys_timerfd_settime_wrapper:
- lgfr %r2,%r2 # int
- lgfr %r3,%r3 # int
- llgtr %r4,%r4 # struct compat_itimerspec *
- llgtr %r5,%r5 # struct compat_itimerspec *
- jg compat_sys_timerfd_settime
-
- .globl compat_sys_timerfd_gettime_wrapper
-compat_sys_timerfd_gettime_wrapper:
- lgfr %r2,%r2 # int
- llgtr %r3,%r3 # struct compat_itimerspec *
- jg compat_sys_timerfd_gettime
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
new file mode 100644
index 00000000000..45cdb37aa6f
--- /dev/null
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -0,0 +1,216 @@
+/*
+ * Compat system call wrappers.
+ *
+ * Copyright IBM Corp. 2014
+ */
+
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include "entry.h"
+
+#define COMPAT_SYSCALL_WRAP1(name, ...) \
+ COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP2(name, ...) \
+ COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP3(name, ...) \
+ COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP4(name, ...) \
+ COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP5(name, ...) \
+ COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP6(name, ...) \
+ COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__)
+
+#define __SC_COMPAT_TYPE(t, a) \
+ __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a
+
+#define __SC_COMPAT_CAST(t, a) \
+({ \
+ long __ReS = a; \
+ \
+ BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \
+ !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \
+ if (__TYPE_IS_L(t)) \
+ __ReS = (s32)a; \
+ if (__TYPE_IS_UL(t)) \
+ __ReS = (u32)a; \
+ if (__TYPE_IS_PTR(t)) \
+ __ReS = a & 0x7fffffff; \
+ (t)__ReS; \
+})
+
+/*
+ * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by
+ * compat tasks. These wrappers will only be used for system calls where only
+ * the system call arguments need sign or zero extension or zeroing of the upper
+ * 33 bits of pointers.
+ * Note: since the wrapper function will afterwards call a system call which
+ * again performs zero and sign extension for all system call arguments with
+ * a size of less than eight bytes, these compat wrappers only touch those
+ * system call arguments with a size of eight bytes ((unsigned) long and
+ * pointers). Zero and sign extension for e.g. int parameters will be done by
+ * the regular system call wrappers.
+ */
+#define COMPAT_SYSCALL_WRAPx(x, name, ...) \
+ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+ asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\
+ asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \
+ { \
+ return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \
+ }
+
+COMPAT_SYSCALL_WRAP1(exit, int, error_code);
+COMPAT_SYSCALL_WRAP1(close, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname);
+COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname);
+COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename);
+COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev);
+COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode);
+COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name);
+COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds);
+COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode);
+COMPAT_SYSCALL_WRAP1(nice, int, increment);
+COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname);
+COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname);
+COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes);
+COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes);
+COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk);
+COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler);
+COMPAT_SYSCALL_WRAP1(acct, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags);
+COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid);
+COMPAT_SYSCALL_WRAP1(umask, int, mask);
+COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename);
+COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd);
+COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask);
+COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len);
+COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new);
+COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz);
+COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library);
+COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags);
+COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg);
+COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len);
+COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode);
+COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who);
+COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval);
+COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len);
+COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile);
+COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len);
+COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name);
+COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot);
+COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs);
+COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr);
+COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data);
+COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2);
+COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality);
+COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence);
+COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd);
+COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len);
+COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len);
+COMPAT_SYSCALL_WRAP1(mlockall, int, flags);
+COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy);
+COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy);
+COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr);
+COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout);
+COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5);
+COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size);
+COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr);
+COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data);
+COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid);
+COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid);
+COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist);
+COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist);
+COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid);
+COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid);
+COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid);
+COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid);
+COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid);
+COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid);
+COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid);
+COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid);
+COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old);
+COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec);
+COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior);
+COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count);
+COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name);
+COMPAT_SYSCALL_WRAP1(exit_group, int, error_code);
+COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr);
+COMPAT_SYSCALL_WRAP1(epoll_create, int, size);
+COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event);
+COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout);
+COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id);
+COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id);
+COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx);
+COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result);
+COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name);
+COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id);
+COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id);
+COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags);
+COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio);
+COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who);
+COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask);
+COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd);
+COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev);
+COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag);
+COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag);
+COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname);
+COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags);
+COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname);
+COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz);
+COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode);
+COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode);
+COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags);
+COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache);
+COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count);
+COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags);
+COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags);
+COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags);
+COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags);
+COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags);
+COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags);
+COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags);
+COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val);
+COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags);
+COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim);
+COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag);
+COMPAT_SYSCALL_WRAP1(syncfs, int, fd);
+COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype);
+COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum);
+COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2);
+COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags);
+COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index d8c1131e081..d7b0c4d2788 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -1,12 +1,13 @@
/*
- * arch/s390/kernel/cpcmd.c
- *
* S390 version
- * Copyright IBM Corp. 1999,2007
+ * Copyright IBM Corp. 1999, 2007
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Christian Borntraeger (cborntra@de.ibm.com),
*/
+#define KMSG_COMPONENT "cpcmd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -15,7 +16,6 @@
#include <linux/string.h>
#include <asm/ebcdic.h>
#include <asm/cpcmd.h>
-#include <asm/system.h>
#include <asm/io.h>
static DEFINE_SPINLOCK(cpcmd_lock);
@@ -104,8 +104,8 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
(((unsigned long)response + rlen) >> 31)) {
lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
if (!lowbuf) {
- printk(KERN_WARNING
- "cpcmd: could not allocate response buffer\n");
+ pr_warning("The cpcmd kernel function failed to "
+ "allocate a response buffer\n");
return -ENOMEM;
}
spin_lock_irqsave(&cpcmd_lock, flags);
diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c
deleted file mode 100644
index 8cc7c9fa64f..00000000000
--- a/arch/s390/kernel/crash.c
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * arch/s390/kernel/crash.c
- *
- * (C) Copyright IBM Corp. 2005
- *
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- */
-
-#include <linux/threads.h>
-#include <linux/kexec.h>
-#include <linux/reboot.h>
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
-}
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
new file mode 100644
index 00000000000..a3b9150e680
--- /dev/null
+++ b/arch/s390/kernel/crash_dump.c
@@ -0,0 +1,647 @@
+/*
+ * S390 kdump implementation
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/crash_dump.h>
+#include <asm/lowcore.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+#include <linux/elf.h>
+#include <linux/memblock.h>
+#include <asm/os_info.h>
+#include <asm/elf.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+
+#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
+#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
+#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+
+static struct memblock_region oldmem_region;
+
+static struct memblock_type oldmem_type = {
+ .cnt = 1,
+ .max = 1,
+ .total_size = 0,
+ .regions = &oldmem_region,
+};
+
+#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \
+ for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \
+ &oldmem_type, p_start, \
+ p_end, p_nid); \
+ i != (u64)ULLONG_MAX; \
+ __next_mem_range(&i, nid, &memblock.physmem, \
+ &oldmem_type, \
+ p_start, p_end, p_nid))
+
+struct dump_save_areas dump_save_areas;
+
+/*
+ * Allocate and add a save area for a CPU
+ */
+struct save_area *dump_save_area_create(int cpu)
+{
+ struct save_area **save_areas, *save_area;
+
+ save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
+ if (!save_area)
+ return NULL;
+ if (cpu + 1 > dump_save_areas.count) {
+ dump_save_areas.count = cpu + 1;
+ save_areas = krealloc(dump_save_areas.areas,
+ dump_save_areas.count * sizeof(void *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!save_areas) {
+ kfree(save_area);
+ return NULL;
+ }
+ dump_save_areas.areas = save_areas;
+ }
+ dump_save_areas.areas[cpu] = save_area;
+ return save_area;
+}
+
+/*
+ * Return physical address for virtual address
+ */
+static inline void *load_real_addr(void *addr)
+{
+ unsigned long real_addr;
+
+ asm volatile(
+ " lra %0,0(%1)\n"
+ " jz 0f\n"
+ " la %0,0\n"
+ "0:"
+ : "=a" (real_addr) : "a" (addr) : "cc");
+ return (void *)real_addr;
+}
+
+/*
+ * Copy real to virtual or real memory
+ */
+static int copy_from_realmem(void *dest, void *src, size_t count)
+{
+ unsigned long size;
+
+ if (!count)
+ return 0;
+ if (!is_vmalloc_or_module_addr(dest))
+ return memcpy_real(dest, src, count);
+ do {
+ size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK));
+ if (memcpy_real(load_real_addr(dest), src, size))
+ return -EFAULT;
+ count -= size;
+ dest += size;
+ src += size;
+ } while (count);
+ return 0;
+}
+
+/*
+ * Pointer to ELF header in new kernel
+ */
+static void *elfcorehdr_newmem;
+
+/*
+ * Copy one page from zfcpdump "oldmem"
+ *
+ * For pages below HSA size memory from the HSA is copied. Otherwise
+ * real memory copy is used.
+ */
+static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize,
+ unsigned long src, int userbuf)
+{
+ int rc;
+
+ if (src < sclp_get_hsa_size()) {
+ rc = memcpy_hsa(buf, src, csize, userbuf);
+ } else {
+ if (userbuf)
+ rc = copy_to_user_real((void __force __user *) buf,
+ (void *) src, csize);
+ else
+ rc = memcpy_real(buf, (void *) src, csize);
+ }
+ return rc ? rc : csize;
+}
+
+/*
+ * Copy one page from kdump "oldmem"
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE].
+ * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize,
+ unsigned long src, int userbuf)
+
+{
+ int rc;
+
+ if (src < OLDMEM_SIZE)
+ src += OLDMEM_BASE;
+ else if (src > OLDMEM_BASE &&
+ src < OLDMEM_BASE + OLDMEM_SIZE)
+ src -= OLDMEM_BASE;
+ if (userbuf)
+ rc = copy_to_user_real((void __force __user *) buf,
+ (void *) src, csize);
+ else
+ rc = copy_from_realmem(buf, (void *) src, csize);
+ return (rc == 0) ? rc : csize;
+}
+
+/*
+ * Copy one page from "oldmem"
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+ unsigned long offset, int userbuf)
+{
+ unsigned long src;
+
+ if (!csize)
+ return 0;
+ src = (pfn << PAGE_SHIFT) + offset;
+ if (OLDMEM_BASE)
+ return copy_oldmem_page_kdump(buf, csize, src, userbuf);
+ else
+ return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf);
+}
+
+/*
+ * Remap "oldmem" for kdump
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long size_old;
+ int rc;
+
+ if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) {
+ size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT));
+ rc = remap_pfn_range(vma, from,
+ pfn + (OLDMEM_BASE >> PAGE_SHIFT),
+ size_old, prot);
+ if (rc || size == size_old)
+ return rc;
+ size -= size_old;
+ from += size_old;
+ pfn += size_old >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for zfcpdump
+ *
+ * We only map available memory above HSA size. Memory below HSA size
+ * is read on demand using the copy_oldmem_page() function.
+ */
+static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
+ unsigned long from,
+ unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long hsa_end = sclp_get_hsa_size();
+ unsigned long size_hsa;
+
+ if (pfn < hsa_end >> PAGE_SHIFT) {
+ size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT));
+ if (size == size_hsa)
+ return 0;
+ size -= size_hsa;
+ from += size_hsa;
+ pfn += size_hsa >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for kdump or zfcpdump
+ */
+int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+ if (OLDMEM_BASE)
+ return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
+ else
+ return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
+ prot);
+}
+
+/*
+ * Copy memory from old kernel
+ */
+int copy_from_oldmem(void *dest, void *src, size_t count)
+{
+ unsigned long copied = 0;
+ int rc;
+
+ if (OLDMEM_BASE) {
+ if ((unsigned long) src < OLDMEM_SIZE) {
+ copied = min(count, OLDMEM_SIZE - (unsigned long) src);
+ rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied);
+ if (rc)
+ return rc;
+ }
+ } else {
+ unsigned long hsa_end = sclp_get_hsa_size();
+ if ((unsigned long) src < hsa_end) {
+ copied = min(count, hsa_end - (unsigned long) src);
+ rc = memcpy_hsa(dest, (unsigned long) src, copied, 0);
+ if (rc)
+ return rc;
+ }
+ }
+ return copy_from_realmem(dest + copied, src + copied, count - copied);
+}
+
+/*
+ * Alloc memory and panic in case of ENOMEM
+ */
+static void *kzalloc_panic(int len)
+{
+ void *rc;
+
+ rc = kzalloc(len, GFP_KERNEL);
+ if (!rc)
+ panic("s390 kdump kzalloc (%d) failed", len);
+ return rc;
+}
+
+/*
+ * Initialize ELF note
+ */
+static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len,
+ const char *name)
+{
+ Elf64_Nhdr *note;
+ u64 len;
+
+ note = (Elf64_Nhdr *)buf;
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = d_len;
+ note->n_type = type;
+ len = sizeof(Elf64_Nhdr);
+
+ memcpy(buf + len, name, note->n_namesz);
+ len = roundup(len + note->n_namesz, 4);
+
+ memcpy(buf + len, desc, note->n_descsz);
+ len = roundup(len + note->n_descsz, 4);
+
+ return PTR_ADD(buf, len);
+}
+
+/*
+ * Initialize prstatus note
+ */
+static void *nt_prstatus(void *ptr, struct save_area *sa)
+{
+ struct elf_prstatus nt_prstatus;
+ static int cpu_nr = 1;
+
+ memset(&nt_prstatus, 0, sizeof(nt_prstatus));
+ memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs));
+ memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
+ memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs));
+ nt_prstatus.pr_pid = cpu_nr;
+ cpu_nr++;
+
+ return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus),
+ "CORE");
+}
+
+/*
+ * Initialize fpregset (floating point) note
+ */
+static void *nt_fpregset(void *ptr, struct save_area *sa)
+{
+ elf_fpregset_t nt_fpregset;
+
+ memset(&nt_fpregset, 0, sizeof(nt_fpregset));
+ memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg));
+ memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs));
+
+ return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset),
+ "CORE");
+}
+
+/*
+ * Initialize timer note
+ */
+static void *nt_s390_timer(void *ptr, struct save_area *sa)
+{
+ return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer),
+ KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize TOD clock comparator note
+ */
+static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
+{
+ return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp,
+ sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize TOD programmable register note
+ */
+static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
+{
+ return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg,
+ sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize control register note
+ */
+static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
+{
+ return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs,
+ sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize prefix register note
+ */
+static void *nt_s390_prefix(void *ptr, struct save_area *sa)
+{
+ return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg,
+ sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Fill ELF notes for one CPU with save area registers
+ */
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa)
+{
+ ptr = nt_prstatus(ptr, sa);
+ ptr = nt_fpregset(ptr, sa);
+ ptr = nt_s390_timer(ptr, sa);
+ ptr = nt_s390_tod_cmp(ptr, sa);
+ ptr = nt_s390_tod_preg(ptr, sa);
+ ptr = nt_s390_ctrs(ptr, sa);
+ ptr = nt_s390_prefix(ptr, sa);
+ return ptr;
+}
+
+/*
+ * Initialize prpsinfo note (new kernel)
+ */
+static void *nt_prpsinfo(void *ptr)
+{
+ struct elf_prpsinfo prpsinfo;
+
+ memset(&prpsinfo, 0, sizeof(prpsinfo));
+ prpsinfo.pr_sname = 'R';
+ strcpy(prpsinfo.pr_fname, "vmlinux");
+ return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo),
+ KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Get vmcoreinfo using lowcore->vmcore_info (new kernel)
+ */
+static void *get_vmcoreinfo_old(unsigned long *size)
+{
+ char nt_name[11], *vmcoreinfo;
+ Elf64_Nhdr note;
+ void *addr;
+
+ if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
+ return NULL;
+ memset(nt_name, 0, sizeof(nt_name));
+ if (copy_from_oldmem(&note, addr, sizeof(note)))
+ return NULL;
+ if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1))
+ return NULL;
+ if (strcmp(nt_name, "VMCOREINFO") != 0)
+ return NULL;
+ vmcoreinfo = kzalloc_panic(note.n_descsz);
+ if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz))
+ return NULL;
+ *size = note.n_descsz;
+ return vmcoreinfo;
+}
+
+/*
+ * Initialize vmcoreinfo note (new kernel)
+ */
+static void *nt_vmcoreinfo(void *ptr)
+{
+ unsigned long size;
+ void *vmcoreinfo;
+
+ vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+ if (!vmcoreinfo)
+ vmcoreinfo = get_vmcoreinfo_old(&size);
+ if (!vmcoreinfo)
+ return ptr;
+ return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO");
+}
+
+/*
+ * Initialize ELF header (new kernel)
+ */
+static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+{
+ memset(ehdr, 0, sizeof(*ehdr));
+ memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+ ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+ ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
+ ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+ memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+ ehdr->e_type = ET_CORE;
+ ehdr->e_machine = EM_S390;
+ ehdr->e_version = EV_CURRENT;
+ ehdr->e_phoff = sizeof(Elf64_Ehdr);
+ ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf64_Phdr);
+ ehdr->e_phnum = mem_chunk_cnt + 1;
+ return ehdr + 1;
+}
+
+/*
+ * Return CPU count for ELF header (new kernel)
+ */
+static int get_cpu_cnt(void)
+{
+ int i, cpus = 0;
+
+ for (i = 0; i < dump_save_areas.count; i++) {
+ if (dump_save_areas.areas[i]->pref_reg == 0)
+ continue;
+ cpus++;
+ }
+ return cpus;
+}
+
+/*
+ * Return memory chunk count for ELF header (new kernel)
+ */
+static int get_mem_chunk_cnt(void)
+{
+ int cnt = 0;
+ u64 idx;
+
+ for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL)
+ cnt++;
+ return cnt;
+}
+
+/*
+ * Initialize ELF loads (new kernel)
+ */
+static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+{
+ phys_addr_t start, end;
+ u64 idx;
+
+ for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) {
+ phdr->p_filesz = end - start;
+ phdr->p_type = PT_LOAD;
+ phdr->p_offset = start;
+ phdr->p_vaddr = start;
+ phdr->p_paddr = start;
+ phdr->p_memsz = end - start;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+ phdr++;
+ }
+}
+
+/*
+ * Initialize notes (new kernel)
+ */
+static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
+{
+ struct save_area *sa;
+ void *ptr_start = ptr;
+ int i;
+
+ ptr = nt_prpsinfo(ptr);
+
+ for (i = 0; i < dump_save_areas.count; i++) {
+ sa = dump_save_areas.areas[i];
+ if (sa->pref_reg == 0)
+ continue;
+ ptr = fill_cpu_elf_notes(ptr, sa);
+ }
+ ptr = nt_vmcoreinfo(ptr);
+ memset(phdr, 0, sizeof(*phdr));
+ phdr->p_type = PT_NOTE;
+ phdr->p_offset = notes_offset;
+ phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
+ phdr->p_memsz = phdr->p_filesz;
+ return ptr;
+}
+
+/*
+ * Create ELF core header (new kernel)
+ */
+int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
+{
+ Elf64_Phdr *phdr_notes, *phdr_loads;
+ int mem_chunk_cnt;
+ void *ptr, *hdr;
+ u32 alloc_size;
+ u64 hdr_off;
+
+ /* If we are not in kdump or zfcpdump mode return */
+ if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
+ return 0;
+ /* If elfcorehdr= has been passed via cmdline, we use that one */
+ if (elfcorehdr_addr != ELFCORE_ADDR_MAX)
+ return 0;
+ /* If we cannot get HSA size for zfcpdump return error */
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size())
+ return -ENODEV;
+
+ /* For kdump, exclude previous crashkernel memory */
+ if (OLDMEM_BASE) {
+ oldmem_region.base = OLDMEM_BASE;
+ oldmem_region.size = OLDMEM_SIZE;
+ oldmem_type.total_size = OLDMEM_SIZE;
+ }
+
+ mem_chunk_cnt = get_mem_chunk_cnt();
+
+ alloc_size = 0x1000 + get_cpu_cnt() * 0x300 +
+ mem_chunk_cnt * sizeof(Elf64_Phdr);
+ hdr = kzalloc_panic(alloc_size);
+ /* Init elf header */
+ ptr = ehdr_init(hdr, mem_chunk_cnt);
+ /* Init program headers */
+ phdr_notes = ptr;
+ ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
+ phdr_loads = ptr;
+ ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+ /* Init notes */
+ hdr_off = PTR_DIFF(ptr, hdr);
+ ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+ /* Init loads */
+ hdr_off = PTR_DIFF(ptr, hdr);
+ loads_init(phdr_loads, hdr_off);
+ *addr = (unsigned long long) hdr;
+ elfcorehdr_newmem = hdr;
+ *size = (unsigned long long) hdr_off;
+ BUG_ON(elfcorehdr_size > alloc_size);
+ return 0;
+}
+
+/*
+ * Free ELF core header (new kernel)
+ */
+void elfcorehdr_free(unsigned long long addr)
+{
+ if (!elfcorehdr_newmem)
+ return;
+ kfree((void *)(unsigned long)addr);
+}
+
+/*
+ * Read from ELF header
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+
+ src = elfcorehdr_newmem ? src : src - OLDMEM_BASE;
+ memcpy(buf, src, count);
+ *ppos += count;
+ return count;
+}
+
+/*
+ * Read from ELF notes data
+ */
+ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+ int rc;
+
+ if (elfcorehdr_newmem) {
+ memcpy(buf, src, count);
+ } else {
+ rc = copy_from_oldmem(buf, src, count);
+ if (rc)
+ return rc;
+ }
+ *ppos += count;
+ return count;
+}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index c93d1296cc0..ee8390da6ea 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1,20 +1,23 @@
/*
- * arch/s390/kernel/debug.c
* S/390 debug facility
*
- * Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH,
- * IBM Corporation
+ * Copyright IBM Corp. 1999, 2012
+ *
* Author(s): Michael Holzheu (holzheu@de.ibm.com),
* Holger Smolinski (Holger.Smolinski@de.ibm.com)
*
* Bugreports to: <Linux390@de.ibm.com>
*/
+#define KMSG_COMPONENT "s390dbf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/ctype.h>
+#include <linux/string.h>
#include <linux/sysctl.h>
#include <asm/uaccess.h>
#include <linux/module.h>
@@ -60,8 +63,6 @@ typedef struct
} debug_sprintf_entry_t;
-extern void tod_to_timeval(uint64_t todval, struct timespec *xtime);
-
/* internal function prototyes */
static int debug_init(void);
@@ -72,7 +73,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
static int debug_open(struct inode *inode, struct file *file);
static int debug_close(struct inode *inode, struct file *file);
static debug_info_t *debug_info_create(const char *name, int pages_per_area,
- int nr_areas, int buf_size, mode_t mode);
+ int nr_areas, int buf_size, umode_t mode);
static void debug_info_get(debug_info_t *);
static void debug_info_put(debug_info_t *);
static int debug_prolog_level_fn(debug_info_t * id,
@@ -109,6 +110,7 @@ struct debug_view debug_raw_view = {
NULL,
NULL
};
+EXPORT_SYMBOL(debug_raw_view);
struct debug_view debug_hex_ascii_view = {
"hex_ascii",
@@ -118,6 +120,7 @@ struct debug_view debug_hex_ascii_view = {
NULL,
NULL
};
+EXPORT_SYMBOL(debug_hex_ascii_view);
static struct debug_view debug_level_view = {
"level",
@@ -154,6 +157,7 @@ struct debug_view debug_sprintf_view = {
NULL,
NULL
};
+EXPORT_SYMBOL(debug_sprintf_view);
/* used by dump analysis tools to determine version of debug feature */
static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
@@ -165,6 +169,7 @@ static debug_info_t *debug_area_last = NULL;
static DEFINE_MUTEX(debug_mutex);
static int initialized;
+static int debug_critical;
static const struct file_operations debug_file_ops = {
.owner = THIS_MODULE,
@@ -172,6 +177,7 @@ static const struct file_operations debug_file_ops = {
.write = debug_input,
.open = debug_open,
.release = debug_close,
+ .llseek = no_llseek,
};
static struct dentry *debug_debugfs_root_entry;
@@ -327,7 +333,7 @@ debug_info_free(debug_info_t* db_info){
static debug_info_t*
debug_info_create(const char *name, int pages_per_area, int nr_areas,
- int buf_size, mode_t mode)
+ int buf_size, umode_t mode)
{
debug_info_t* rc;
@@ -388,7 +394,7 @@ debug_info_copy(debug_info_t* in, int mode)
debug_info_free(rc);
} while (1);
- if(!rc || (mode == NO_AREAS))
+ if (mode == NO_AREAS)
goto out;
for(i = 0; i < in->nr_areas; i++){
@@ -600,12 +606,12 @@ debug_input(struct file *file, const char __user *user_buf, size_t length,
static int
debug_open(struct inode *inode, struct file *file)
{
- int i = 0, rc = 0;
+ int i, rc = 0;
file_private_info_t *p_info;
debug_info_t *debug_info, *debug_info_snapshot;
mutex_lock(&debug_mutex);
- debug_info = file->f_path.dentry->d_inode->i_private;
+ debug_info = file_inode(file)->i_private;
/* find debug view */
for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
if (!debug_info->views[i])
@@ -639,8 +645,7 @@ found:
p_info = kmalloc(sizeof(file_private_info_t),
GFP_KERNEL);
if(!p_info){
- if(debug_info_snapshot)
- debug_info_free(debug_info_snapshot);
+ debug_info_free(debug_info_snapshot);
rc = -ENOMEM;
goto out;
}
@@ -654,6 +659,7 @@ found:
p_info->act_entry_offset = 0;
file->private_data = p_info;
debug_info_get(debug_info);
+ nonseekable_open(inode, file);
out:
mutex_unlock(&debug_mutex);
return rc;
@@ -685,7 +691,7 @@ debug_close(struct inode *inode, struct file *file)
*/
debug_info_t *debug_register_mode(const char *name, int pages_per_area,
- int nr_areas, int buf_size, mode_t mode,
+ int nr_areas, int buf_size, umode_t mode,
uid_t uid, gid_t gid)
{
debug_info_t *rc = NULL;
@@ -693,10 +699,9 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area,
/* Since debugfs currently does not support uid/gid other than root, */
/* we do not allow gid/uid != 0 until we get support for that. */
if ((uid != 0) || (gid != 0))
- printk(KERN_WARNING "debug: Warning - Currently only uid/gid "
- "= 0 are supported. Using root as owner now!");
- if (!initialized)
- BUG();
+ pr_warning("Root becomes the owner of all s390dbf files "
+ "in sysfs\n");
+ BUG_ON(!initialized);
mutex_lock(&debug_mutex);
/* create new debug_info */
@@ -709,7 +714,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area,
debug_register_view(rc, &debug_pages_view);
out:
if (!rc){
- printk(KERN_ERR "debug: debug_register failed for %s\n",name);
+ pr_err("Registering debug feature %s failed\n", name);
}
mutex_unlock(&debug_mutex);
return rc;
@@ -728,6 +733,7 @@ debug_info_t *debug_register(const char *name, int pages_per_area,
return debug_register_mode(name, pages_per_area, nr_areas, buf_size,
S_IRUSR | S_IWUSR, 0, 0);
}
+EXPORT_SYMBOL(debug_register);
/*
* debug_unregister:
@@ -746,6 +752,7 @@ debug_unregister(debug_info_t * id)
out:
return;
}
+EXPORT_SYMBOL(debug_unregister);
/*
* debug_set_size:
@@ -763,8 +770,8 @@ debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area)
if(pages_per_area > 0){
new_areas = debug_areas_alloc(pages_per_area, nr_areas);
if(!new_areas) {
- printk(KERN_WARNING "debug: could not allocate memory "\
- "for pagenumber: %i\n",pages_per_area);
+ pr_info("Allocating memory for %i pages failed\n",
+ pages_per_area);
rc = -ENOMEM;
goto out;
}
@@ -780,8 +787,7 @@ debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area)
memset(id->active_entries,0,sizeof(int)*id->nr_areas);
memset(id->active_pages, 0, sizeof(int)*id->nr_areas);
spin_unlock_irqrestore(&id->lock,flags);
- printk(KERN_INFO "debug: %s: set new size (%i pages)\n"\
- ,id->name, pages_per_area);
+ pr_info("%s: set new size (%i pages)\n" ,id->name, pages_per_area);
out:
return rc;
}
@@ -800,17 +806,16 @@ debug_set_level(debug_info_t* id, int new_level)
spin_lock_irqsave(&id->lock,flags);
if(new_level == DEBUG_OFF_LEVEL){
id->level = DEBUG_OFF_LEVEL;
- printk(KERN_INFO "debug: %s: switched off\n",id->name);
+ pr_info("%s: switched off\n",id->name);
} else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) {
- printk(KERN_INFO
- "debug: %s: level %i is out of range (%i - %i)\n",
+ pr_info("%s: level %i is out of range (%i - %i)\n",
id->name, new_level, 0, DEBUG_MAX_LEVEL);
} else {
id->level = new_level;
}
spin_unlock_irqrestore(&id->lock,flags);
}
-
+EXPORT_SYMBOL(debug_set_level);
/*
* proceed_active_entry:
@@ -862,7 +867,7 @@ static inline void
debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level,
int exception)
{
- active->id.stck = get_clock();
+ active->id.stck = get_tod_clock_fast();
active->id.fields.cpuid = smp_processor_id();
active->caller = __builtin_return_address(0);
active->id.fields.exception = exception;
@@ -884,11 +889,11 @@ static int debug_active=1;
* if debug_active is already off
*/
static int
-s390dbf_procactive(ctl_table *table, int write, struct file *filp,
+s390dbf_procactive(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
- return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+ return proc_dointvec(table, write, buffer, lenp, ppos);
else
return 0;
}
@@ -896,35 +901,30 @@ s390dbf_procactive(ctl_table *table, int write, struct file *filp,
static struct ctl_table s390dbf_table[] = {
{
- .ctl_name = CTL_S390DBF_STOPPABLE,
.procname = "debug_stoppable",
.data = &debug_stoppable,
.maxlen = sizeof(int),
.mode = S_IRUGO | S_IWUSR,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_S390DBF_ACTIVE,
.procname = "debug_active",
.data = &debug_active,
.maxlen = sizeof(int),
.mode = S_IRUGO | S_IWUSR,
- .proc_handler = &s390dbf_procactive,
- .strategy = &sysctl_intvec,
+ .proc_handler = s390dbf_procactive,
},
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table s390dbf_dir_table[] = {
{
- .ctl_name = CTL_S390DBF,
.procname = "s390dbf",
.maxlen = 0,
.mode = S_IRUGO | S_IXUGO,
.child = s390dbf_table,
},
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table_header *s390dbf_sysctl_header;
@@ -935,7 +935,12 @@ debug_stop_all(void)
if (debug_stoppable)
debug_active = 0;
}
+EXPORT_SYMBOL(debug_stop_all);
+void debug_set_critical(void)
+{
+ debug_critical = 1;
+}
/*
* debug_event_common:
@@ -950,7 +955,11 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len)
if (!debug_active || !id->areas)
return NULL;
- spin_lock_irqsave(&id->lock, flags);
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
active = get_active_entry(id);
memset(DEBUG_DATA(active), 0, id->buf_size);
memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
@@ -959,6 +968,7 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len)
return active;
}
+EXPORT_SYMBOL(debug_event_common);
/*
* debug_exception_common:
@@ -973,7 +983,11 @@ debug_entry_t
if (!debug_active || !id->areas)
return NULL;
- spin_lock_irqsave(&id->lock, flags);
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
active = get_active_entry(id);
memset(DEBUG_DATA(active), 0, id->buf_size);
memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
@@ -982,6 +996,7 @@ debug_entry_t
return active;
}
+EXPORT_SYMBOL(debug_exception_common);
/*
* counts arguments in format string for sprintf view
@@ -1018,7 +1033,11 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...)
return NULL;
numargs=debug_count_numargs(string);
- spin_lock_irqsave(&id->lock, flags);
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
active = get_active_entry(id);
curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active);
va_start(ap,string);
@@ -1031,6 +1050,7 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...)
return active;
}
+EXPORT_SYMBOL(debug_sprintf_event);
/*
* debug_sprintf_exception:
@@ -1052,7 +1072,11 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...)
numargs=debug_count_numargs(string);
- spin_lock_irqsave(&id->lock, flags);
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
active = get_active_entry(id);
curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active);
va_start(ap,string);
@@ -1065,26 +1089,7 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...)
return active;
}
-
-/*
- * debug_init:
- * - is called exactly once to initialize the debug feature
- */
-
-static int
-__init debug_init(void)
-{
- int rc = 0;
-
- s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table);
- mutex_lock(&debug_mutex);
- debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT,NULL);
- printk(KERN_INFO "debug: Initialization complete\n");
- initialized = 1;
- mutex_unlock(&debug_mutex);
-
- return rc;
-}
+EXPORT_SYMBOL(debug_sprintf_exception);
/*
* debug_register_view:
@@ -1096,7 +1101,7 @@ debug_register_view(debug_info_t * id, struct debug_view *view)
int rc = 0;
int i;
unsigned long flags;
- mode_t mode;
+ umode_t mode;
struct dentry *pde;
if (!id)
@@ -1109,8 +1114,8 @@ debug_register_view(debug_info_t * id, struct debug_view *view)
pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
id , &debug_file_ops);
if (!pde){
- printk(KERN_WARNING "debug: debugfs_create_file() failed!"\
- " Cannot register view %s/%s\n", id->name,view->name);
+ pr_err("Registering view %s/%s failed due to out of "
+ "memory\n", id->name,view->name);
rc = -1;
goto out;
}
@@ -1120,20 +1125,20 @@ debug_register_view(debug_info_t * id, struct debug_view *view)
break;
}
if (i == DEBUG_MAX_VIEWS) {
- printk(KERN_WARNING "debug: cannot register view %s/%s\n",
- id->name,view->name);
- printk(KERN_WARNING
- "debug: maximum number of views reached (%i)!\n", i);
- debugfs_remove(pde);
+ pr_err("Registering view %s/%s would exceed the maximum "
+ "number of views %i\n", id->name, view->name, i);
rc = -1;
} else {
id->views[i] = view;
id->debugfs_entries[i] = pde;
}
spin_unlock_irqrestore(&id->lock, flags);
+ if (rc)
+ debugfs_remove(pde);
out:
return rc;
}
+EXPORT_SYMBOL(debug_register_view);
/*
* debug_unregister_view:
@@ -1142,9 +1147,9 @@ out:
int
debug_unregister_view(debug_info_t * id, struct debug_view *view)
{
- int rc = 0;
- int i;
+ struct dentry *dentry = NULL;
unsigned long flags;
+ int i, rc = 0;
if (!id)
goto out;
@@ -1156,14 +1161,16 @@ debug_unregister_view(debug_info_t * id, struct debug_view *view)
if (i == DEBUG_MAX_VIEWS)
rc = -1;
else {
- debugfs_remove(id->debugfs_entries[i]);
+ dentry = id->debugfs_entries[i];
id->views[i] = NULL;
- rc = 0;
+ id->debugfs_entries[i] = NULL;
}
spin_unlock_irqrestore(&id->lock, flags);
+ debugfs_remove(dentry);
out:
return rc;
}
+EXPORT_SYMBOL(debug_unregister_view);
static inline char *
debug_get_user_string(const char __user *user_buf, size_t user_len)
@@ -1190,10 +1197,9 @@ debug_get_uint(char *buf)
{
int rc;
- for(; isspace(*buf); buf++);
+ buf = skip_spaces(buf);
rc = simple_strtoul(buf, &buf, 10);
if(*buf){
- printk("debug: no integer specified!\n");
rc = -EINVAL;
}
return rc;
@@ -1305,7 +1311,8 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view,
new_level = debug_get_uint(str);
}
if(new_level < 0) {
- printk(KERN_INFO "debug: level `%s` is not valid\n", str);
+ pr_warning("%s is not a valid level for a debug "
+ "feature\n", str);
rc = -EINVAL;
} else {
debug_set_level(id, new_level);
@@ -1340,19 +1347,12 @@ static void debug_flush(debug_info_t* id, int area)
memset(id->areas[i][j], 0, PAGE_SIZE);
}
}
- printk(KERN_INFO "debug: %s: all areas flushed\n",id->name);
} else if(area >= 0 && area < id->nr_areas) {
id->active_entries[area] = 0;
id->active_pages[area] = 0;
for(i = 0; i < id->pages_per_area; i++) {
memset(id->areas[area][i],0,PAGE_SIZE);
}
- printk(KERN_INFO "debug: %s: area %i has been flushed\n",
- id->name, area);
- } else {
- printk(KERN_INFO
- "debug: %s: area %i cannot be flushed (range: %i - %i)\n",
- id->name, area, 0, id->nr_areas-1);
}
spin_unlock_irqrestore(&id->lock,flags);
}
@@ -1389,7 +1389,8 @@ debug_input_flush_fn(debug_info_t * id, struct debug_view *view,
goto out;
}
- printk(KERN_INFO "debug: area `%c` is not valid\n", input_buf[0]);
+ pr_info("Flushing debug data failed because %c is not a valid "
+ "area\n", input_buf[0]);
out:
*offset += user_len;
@@ -1443,10 +1444,10 @@ debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view,
rc += sprintf(out_buf + rc, "| ");
for (i = 0; i < id->buf_size; i++) {
unsigned char c = in_buf[i];
- if (!isprint(c))
- rc += sprintf(out_buf + rc, ".");
- else
+ if (isascii(c) && isprint(c))
rc += sprintf(out_buf + rc, "%c", c);
+ else
+ rc += sprintf(out_buf + rc, ".");
}
rc += sprintf(out_buf + rc, "\n");
return rc;
@@ -1461,17 +1462,13 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
int area, debug_entry_t * entry, char *out_buf)
{
struct timespec time_spec;
- unsigned long long time;
char *except_str;
unsigned long caller;
int rc = 0;
unsigned int level;
level = entry->id.fields.level;
- time = entry->id.stck;
- /* adjust todclock to 1970 */
- time -= 0x8126d60e46000000LL - (0x3c26700LL * 1000000 * 4096);
- tod_to_timeval(time, &time_spec);
+ stck_to_timespec(entry->id.stck, &time_spec);
if (entry->id.fields.exception)
except_str = "*";
@@ -1483,6 +1480,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
except_str, entry->id.fields.cpuid, (void *) caller);
return rc;
}
+EXPORT_SYMBOL(debug_dflt_header_fn);
/*
* prints debug data sprintf-formated:
@@ -1531,33 +1529,16 @@ out:
}
/*
- * clean up module
+ * debug_init:
+ * - is called exactly once to initialize the debug feature
*/
-static void __exit debug_exit(void)
+static int __init debug_init(void)
{
- debugfs_remove(debug_debugfs_root_entry);
- unregister_sysctl_table(s390dbf_sysctl_header);
- return;
+ s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table);
+ mutex_lock(&debug_mutex);
+ debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL);
+ initialized = 1;
+ mutex_unlock(&debug_mutex);
+ return 0;
}
-
-/*
- * module definitions
- */
postcore_initcall(debug_init);
-module_exit(debug_exit);
-MODULE_LICENSE("GPL");
-
-EXPORT_SYMBOL(debug_register);
-EXPORT_SYMBOL(debug_unregister);
-EXPORT_SYMBOL(debug_set_level);
-EXPORT_SYMBOL(debug_stop_all);
-EXPORT_SYMBOL(debug_register_view);
-EXPORT_SYMBOL(debug_unregister_view);
-EXPORT_SYMBOL(debug_event_common);
-EXPORT_SYMBOL(debug_exception_common);
-EXPORT_SYMBOL(debug_hex_ascii_view);
-EXPORT_SYMBOL(debug_raw_view);
-EXPORT_SYMBOL(debug_dflt_header_fn);
-EXPORT_SYMBOL(debug_sprintf_view);
-EXPORT_SYMBOL(debug_sprintf_exception);
-EXPORT_SYMBOL(debug_sprintf_event);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index c032d11da8a..8237fc07ac7 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -9,27 +9,6 @@
#include <asm/diag.h>
/*
- * Diagnose 10: Release pages
- */
-void diag10(unsigned long addr)
-{
- if (addr >= 0x7ff00000)
- return;
- asm volatile(
-#ifdef CONFIG_64BIT
- " sam31\n"
- " diag %0,%0,0x10\n"
- "0: sam64\n"
-#else
- " diag %0,%0,0x10\n"
- "0:\n"
-#endif
- EX_TABLE(0b, 0b)
- : : "a" (addr));
-}
-EXPORT_SYMBOL(diag10);
-
-/*
* Diagnose 14: Input spool file manipulation
*/
int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index d2f270c995d..993efe6a887 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1,6 +1,4 @@
/*
- * arch/s390/kernel/dis.c
- *
* Disassemble s390 instructions.
*
* Copyright IBM Corp. 2007
@@ -15,7 +13,6 @@
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/smp.h>
-#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
@@ -25,15 +22,15 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
+#include <asm/dis.h>
#include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/mathemu.h>
#include <asm/cpcmd.h>
-#include <asm/s390_ext.h>
#include <asm/lowcore.h>
#include <asm/debug.h>
+#include <asm/irq.h>
#ifndef CONFIG_64BIT
#define ONELONG "%08lx: "
@@ -41,17 +38,6 @@
#define ONELONG "%016lx: "
#endif /* CONFIG_64BIT */
-#define OPERAND_GPR 0x1 /* Operand printed as %rx */
-#define OPERAND_FPR 0x2 /* Operand printed as %fx */
-#define OPERAND_AR 0x4 /* Operand printed as %ax */
-#define OPERAND_CR 0x8 /* Operand printed as %cx */
-#define OPERAND_DISP 0x10 /* Operand printed as displacement */
-#define OPERAND_BASE 0x20 /* Operand printed as base register */
-#define OPERAND_INDEX 0x40 /* Operand printed as index register */
-#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */
-#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */
-#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */
-
enum {
UNUSED, /* Indicates the end of the operand list */
R_8, /* GPR starting at position 8 */
@@ -87,15 +73,29 @@ enum {
U4_12, /* 4 bit unsigned value starting at 12 */
U4_16, /* 4 bit unsigned value starting at 16 */
U4_20, /* 4 bit unsigned value starting at 20 */
+ U4_24, /* 4 bit unsigned value starting at 24 */
+ U4_28, /* 4 bit unsigned value starting at 28 */
+ U4_32, /* 4 bit unsigned value starting at 32 */
+ U4_36, /* 4 bit unsigned value starting at 36 */
U8_8, /* 8 bit unsigned value starting at 8 */
U8_16, /* 8 bit unsigned value starting at 16 */
+ U8_24, /* 8 bit unsigned value starting at 24 */
+ U8_32, /* 8 bit unsigned value starting at 32 */
+ I8_8, /* 8 bit signed value starting at 8 */
+ I8_32, /* 8 bit signed value starting at 32 */
+ J12_12, /* PC relative offset at 12 */
I16_16, /* 16 bit signed value starting at 16 */
+ I16_32, /* 32 bit signed value starting at 16 */
U16_16, /* 16 bit unsigned value starting at 16 */
+ U16_32, /* 32 bit unsigned value starting at 16 */
J16_16, /* PC relative jump offset at 16 */
+ J16_32, /* PC relative offset at 16 */
+ I24_24, /* 24 bit signed value starting at 24 */
J32_16, /* PC relative long offset at 16 */
I32_16, /* 32 bit signed value starting at 16 */
U32_16, /* 32 bit unsigned value starting at 16 */
M_16, /* 4 bit optional mask starting at 16 */
+ M_20, /* 4 bit optional mask starting at 20 */
RO_28, /* optional GPR starting at position 28 */
};
@@ -105,37 +105,47 @@ enum {
*/
enum {
INSTR_INVALID,
- INSTR_E, INSTR_RIE_RRP, INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU,
- INSTR_RIL_UP, INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP,
+ INSTR_E,
+ INSTR_IE_UU,
+ INSTR_MII_UPI,
+ INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU,
+ INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0,
+ INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP,
+ INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU,
+ INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP,
INSTR_RRE_00, INSTR_RRE_0R, INSTR_RRE_AA, INSTR_RRE_AR, INSTR_RRE_F0,
- INSTR_RRE_FF, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF, INSTR_RRE_RR,
- INSTR_RRE_RR_OPT, INSTR_RRF_F0FF, INSTR_RRF_FUFF, INSTR_RRF_M0RR,
- INSTR_RRF_R0RR, INSTR_RRF_RURR, INSTR_RRF_U0FF, INSTR_RRF_U0RF,
+ INSTR_RRE_FF, INSTR_RRE_FR, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF,
+ INSTR_RRE_RR, INSTR_RRE_RR_OPT,
+ INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR,
+ INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_FUFF2, INSTR_RRF_M0RR,
+ INSTR_RRF_R0RR, INSTR_RRF_R0RR2, INSTR_RRF_RMRR, INSTR_RRF_RURR,
+ INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, INSTR_RRF_UUFF,
+ INSTR_RRF_UUFR, INSTR_RRF_UURF,
+ INSTR_RRR_F0FF, INSTR_RRS_RRRDU,
INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR,
- INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, INSTR_RSI_RRP,
- INSTR_RSL_R0RD, INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD,
- INSTR_RSY_RURD, INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD,
- INSTR_RS_RRRD, INSTR_RS_RURD, INSTR_RXE_FRRD, INSTR_RXE_RRRD,
- INSTR_RXF_FRRDF, INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RX_FRRD,
- INSTR_RX_RRRD, INSTR_RX_URRD, INSTR_SIY_URD, INSTR_SI_URD,
- INSTR_SSE_RDRD, INSTR_SSF_RRDRD, INSTR_SS_L0RDRD, INSTR_SS_LIRDRD,
- INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3,
+ INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD,
+ INSTR_RSI_RRP,
+ INSTR_RSL_LRDFU, INSTR_RSL_R0RD,
+ INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD,
+ INSTR_RSY_RDRM,
+ INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD,
+ INSTR_RS_RURD,
+ INSTR_RXE_FRRD, INSTR_RXE_RRRD,
+ INSTR_RXF_FRRDF,
+ INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD,
+ INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD,
+ INSTR_SIL_RDI, INSTR_SIL_RDU,
+ INSTR_SIY_IRD, INSTR_SIY_URD,
+ INSTR_SI_URD,
+ INSTR_SMI_U0RDP,
+ INSTR_SSE_RDRD,
+ INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2,
+ INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD,
+ INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3,
INSTR_S_00, INSTR_S_RD,
};
-struct operand {
- int bits; /* The number of bits in the operand. */
- int shift; /* The number of bits to shift. */
- int flags; /* One bit syntax flags. */
-};
-
-struct insn {
- const char name[5];
- unsigned char opfrag;
- unsigned char format;
-};
-
-static const struct operand operands[] =
+static const struct s390_operand operands[] =
{
[UNUSED] = { 0, 0, 0 },
[R_8] = { 4, 8, OPERAND_GPR },
@@ -171,99 +181,287 @@ static const struct operand operands[] =
[U4_12] = { 4, 12, 0 },
[U4_16] = { 4, 16, 0 },
[U4_20] = { 4, 20, 0 },
+ [U4_24] = { 4, 24, 0 },
+ [U4_28] = { 4, 28, 0 },
+ [U4_32] = { 4, 32, 0 },
+ [U4_36] = { 4, 36, 0 },
[U8_8] = { 8, 8, 0 },
[U8_16] = { 8, 16, 0 },
+ [U8_24] = { 8, 24, 0 },
+ [U8_32] = { 8, 32, 0 },
+ [J12_12] = { 12, 12, OPERAND_PCREL },
[I16_16] = { 16, 16, OPERAND_SIGNED },
[U16_16] = { 16, 16, 0 },
+ [U16_32] = { 16, 32, 0 },
[J16_16] = { 16, 16, OPERAND_PCREL },
+ [J16_32] = { 16, 32, OPERAND_PCREL },
+ [I16_32] = { 16, 32, OPERAND_SIGNED },
+ [I24_24] = { 24, 24, OPERAND_SIGNED },
[J32_16] = { 32, 16, OPERAND_PCREL },
[I32_16] = { 32, 16, OPERAND_SIGNED },
[U32_16] = { 32, 16, 0 },
[M_16] = { 4, 16, 0 },
+ [M_20] = { 4, 20, 0 },
[RO_28] = { 4, 28, OPERAND_GPR }
};
static const unsigned char formats[][7] = {
- [INSTR_E] = { 0xff, 0,0,0,0,0,0 }, /* e.g. pr */
- [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, /* e.g. brxhg */
- [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, /* e.g. brasl */
- [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 }, /* e.g. brcl */
- [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, /* e.g. afi */
- [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, /* e.g. alfi */
- [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 }, /* e.g. ahi */
- [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 }, /* e.g. brct */
- [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 }, /* e.g. tml */
- [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 }, /* e.g. brc */
- [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 }, /* e.g. palb */
- [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 }, /* e.g. tb */
- [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 }, /* e.g. cpya */
- [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 }, /* e.g. sar */
- [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 }, /* e.g. sqer */
- [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 }, /* e.g. debr */
- [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 }, /* e.g. ipm */
- [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 }, /* e.g. ear */
- [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 }, /* e.g. cefbr */
- [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 }, /* e.g. lura */
- [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 }, /* efpc, sfpc */
- [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 }, /* e.g. madbr */
- [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 },/* e.g. didbr */
- [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 },/* e.g. .insn */
- [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, /* e.g. idte */
- [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, /* e.g. fixr */
- [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, /* e.g. cfebr */
- [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, /* e.g. sske */
- [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 }, /* e.g. adr */
- [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 }, /* e.g. spm */
- [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 }, /* e.g. lr */
- [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 }, /* e.g. svc */
- [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 }, /* e.g. bcr */
- [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, /* e.g. lmh */
- [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, /* e.g. lmh */
- [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, /* e.g. icmh */
- [INSTR_RSL_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, /* e.g. tp */
- [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, /* e.g. brxh */
- [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 },/* e.g. stmy */
+ [INSTR_E] = { 0xff, 0,0,0,0,0,0 },
+ [INSTR_IE_UU] = { 0xff, U4_24,U4_28,0,0,0,0 },
+ [INSTR_MII_UPI] = { 0xff, U4_8,J12_12,I24_24 },
+ [INSTR_RIE_R0IU] = { 0xff, R_8,I16_16,U4_32,0,0,0 },
+ [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 },
+ [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 },
+ [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 },
+ [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 },
+ [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 },
+ [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 },
+ [INSTR_RIE_RUPU] = { 0xff, R_8,U8_32,U4_12,J16_16,0,0 },
+ [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 },
+ [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 },
+ [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 },
+ [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 },
+ [INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 },
+ [INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 },
+ [INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 },
+ [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 },
+ [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 },
+ [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 },
+ [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 },
+ [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 },
+ [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 },
+ [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 },
+ [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 },
+ [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 },
+ [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 },
+ [INSTR_RRE_FR] = { 0xff, F_24,R_28,0,0,0,0 },
+ [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 },
+ [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 },
+ [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 },
+ [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 },
+ [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 },
+ [INSTR_RRF_0UFF] = { 0xff, F_24,F_28,U4_20,0,0,0 },
+ [INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 },
+ [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 },
+ [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 },
+ [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 },
+ [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 },
+ [INSTR_RRF_FUFF2] = { 0xff, F_24,F_28,F_16,U4_20,0,0 },
+ [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 },
+ [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 },
+ [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 },
+ [INSTR_RRF_RMRR] = { 0xff, R_24,R_16,R_28,M_20,0,0 },
+ [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 },
+ [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 },
+ [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 },
+ [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 },
+ [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 },
+ [INSTR_RRF_UUFR] = { 0xff, F_24,U4_16,R_28,U4_20,0,0 },
+ [INSTR_RRF_UURF] = { 0xff, R_24,U4_16,F_28,U4_20,0,0 },
+ [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 },
+ [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 },
+ [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 },
+ [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 },
+ [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 },
+ [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 },
+ [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 },
+ [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 },
+ [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 },
+ [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
+ [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 },
+ [INSTR_RSL_LRDFU] = { 0xff, F_32,D_20,L4_8,B_16,U4_36,0 },
+ [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 },
+ [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 },
+ [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 },
+ [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 },
+ [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 },
[INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 },
- /* e.g. icmh */
- [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 },/* e.g. lamy */
- [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 },/* e.g. lamy */
- [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, /* e.g. lam */
- [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, /* e.g. lctl */
- [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, /* e.g. sll */
- [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, /* e.g. cs */
- [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, /* e.g. icm */
- [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, /* e.g. axbr */
- [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, /* e.g. lg */
+ [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 },
+ [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 },
+ [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 },
+ [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 },
+ [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
+ [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 },
[INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 },
- /* e.g. madb */
- [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 },/* e.g. ly */
- [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 },/* e.g. ley */
- [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, /* e.g. ae */
- [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, /* e.g. l */
- [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 }, /* e.g. bc */
- [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, /* e.g. cli */
- [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 }, /* e.g. tmy */
- [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, /* e.g. mvsdk */
+ [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 },
+ [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 },
+ [INSTR_RXY_URRD] = { 0xff, U4_8,D20_20,X_12,B_16,0,0 },
+ [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 },
+ [INSTR_SIL_RDI] = { 0xff, D_20,B_16,I16_32,0,0,0 },
+ [INSTR_SIL_RDU] = { 0xff, D_20,B_16,U16_32,0,0,0 },
+ [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 },
+ [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 },
+ [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 },
+ [INSTR_SMI_U0RDP] = { 0xff, U4_8,J16_32,D_20,B_16,0,0 },
+ [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 },
+ [INSTR_SSF_RRDRD] = { 0x0f, D_20,B_16,D_36,B_32,R_8,0 },
+ [INSTR_SSF_RRDRD2]= { 0x0f, R_8,D_20,B_16,D_36,B_32,0 },
[INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 },
- /* e.g. mvc */
[INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 },
- /* e.g. srp */
[INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 },
- /* e.g. pack */
- [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 },
- /* e.g. mvck */
[INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 },
- /* e.g. plo */
[INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 },
- /* e.g. lmd */
- [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, /* e.g. hsch */
- [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, /* e.g. lpsw */
- [INSTR_SSF_RRDRD] = { 0x00, D_20,B_16,D_36,B_32,R_8,0 },
- /* e.g. mvcos */
+ [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 },
+ [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 },
+ [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 },
};
-static struct insn opcode[] = {
+enum {
+ LONG_INSN_ALGHSIK,
+ LONG_INSN_ALHHHR,
+ LONG_INSN_ALHHLR,
+ LONG_INSN_ALHSIK,
+ LONG_INSN_ALSIHN,
+ LONG_INSN_CDFBRA,
+ LONG_INSN_CDGBRA,
+ LONG_INSN_CDGTRA,
+ LONG_INSN_CDLFBR,
+ LONG_INSN_CDLFTR,
+ LONG_INSN_CDLGBR,
+ LONG_INSN_CDLGTR,
+ LONG_INSN_CEFBRA,
+ LONG_INSN_CEGBRA,
+ LONG_INSN_CELFBR,
+ LONG_INSN_CELGBR,
+ LONG_INSN_CFDBRA,
+ LONG_INSN_CFEBRA,
+ LONG_INSN_CFXBRA,
+ LONG_INSN_CGDBRA,
+ LONG_INSN_CGDTRA,
+ LONG_INSN_CGEBRA,
+ LONG_INSN_CGXBRA,
+ LONG_INSN_CGXTRA,
+ LONG_INSN_CLFDBR,
+ LONG_INSN_CLFDTR,
+ LONG_INSN_CLFEBR,
+ LONG_INSN_CLFHSI,
+ LONG_INSN_CLFXBR,
+ LONG_INSN_CLFXTR,
+ LONG_INSN_CLGDBR,
+ LONG_INSN_CLGDTR,
+ LONG_INSN_CLGEBR,
+ LONG_INSN_CLGFRL,
+ LONG_INSN_CLGHRL,
+ LONG_INSN_CLGHSI,
+ LONG_INSN_CLGXBR,
+ LONG_INSN_CLGXTR,
+ LONG_INSN_CLHHSI,
+ LONG_INSN_CXFBRA,
+ LONG_INSN_CXGBRA,
+ LONG_INSN_CXGTRA,
+ LONG_INSN_CXLFBR,
+ LONG_INSN_CXLFTR,
+ LONG_INSN_CXLGBR,
+ LONG_INSN_CXLGTR,
+ LONG_INSN_FIDBRA,
+ LONG_INSN_FIEBRA,
+ LONG_INSN_FIXBRA,
+ LONG_INSN_LDXBRA,
+ LONG_INSN_LEDBRA,
+ LONG_INSN_LEXBRA,
+ LONG_INSN_LLGFAT,
+ LONG_INSN_LLGFRL,
+ LONG_INSN_LLGHRL,
+ LONG_INSN_LLGTAT,
+ LONG_INSN_POPCNT,
+ LONG_INSN_RIEMIT,
+ LONG_INSN_RINEXT,
+ LONG_INSN_RISBGN,
+ LONG_INSN_RISBHG,
+ LONG_INSN_RISBLG,
+ LONG_INSN_SLHHHR,
+ LONG_INSN_SLHHLR,
+ LONG_INSN_TABORT,
+ LONG_INSN_TBEGIN,
+ LONG_INSN_TBEGINC,
+ LONG_INSN_PCISTG,
+ LONG_INSN_MPCIFC,
+ LONG_INSN_STPCIFC,
+ LONG_INSN_PCISTB,
+};
+
+static char *long_insn_name[] = {
+ [LONG_INSN_ALGHSIK] = "alghsik",
+ [LONG_INSN_ALHHHR] = "alhhhr",
+ [LONG_INSN_ALHHLR] = "alhhlr",
+ [LONG_INSN_ALHSIK] = "alhsik",
+ [LONG_INSN_ALSIHN] = "alsihn",
+ [LONG_INSN_CDFBRA] = "cdfbra",
+ [LONG_INSN_CDGBRA] = "cdgbra",
+ [LONG_INSN_CDGTRA] = "cdgtra",
+ [LONG_INSN_CDLFBR] = "cdlfbr",
+ [LONG_INSN_CDLFTR] = "cdlftr",
+ [LONG_INSN_CDLGBR] = "cdlgbr",
+ [LONG_INSN_CDLGTR] = "cdlgtr",
+ [LONG_INSN_CEFBRA] = "cefbra",
+ [LONG_INSN_CEGBRA] = "cegbra",
+ [LONG_INSN_CELFBR] = "celfbr",
+ [LONG_INSN_CELGBR] = "celgbr",
+ [LONG_INSN_CFDBRA] = "cfdbra",
+ [LONG_INSN_CFEBRA] = "cfebra",
+ [LONG_INSN_CFXBRA] = "cfxbra",
+ [LONG_INSN_CGDBRA] = "cgdbra",
+ [LONG_INSN_CGDTRA] = "cgdtra",
+ [LONG_INSN_CGEBRA] = "cgebra",
+ [LONG_INSN_CGXBRA] = "cgxbra",
+ [LONG_INSN_CGXTRA] = "cgxtra",
+ [LONG_INSN_CLFDBR] = "clfdbr",
+ [LONG_INSN_CLFDTR] = "clfdtr",
+ [LONG_INSN_CLFEBR] = "clfebr",
+ [LONG_INSN_CLFHSI] = "clfhsi",
+ [LONG_INSN_CLFXBR] = "clfxbr",
+ [LONG_INSN_CLFXTR] = "clfxtr",
+ [LONG_INSN_CLGDBR] = "clgdbr",
+ [LONG_INSN_CLGDTR] = "clgdtr",
+ [LONG_INSN_CLGEBR] = "clgebr",
+ [LONG_INSN_CLGFRL] = "clgfrl",
+ [LONG_INSN_CLGHRL] = "clghrl",
+ [LONG_INSN_CLGHSI] = "clghsi",
+ [LONG_INSN_CLGXBR] = "clgxbr",
+ [LONG_INSN_CLGXTR] = "clgxtr",
+ [LONG_INSN_CLHHSI] = "clhhsi",
+ [LONG_INSN_CXFBRA] = "cxfbra",
+ [LONG_INSN_CXGBRA] = "cxgbra",
+ [LONG_INSN_CXGTRA] = "cxgtra",
+ [LONG_INSN_CXLFBR] = "cxlfbr",
+ [LONG_INSN_CXLFTR] = "cxlftr",
+ [LONG_INSN_CXLGBR] = "cxlgbr",
+ [LONG_INSN_CXLGTR] = "cxlgtr",
+ [LONG_INSN_FIDBRA] = "fidbra",
+ [LONG_INSN_FIEBRA] = "fiebra",
+ [LONG_INSN_FIXBRA] = "fixbra",
+ [LONG_INSN_LDXBRA] = "ldxbra",
+ [LONG_INSN_LEDBRA] = "ledbra",
+ [LONG_INSN_LEXBRA] = "lexbra",
+ [LONG_INSN_LLGFAT] = "llgfat",
+ [LONG_INSN_LLGFRL] = "llgfrl",
+ [LONG_INSN_LLGHRL] = "llghrl",
+ [LONG_INSN_LLGTAT] = "llgtat",
+ [LONG_INSN_POPCNT] = "popcnt",
+ [LONG_INSN_RIEMIT] = "riemit",
+ [LONG_INSN_RINEXT] = "rinext",
+ [LONG_INSN_RISBGN] = "risbgn",
+ [LONG_INSN_RISBHG] = "risbhg",
+ [LONG_INSN_RISBLG] = "risblg",
+ [LONG_INSN_SLHHHR] = "slhhhr",
+ [LONG_INSN_SLHHLR] = "slhhlr",
+ [LONG_INSN_TABORT] = "tabort",
+ [LONG_INSN_TBEGIN] = "tbegin",
+ [LONG_INSN_TBEGINC] = "tbeginc",
+ [LONG_INSN_PCISTG] = "pcistg",
+ [LONG_INSN_MPCIFC] = "mpcifc",
+ [LONG_INSN_STPCIFC] = "stpcifc",
+ [LONG_INSN_PCISTB] = "pcistb",
+};
+
+static struct s390_insn opcode[] = {
#ifdef CONFIG_64BIT
+ { "bprp", 0xc5, INSTR_MII_UPI },
+ { "bpp", 0xc7, INSTR_SMI_U0RDP },
+ { "trtr", 0xd0, INSTR_SS_L0RDRD },
{ "lmd", 0xef, INSTR_SS_RRRDRD3 },
#endif
{ "spm", 0x04, INSTR_RR_R0 },
@@ -298,7 +496,6 @@ static struct insn opcode[] = {
{ "lcdr", 0x23, INSTR_RR_FF },
{ "hdr", 0x24, INSTR_RR_FF },
{ "ldxr", 0x25, INSTR_RR_FF },
- { "lrdr", 0x25, INSTR_RR_FF },
{ "mxr", 0x26, INSTR_RR_FF },
{ "mxdr", 0x27, INSTR_RR_FF },
{ "ldr", 0x28, INSTR_RR_FF },
@@ -315,7 +512,6 @@ static struct insn opcode[] = {
{ "lcer", 0x33, INSTR_RR_FF },
{ "her", 0x34, INSTR_RR_FF },
{ "ledr", 0x35, INSTR_RR_FF },
- { "lrer", 0x35, INSTR_RR_FF },
{ "axr", 0x36, INSTR_RR_FF },
{ "sxr", 0x37, INSTR_RR_FF },
{ "ler", 0x38, INSTR_RR_FF },
@@ -323,7 +519,6 @@ static struct insn opcode[] = {
{ "aer", 0x3a, INSTR_RR_FF },
{ "ser", 0x3b, INSTR_RR_FF },
{ "mder", 0x3c, INSTR_RR_FF },
- { "mer", 0x3c, INSTR_RR_FF },
{ "der", 0x3d, INSTR_RR_FF },
{ "aur", 0x3e, INSTR_RR_FF },
{ "sur", 0x3f, INSTR_RR_FF },
@@ -374,7 +569,6 @@ static struct insn opcode[] = {
{ "ae", 0x7a, INSTR_RX_FRRD },
{ "se", 0x7b, INSTR_RX_FRRD },
{ "mde", 0x7c, INSTR_RX_FRRD },
- { "me", 0x7c, INSTR_RX_FRRD },
{ "de", 0x7d, INSTR_RX_FRRD },
{ "au", 0x7e, INSTR_RX_FRRD },
{ "su", 0x7f, INSTR_RX_FRRD },
@@ -452,8 +646,10 @@ static struct insn opcode[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_01[] = {
+static struct s390_insn opcode_01[] = {
#ifdef CONFIG_64BIT
+ { "ptff", 0x04, INSTR_E },
+ { "pfpo", 0x0a, INSTR_E },
{ "sam64", 0x0e, INSTR_E },
#endif
{ "pr", 0x01, INSTR_E },
@@ -466,7 +662,7 @@ static struct insn opcode_01[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_a5[] = {
+static struct s390_insn opcode_a5[] = {
#ifdef CONFIG_64BIT
{ "iihh", 0x00, INSTR_RI_RU },
{ "iihl", 0x01, INSTR_RI_RU },
@@ -488,7 +684,7 @@ static struct insn opcode_a5[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_a7[] = {
+static struct s390_insn opcode_a7[] = {
#ifdef CONFIG_64BIT
{ "tmhh", 0x02, INSTR_RI_RU },
{ "tmhl", 0x03, INSTR_RI_RU },
@@ -510,16 +706,41 @@ static struct insn opcode_a7[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_b2[] = {
+static struct s390_insn opcode_aa[] = {
+#ifdef CONFIG_64BIT
+ { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI },
+ { "rion", 0x01, INSTR_RI_RI },
+ { "tric", 0x02, INSTR_RI_RI },
+ { "rioff", 0x03, INSTR_RI_RI },
+ { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI },
+#endif
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_b2[] = {
#ifdef CONFIG_64BIT
- { "sske", 0x2b, INSTR_RRF_M0RR },
{ "stckf", 0x7c, INSTR_S_RD },
- { "cu21", 0xa6, INSTR_RRF_M0RR },
- { "cuutf", 0xa6, INSTR_RRF_M0RR },
- { "cu12", 0xa7, INSTR_RRF_M0RR },
- { "cutfu", 0xa7, INSTR_RRF_M0RR },
+ { "lpp", 0x80, INSTR_S_RD },
+ { "lcctl", 0x84, INSTR_S_RD },
+ { "lpctl", 0x85, INSTR_S_RD },
+ { "qsi", 0x86, INSTR_S_RD },
+ { "lsctl", 0x87, INSTR_S_RD },
+ { "qctri", 0x8e, INSTR_S_RD },
{ "stfle", 0xb0, INSTR_S_RD },
{ "lpswe", 0xb2, INSTR_S_RD },
+ { "srnmb", 0xb8, INSTR_S_RD },
+ { "srnmt", 0xb9, INSTR_S_RD },
+ { "lfas", 0xbd, INSTR_S_RD },
+ { "scctr", 0xe0, INSTR_RRE_RR },
+ { "spctr", 0xe1, INSTR_RRE_RR },
+ { "ecctr", 0xe4, INSTR_RRE_RR },
+ { "epctr", 0xe5, INSTR_RRE_RR },
+ { "ppa", 0xe8, INSTR_RRF_U0RR },
+ { "etnd", 0xec, INSTR_RRE_R0 },
+ { "ecpga", 0xed, INSTR_RRE_RR },
+ { "tend", 0xf8, INSTR_S_00 },
+ { "niai", 0xfa, INSTR_IE_UU },
+ { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD },
#endif
{ "stidp", 0x02, INSTR_S_RD },
{ "sck", 0x04, INSTR_S_RD },
@@ -538,6 +759,7 @@ static struct insn opcode_b2[] = {
{ "pc", 0x18, INSTR_S_RD },
{ "sac", 0x19, INSTR_S_RD },
{ "cfc", 0x1a, INSTR_S_RD },
+ { "servc", 0x20, INSTR_RRE_RR },
{ "ipte", 0x21, INSTR_RRE_RR },
{ "ipm", 0x22, INSTR_RRE_R0 },
{ "ivsk", 0x23, INSTR_RRE_RR },
@@ -548,9 +770,9 @@ static struct insn opcode_b2[] = {
{ "pt", 0x28, INSTR_RRE_RR },
{ "iske", 0x29, INSTR_RRE_RR },
{ "rrbe", 0x2a, INSTR_RRE_RR },
- { "sske", 0x2b, INSTR_RRE_RR },
+ { "sske", 0x2b, INSTR_RRF_M0RR },
{ "tb", 0x2c, INSTR_RRE_0R },
- { "dxr", 0x2d, INSTR_RRE_F0 },
+ { "dxr", 0x2d, INSTR_RRE_FF },
{ "pgin", 0x2e, INSTR_RRE_RR },
{ "pgout", 0x2f, INSTR_RRE_RR },
{ "csch", 0x30, INSTR_S_00 },
@@ -568,8 +790,8 @@ static struct insn opcode_b2[] = {
{ "schm", 0x3c, INSTR_S_00 },
{ "bakr", 0x40, INSTR_RRE_RR },
{ "cksm", 0x41, INSTR_RRE_RR },
- { "sqdr", 0x44, INSTR_RRE_F0 },
- { "sqer", 0x45, INSTR_RRE_F0 },
+ { "sqdr", 0x44, INSTR_RRE_FF },
+ { "sqer", 0x45, INSTR_RRE_FF },
{ "stura", 0x46, INSTR_RRE_RR },
{ "msta", 0x47, INSTR_RRE_R0 },
{ "palb", 0x48, INSTR_RRE_00 },
@@ -590,7 +812,6 @@ static struct insn opcode_b2[] = {
{ "clst", 0x5d, INSTR_RRE_RR },
{ "srst", 0x5e, INSTR_RRE_RR },
{ "cmpsc", 0x63, INSTR_RRE_RR },
- { "cmpsc", 0x63, INSTR_RRE_RR },
{ "siga", 0x74, INSTR_S_RD },
{ "xsch", 0x76, INSTR_S_00 },
{ "rp", 0x77, INSTR_S_RD },
@@ -601,14 +822,14 @@ static struct insn opcode_b2[] = {
{ "stfpc", 0x9c, INSTR_S_RD },
{ "lfpc", 0x9d, INSTR_S_RD },
{ "tre", 0xa5, INSTR_RRE_RR },
- { "cuutf", 0xa6, INSTR_RRE_RR },
- { "cutfu", 0xa7, INSTR_RRE_RR },
+ { "cuutf", 0xa6, INSTR_RRF_M0RR },
+ { "cutfu", 0xa7, INSTR_RRF_M0RR },
{ "stfl", 0xb1, INSTR_S_RD },
{ "trap4", 0xff, INSTR_S_RD },
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_b3[] = {
+static struct s390_insn opcode_b3[] = {
#ifdef CONFIG_64BIT
{ "maylr", 0x38, INSTR_RRF_F0FF },
{ "mylr", 0x39, INSTR_RRF_F0FF },
@@ -616,21 +837,87 @@ static struct insn opcode_b3[] = {
{ "myr", 0x3b, INSTR_RRF_F0FF },
{ "mayhr", 0x3c, INSTR_RRF_F0FF },
{ "myhr", 0x3d, INSTR_RRF_F0FF },
- { "cegbr", 0xa4, INSTR_RRE_RR },
- { "cdgbr", 0xa5, INSTR_RRE_RR },
- { "cxgbr", 0xa6, INSTR_RRE_RR },
- { "cgebr", 0xa8, INSTR_RRF_U0RF },
- { "cgdbr", 0xa9, INSTR_RRF_U0RF },
- { "cgxbr", 0xaa, INSTR_RRF_U0RF },
- { "cfer", 0xb8, INSTR_RRF_U0RF },
- { "cfdr", 0xb9, INSTR_RRF_U0RF },
- { "cfxr", 0xba, INSTR_RRF_U0RF },
- { "cegr", 0xc4, INSTR_RRE_RR },
- { "cdgr", 0xc5, INSTR_RRE_RR },
- { "cxgr", 0xc6, INSTR_RRE_RR },
+ { "lpdfr", 0x70, INSTR_RRE_FF },
+ { "lndfr", 0x71, INSTR_RRE_FF },
+ { "cpsdr", 0x72, INSTR_RRF_F0FF2 },
+ { "lcdfr", 0x73, INSTR_RRE_FF },
+ { "sfasr", 0x85, INSTR_RRE_R0 },
+ { { 0, LONG_INSN_CELFBR }, 0x90, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDLFBR }, 0x91, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CXLFBR }, 0x92, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CEFBRA }, 0x94, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDFBRA }, 0x95, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CXFBRA }, 0x96, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CFEBRA }, 0x98, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CFDBRA }, 0x99, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CFXBRA }, 0x9a, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CLFEBR }, 0x9c, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLFDBR }, 0x9d, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLFXBR }, 0x9e, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CELGBR }, 0xa0, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDLGBR }, 0xa1, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CXLGBR }, 0xa2, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CEGBRA }, 0xa4, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDGBRA }, 0xa5, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CXGBRA }, 0xa6, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CGEBRA }, 0xa8, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CGDBRA }, 0xa9, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CGXBRA }, 0xaa, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CLGEBR }, 0xac, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLGDBR }, 0xad, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLGXBR }, 0xae, INSTR_RRF_UUFR },
+ { "ldgr", 0xc1, INSTR_RRE_FR },
+ { "cegr", 0xc4, INSTR_RRE_FR },
+ { "cdgr", 0xc5, INSTR_RRE_FR },
+ { "cxgr", 0xc6, INSTR_RRE_FR },
{ "cger", 0xc8, INSTR_RRF_U0RF },
{ "cgdr", 0xc9, INSTR_RRF_U0RF },
{ "cgxr", 0xca, INSTR_RRF_U0RF },
+ { "lgdr", 0xcd, INSTR_RRE_RF },
+ { "mdtra", 0xd0, INSTR_RRF_FUFF2 },
+ { "ddtra", 0xd1, INSTR_RRF_FUFF2 },
+ { "adtra", 0xd2, INSTR_RRF_FUFF2 },
+ { "sdtra", 0xd3, INSTR_RRF_FUFF2 },
+ { "ldetr", 0xd4, INSTR_RRF_0UFF },
+ { "ledtr", 0xd5, INSTR_RRF_UUFF },
+ { "ltdtr", 0xd6, INSTR_RRE_FF },
+ { "fidtr", 0xd7, INSTR_RRF_UUFF },
+ { "mxtra", 0xd8, INSTR_RRF_FUFF2 },
+ { "dxtra", 0xd9, INSTR_RRF_FUFF2 },
+ { "axtra", 0xda, INSTR_RRF_FUFF2 },
+ { "sxtra", 0xdb, INSTR_RRF_FUFF2 },
+ { "lxdtr", 0xdc, INSTR_RRF_0UFF },
+ { "ldxtr", 0xdd, INSTR_RRF_UUFF },
+ { "ltxtr", 0xde, INSTR_RRE_FF },
+ { "fixtr", 0xdf, INSTR_RRF_UUFF },
+ { "kdtr", 0xe0, INSTR_RRE_FF },
+ { { 0, LONG_INSN_CGDTRA }, 0xe1, INSTR_RRF_UURF },
+ { "cudtr", 0xe2, INSTR_RRE_RF },
+ { "csdtr", 0xe3, INSTR_RRE_RF },
+ { "cdtr", 0xe4, INSTR_RRE_FF },
+ { "eedtr", 0xe5, INSTR_RRE_RF },
+ { "esdtr", 0xe7, INSTR_RRE_RF },
+ { "kxtr", 0xe8, INSTR_RRE_FF },
+ { { 0, LONG_INSN_CGXTRA }, 0xe9, INSTR_RRF_UUFR },
+ { "cuxtr", 0xea, INSTR_RRE_RF },
+ { "csxtr", 0xeb, INSTR_RRE_RF },
+ { "cxtr", 0xec, INSTR_RRE_FF },
+ { "eextr", 0xed, INSTR_RRE_RF },
+ { "esxtr", 0xef, INSTR_RRE_RF },
+ { { 0, LONG_INSN_CDGTRA }, 0xf1, INSTR_RRF_UUFR },
+ { "cdutr", 0xf2, INSTR_RRE_FR },
+ { "cdstr", 0xf3, INSTR_RRE_FR },
+ { "cedtr", 0xf4, INSTR_RRE_FF },
+ { "qadtr", 0xf5, INSTR_RRF_FUFF },
+ { "iedtr", 0xf6, INSTR_RRF_F0FR },
+ { "rrdtr", 0xf7, INSTR_RRF_FFRU },
+ { { 0, LONG_INSN_CXGTRA }, 0xf9, INSTR_RRF_UURF },
+ { "cxutr", 0xfa, INSTR_RRE_FR },
+ { "cxstr", 0xfb, INSTR_RRE_FR },
+ { "cextr", 0xfc, INSTR_RRE_FF },
+ { "qaxtr", 0xfd, INSTR_RRF_FUFF },
+ { "iextr", 0xfe, INSTR_RRF_F0FR },
+ { "rrxtr", 0xff, INSTR_RRF_FFRU },
#endif
{ "lpebr", 0x00, INSTR_RRE_FF },
{ "lnebr", 0x01, INSTR_RRE_FF },
@@ -677,10 +964,10 @@ static struct insn opcode_b3[] = {
{ "lnxbr", 0x41, INSTR_RRE_FF },
{ "ltxbr", 0x42, INSTR_RRE_FF },
{ "lcxbr", 0x43, INSTR_RRE_FF },
- { "ledbr", 0x44, INSTR_RRE_FF },
- { "ldxbr", 0x45, INSTR_RRE_FF },
- { "lexbr", 0x46, INSTR_RRE_FF },
- { "fixbr", 0x47, INSTR_RRF_U0FF },
+ { { 0, LONG_INSN_LEDBRA }, 0x44, INSTR_RRF_UUFF },
+ { { 0, LONG_INSN_LDXBRA }, 0x45, INSTR_RRF_UUFF },
+ { { 0, LONG_INSN_LEXBRA }, 0x46, INSTR_RRF_UUFF },
+ { { 0, LONG_INSN_FIXBRA }, 0x47, INSTR_RRF_UUFF },
{ "kxbr", 0x48, INSTR_RRE_FF },
{ "cxbr", 0x49, INSTR_RRE_FF },
{ "axbr", 0x4a, INSTR_RRE_FF },
@@ -690,24 +977,24 @@ static struct insn opcode_b3[] = {
{ "tbedr", 0x50, INSTR_RRF_U0FF },
{ "tbdr", 0x51, INSTR_RRF_U0FF },
{ "diebr", 0x53, INSTR_RRF_FUFF },
- { "fiebr", 0x57, INSTR_RRF_U0FF },
- { "thder", 0x58, INSTR_RRE_RR },
- { "thdr", 0x59, INSTR_RRE_RR },
+ { { 0, LONG_INSN_FIEBRA }, 0x57, INSTR_RRF_UUFF },
+ { "thder", 0x58, INSTR_RRE_FF },
+ { "thdr", 0x59, INSTR_RRE_FF },
{ "didbr", 0x5b, INSTR_RRF_FUFF },
- { "fidbr", 0x5f, INSTR_RRF_U0FF },
+ { { 0, LONG_INSN_FIDBRA }, 0x5f, INSTR_RRF_UUFF },
{ "lpxr", 0x60, INSTR_RRE_FF },
{ "lnxr", 0x61, INSTR_RRE_FF },
{ "ltxr", 0x62, INSTR_RRE_FF },
{ "lcxr", 0x63, INSTR_RRE_FF },
- { "lxr", 0x65, INSTR_RRE_RR },
+ { "lxr", 0x65, INSTR_RRE_FF },
{ "lexr", 0x66, INSTR_RRE_FF },
- { "fixr", 0x67, INSTR_RRF_U0FF },
+ { "fixr", 0x67, INSTR_RRE_FF },
{ "cxr", 0x69, INSTR_RRE_FF },
- { "lzer", 0x74, INSTR_RRE_R0 },
- { "lzdr", 0x75, INSTR_RRE_R0 },
- { "lzxr", 0x76, INSTR_RRE_R0 },
- { "fier", 0x77, INSTR_RRF_U0FF },
- { "fidr", 0x7f, INSTR_RRF_U0FF },
+ { "lzer", 0x74, INSTR_RRE_F0 },
+ { "lzdr", 0x75, INSTR_RRE_F0 },
+ { "lzxr", 0x76, INSTR_RRE_F0 },
+ { "fier", 0x77, INSTR_RRE_FF },
+ { "fidr", 0x7f, INSTR_RRE_FF },
{ "sfpc", 0x84, INSTR_RRE_RR_OPT },
{ "efpc", 0x8c, INSTR_RRE_RR_OPT },
{ "cefbr", 0x94, INSTR_RRE_RF },
@@ -716,13 +1003,16 @@ static struct insn opcode_b3[] = {
{ "cfebr", 0x98, INSTR_RRF_U0RF },
{ "cfdbr", 0x99, INSTR_RRF_U0RF },
{ "cfxbr", 0x9a, INSTR_RRF_U0RF },
- { "cefr", 0xb4, INSTR_RRE_RF },
- { "cdfr", 0xb5, INSTR_RRE_RF },
- { "cxfr", 0xb6, INSTR_RRE_RF },
+ { "cefr", 0xb4, INSTR_RRE_FR },
+ { "cdfr", 0xb5, INSTR_RRE_FR },
+ { "cxfr", 0xb6, INSTR_RRE_FR },
+ { "cfer", 0xb8, INSTR_RRF_U0RF },
+ { "cfdr", 0xb9, INSTR_RRF_U0RF },
+ { "cfxr", 0xba, INSTR_RRF_U0RF },
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_b9[] = {
+static struct s390_insn opcode_b9[] = {
#ifdef CONFIG_64BIT
{ "lpgr", 0x00, INSTR_RRE_RR },
{ "lngr", 0x01, INSTR_RRE_RR },
@@ -760,7 +1050,23 @@ static struct insn opcode_b9[] = {
{ "lhr", 0x27, INSTR_RRE_RR },
{ "cgfr", 0x30, INSTR_RRE_RR },
{ "clgfr", 0x31, INSTR_RRE_RR },
+ { "cfdtr", 0x41, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLGDTR }, 0x42, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLFDTR }, 0x43, INSTR_RRF_UURF },
{ "bctgr", 0x46, INSTR_RRE_RR },
+ { "cfxtr", 0x49, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLGXTR }, 0x4a, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CLFXTR }, 0x4b, INSTR_RRF_UUFR },
+ { "cdftr", 0x51, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDLGTR }, 0x52, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDLFTR }, 0x53, INSTR_RRF_UUFR },
+ { "cxftr", 0x59, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CXLGTR }, 0x5a, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CXLFTR }, 0x5b, INSTR_RRF_UUFR },
+ { "cgrt", 0x60, INSTR_RRF_U0RR },
+ { "clgrt", 0x61, INSTR_RRF_U0RR },
+ { "crt", 0x72, INSTR_RRF_U0RR },
+ { "clrt", 0x73, INSTR_RRF_U0RR },
{ "ngr", 0x80, INSTR_RRE_RR },
{ "ogr", 0x81, INSTR_RRE_RR },
{ "xgr", 0x82, INSTR_RRE_RR },
@@ -773,14 +1079,53 @@ static struct insn opcode_b9[] = {
{ "slbgr", 0x89, INSTR_RRE_RR },
{ "cspg", 0x8a, INSTR_RRE_RR },
{ "idte", 0x8e, INSTR_RRF_R0RR },
+ { "crdte", 0x8f, INSTR_RRF_RMRR },
{ "llcr", 0x94, INSTR_RRE_RR },
{ "llhr", 0x95, INSTR_RRE_RR },
{ "esea", 0x9d, INSTR_RRE_R0 },
+ { "ptf", 0xa2, INSTR_RRE_R0 },
{ "lptea", 0xaa, INSTR_RRF_RURR },
+ { "rrbm", 0xae, INSTR_RRE_RR },
+ { "pfmf", 0xaf, INSTR_RRE_RR },
{ "cu14", 0xb0, INSTR_RRF_M0RR },
{ "cu24", 0xb1, INSTR_RRF_M0RR },
- { "cu41", 0xb2, INSTR_RRF_M0RR },
- { "cu42", 0xb3, INSTR_RRF_M0RR },
+ { "cu41", 0xb2, INSTR_RRE_RR },
+ { "cu42", 0xb3, INSTR_RRE_RR },
+ { "trtre", 0xbd, INSTR_RRF_M0RR },
+ { "srstu", 0xbe, INSTR_RRE_RR },
+ { "trte", 0xbf, INSTR_RRF_M0RR },
+ { "ahhhr", 0xc8, INSTR_RRF_R0RR2 },
+ { "shhhr", 0xc9, INSTR_RRF_R0RR2 },
+ { { 0, LONG_INSN_ALHHHR }, 0xca, INSTR_RRF_R0RR2 },
+ { { 0, LONG_INSN_SLHHHR }, 0xcb, INSTR_RRF_R0RR2 },
+ { "chhr", 0xcd, INSTR_RRE_RR },
+ { "clhhr", 0xcf, INSTR_RRE_RR },
+ { { 0, LONG_INSN_PCISTG }, 0xd0, INSTR_RRE_RR },
+ { "pcilg", 0xd2, INSTR_RRE_RR },
+ { "rpcit", 0xd3, INSTR_RRE_RR },
+ { "ahhlr", 0xd8, INSTR_RRF_R0RR2 },
+ { "shhlr", 0xd9, INSTR_RRF_R0RR2 },
+ { { 0, LONG_INSN_ALHHLR }, 0xda, INSTR_RRF_R0RR2 },
+ { { 0, LONG_INSN_SLHHLR }, 0xdb, INSTR_RRF_R0RR2 },
+ { "chlr", 0xdd, INSTR_RRE_RR },
+ { "clhlr", 0xdf, INSTR_RRE_RR },
+ { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR },
+ { "locgr", 0xe2, INSTR_RRF_M0RR },
+ { "ngrk", 0xe4, INSTR_RRF_R0RR2 },
+ { "ogrk", 0xe6, INSTR_RRF_R0RR2 },
+ { "xgrk", 0xe7, INSTR_RRF_R0RR2 },
+ { "agrk", 0xe8, INSTR_RRF_R0RR2 },
+ { "sgrk", 0xe9, INSTR_RRF_R0RR2 },
+ { "algrk", 0xea, INSTR_RRF_R0RR2 },
+ { "slgrk", 0xeb, INSTR_RRF_R0RR2 },
+ { "locr", 0xf2, INSTR_RRF_M0RR },
+ { "nrk", 0xf4, INSTR_RRF_R0RR2 },
+ { "ork", 0xf6, INSTR_RRF_R0RR2 },
+ { "xrk", 0xf7, INSTR_RRF_R0RR2 },
+ { "ark", 0xf8, INSTR_RRF_R0RR2 },
+ { "srk", 0xf9, INSTR_RRF_R0RR2 },
+ { "alrk", 0xfa, INSTR_RRF_R0RR2 },
+ { "slrk", 0xfb, INSTR_RRF_R0RR2 },
#endif
{ "kmac", 0x1e, INSTR_RRE_RR },
{ "lrvr", 0x1f, INSTR_RRE_RR },
@@ -789,13 +1134,9 @@ static struct insn opcode_b9[] = {
{ "kimd", 0x3e, INSTR_RRE_RR },
{ "klmd", 0x3f, INSTR_RRE_RR },
{ "epsw", 0x8d, INSTR_RRE_RR },
- { "trtt", 0x90, INSTR_RRE_RR },
{ "trtt", 0x90, INSTR_RRF_M0RR },
- { "trto", 0x91, INSTR_RRE_RR },
{ "trto", 0x91, INSTR_RRF_M0RR },
- { "trot", 0x92, INSTR_RRE_RR },
{ "trot", 0x92, INSTR_RRF_M0RR },
- { "troo", 0x93, INSTR_RRE_RR },
{ "troo", 0x93, INSTR_RRF_M0RR },
{ "mlr", 0x96, INSTR_RRE_RR },
{ "dlr", 0x97, INSTR_RRE_RR },
@@ -804,7 +1145,7 @@ static struct insn opcode_b9[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_c0[] = {
+static struct s390_insn opcode_c0[] = {
#ifdef CONFIG_64BIT
{ "lgfi", 0x01, INSTR_RIL_RI },
{ "xihf", 0x06, INSTR_RIL_RU },
@@ -824,8 +1165,10 @@ static struct insn opcode_c0[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_c2[] = {
+static struct s390_insn opcode_c2[] = {
#ifdef CONFIG_64BIT
+ { "msgfi", 0x00, INSTR_RIL_RI },
+ { "msfi", 0x01, INSTR_RIL_RI },
{ "slgfi", 0x04, INSTR_RIL_RU },
{ "slfi", 0x05, INSTR_RIL_RU },
{ "agfi", 0x08, INSTR_RIL_RI },
@@ -840,14 +1183,65 @@ static struct insn opcode_c2[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_c8[] = {
+static struct s390_insn opcode_c4[] = {
+#ifdef CONFIG_64BIT
+ { "llhrl", 0x02, INSTR_RIL_RP },
+ { "lghrl", 0x04, INSTR_RIL_RP },
+ { "lhrl", 0x05, INSTR_RIL_RP },
+ { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP },
+ { "sthrl", 0x07, INSTR_RIL_RP },
+ { "lgrl", 0x08, INSTR_RIL_RP },
+ { "stgrl", 0x0b, INSTR_RIL_RP },
+ { "lgfrl", 0x0c, INSTR_RIL_RP },
+ { "lrl", 0x0d, INSTR_RIL_RP },
+ { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP },
+ { "strl", 0x0f, INSTR_RIL_RP },
+#endif
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c6[] = {
+#ifdef CONFIG_64BIT
+ { "exrl", 0x00, INSTR_RIL_RP },
+ { "pfdrl", 0x02, INSTR_RIL_UP },
+ { "cghrl", 0x04, INSTR_RIL_RP },
+ { "chrl", 0x05, INSTR_RIL_RP },
+ { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP },
+ { "clhrl", 0x07, INSTR_RIL_RP },
+ { "cgrl", 0x08, INSTR_RIL_RP },
+ { "clgrl", 0x0a, INSTR_RIL_RP },
+ { "cgfrl", 0x0c, INSTR_RIL_RP },
+ { "crl", 0x0d, INSTR_RIL_RP },
+ { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP },
+ { "clrl", 0x0f, INSTR_RIL_RP },
+#endif
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c8[] = {
#ifdef CONFIG_64BIT
{ "mvcos", 0x00, INSTR_SSF_RRDRD },
+ { "ectg", 0x01, INSTR_SSF_RRDRD },
+ { "csst", 0x02, INSTR_SSF_RRDRD },
+ { "lpd", 0x04, INSTR_SSF_RRDRD2 },
+ { "lpdg", 0x05, INSTR_SSF_RRDRD2 },
+#endif
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_cc[] = {
+#ifdef CONFIG_64BIT
+ { "brcth", 0x06, INSTR_RIL_RP },
+ { "aih", 0x08, INSTR_RIL_RI },
+ { "alsih", 0x0a, INSTR_RIL_RI },
+ { { 0, LONG_INSN_ALSIHN }, 0x0b, INSTR_RIL_RI },
+ { "cih", 0x0d, INSTR_RIL_RI },
+ { "clih", 0x0f, INSTR_RIL_RI },
#endif
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_e3[] = {
+static struct s390_insn opcode_e3[] = {
#ifdef CONFIG_64BIT
{ "ltg", 0x02, INSTR_RXY_RRRD },
{ "lrag", 0x03, INSTR_RXY_RRRD },
@@ -876,11 +1270,15 @@ static struct insn opcode_e3[] = {
{ "cg", 0x20, INSTR_RXY_RRRD },
{ "clg", 0x21, INSTR_RXY_RRRD },
{ "stg", 0x24, INSTR_RXY_RRRD },
+ { "ntstg", 0x25, INSTR_RXY_RRRD },
{ "cvdy", 0x26, INSTR_RXY_RRRD },
{ "cvdg", 0x2e, INSTR_RXY_RRRD },
{ "strvg", 0x2f, INSTR_RXY_RRRD },
{ "cgf", 0x30, INSTR_RXY_RRRD },
{ "clgf", 0x31, INSTR_RXY_RRRD },
+ { "ltgf", 0x32, INSTR_RXY_RRRD },
+ { "cgh", 0x34, INSTR_RXY_RRRD },
+ { "pfd", 0x36, INSTR_RXY_URRD },
{ "strvh", 0x3f, INSTR_RXY_RRRD },
{ "bctg", 0x46, INSTR_RXY_RRRD },
{ "sty", 0x50, INSTR_RXY_RRRD },
@@ -893,21 +1291,25 @@ static struct insn opcode_e3[] = {
{ "cy", 0x59, INSTR_RXY_RRRD },
{ "ay", 0x5a, INSTR_RXY_RRRD },
{ "sy", 0x5b, INSTR_RXY_RRRD },
+ { "mfy", 0x5c, INSTR_RXY_RRRD },
{ "aly", 0x5e, INSTR_RXY_RRRD },
{ "sly", 0x5f, INSTR_RXY_RRRD },
{ "sthy", 0x70, INSTR_RXY_RRRD },
{ "lay", 0x71, INSTR_RXY_RRRD },
{ "stcy", 0x72, INSTR_RXY_RRRD },
{ "icy", 0x73, INSTR_RXY_RRRD },
+ { "laey", 0x75, INSTR_RXY_RRRD },
{ "lb", 0x76, INSTR_RXY_RRRD },
{ "lgb", 0x77, INSTR_RXY_RRRD },
{ "lhy", 0x78, INSTR_RXY_RRRD },
{ "chy", 0x79, INSTR_RXY_RRRD },
{ "ahy", 0x7a, INSTR_RXY_RRRD },
{ "shy", 0x7b, INSTR_RXY_RRRD },
+ { "mhy", 0x7c, INSTR_RXY_RRRD },
{ "ng", 0x80, INSTR_RXY_RRRD },
{ "og", 0x81, INSTR_RXY_RRRD },
{ "xg", 0x82, INSTR_RXY_RRRD },
+ { "lgat", 0x85, INSTR_RXY_RRRD },
{ "mlg", 0x86, INSTR_RXY_RRRD },
{ "dlg", 0x87, INSTR_RXY_RRRD },
{ "alcg", 0x88, INSTR_RXY_RRRD },
@@ -918,6 +1320,22 @@ static struct insn opcode_e3[] = {
{ "llgh", 0x91, INSTR_RXY_RRRD },
{ "llc", 0x94, INSTR_RXY_RRRD },
{ "llh", 0x95, INSTR_RXY_RRRD },
+ { { 0, LONG_INSN_LLGTAT }, 0x9c, INSTR_RXY_RRRD },
+ { { 0, LONG_INSN_LLGFAT }, 0x9d, INSTR_RXY_RRRD },
+ { "lat", 0x9f, INSTR_RXY_RRRD },
+ { "lbh", 0xc0, INSTR_RXY_RRRD },
+ { "llch", 0xc2, INSTR_RXY_RRRD },
+ { "stch", 0xc3, INSTR_RXY_RRRD },
+ { "lhh", 0xc4, INSTR_RXY_RRRD },
+ { "llhh", 0xc6, INSTR_RXY_RRRD },
+ { "sthh", 0xc7, INSTR_RXY_RRRD },
+ { "lfhat", 0xc8, INSTR_RXY_RRRD },
+ { "lfh", 0xca, INSTR_RXY_RRRD },
+ { "stfh", 0xcb, INSTR_RXY_RRRD },
+ { "chf", 0xcd, INSTR_RXY_RRRD },
+ { "clhf", 0xcf, INSTR_RXY_RRRD },
+ { { 0, LONG_INSN_MPCIFC }, 0xd0, INSTR_RXY_RRRD },
+ { { 0, LONG_INSN_STPCIFC }, 0xd4, INSTR_RXY_RRRD },
#endif
{ "lrv", 0x1e, INSTR_RXY_RRRD },
{ "lrvh", 0x1f, INSTR_RXY_RRRD },
@@ -929,9 +1347,20 @@ static struct insn opcode_e3[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_e5[] = {
+static struct s390_insn opcode_e5[] = {
#ifdef CONFIG_64BIT
{ "strag", 0x02, INSTR_SSE_RDRD },
+ { "mvhhi", 0x44, INSTR_SIL_RDI },
+ { "mvghi", 0x48, INSTR_SIL_RDI },
+ { "mvhi", 0x4c, INSTR_SIL_RDI },
+ { "chhsi", 0x54, INSTR_SIL_RDI },
+ { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU },
+ { "cghsi", 0x58, INSTR_SIL_RDI },
+ { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU },
+ { "chsi", 0x5c, INSTR_SIL_RDI },
+ { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU },
+ { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU },
+ { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU },
#endif
{ "lasp", 0x00, INSTR_SSE_RDRD },
{ "tprot", 0x01, INSTR_SSE_RDRD },
@@ -940,7 +1369,7 @@ static struct insn opcode_e5[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_eb[] = {
+static struct s390_insn opcode_eb[] = {
#ifdef CONFIG_64BIT
{ "lmg", 0x04, INSTR_RSY_RRRD },
{ "srag", 0x0a, INSTR_RSY_RRRD },
@@ -952,9 +1381,11 @@ static struct insn opcode_eb[] = {
{ "rllg", 0x1c, INSTR_RSY_RRRD },
{ "clmh", 0x20, INSTR_RSY_RURD },
{ "clmy", 0x21, INSTR_RSY_RURD },
+ { "clt", 0x23, INSTR_RSY_RURD },
{ "stmg", 0x24, INSTR_RSY_RRRD },
{ "stctg", 0x25, INSTR_RSY_CCRD },
{ "stmh", 0x26, INSTR_RSY_RRRD },
+ { "clgt", 0x2b, INSTR_RSY_RURD },
{ "stcmh", 0x2c, INSTR_RSY_RURD },
{ "stcmy", 0x2d, INSTR_RSY_RURD },
{ "lctlg", 0x2f, INSTR_RSY_CCRD },
@@ -963,13 +1394,17 @@ static struct insn opcode_eb[] = {
{ "cdsg", 0x3e, INSTR_RSY_RRRD },
{ "bxhg", 0x44, INSTR_RSY_RRRD },
{ "bxleg", 0x45, INSTR_RSY_RRRD },
+ { "ecag", 0x4c, INSTR_RSY_RRRD },
{ "tmy", 0x51, INSTR_SIY_URD },
{ "mviy", 0x52, INSTR_SIY_URD },
{ "niy", 0x54, INSTR_SIY_URD },
{ "cliy", 0x55, INSTR_SIY_URD },
{ "oiy", 0x56, INSTR_SIY_URD },
{ "xiy", 0x57, INSTR_SIY_URD },
- { "icmh", 0x80, INSTR_RSE_RURD },
+ { "asi", 0x6a, INSTR_SIY_IRD },
+ { "alsi", 0x6e, INSTR_SIY_IRD },
+ { "agsi", 0x7a, INSTR_SIY_IRD },
+ { "algsi", 0x7e, INSTR_SIY_IRD },
{ "icmh", 0x80, INSTR_RSY_RURD },
{ "icmy", 0x81, INSTR_RSY_RURD },
{ "clclu", 0x8f, INSTR_RSY_RRRD },
@@ -978,6 +1413,29 @@ static struct insn opcode_eb[] = {
{ "lmy", 0x98, INSTR_RSY_RRRD },
{ "lamy", 0x9a, INSTR_RSY_AARD },
{ "stamy", 0x9b, INSTR_RSY_AARD },
+ { { 0, LONG_INSN_PCISTB }, 0xd0, INSTR_RSY_RRRD },
+ { "sic", 0xd1, INSTR_RSY_RRRD },
+ { "srak", 0xdc, INSTR_RSY_RRRD },
+ { "slak", 0xdd, INSTR_RSY_RRRD },
+ { "srlk", 0xde, INSTR_RSY_RRRD },
+ { "sllk", 0xdf, INSTR_RSY_RRRD },
+ { "locg", 0xe2, INSTR_RSY_RDRM },
+ { "stocg", 0xe3, INSTR_RSY_RDRM },
+ { "lang", 0xe4, INSTR_RSY_RRRD },
+ { "laog", 0xe6, INSTR_RSY_RRRD },
+ { "laxg", 0xe7, INSTR_RSY_RRRD },
+ { "laag", 0xe8, INSTR_RSY_RRRD },
+ { "laalg", 0xea, INSTR_RSY_RRRD },
+ { "loc", 0xf2, INSTR_RSY_RDRM },
+ { "stoc", 0xf3, INSTR_RSY_RDRM },
+ { "lan", 0xf4, INSTR_RSY_RRRD },
+ { "lao", 0xf6, INSTR_RSY_RRRD },
+ { "lax", 0xf7, INSTR_RSY_RRRD },
+ { "laa", 0xf8, INSTR_RSY_RRRD },
+ { "laal", 0xfa, INSTR_RSY_RRRD },
+ { "lric", 0x60, INSTR_RSY_RDRM },
+ { "stric", 0x61, INSTR_RSY_RDRM },
+ { "mric", 0x62, INSTR_RSY_RDRM },
#endif
{ "rll", 0x1d, INSTR_RSY_RRRD },
{ "mvclu", 0x8e, INSTR_RSY_RRRD },
@@ -985,15 +1443,46 @@ static struct insn opcode_eb[] = {
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_ec[] = {
+static struct s390_insn opcode_ec[] = {
#ifdef CONFIG_64BIT
{ "brxhg", 0x44, INSTR_RIE_RRP },
{ "brxlg", 0x45, INSTR_RIE_RRP },
+ { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU },
+ { "rnsbg", 0x54, INSTR_RIE_RRUUU },
+ { "risbg", 0x55, INSTR_RIE_RRUUU },
+ { "rosbg", 0x56, INSTR_RIE_RRUUU },
+ { "rxsbg", 0x57, INSTR_RIE_RRUUU },
+ { { 0, LONG_INSN_RISBGN }, 0x59, INSTR_RIE_RRUUU },
+ { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU },
+ { "cgrj", 0x64, INSTR_RIE_RRPU },
+ { "clgrj", 0x65, INSTR_RIE_RRPU },
+ { "cgit", 0x70, INSTR_RIE_R0IU },
+ { "clgit", 0x71, INSTR_RIE_R0UU },
+ { "cit", 0x72, INSTR_RIE_R0IU },
+ { "clfit", 0x73, INSTR_RIE_R0UU },
+ { "crj", 0x76, INSTR_RIE_RRPU },
+ { "clrj", 0x77, INSTR_RIE_RRPU },
+ { "cgij", 0x7c, INSTR_RIE_RUPI },
+ { "clgij", 0x7d, INSTR_RIE_RUPU },
+ { "cij", 0x7e, INSTR_RIE_RUPI },
+ { "clij", 0x7f, INSTR_RIE_RUPU },
+ { "ahik", 0xd8, INSTR_RIE_RRI0 },
+ { "aghik", 0xd9, INSTR_RIE_RRI0 },
+ { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 },
+ { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 },
+ { "cgrb", 0xe4, INSTR_RRS_RRRDU },
+ { "clgrb", 0xe5, INSTR_RRS_RRRDU },
+ { "crb", 0xf6, INSTR_RRS_RRRDU },
+ { "clrb", 0xf7, INSTR_RRS_RRRDU },
+ { "cgib", 0xfc, INSTR_RIS_RURDI },
+ { "clgib", 0xfd, INSTR_RIS_RURDU },
+ { "cib", 0xfe, INSTR_RIS_RURDI },
+ { "clib", 0xff, INSTR_RIS_RURDU },
#endif
{ "", 0, INSTR_INVALID }
};
-static struct insn opcode_ed[] = {
+static struct s390_insn opcode_ed[] = {
#ifdef CONFIG_64BIT
{ "mayl", 0x38, INSTR_RXF_FRRDF },
{ "myl", 0x39, INSTR_RXF_FRRDF },
@@ -1001,10 +1490,24 @@ static struct insn opcode_ed[] = {
{ "my", 0x3b, INSTR_RXF_FRRDF },
{ "mayh", 0x3c, INSTR_RXF_FRRDF },
{ "myh", 0x3d, INSTR_RXF_FRRDF },
+ { "sldt", 0x40, INSTR_RXF_FRRDF },
+ { "srdt", 0x41, INSTR_RXF_FRRDF },
+ { "slxt", 0x48, INSTR_RXF_FRRDF },
+ { "srxt", 0x49, INSTR_RXF_FRRDF },
+ { "tdcet", 0x50, INSTR_RXE_FRRD },
+ { "tdget", 0x51, INSTR_RXE_FRRD },
+ { "tdcdt", 0x54, INSTR_RXE_FRRD },
+ { "tdgdt", 0x55, INSTR_RXE_FRRD },
+ { "tdcxt", 0x58, INSTR_RXE_FRRD },
+ { "tdgxt", 0x59, INSTR_RXE_FRRD },
{ "ley", 0x64, INSTR_RXY_FRRD },
{ "ldy", 0x65, INSTR_RXY_FRRD },
{ "stey", 0x66, INSTR_RXY_FRRD },
{ "stdy", 0x67, INSTR_RXY_FRRD },
+ { "czdt", 0xa8, INSTR_RSL_LRDFU },
+ { "czxt", 0xa9, INSTR_RSL_LRDFU },
+ { "cdzt", 0xaa, INSTR_RSL_LRDFU },
+ { "cxzt", 0xab, INSTR_RSL_LRDFU },
#endif
{ "ldeb", 0x04, INSTR_RXE_FRRD },
{ "lxdb", 0x05, INSTR_RXE_FRRD },
@@ -1038,6 +1541,7 @@ static struct insn opcode_ed[] = {
{ "mae", 0x2e, INSTR_RXF_FRRDF },
{ "mse", 0x2f, INSTR_RXF_FRRDF },
{ "sqe", 0x34, INSTR_RXE_FRRD },
+ { "sqd", 0x35, INSTR_RXE_FRRD },
{ "mee", 0x37, INSTR_RXE_FRRD },
{ "mad", 0x3e, INSTR_RXF_FRRDF },
{ "msd", 0x3f, INSTR_RXF_FRRDF },
@@ -1046,7 +1550,7 @@ static struct insn opcode_ed[] = {
/* Extracts an operand value from an instruction. */
static unsigned int extract_operand(unsigned char *code,
- const struct operand *operand)
+ const struct s390_operand *operand)
{
unsigned int val;
int bits;
@@ -1082,16 +1586,11 @@ static unsigned int extract_operand(unsigned char *code,
return val;
}
-static inline int insn_length(unsigned char code)
-{
- return ((((int) code + 64) >> 7) + 1) << 1;
-}
-
-static struct insn *find_insn(unsigned char *code)
+struct s390_insn *find_insn(unsigned char *code)
{
unsigned char opfrag = code[1];
unsigned char opmask;
- struct insn *table;
+ struct s390_insn *table;
switch (code[0]) {
case 0x01:
@@ -1103,6 +1602,9 @@ static struct insn *find_insn(unsigned char *code)
case 0xa7:
table = opcode_a7;
break;
+ case 0xaa:
+ table = opcode_aa;
+ break;
case 0xb2:
table = opcode_b2;
break;
@@ -1118,9 +1620,18 @@ static struct insn *find_insn(unsigned char *code)
case 0xc2:
table = opcode_c2;
break;
+ case 0xc4:
+ table = opcode_c4;
+ break;
+ case 0xc6:
+ table = opcode_c6;
+ break;
case 0xc8:
table = opcode_c8;
break;
+ case 0xcc:
+ table = opcode_cc;
+ break;
case 0xe3:
table = opcode_e3;
opfrag = code[5];
@@ -1154,11 +1665,39 @@ static struct insn *find_insn(unsigned char *code)
return NULL;
}
+/**
+ * insn_to_mnemonic - decode an s390 instruction
+ * @instruction: instruction to decode
+ * @buf: buffer to fill with mnemonic
+ * @len: length of buffer
+ *
+ * Decode the instruction at @instruction and store the corresponding
+ * mnemonic into @buf of length @len.
+ * @buf is left unchanged if the instruction could not be decoded.
+ * Returns:
+ * %0 on success, %-ENOENT if the instruction was not found.
+ */
+int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len)
+{
+ struct s390_insn *insn;
+
+ insn = find_insn(instruction);
+ if (!insn)
+ return -ENOENT;
+ if (insn->name[0] == '\0')
+ snprintf(buf, len, "%s",
+ long_insn_name[(int) insn->name[1]]);
+ else
+ snprintf(buf, len, "%.5s", insn->name);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(insn_to_mnemonic);
+
static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
{
- struct insn *insn;
+ struct s390_insn *insn;
const unsigned char *ops;
- const struct operand *operand;
+ const struct s390_operand *operand;
unsigned int value;
char separator;
char *ptr;
@@ -1167,7 +1706,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
ptr = buffer;
insn = find_insn(code);
if (insn) {
- ptr += sprintf(ptr, "%.5s\t", insn->name);
+ if (insn->name[0] == '\0')
+ ptr += sprintf(ptr, "%s\t",
+ long_insn_name[(int) insn->name[1]]);
+ else
+ ptr += sprintf(ptr, "%.5s\t", insn->name);
/* Extract the operands. */
separator = 0;
for (ops = formats[insn->format] + 1, i = 0;
@@ -1213,7 +1756,7 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
void show_code(struct pt_regs *regs)
{
- char *mode = (regs->psw.mask & PSW_MASK_PSTATE) ? "User" : "Krnl";
+ char *mode = user_mode(regs) ? "User" : "Krnl";
unsigned char code[64];
char buffer[64], *ptr;
mm_segment_t old_fs;
@@ -1222,7 +1765,7 @@ void show_code(struct pt_regs *regs)
/* Get a snapshot of the 64 bytes surrounding the fault address. */
old_fs = get_fs();
- set_fs((regs->psw.mask & PSW_MASK_PSTATE) ? USER_DS : KERNEL_DS);
+ set_fs(user_mode(regs) ? USER_DS : KERNEL_DS);
for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
addr = regs->psw.addr - 34 + start;
if (__copy_from_user(code + start - 2,
@@ -1258,10 +1801,15 @@ void show_code(struct pt_regs *regs)
ptr += sprintf(ptr, "%s Code:", mode);
hops = 0;
while (start < end && hops < 8) {
- *ptr++ = (start == 32) ? '>' : ' ';
+ opsize = insn_length(code[start]);
+ if (start + opsize == 32)
+ *ptr++ = '#';
+ else if (start == 32)
+ *ptr++ = '>';
+ else
+ *ptr++ = ' ';
addr = regs->psw.addr + start - 32;
ptr += sprintf(ptr, ONELONG, addr);
- opsize = insn_length(code[start]);
if (start + opsize >= end)
break;
for (i = 0; i < opsize; i++)
@@ -1278,3 +1826,28 @@ void show_code(struct pt_regs *regs)
}
printk("\n");
}
+
+void print_fn_code(unsigned char *code, unsigned long len)
+{
+ char buffer[64], *ptr;
+ int opsize, i;
+
+ while (len) {
+ ptr = buffer;
+ opsize = insn_length(*code);
+ if (opsize > len)
+ break;
+ ptr += sprintf(ptr, "%p: ", code);
+ for (i = 0; i < opsize; i++)
+ ptr += sprintf(ptr, "%02x", code[i]);
+ *ptr++ = '\t';
+ if (i < 4)
+ *ptr++ = '\t';
+ ptr += print_insn(ptr, code, (unsigned long) code);
+ *ptr++ = '\n';
+ *ptr++ = 0;
+ printk(buffer);
+ code += opsize;
+ len -= opsize;
+ }
+}
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
new file mode 100644
index 00000000000..acb412442e5
--- /dev/null
+++ b/arch/s390/kernel/dumpstack.c
@@ -0,0 +1,217 @@
+/*
+ * Stack dumping functions
+ *
+ * Copyright IBM Corp. 1999, 2013
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/utsname.h>
+#include <linux/export.h>
+#include <linux/kdebug.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+#include <asm/ipl.h>
+
+#ifndef CONFIG_64BIT
+#define LONG "%08lx "
+#define FOURLONG "%08lx %08lx %08lx %08lx\n"
+static int kstack_depth_to_print = 12;
+#else /* CONFIG_64BIT */
+#define LONG "%016lx "
+#define FOURLONG "%016lx %016lx %016lx %016lx\n"
+static int kstack_depth_to_print = 20;
+#endif /* CONFIG_64BIT */
+
+/*
+ * For show_trace we have tree different stack to consider:
+ * - the panic stack which is used if the kernel stack has overflown
+ * - the asynchronous interrupt stack (cpu related)
+ * - the synchronous kernel stack (process related)
+ * The stack trace can start at any of the three stack and can potentially
+ * touch all of them. The order is: panic stack, async stack, sync stack.
+ */
+static unsigned long
+__show_trace(unsigned long sp, unsigned long low, unsigned long high)
+{
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+ unsigned long addr;
+
+ while (1) {
+ sp = sp & PSW_ADDR_INSN;
+ if (sp < low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ addr = sf->gprs[8] & PSW_ADDR_INSN;
+ printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
+ /* Follow the backchain. */
+ while (1) {
+ low = sp;
+ sp = sf->back_chain & PSW_ADDR_INSN;
+ if (!sp)
+ break;
+ if (sp <= low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ addr = sf->gprs[8] & PSW_ADDR_INSN;
+ printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+ }
+ /* Zero backchain detected, check for interrupt frame. */
+ sp = (unsigned long) (sf + 1);
+ if (sp <= low || sp > high - sizeof(*regs))
+ return sp;
+ regs = (struct pt_regs *) sp;
+ addr = regs->psw.addr & PSW_ADDR_INSN;
+ printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+ low = sp;
+ sp = regs->gprs[15];
+ }
+}
+
+static void show_trace(struct task_struct *task, unsigned long *stack)
+{
+ const unsigned long frame_size =
+ STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+ register unsigned long __r15 asm ("15");
+ unsigned long sp;
+
+ sp = (unsigned long) stack;
+ if (!sp)
+ sp = task ? task->thread.ksp : __r15;
+ printk("Call Trace:\n");
+#ifdef CONFIG_CHECK_STACK
+ sp = __show_trace(sp,
+ S390_lowcore.panic_stack + frame_size - 4096,
+ S390_lowcore.panic_stack + frame_size);
+#endif
+ sp = __show_trace(sp,
+ S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+ S390_lowcore.async_stack + frame_size);
+ if (task)
+ __show_trace(sp, (unsigned long) task_stack_page(task),
+ (unsigned long) task_stack_page(task) + THREAD_SIZE);
+ else
+ __show_trace(sp, S390_lowcore.thread_info,
+ S390_lowcore.thread_info + THREAD_SIZE);
+ if (!task)
+ task = current;
+ debug_show_held_locks(task);
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+ register unsigned long *__r15 asm ("15");
+ unsigned long *stack;
+ int i;
+
+ if (!sp)
+ stack = task ? (unsigned long *) task->thread.ksp : __r15;
+ else
+ stack = sp;
+
+ for (i = 0; i < kstack_depth_to_print; i++) {
+ if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
+ break;
+ if ((i * sizeof(long) % 32) == 0)
+ printk("%s ", i == 0 ? "" : "\n");
+ printk(LONG, *stack++);
+ }
+ printk("\n");
+ show_trace(task, sp);
+}
+
+static void show_last_breaking_event(struct pt_regs *regs)
+{
+#ifdef CONFIG_64BIT
+ printk("Last Breaking-Event-Address:\n");
+ printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
+#endif
+}
+
+static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
+{
+ return (regs->psw.mask & bits) / ((~bits + 1) & bits);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+ char *mode;
+
+ mode = user_mode(regs) ? "User" : "Krnl";
+ printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+ if (!user_mode(regs))
+ printk(" (%pSR)", (void *)regs->psw.addr);
+ printk("\n");
+ printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
+ "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
+ mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
+ mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
+ mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
+ mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
+ mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
+#ifdef CONFIG_64BIT
+ printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
+#endif
+ printk("\n%s GPRS: " FOURLONG, mode,
+ regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
+ printk(" " FOURLONG,
+ regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
+ printk(" " FOURLONG,
+ regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
+ printk(" " FOURLONG,
+ regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
+ show_code(regs);
+}
+
+void show_regs(struct pt_regs *regs)
+{
+ show_regs_print_info(KERN_DEFAULT);
+ show_registers(regs);
+ /* Show stack backtrace if pt_regs is from kernel mode */
+ if (!user_mode(regs))
+ show_trace(NULL, (unsigned long *) regs->gprs[15]);
+ show_last_breaking_event(regs);
+}
+
+static DEFINE_SPINLOCK(die_lock);
+
+void die(struct pt_regs *regs, const char *str)
+{
+ static int die_counter;
+
+ oops_enter();
+ lgr_info_log();
+ debug_stop_all();
+ console_verbose();
+ spin_lock_irq(&die_lock);
+ bust_spinlocks(1);
+ printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+#endif
+ printk("\n");
+ notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
+ print_modules();
+ show_regs(regs);
+ bust_spinlocks(0);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+ spin_unlock_irq(&die_lock);
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception: panic_on_oops");
+ oops_exit();
+ do_exit(SIGSEGV);
+}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index d0e09684b9c..0dff972a169 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -1,37 +1,91 @@
/*
- * arch/s390/kernel/early.c
- *
- * Copyright IBM Corp. 2007
+ * Copyright IBM Corp. 2007, 2009
* Author(s): Hongjie Yang <hongjie@us.ibm.com>,
* Heiko Carstens <heiko.carstens@de.ibm.com>
*/
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/ctype.h>
+#include <linux/ftrace.h>
#include <linux/lockdep.h>
#include <linux/module.h>
#include <linux/pfn.h>
#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <asm/ebcdic.h>
#include <asm/ipl.h>
#include <asm/lowcore.h>
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/sysinfo.h>
#include <asm/cpcmd.h>
#include <asm/sclp.h>
+#include <asm/facility.h>
#include "entry.h"
/*
* Create a Kernel NSS if the SAVESYS= parameter is defined
*/
-#define DEFSYS_CMD_SIZE 96
+#define DEFSYS_CMD_SIZE 128
#define SAVESYS_CMD_SIZE 32
char kernel_nss_name[NSS_NAME_SIZE + 1];
+static void __init setup_boot_command_line(void);
+
+/*
+ * Get the TOD clock running.
+ */
+static void __init reset_tod_clock(void)
+{
+ u64 time;
+
+ if (store_tod_clock(&time) == 0)
+ return;
+ /* TOD clock not running. Set the clock to Unix Epoch. */
+ if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
+ disabled_wait(0);
+
+ sched_clock_base_cc = TOD_UNIX_EPOCH;
+ S390_lowcore.last_update_clock = sched_clock_base_cc;
+}
+
#ifdef CONFIG_SHARED_KERNEL
+int __init savesys_ipl_nss(char *cmd, const int cmdlen);
+
+asm(
+ " .section .init.text,\"ax\",@progbits\n"
+ " .align 4\n"
+ " .type savesys_ipl_nss, @function\n"
+ "savesys_ipl_nss:\n"
+#ifdef CONFIG_64BIT
+ " stmg 6,15,48(15)\n"
+ " lgr 14,3\n"
+ " sam31\n"
+ " diag 2,14,0x8\n"
+ " sam64\n"
+ " lgr 2,14\n"
+ " lmg 6,15,48(15)\n"
+#else
+ " stm 6,15,24(15)\n"
+ " lr 14,3\n"
+ " diag 2,14,0x8\n"
+ " lr 2,14\n"
+ " lm 6,15,24(15)\n"
+#endif
+ " br 14\n"
+ " .size savesys_ipl_nss, .-savesys_ipl_nss\n"
+ " .previous\n");
+
+static __initdata char upper_command_line[COMMAND_LINE_SIZE];
+
static noinline __init void create_kernel_nss(void)
{
unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size;
@@ -39,8 +93,9 @@ static noinline __init void create_kernel_nss(void)
unsigned int sinitrd_pfn, einitrd_pfn;
#endif
int response;
+ int hlen;
+ size_t len;
char *savesys_ptr;
- char upper_command_line[COMMAND_LINE_SIZE];
char defsys_cmd[DEFSYS_CMD_SIZE];
char savesys_cmd[SAVESYS_CMD_SIZE];
@@ -49,8 +104,8 @@ static noinline __init void create_kernel_nss(void)
return;
/* Convert COMMAND_LINE to upper case */
- for (i = 0; i < strlen(COMMAND_LINE); i++)
- upper_command_line[i] = toupper(COMMAND_LINE[i]);
+ for (i = 0; i < strlen(boot_command_line); i++)
+ upper_command_line[i] = toupper(boot_command_line[i]);
savesys_ptr = strstr(upper_command_line, "SAVESYS=");
@@ -69,38 +124,66 @@ static noinline __init void create_kernel_nss(void)
end_pfn = PFN_UP(__pa(&_end));
min_size = end_pfn << 2;
- sprintf(defsys_cmd, "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X",
- kernel_nss_name, stext_pfn - 1, stext_pfn, eshared_pfn - 1,
- eshared_pfn, end_pfn);
+ hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE,
+ "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X",
+ kernel_nss_name, stext_pfn - 1, stext_pfn,
+ eshared_pfn - 1, eshared_pfn, end_pfn);
#ifdef CONFIG_BLK_DEV_INITRD
if (INITRD_START && INITRD_SIZE) {
sinitrd_pfn = PFN_DOWN(__pa(INITRD_START));
einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE));
min_size = einitrd_pfn << 2;
- sprintf(defsys_cmd, "%s EW %.5X-%.5X", defsys_cmd,
- sinitrd_pfn, einitrd_pfn);
+ hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
+ " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn);
}
#endif
- sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK", defsys_cmd, min_size);
- sprintf(savesys_cmd, "SAVESYS %s \n IPL %s",
- kernel_nss_name, kernel_nss_name);
+ snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
+ " EW MINSIZE=%.7iK PARMREGS=0-13", min_size);
+ defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0';
+ snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s",
+ kernel_nss_name, kernel_nss_name);
+ savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0';
__cpcmd(defsys_cmd, NULL, 0, &response);
if (response != 0) {
+ pr_err("Defining the Linux kernel NSS failed with rc=%d\n",
+ response);
kernel_nss_name[0] = '\0';
return;
}
- __cpcmd(savesys_cmd, NULL, 0, &response);
+ len = strlen(savesys_cmd);
+ ASCEBC(savesys_cmd, len);
+ response = savesys_ipl_nss(savesys_cmd, len);
- if (response != strlen(savesys_cmd)) {
+ /* On success: response is equal to the command size,
+ * max SAVESYS_CMD_SIZE
+ * On error: response contains the numeric portion of cp error message.
+ * for SAVESYS it will be >= 263
+ * for missing privilege class, it will be 1
+ */
+ if (response > SAVESYS_CMD_SIZE || response == 1) {
+ pr_err("Saving the Linux kernel NSS failed with rc=%d\n",
+ response);
kernel_nss_name[0] = '\0';
return;
}
+ /* re-initialize cputime accounting. */
+ sched_clock_base_cc = get_tod_clock();
+ S390_lowcore.last_update_clock = sched_clock_base_cc;
+ S390_lowcore.last_update_timer = 0x7fffffffffffffffULL;
+ S390_lowcore.user_timer = 0;
+ S390_lowcore.system_timer = 0;
+ asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer));
+
+ /* re-setup boot command line with new ipl vm parms */
+ ipl_update_parameters();
+ setup_boot_command_line();
+
ipl_flags = IPL_NSS_VALID;
}
@@ -123,144 +206,78 @@ static noinline __init void clear_bss_section(void)
*/
static noinline __init void init_kernel_storage_key(void)
{
+#if PAGE_DEFAULT_KEY
unsigned long end_pfn, init_pfn;
end_pfn = PFN_UP(__pa(&_end));
for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
- page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY);
-}
-
-static noinline __init void detect_machine_type(void)
-{
- struct cpuinfo_S390 *cpuinfo = &S390_lowcore.cpu_data;
-
- get_cpu_id(&S390_lowcore.cpu_data.cpu_id);
-
- /* Running under z/VM ? */
- if (cpuinfo->cpu_id.version == 0xff)
- machine_flags |= MACHINE_FLAG_VM;
-
- /* Running on a P/390 ? */
- if (cpuinfo->cpu_id.machine == 0x7490)
- machine_flags |= MACHINE_FLAG_P390;
-
- /* Running under KVM ? */
- if (cpuinfo->cpu_id.version == 0xfe)
- machine_flags |= MACHINE_FLAG_KVM;
+ page_set_storage_key(init_pfn << PAGE_SHIFT,
+ PAGE_DEFAULT_KEY, 0);
+#endif
}
-#ifdef CONFIG_64BIT
-static noinline __init int memory_fast_detect(void)
-{
- unsigned long val0 = 0;
- unsigned long val1 = 0xc;
- int ret = -ENOSYS;
-
- if (ipl_flags & IPL_NSS_VALID)
- return -ENOSYS;
-
- asm volatile(
- " diag %1,%2,0x260\n"
- "0: lhi %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "+d" (ret), "+d" (val0), "+d" (val1) : : "cc");
-
- if (ret || val0 != val1)
- return -ENOSYS;
+static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
- memory_chunk[0].size = val0 + 1;
- return 0;
-}
-#else
-static inline int memory_fast_detect(void)
+static noinline __init void detect_machine_type(void)
{
- return -ENOSYS;
-}
-#endif
+ struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
-static inline __init unsigned long __tprot(unsigned long addr)
-{
- int cc = -1;
+ /* Check current-configuration-level */
+ if (stsi(NULL, 0, 0, 0) <= 2) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
+ return;
+ }
+ /* Get virtual-machine cpu information. */
+ if (stsi(vmms, 3, 2, 2) || !vmms->count)
+ return;
- asm volatile(
- " tprot 0(%1),0\n"
- "0: ipm %0\n"
- " srl %0,28\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "+d" (cc) : "a" (addr) : "cc");
- return (unsigned long)cc;
+ /* Running under KVM? If not we assume z/VM */
+ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
+ else
+ S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
}
-/* Checking memory in 128KB increments. */
-#define CHUNK_INCR (1UL << 17)
-#define ADDR2G (1UL << 31)
-
-static noinline __init void find_memory_chunks(unsigned long memsize)
+static __init void setup_topology(void)
{
- unsigned long addr = 0, old_addr = 0;
- unsigned long old_cc = CHUNK_READ_WRITE;
- unsigned long cc;
- int chunk = 0;
-
- while (chunk < MEMORY_CHUNKS) {
- cc = __tprot(addr);
- while (cc == old_cc) {
- addr += CHUNK_INCR;
- if (memsize && addr >= memsize)
- break;
-#ifndef CONFIG_64BIT
- if (addr == ADDR2G)
- break;
-#endif
- cc = __tprot(addr);
- }
-
- if (old_addr != addr &&
- (old_cc == CHUNK_READ_WRITE || old_cc == CHUNK_READ_ONLY)) {
- memory_chunk[chunk].addr = old_addr;
- memory_chunk[chunk].size = addr - old_addr;
- memory_chunk[chunk].type = old_cc;
- chunk++;
- }
-
- old_addr = addr;
- old_cc = cc;
+#ifdef CONFIG_64BIT
+ int max_mnest;
-#ifndef CONFIG_64BIT
- if (addr == ADDR2G)
- break;
-#endif
- /*
- * Finish memory detection at the first hole
- * if storage size is unknown.
- */
- if (cc == -1UL && !memsize)
- break;
- if (memsize && addr >= memsize)
+ if (!test_facility(11))
+ return;
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
+ for (max_mnest = 6; max_mnest > 1; max_mnest--) {
+ if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
break;
}
+ topology_max_mnest = max_mnest;
+#endif
}
-static __init void early_pgm_check_handler(void)
+static void early_pgm_check_handler(void)
{
- unsigned long addr;
const struct exception_table_entry *fixup;
+ unsigned long cr0, cr0_new;
+ unsigned long addr;
addr = S390_lowcore.program_old_psw.addr;
fixup = search_exception_tables(addr & PSW_ADDR_INSN);
if (!fixup)
disabled_wait(0);
- S390_lowcore.program_old_psw.addr = fixup->fixup | PSW_ADDR_AMODE;
+ /* Disable low address protection before storing into lowcore. */
+ __ctl_store(cr0, 0, 0);
+ cr0_new = cr0 & ~(1UL << 28);
+ __ctl_load(cr0_new, 0, 0);
+ S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+ __ctl_load(cr0, 0, 0);
}
static noinline __init void setup_lowcore_early(void)
{
psw_t psw;
- psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
+ psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
S390_lowcore.external_new_psw = psw;
psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
@@ -268,17 +285,10 @@ static noinline __init void setup_lowcore_early(void)
s390_base_pgm_handler_fn = early_pgm_check_handler;
}
-static noinline __init void setup_hpage(void)
+static noinline __init void setup_facility_list(void)
{
-#ifndef CONFIG_DEBUG_PAGEALLOC
- unsigned int facilities;
-
- facilities = stfl();
- if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29)))
- return;
- machine_flags |= MACHINE_FLAG_HPAGE;
- __ctl_set_bit(0, 23);
-#endif
+ stfle(S390_lowcore.stfle_fac_list,
+ ARRAY_SIZE(S390_lowcore.stfle_fac_list));
}
static __init void detect_mvpg(void)
@@ -294,7 +304,7 @@ static __init void detect_mvpg(void)
EX_TABLE(0b,1b)
: "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0");
if (!rc)
- machine_flags |= MACHINE_FLAG_MVPG;
+ S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG;
#endif
}
@@ -310,7 +320,7 @@ static __init void detect_ieee(void)
EX_TABLE(0b,1b)
: "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc");
if (!rc)
- machine_flags |= MACHINE_FLAG_IEEE;
+ S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE;
#endif
}
@@ -329,7 +339,7 @@ static __init void detect_csp(void)
EX_TABLE(0b,1b)
: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2");
if (!rc)
- machine_flags |= MACHINE_FLAG_CSP;
+ S390_lowcore.machine_flags |= MACHINE_FLAG_CSP;
#endif
}
@@ -346,7 +356,7 @@ static __init void detect_diag9c(void)
EX_TABLE(0b,1b)
: "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
if (!rc)
- machine_flags |= MACHINE_FLAG_DIAG9C;
+ S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
}
static __init void detect_diag44(void)
@@ -361,61 +371,127 @@ static __init void detect_diag44(void)
EX_TABLE(0b,1b)
: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
if (!rc)
- machine_flags |= MACHINE_FLAG_DIAG44;
+ S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
#endif
}
static __init void detect_machine_facilities(void)
{
#ifdef CONFIG_64BIT
- unsigned int facilities;
-
- facilities = stfl();
- if (facilities & (1 << 28))
- machine_flags |= MACHINE_FLAG_IDTE;
- if (facilities & (1 << 23))
- machine_flags |= MACHINE_FLAG_PFMF;
- if (facilities & (1 << 4))
- machine_flags |= MACHINE_FLAG_MVCOS;
+ if (test_facility(8)) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
+ __ctl_set_bit(0, 23);
+ }
+ if (test_facility(78))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
+ if (test_facility(3))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
+ if (test_facility(40))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_LPP;
+ if (test_facility(50) && test_facility(73))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+ if (test_facility(66))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM;
+ if (test_facility(51))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
#endif
}
+static __init void rescue_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
+ /*
+ * Just like in case of IPL from VM reader we make sure there is a
+ * gap of 4MB between end of kernel and start of initrd.
+ * That way we can also be sure that saving an NSS will succeed,
+ * which however only requires different segments.
+ */
+ if (!INITRD_START || !INITRD_SIZE)
+ return;
+ if (INITRD_START >= min_initrd_addr)
+ return;
+ memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+ INITRD_START = min_initrd_addr;
+#endif
+}
+
+/* Set up boot command line */
+static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t))
+{
+ char *parm, *delim;
+ size_t rc, len;
+
+ len = strlen(boot_command_line);
+
+ delim = boot_command_line + len; /* '\0' character position */
+ parm = boot_command_line + len + 1; /* append right after '\0' */
+
+ rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1);
+ if (rc) {
+ if (*parm == '=')
+ memmove(boot_command_line, parm + 1, rc);
+ else
+ *delim = ' '; /* replace '\0' with space */
+ }
+}
+
+static inline int has_ebcdic_char(const char *str)
+{
+ int i;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] & 0x80)
+ return 1;
+ return 0;
+}
+
+static void __init setup_boot_command_line(void)
+{
+ COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0;
+ /* convert arch command line to ascii if necessary */
+ if (has_ebcdic_char(COMMAND_LINE))
+ EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
+ /* copy arch command line */
+ strlcpy(boot_command_line, strstrip(COMMAND_LINE),
+ ARCH_COMMAND_LINE_SIZE);
+
+ /* append IPL PARM data to the boot command line */
+ if (MACHINE_IS_VM)
+ append_to_cmdline(append_ipl_vmparm);
+
+ append_to_cmdline(append_ipl_scpdata);
+}
+
/*
* Save ipl parameters, clear bss memory, initialize storage keys
* and create a kernel NSS at startup if the SAVESYS= parm is defined
*/
void __init startup_init(void)
{
- unsigned long long memsize;
-
+ reset_tod_clock();
ipl_save_parameters();
+ rescue_initrd();
clear_bss_section();
init_kernel_storage_key();
lockdep_init();
lockdep_off();
+ setup_lowcore_early();
+ setup_facility_list();
detect_machine_type();
+ ipl_update_parameters();
+ setup_boot_command_line();
create_kernel_nss();
- sort_main_extable();
- setup_lowcore_early();
detect_mvpg();
detect_ieee();
detect_csp();
detect_diag9c();
detect_diag44();
detect_machine_facilities();
- setup_hpage();
- sclp_read_info_early();
- sclp_facilities_detect();
- memsize = sclp_memory_detect();
-#ifndef CONFIG_64BIT
- /*
- * Can't deal with more than 2G in 31 bit addressing mode, so
- * limit the value in order to avoid strange side effects.
- */
- if (memsize > ADDR2G)
- memsize = ADDR2G;
+ setup_topology();
+ sclp_early_detect();
+#ifdef CONFIG_DYNAMIC_FTRACE
+ S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
#endif
- if (memory_fast_detect() < 0)
- find_memory_chunks((unsigned long) memsize);
lockdep_on();
}
diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
index cc0dc609d73..b971c6be629 100644
--- a/arch/s390/kernel/ebcdic.c
+++ b/arch/s390/kernel/ebcdic.c
@@ -1,10 +1,9 @@
/*
- * arch/s390/kernel/ebcdic.c
* ECBDIC -> ASCII, ASCII -> ECBDIC,
* upper to lower case (EBCDIC) conversion tables.
*
* S390 version
- * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 1999
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Martin Peschke <peschke@fh-brandenburg.de>
*/
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 708cf9cf9a3..70203265196 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -1,205 +1,144 @@
/*
- * arch/s390/kernel/entry.S
* S390 low-level entry points.
*
- * Copyright (C) IBM Corp. 1999,2006
+ * Copyright IBM Corp. 1999, 2012
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Hartmut Penner (hp@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
* Heiko Carstens <heiko.carstens@de.ibm.com>
*/
-#include <linux/sys.h>
-#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/processor.h>
#include <asm/cache.h>
-#include <asm/lowcore.h>
#include <asm/errno.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/page.h>
-
-/*
- * Stack layout for the system_call stack entry.
- * The first few entries are identical to the user_regs_struct.
- */
-SP_PTREGS = STACK_FRAME_OVERHEAD
-SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS
-SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW
-SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS
-SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 4
-SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8
-SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 12
-SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16
-SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 20
-SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24
-SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 28
-SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32
-SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 36
-SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40
-SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 44
-SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48
-SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52
-SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56
-SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60
-SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2
-SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC
-SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP
-SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE
-
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING)
+#include <asm/sigp.h>
+#include <asm/irq.h>
+
+__PT_R0 = __PT_GPRS
+__PT_R1 = __PT_GPRS + 4
+__PT_R2 = __PT_GPRS + 8
+__PT_R3 = __PT_GPRS + 12
+__PT_R4 = __PT_GPRS + 16
+__PT_R5 = __PT_GPRS + 20
+__PT_R6 = __PT_GPRS + 24
+__PT_R7 = __PT_GPRS + 28
+__PT_R8 = __PT_GPRS + 32
+__PT_R9 = __PT_GPRS + 36
+__PT_R10 = __PT_GPRS + 40
+__PT_R11 = __PT_GPRS + 44
+__PT_R12 = __PT_GPRS + 48
+__PT_R13 = __PT_GPRS + 524
+__PT_R14 = __PT_GPRS + 56
+__PT_R15 = __PT_GPRS + 60
STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
STACK_SIZE = 1 << STACK_SHIFT
+STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
+
+_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
+_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+ _TIF_SYSCALL_TRACEPOINT)
+_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE)
+_PIF_WORK = (_PIF_PER_TRAP)
#define BASED(name) name-system_call(%r13)
-#ifdef CONFIG_TRACE_IRQFLAGS
.macro TRACE_IRQS_ON
- l %r1,BASED(.Ltrace_irq_on)
- basr %r14,%r1
+#ifdef CONFIG_TRACE_IRQFLAGS
+ basr %r2,%r0
+ l %r1,BASED(.Lhardirqs_on)
+ basr %r14,%r1 # call trace_hardirqs_on_caller
+#endif
.endm
.macro TRACE_IRQS_OFF
- l %r1,BASED(.Ltrace_irq_off)
- basr %r14,%r1
- .endm
-
- .macro TRACE_IRQS_CHECK
- tm SP_PSW(%r15),0x03 # irqs enabled?
- jz 0f
- l %r1,BASED(.Ltrace_irq_on)
- basr %r14,%r1
- j 1f
-0: l %r1,BASED(.Ltrace_irq_off)
- basr %r14,%r1
-1:
- .endm
-#else
-#define TRACE_IRQS_ON
-#define TRACE_IRQS_OFF
-#define TRACE_IRQS_CHECK
+#ifdef CONFIG_TRACE_IRQFLAGS
+ basr %r2,%r0
+ l %r1,BASED(.Lhardirqs_off)
+ basr %r14,%r1 # call trace_hardirqs_off_caller
#endif
+ .endm
-#ifdef CONFIG_LOCKDEP
.macro LOCKDEP_SYS_EXIT
- tm SP_PSW+1(%r15),0x01 # returning to user ?
- jz 0f
+#ifdef CONFIG_LOCKDEP
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jz .+10
l %r1,BASED(.Llockdep_sys_exit)
- basr %r14,%r1
-0:
- .endm
-#else
-#define LOCKDEP_SYS_EXIT
-#endif
-
-/*
- * Register usage in interrupt handlers:
- * R9 - pointer to current task structure
- * R13 - pointer to literal pool
- * R14 - return register for function calls
- * R15 - kernel stack pointer
- */
-
- .macro STORE_TIMER lc_offset
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- stpt \lc_offset
+ basr %r14,%r1 # call lockdep_sys_exit
#endif
.endm
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- .macro UPDATE_VTIME lc_from,lc_to,lc_sum
- lm %r10,%r11,\lc_from
- sl %r10,\lc_to
- sl %r11,\lc_to+4
- bc 3,BASED(0f)
- sl %r10,BASED(.Lc_1)
-0: al %r10,\lc_sum
- al %r11,\lc_sum+4
- bc 12,BASED(1f)
- al %r10,BASED(.Lc_1)
-1: stm %r10,%r11,\lc_sum
- .endm
+ .macro CHECK_STACK stacksize,savearea
+#ifdef CONFIG_CHECK_STACK
+ tml %r15,\stacksize - CONFIG_STACK_GUARD
+ la %r14,\savearea
+ jz stack_overflow
#endif
-
- .macro SAVE_ALL_BASE savearea
- stm %r12,%r15,\savearea
- l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13
.endm
- .macro SAVE_ALL_SVC psworg,savearea
- la %r12,\psworg
- l %r15,__LC_KERNEL_STACK # problem state -> load ksp
+ .macro SWITCH_ASYNC savearea,stack,shift
+ tmh %r8,0x0001 # interrupting from user ?
+ jnz 1f
+ lr %r14,%r9
+ sl %r14,BASED(.Lcritical_start)
+ cl %r14,BASED(.Lcritical_length)
+ jhe 0f
+ la %r11,\savearea # inside critical section, do cleanup
+ bras %r14,cleanup_critical
+ tmh %r8,0x0001 # retest problem state after cleanup
+ jnz 1f
+0: l %r14,\stack # are we already on the target stack?
+ slr %r14,%r15
+ sra %r14,\shift
+ jnz 1f
+ CHECK_STACK 1<<\shift,\savearea
+ ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j 2f
+1: l %r15,\stack # load target stack
+2: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
- .macro SAVE_ALL_SYNC psworg,savearea
- la %r12,\psworg
- tm \psworg+1,0x01 # test problem state bit
- bz BASED(2f) # skip stack setup save
- l %r15,__LC_KERNEL_STACK # problem state -> load ksp
-#ifdef CONFIG_CHECK_STACK
- b BASED(3f)
-2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
- bz BASED(stack_overflow)
-3:
-#endif
-2:
+ .macro ADD64 high,low,timer
+ al \high,\timer
+ al \low,4+\timer
+ brc 12,.+8
+ ahi \high,1
.endm
- .macro SAVE_ALL_ASYNC psworg,savearea
- la %r12,\psworg
- tm \psworg+1,0x01 # test problem state bit
- bnz BASED(1f) # from user -> load async stack
- clc \psworg+4(4),BASED(.Lcritical_end)
- bhe BASED(0f)
- clc \psworg+4(4),BASED(.Lcritical_start)
- bl BASED(0f)
- l %r14,BASED(.Lcleanup_critical)
- basr %r14,%r14
- tm 1(%r12),0x01 # retest problem state after cleanup
- bnz BASED(1f)
-0: l %r14,__LC_ASYNC_STACK # are we already on the async stack ?
- slr %r14,%r15
- sra %r14,STACK_SHIFT
- be BASED(2f)
-1: l %r15,__LC_ASYNC_STACK
-#ifdef CONFIG_CHECK_STACK
- b BASED(3f)
-2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
- bz BASED(stack_overflow)
-3:
-#endif
-2:
+ .macro SUB64 high,low,timer
+ sl \high,\timer
+ sl \low,4+\timer
+ brc 3,.+8
+ ahi \high,-1
.endm
- .macro CREATE_STACK_FRAME psworg,savearea
- s %r15,BASED(.Lc_spsize) # make room for registers & psw
- mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack
- la %r12,\psworg
- st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2
- icm %r12,12,__LC_SVC_ILC
- stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack
- st %r12,SP_ILC(%r15)
- mvc SP_R12(16,%r15),\savearea # move %r12-%r15 to stack
- la %r12,0
- st %r12,__SF_BACKCHAIN(%r15) # clear back chain
+ .macro UPDATE_VTIME high,low,enter_timer
+ lm \high,\low,__LC_EXIT_TIMER
+ SUB64 \high,\low,\enter_timer
+ ADD64 \high,\low,__LC_USER_TIMER
+ stm \high,\low,__LC_USER_TIMER
+ lm \high,\low,__LC_LAST_UPDATE_TIMER
+ SUB64 \high,\low,__LC_EXIT_TIMER
+ ADD64 \high,\low,__LC_SYSTEM_TIMER
+ stm \high,\low,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
.endm
- .macro RESTORE_ALL psworg,sync
- mvc \psworg(8),SP_PSW(%r15) # move user PSW to lowcore
- .if !\sync
- ni \psworg+1,0xfd # clear wait state bit
- .endif
- lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user
- STORE_TIMER __LC_EXIT_TIMER
- lpsw \psworg # back to caller
+ .macro REENABLE_IRQS
+ st %r8,__LC_RETURN_PSW
+ ni __LC_RETURN_PSW,0xbf
+ ssm __LC_RETURN_PSW
.endm
+ .section .kprobes.text, "ax"
+
/*
* Scheduler resume function, called by switch_to
* gpr2 = (task_struct *) prev
@@ -207,34 +146,20 @@ STACK_SIZE = 1 << STACK_SHIFT
* Returns:
* gpr2 = prev
*/
- .globl __switch_to
-__switch_to:
- basr %r1,0
-__switch_to_base:
- tm __THREAD_per(%r3),0xe8 # new process is using per ?
- bz __switch_to_noper-__switch_to_base(%r1) # if not we're fine
- stctl %c9,%c11,__SF_EMPTY(%r15) # We are using per stuff
- clc __THREAD_per(12,%r3),__SF_EMPTY(%r15)
- be __switch_to_noper-__switch_to_base(%r1) # we got away w/o bashing TLB's
- lctl %c9,%c11,__THREAD_per(%r3) # Nope we didn't
-__switch_to_noper:
+ENTRY(__switch_to)
+ stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
+ st %r15,__THREAD_ksp(%r2) # store kernel stack of prev
l %r4,__THREAD_info(%r2) # get thread_info of prev
- tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending?
- bz __switch_to_no_mcck-__switch_to_base(%r1)
- ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev
- l %r4,__THREAD_info(%r3) # get thread_info of next
- oi __TI_flags+3(%r4),_TIF_MCCK_PENDING # set it in next
-__switch_to_no_mcck:
- stm %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task
- st %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp
- l %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp
- lm %r6,%r15,__SF_GPRS(%r15)# load __switch_to registers of next task
- st %r3,__LC_CURRENT # __LC_CURRENT = current task struct
- lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
- l %r3,__THREAD_info(%r3) # load thread_info from task struct
- st %r3,__LC_THREAD_INFO
- ahi %r3,STACK_SIZE
- st %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack
+ l %r5,__THREAD_info(%r3) # get thread_info of next
+ lr %r15,%r5
+ ahi %r15,STACK_INIT # end of kernel stack of next
+ st %r3,__LC_CURRENT # store task struct of next
+ st %r5,__LC_THREAD_INFO # store thread info of next
+ st %r15,__LC_KERNEL_STACK # store end of kernel stack
+ lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
+ mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
+ l %r15,__THREAD_ksp(%r3) # load kernel stack of next
+ lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
br %r14
__critical_start:
@@ -243,493 +168,473 @@ __critical_start:
* are executed with interrupts enabled.
*/
- .globl system_call
-system_call:
- STORE_TIMER __LC_SYNC_ENTER_TIMER
-sysc_saveall:
- SAVE_ALL_BASE __LC_SAVE_AREA
- SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
- CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
- lh %r7,0x8a # get svc number from lowcore
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ENTRY(system_call)
+ stpt __LC_SYNC_ENTER_TIMER
+sysc_stm:
+ stm %r8,%r15,__LC_SAVE_AREA_SYNC
+ l %r12,__LC_THREAD_INFO
+ l %r13,__LC_SVC_NEW_PSW+4
+ lhi %r14,_PIF_SYSCALL
+sysc_per:
+ l %r15,__LC_KERNEL_STACK
+ la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
sysc_vtime:
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
-sysc_stime:
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
-sysc_update:
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-#endif
+ UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
+ stm %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
+ mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW
+ mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
+ st %r14,__PT_FLAGS(%r11)
sysc_do_svc:
- l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- sla %r7,2 # *4 and test for svc 0
- bnz BASED(sysc_nr_ok) # svc number > 0
+ l %r10,__TI_sysc_table(%r12) # 31 bit system call table
+ lh %r8,__PT_INT_CODE+2(%r11)
+ sla %r8,2 # shift and test for svc0
+ jnz sysc_nr_ok
# svc 0: system call number in %r1
cl %r1,BASED(.Lnr_syscalls)
- bnl BASED(sysc_nr_ok)
- lr %r7,%r1 # copy svc number to %r7
- sla %r7,2 # *4
+ jnl sysc_nr_ok
+ sth %r1,__PT_INT_CODE+2(%r11)
+ lr %r8,%r1
+ sla %r8,2
sysc_nr_ok:
- mvc SP_ARGS(4,%r15),SP_R7(%r15)
-sysc_do_restart:
- l %r8,BASED(.Lsysc_table)
- tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
- l %r8,0(%r7,%r8) # get system call addr.
- bnz BASED(sysc_tracesys)
- basr %r14,%r8 # call sys_xxxx
- st %r2,SP_R2(%r15) # store return value (change R2 on stack)
+ xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+ st %r2,__PT_ORIG_GPR2(%r11)
+ st %r7,STACK_FRAME_OVERHEAD(%r15)
+ l %r9,0(%r8,%r10) # get system call addr.
+ tm __TI_flags+3(%r12),_TIF_TRACE
+ jnz sysc_tracesys
+ basr %r14,%r9 # call sys_xxxx
+ st %r2,__PT_R2(%r11) # store return value
sysc_return:
- tm __TI_flags+3(%r9),_TIF_WORK_SVC
- bnz BASED(sysc_work) # there is work to do (signals etc.)
-sysc_restore:
-#ifdef CONFIG_TRACE_IRQFLAGS
- la %r1,BASED(sysc_restore_trace_psw)
- lpsw 0(%r1)
-sysc_restore_trace:
- TRACE_IRQS_CHECK
LOCKDEP_SYS_EXIT
-#endif
-sysc_leave:
- RESTORE_ALL __LC_RETURN_PSW,1
+sysc_tif:
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno sysc_restore
+ tm __PT_FLAGS+3(%r11),_PIF_WORK
+ jnz sysc_work
+ tm __TI_flags+3(%r12),_TIF_WORK
+ jnz sysc_work # check for thread work
+ tm __LC_CPU_FLAGS+3,_CIF_WORK
+ jnz sysc_work
+sysc_restore:
+ mvc __LC_RETURN_PSW(8),__PT_PSW(%r11)
+ stpt __LC_EXIT_TIMER
+ lm %r0,%r15,__PT_R0(%r11)
+ lpsw __LC_RETURN_PSW
sysc_done:
-#ifdef CONFIG_TRACE_IRQFLAGS
- .align 8
- .globl sysc_restore_trace_psw
-sysc_restore_trace_psw:
- .long 0, sysc_restore_trace + 0x80000000
-#endif
-
-#
-# recheck if there is more work to do
-#
-sysc_work_loop:
- tm __TI_flags+3(%r9),_TIF_WORK_SVC
- bz BASED(sysc_restore) # there is no work to do
#
# One of the work bits is on. Find out which one.
#
sysc_work:
- tm SP_PSW+1(%r15),0x01 # returning to user ?
- bno BASED(sysc_restore)
- tm __TI_flags+3(%r9),_TIF_MCCK_PENDING
- bo BASED(sysc_mcck_pending)
- tm __TI_flags+3(%r9),_TIF_NEED_RESCHED
- bo BASED(sysc_reschedule)
- tm __TI_flags+3(%r9),_TIF_SIGPENDING
- bnz BASED(sysc_sigpending)
- tm __TI_flags+3(%r9),_TIF_RESTART_SVC
- bo BASED(sysc_restart)
- tm __TI_flags+3(%r9),_TIF_SINGLE_STEP
- bo BASED(sysc_singlestep)
- b BASED(sysc_restore)
-sysc_work_done:
+ tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING
+ jo sysc_mcck_pending
+ tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
+ jo sysc_reschedule
+ tm __PT_FLAGS+3(%r11),_PIF_PER_TRAP
+ jo sysc_singlestep
+ tm __TI_flags+3(%r12),_TIF_SIGPENDING
+ jo sysc_sigpending
+ tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME
+ jo sysc_notify_resume
+ tm __LC_CPU_FLAGS+3,_CIF_ASCE
+ jo sysc_uaccess
+ j sysc_return # beware of critical section cleanup
#
# _TIF_NEED_RESCHED is set, call schedule
#
sysc_reschedule:
l %r1,BASED(.Lschedule)
- la %r14,BASED(sysc_work_loop)
- br %r1 # call scheduler
+ la %r14,BASED(sysc_return)
+ br %r1 # call schedule
#
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
#
sysc_mcck_pending:
- l %r1,BASED(.Ls390_handle_mcck)
- la %r14,BASED(sysc_work_loop)
+ l %r1,BASED(.Lhandle_mcck)
+ la %r14,BASED(sysc_return)
br %r1 # TIF bit will be cleared by handler
#
+# _CIF_ASCE is set, load user space asce
+#
+sysc_uaccess:
+ ni __LC_CPU_FLAGS+3,255-_CIF_ASCE
+ lctl %c1,%c1,__LC_USER_ASCE # load primary asce
+ j sysc_return
+
+#
# _TIF_SIGPENDING is set, call do_signal
#
sysc_sigpending:
- ni __TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
- la %r2,SP_PTREGS(%r15) # load pt_regs
+ lr %r2,%r11 # pass pointer to pt_regs
l %r1,BASED(.Ldo_signal)
basr %r14,%r1 # call do_signal
- tm __TI_flags+3(%r9),_TIF_RESTART_SVC
- bo BASED(sysc_restart)
- tm __TI_flags+3(%r9),_TIF_SINGLE_STEP
- bo BASED(sysc_singlestep)
- b BASED(sysc_work_loop)
-
-#
-# _TIF_RESTART_SVC is set, set up registers and restart svc
-#
-sysc_restart:
- ni __TI_flags+3(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC
- l %r7,SP_R2(%r15) # load new svc number
- sla %r7,2
- mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument
- lm %r2,%r6,SP_R2(%r15) # load svc arguments
- b BASED(sysc_do_restart) # restart svc
+ tm __PT_FLAGS+3(%r11),_PIF_SYSCALL
+ jno sysc_return
+ lm %r2,%r7,__PT_R2(%r11) # load svc arguments
+ l %r10,__TI_sysc_table(%r12) # 31 bit system call table
+ xr %r8,%r8 # svc 0 returns -ENOSYS
+ clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2)
+ jnl sysc_nr_ok # invalid svc number -> do svc 0
+ lh %r8,__PT_INT_CODE+2(%r11) # load new svc number
+ sla %r8,2
+ j sysc_nr_ok # restart svc
+
+#
+# _TIF_NOTIFY_RESUME is set, call do_notify_resume
+#
+sysc_notify_resume:
+ lr %r2,%r11 # pass pointer to pt_regs
+ l %r1,BASED(.Ldo_notify_resume)
+ la %r14,BASED(sysc_return)
+ br %r1 # call do_notify_resume
#
-# _TIF_SINGLE_STEP is set, call do_single_step
+# _PIF_PER_TRAP is set, call do_per_trap
#
sysc_singlestep:
- ni __TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
- mvi SP_TRAP+1(%r15),0x28 # set trap indication to pgm check
- la %r2,SP_PTREGS(%r15) # address of register-save area
- l %r1,BASED(.Lhandle_per) # load adr. of per handler
- la %r14,BASED(sysc_return) # load adr. of system return
- br %r1 # branch to do_single_step
+ ni __PT_FLAGS+3(%r11),255-_PIF_PER_TRAP
+ lr %r2,%r11 # pass pointer to pt_regs
+ l %r1,BASED(.Ldo_per_trap)
+ la %r14,BASED(sysc_return)
+ br %r1 # call do_per_trap
#
-# call trace before and after sys_call
+# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
+# and after the system call
#
sysc_tracesys:
- l %r1,BASED(.Ltrace)
- la %r2,SP_PTREGS(%r15) # load pt_regs
+ l %r1,BASED(.Ltrace_enter)
+ lr %r2,%r11 # pass pointer to pt_regs
la %r3,0
- srl %r7,2
- st %r7,SP_R2(%r15)
- basr %r14,%r1
- clc SP_R2(4,%r15),BASED(.Lnr_syscalls)
- bnl BASED(sysc_tracenogo)
- l %r8,BASED(.Lsysc_table)
- l %r7,SP_R2(%r15) # strace might have changed the
- sll %r7,2 # system call
- l %r8,0(%r7,%r8)
+ xr %r0,%r0
+ icm %r0,3,__PT_INT_CODE+2(%r11)
+ st %r0,__PT_R2(%r11)
+ basr %r14,%r1 # call do_syscall_trace_enter
+ cl %r2,BASED(.Lnr_syscalls)
+ jnl sysc_tracenogo
+ lr %r8,%r2
+ sll %r8,2
+ l %r9,0(%r8,%r10)
sysc_tracego:
- lm %r3,%r6,SP_R3(%r15)
- l %r2,SP_ORIG_R2(%r15)
- basr %r14,%r8 # call sys_xxx
- st %r2,SP_R2(%r15) # store return value
+ lm %r3,%r7,__PT_R3(%r11)
+ st %r7,STACK_FRAME_OVERHEAD(%r15)
+ l %r2,__PT_ORIG_GPR2(%r11)
+ basr %r14,%r9 # call sys_xxx
+ st %r2,__PT_R2(%r11) # store return value
sysc_tracenogo:
- tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
- bz BASED(sysc_return)
- l %r1,BASED(.Ltrace)
- la %r2,SP_PTREGS(%r15) # load pt_regs
- la %r3,1
+ tm __TI_flags+3(%r12),_TIF_TRACE
+ jz sysc_return
+ l %r1,BASED(.Ltrace_exit)
+ lr %r2,%r11 # pass pointer to pt_regs
la %r14,BASED(sysc_return)
- br %r1
+ br %r1 # call do_syscall_trace_exit
#
# a new process exits the kernel with ret_from_fork
#
- .globl ret_from_fork
-ret_from_fork:
+ENTRY(ret_from_fork)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ l %r12,__LC_THREAD_INFO
l %r13,__LC_SVC_NEW_PSW+4
- l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- tm SP_PSW+1(%r15),0x01 # forking a kernel thread ?
- bo BASED(0f)
- st %r15,SP_R15(%r15) # store stack pointer for new kthread
-0: l %r1,BASED(.Lschedtail)
- basr %r14,%r1
+ l %r1,BASED(.Lschedule_tail)
+ basr %r14,%r1 # call schedule_tail
TRACE_IRQS_ON
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
- b BASED(sysc_return)
-
-#
-# kernel_execve function needs to deal with pt_regs that is not
-# at the usual place
-#
- .globl kernel_execve
-kernel_execve:
- stm %r12,%r15,48(%r15)
- lr %r14,%r15
- l %r13,__LC_SVC_NEW_PSW+4
- s %r15,BASED(.Lc_spsize)
- st %r14,__SF_BACKCHAIN(%r15)
- la %r12,SP_PTREGS(%r15)
- xc 0(__PT_SIZE,%r12),0(%r12)
- l %r1,BASED(.Ldo_execve)
- lr %r5,%r12
- basr %r14,%r1
- ltr %r2,%r2
- be BASED(0f)
- a %r15,BASED(.Lc_spsize)
- lm %r12,%r15,48(%r15)
- br %r14
- # execve succeeded.
-0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts
- l %r15,__LC_KERNEL_STACK # load ksp
- s %r15,BASED(.Lc_spsize) # make room for registers & psw
- l %r9,__LC_THREAD_INFO
- mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
- l %r1,BASED(.Lexecve_tail)
- basr %r14,%r1
- b BASED(sysc_return)
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
+ jne sysc_tracenogo
+ # it's a kernel thread
+ lm %r9,%r10,__PT_R9(%r11) # load gprs
+ENTRY(kernel_thread_starter)
+ la %r2,0(%r10)
+ basr %r14,%r9
+ j sysc_tracenogo
/*
* Program check handler routine
*/
- .globl pgm_check_handler
-pgm_check_handler:
-/*
- * First we need to check for a special case:
- * Single stepping an instruction that disables the PER event mask will
- * cause a PER event AFTER the mask has been set. Example: SVC or LPSW.
- * For a single stepped SVC the program check handler gets control after
- * the SVC new PSW has been loaded. But we want to execute the SVC first and
- * then handle the PER event. Therefore we update the SVC old PSW to point
- * to the pgm_check_handler and branch to the SVC handler after we checked
- * if we have to load the kernel stack register.
- * For every other possible cause for PER event without the PER mask set
- * we just ignore the PER event (FIXME: is there anything we have to do
- * for LPSW?).
- */
- STORE_TIMER __LC_SYNC_ENTER_TIMER
- SAVE_ALL_BASE __LC_SAVE_AREA
- tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception
- bnz BASED(pgm_per) # got per exception -> special case
- SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA
- CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- tm SP_PSW+1(%r15),0x01 # interrupting from user ?
- bz BASED(pgm_no_vtime)
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-pgm_no_vtime:
-#endif
- l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- TRACE_IRQS_OFF
- l %r3,__LC_PGM_ILC # load program interruption code
- la %r8,0x7f
- nr %r8,%r3
-pgm_do_call:
- l %r7,BASED(.Ljump_table)
- sll %r8,2
- l %r7,0(%r8,%r7) # load address of handler routine
- la %r2,SP_PTREGS(%r15) # address of register-save area
- la %r14,BASED(sysc_return)
- br %r7 # branch to interrupt-handler
-
-#
-# handle per exception
-#
-pgm_per:
- tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on
- bnz BASED(pgm_per_std) # ok, normal per event from user space
-# ok its one of the special cases, now we need to find out which one
- clc __LC_PGM_OLD_PSW(8),__LC_SVC_NEW_PSW
- be BASED(pgm_svcper)
-# no interesting special case, ignore PER event
- lm %r12,%r15,__LC_SAVE_AREA
- lpsw 0x28
+ENTRY(pgm_check_handler)
+ stpt __LC_SYNC_ENTER_TIMER
+ stm %r8,%r15,__LC_SAVE_AREA_SYNC
+ l %r12,__LC_THREAD_INFO
+ l %r13,__LC_SVC_NEW_PSW+4
+ lm %r8,%r9,__LC_PGM_OLD_PSW
+ tmh %r8,0x0001 # test problem state bit
+ jnz 1f # -> fault in user space
+ tmh %r8,0x4000 # PER bit set in old PSW ?
+ jnz 0f # -> enabled, can't be a double fault
+ tm __LC_PGM_ILC+3,0x80 # check for per exception
+ jnz pgm_svcper # -> single stepped svc
+0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+ ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j 2f
+1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
+ l %r15,__LC_KERNEL_STACK
+2: la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stm %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
+ stm %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
+ mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE
+ xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
+ tm __LC_PGM_ILC+3,0x80 # check for per exception
+ jz 0f
+ l %r1,__TI_task(%r12)
+ tmh %r8,0x0001 # kernel per event ?
+ jz pgm_kprobe
+ oi __PT_FLAGS+3(%r11),_PIF_PER_TRAP
+ mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS
+ mvc __THREAD_per_cause(2,%r1),__LC_PER_CODE
+ mvc __THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID
+0: REENABLE_IRQS
+ xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+ l %r1,BASED(.Ljump_table)
+ la %r10,0x7f
+ n %r10,__PT_INT_CODE(%r11)
+ je sysc_return
+ sll %r10,2
+ l %r1,0(%r10,%r1) # load address of handler routine
+ lr %r2,%r11 # pass pointer to pt_regs
+ basr %r14,%r1 # branch to interrupt-handler
+ j sysc_return
#
-# Normal per exception
+# PER event in supervisor state, must be kprobes
#
-pgm_per_std:
- SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA
- CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- tm SP_PSW+1(%r15),0x01 # interrupting from user ?
- bz BASED(pgm_no_vtime2)
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-pgm_no_vtime2:
-#endif
- l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- TRACE_IRQS_OFF
- l %r1,__TI_task(%r9)
- mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
- mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS
- mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
- oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
- tm SP_PSW+1(%r15),0x01 # kernel per event ?
- bz BASED(kernel_per)
- l %r3,__LC_PGM_ILC # load program interruption code
- la %r8,0x7f
- nr %r8,%r3 # clear per-event-bit and ilc
- be BASED(sysc_return) # only per or per+check ?
- b BASED(pgm_do_call)
+pgm_kprobe:
+ REENABLE_IRQS
+ xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+ l %r1,BASED(.Ldo_per_trap)
+ lr %r2,%r11 # pass pointer to pt_regs
+ basr %r14,%r1 # call do_per_trap
+ j sysc_return
#
-# it was a single stepped SVC that is causing all the trouble
+# single stepped system call
#
pgm_svcper:
- SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
- CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-#endif
- lh %r7,0x8a # get svc number from lowcore
- l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- TRACE_IRQS_OFF
- l %r1,__TI_task(%r9)
- mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
- mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS
- mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
- oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
- TRACE_IRQS_ON
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
- b BASED(sysc_do_svc)
-
-#
-# per was called from kernel, must be kprobes
-#
-kernel_per:
- mvi SP_TRAP+1(%r15),0x28 # set trap indication to pgm check
- la %r2,SP_PTREGS(%r15) # address of register-save area
- l %r1,BASED(.Lhandle_per) # load adr. of per handler
- la %r14,BASED(sysc_restore)# load adr. of system return
- br %r1 # branch to do_single_step
+ mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW
+ mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per)
+ lhi %r14,_PIF_SYSCALL | _PIF_PER_TRAP
+ lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs
/*
* IO interrupt handler routine
*/
- .globl io_int_handler
-io_int_handler:
- STORE_TIMER __LC_ASYNC_ENTER_TIMER
+ENTRY(io_int_handler)
stck __LC_INT_CLOCK
- SAVE_ALL_BASE __LC_SAVE_AREA+16
- SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
- CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- tm SP_PSW+1(%r15),0x01 # interrupting from user ?
- bz BASED(io_no_vtime)
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
-io_no_vtime:
-#endif
- l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
+ stpt __LC_ASYNC_ENTER_TIMER
+ stm %r8,%r15,__LC_SAVE_AREA_ASYNC
+ l %r12,__LC_THREAD_INFO
+ l %r13,__LC_SVC_NEW_PSW+4
+ lm %r8,%r9,__LC_IO_OLD_PSW
+ tmh %r8,0x0001 # interrupting from user ?
+ jz io_skip
+ UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER
+io_skip:
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
+ stm %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
+ stm %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
TRACE_IRQS_OFF
- l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ
- la %r2,SP_PTREGS(%r15) # address of register-save area
- basr %r14,%r1 # branch to standard irq handler
+ xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+io_loop:
+ l %r1,BASED(.Ldo_IRQ)
+ lr %r2,%r11 # pass pointer to pt_regs
+ lhi %r3,IO_INTERRUPT
+ tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
+ jz io_call
+ lhi %r3,THIN_INTERRUPT
+io_call:
+ basr %r14,%r1 # call do_IRQ
+ tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR
+ jz io_return
+ tpi 0
+ jz io_return
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ j io_loop
io_return:
- tm __TI_flags+3(%r9),_TIF_WORK_INT
- bnz BASED(io_work) # there is work to do (signals etc.)
-io_restore:
-#ifdef CONFIG_TRACE_IRQFLAGS
- la %r1,BASED(io_restore_trace_psw)
- lpsw 0(%r1)
-io_restore_trace:
- TRACE_IRQS_CHECK
LOCKDEP_SYS_EXIT
-#endif
-io_leave:
- RESTORE_ALL __LC_RETURN_PSW,0
+ TRACE_IRQS_ON
+io_tif:
+ tm __TI_flags+3(%r12),_TIF_WORK
+ jnz io_work # there is work to do (signals etc.)
+ tm __LC_CPU_FLAGS+3,_CIF_WORK
+ jnz io_work
+io_restore:
+ mvc __LC_RETURN_PSW(8),__PT_PSW(%r11)
+ stpt __LC_EXIT_TIMER
+ lm %r0,%r15,__PT_R0(%r11)
+ lpsw __LC_RETURN_PSW
io_done:
-#ifdef CONFIG_TRACE_IRQFLAGS
- .align 8
- .globl io_restore_trace_psw
-io_restore_trace_psw:
- .long 0, io_restore_trace + 0x80000000
-#endif
-
#
-# switch to kernel stack, then check the TIF bits
+# There is work todo, find out in which context we have been interrupted:
+# 1) if we return to user space we can do all _TIF_WORK work
+# 2) if we return to kernel code and preemptive scheduling is enabled check
+# the preemption counter and if it is zero call preempt_schedule_irq
+# Before any work can be done, a switch to the kernel stack is required.
#
io_work:
- tm SP_PSW+1(%r15),0x01 # returning to user ?
-#ifndef CONFIG_PREEMPT
- bno BASED(io_restore) # no-> skip resched & signal
-#else
- bnz BASED(io_work_user) # no -> check for preemptive scheduling
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jo io_work_user # yes -> do resched & signal
+#ifdef CONFIG_PREEMPT
# check for preemptive scheduling
- icm %r0,15,__TI_precount(%r9)
- bnz BASED(io_restore) # preemption disabled
- l %r1,SP_R15(%r15)
- s %r1,BASED(.Lc_spsize)
- mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
- xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain
+ icm %r0,15,__TI_precount(%r12)
+ jnz io_restore # preemption disabled
+ tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
+ jno io_restore
+ # switch to kernel stack
+ l %r1,__PT_R15(%r11)
+ ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
lr %r15,%r1
-io_resume_loop:
- tm __TI_flags+3(%r9),_TIF_NEED_RESCHED
- bno BASED(io_restore)
- l %r1,BASED(.Lpreempt_schedule_irq)
- la %r14,BASED(io_resume_loop)
- br %r1 # call schedule
+ # TRACE_IRQS_ON already done at io_return, call
+ # TRACE_IRQS_OFF to keep things symmetrical
+ TRACE_IRQS_OFF
+ l %r1,BASED(.Lpreempt_irq)
+ basr %r14,%r1 # call preempt_schedule_irq
+ j io_return
+#else
+ j io_restore
#endif
+#
+# Need to do work before returning to userspace, switch to kernel stack
+#
io_work_user:
l %r1,__LC_KERNEL_STACK
- s %r1,BASED(.Lc_spsize)
- mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
- xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
lr %r15,%r1
+
#
# One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED
-# and _TIF_MCCK_PENDING
#
-io_work_loop:
- tm __TI_flags+3(%r9),_TIF_MCCK_PENDING
- bo BASED(io_mcck_pending)
- tm __TI_flags+3(%r9),_TIF_NEED_RESCHED
- bo BASED(io_reschedule)
- tm __TI_flags+3(%r9),_TIF_SIGPENDING
- bnz BASED(io_sigpending)
- b BASED(io_restore)
-io_work_done:
+io_work_tif:
+ tm __LC_CPU_FLAGS+3(%r12),_CIF_MCCK_PENDING
+ jo io_mcck_pending
+ tm __TI_flags+3(%r12),_TIF_NEED_RESCHED
+ jo io_reschedule
+ tm __TI_flags+3(%r12),_TIF_SIGPENDING
+ jo io_sigpending
+ tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME
+ jo io_notify_resume
+ tm __LC_CPU_FLAGS+3,_CIF_ASCE
+ jo io_uaccess
+ j io_return # beware of critical section cleanup
#
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
#
io_mcck_pending:
- l %r1,BASED(.Ls390_handle_mcck)
+ # TRACE_IRQS_ON already done at io_return
+ l %r1,BASED(.Lhandle_mcck)
basr %r14,%r1 # TIF bit will be cleared by handler
- b BASED(io_work_loop)
+ TRACE_IRQS_OFF
+ j io_return
+
+#
+# _CIF_ASCE is set, load user space asce
+#
+io_uaccess:
+ ni __LC_CPU_FLAGS+3,255-_CIF_ASCE
+ lctl %c1,%c1,__LC_USER_ASCE # load primary asce
+ j io_return
#
# _TIF_NEED_RESCHED is set, call schedule
#
io_reschedule:
- TRACE_IRQS_ON
+ # TRACE_IRQS_ON already done at io_return
l %r1,BASED(.Lschedule)
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
basr %r14,%r1 # call scheduler
- stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
TRACE_IRQS_OFF
- tm __TI_flags+3(%r9),_TIF_WORK_INT
- bz BASED(io_restore) # there is no work to do
- b BASED(io_work_loop)
+ j io_return
#
# _TIF_SIGPENDING is set, call do_signal
#
io_sigpending:
- TRACE_IRQS_ON
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
- la %r2,SP_PTREGS(%r15) # load pt_regs
+ # TRACE_IRQS_ON already done at io_return
l %r1,BASED(.Ldo_signal)
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lr %r2,%r11 # pass pointer to pt_regs
basr %r14,%r1 # call do_signal
- stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j io_return
+
+#
+# _TIF_SIGPENDING is set, call do_signal
+#
+io_notify_resume:
+ # TRACE_IRQS_ON already done at io_return
+ l %r1,BASED(.Ldo_notify_resume)
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lr %r2,%r11 # pass pointer to pt_regs
+ basr %r14,%r1 # call do_notify_resume
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
TRACE_IRQS_OFF
- b BASED(io_work_loop)
+ j io_return
/*
* External interrupt handler routine
*/
- .globl ext_int_handler
-ext_int_handler:
- STORE_TIMER __LC_ASYNC_ENTER_TIMER
+ENTRY(ext_int_handler)
stck __LC_INT_CLOCK
- SAVE_ALL_BASE __LC_SAVE_AREA+16
- SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
- CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- tm SP_PSW+1(%r15),0x01 # interrupting from user ?
- bz BASED(ext_no_vtime)
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
-ext_no_vtime:
-#endif
- l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
+ stpt __LC_ASYNC_ENTER_TIMER
+ stm %r8,%r15,__LC_SAVE_AREA_ASYNC
+ l %r12,__LC_THREAD_INFO
+ l %r13,__LC_SVC_NEW_PSW+4
+ lm %r8,%r9,__LC_EXT_OLD_PSW
+ tmh %r8,0x0001 # interrupting from user ?
+ jz ext_skip
+ UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER
+ext_skip:
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
+ stm %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
+ stm %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+ mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
+ xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
TRACE_IRQS_OFF
- la %r2,SP_PTREGS(%r15) # address of register-save area
- lh %r3,__LC_EXT_INT_CODE # get interruption code
- l %r1,BASED(.Ldo_extint)
- basr %r14,%r1
- b BASED(io_return)
+ l %r1,BASED(.Ldo_IRQ)
+ lr %r2,%r11 # pass pointer to pt_regs
+ lhi %r3,EXT_INTERRUPT
+ basr %r14,%r1 # call do_IRQ
+ j io_return
+
+/*
+ * Load idle PSW. The second "half" of this function is in cleanup_idle.
+ */
+ENTRY(psw_idle)
+ st %r3,__SF_EMPTY(%r15)
+ basr %r1,0
+ la %r1,psw_idle_lpsw+4-.(%r1)
+ st %r1,__SF_EMPTY+4(%r15)
+ oi __SF_EMPTY+4(%r15),0x80
+ stck __CLOCK_IDLE_ENTER(%r2)
+ stpt __TIMER_IDLE_ENTER(%r2)
+psw_idle_lpsw:
+ lpsw __SF_EMPTY(%r15)
+ br %r14
+psw_idle_end:
__critical_end:
@@ -737,127 +642,108 @@ __critical_end:
* Machine check handler routines
*/
- .globl mcck_int_handler
-mcck_int_handler:
+ENTRY(mcck_int_handler)
+ stck __LC_MCCK_CLOCK
spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer
lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs
- SAVE_ALL_BASE __LC_SAVE_AREA+32
- la %r12,__LC_MCK_OLD_PSW
+ l %r12,__LC_THREAD_INFO
+ l %r13,__LC_SVC_NEW_PSW+4
+ lm %r8,%r9,__LC_MCK_OLD_PSW
tm __LC_MCCK_CODE,0x80 # system damage?
- bo BASED(mcck_int_main) # yes -> rest of mcck code invalid
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mvc __LC_SAVE_AREA+52(8),__LC_ASYNC_ENTER_TIMER
- mvc __LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA
+ jo mcck_panic # yes -> rest of mcck code invalid
+ la %r14,__LC_CPU_TIMER_SAVE_AREA
+ mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid?
- bo BASED(1f)
+ jo 3f
la %r14,__LC_SYNC_ENTER_TIMER
clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER
- bl BASED(0f)
+ jl 0f
la %r14,__LC_ASYNC_ENTER_TIMER
0: clc 0(8,%r14),__LC_EXIT_TIMER
- bl BASED(0f)
+ jl 1f
la %r14,__LC_EXIT_TIMER
-0: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
- bl BASED(0f)
+1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
+ jl 2f
la %r14,__LC_LAST_UPDATE_TIMER
-0: spt 0(%r14)
- mvc __LC_ASYNC_ENTER_TIMER(8),0(%r14)
-1:
-#endif
- tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid?
- bno BASED(mcck_int_main) # no -> skip cleanup critical
- tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit
- bnz BASED(mcck_int_main) # from user -> load async stack
- clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_end)
- bhe BASED(mcck_int_main)
- clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_start)
- bl BASED(mcck_int_main)
- l %r14,BASED(.Lcleanup_critical)
- basr %r14,%r14
-mcck_int_main:
- l %r14,__LC_PANIC_STACK # are we already on the panic stack?
- slr %r14,%r15
- sra %r14,PAGE_SHIFT
- be BASED(0f)
- l %r15,__LC_PANIC_STACK # load panic stack
-0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+32
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid?
- bno BASED(mcck_no_vtime) # no -> skip cleanup critical
- tm SP_PSW+1(%r15),0x01 # interrupting from user ?
- bz BASED(mcck_no_vtime)
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
-mcck_no_vtime:
-#endif
- l %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- la %r2,SP_PTREGS(%r15) # load pt_regs
- l %r1,BASED(.Ls390_mcck)
- basr %r14,%r1 # call machine check handler
- tm SP_PSW+1(%r15),0x01 # returning to user ?
- bno BASED(mcck_return)
+2: spt 0(%r14)
+ mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
+3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid?
+ jno mcck_panic # no -> skip cleanup critical
+ tm %r8,0x0001 # interrupting from user ?
+ jz mcck_skip
+ UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER
+mcck_skip:
+ SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT
+ stm %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32
+ stm %r8,%r9,__PT_PSW(%r11)
+ xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
+ xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+ l %r1,BASED(.Ldo_machine_check)
+ lr %r2,%r11 # pass pointer to pt_regs
+ basr %r14,%r1 # call s390_do_machine_check
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno mcck_return
l %r1,__LC_KERNEL_STACK # switch to kernel stack
- s %r1,BASED(.Lc_spsize)
- mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
- xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
lr %r15,%r1
- stosm __SF_EMPTY(%r15),0x04 # turn dat on
- tm __TI_flags+3(%r9),_TIF_MCCK_PENDING
- bno BASED(mcck_return)
+ ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
+ tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING
+ jno mcck_return
TRACE_IRQS_OFF
- l %r1,BASED(.Ls390_handle_mcck)
- basr %r14,%r1 # call machine check handler
+ l %r1,BASED(.Lhandle_mcck)
+ basr %r14,%r1 # call s390_handle_mcck
TRACE_IRQS_ON
mcck_return:
- mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW
- ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mvc __LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+52
+ mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
- bno BASED(0f)
- lm %r0,%r15,SP_R0(%r15) # load gprs 0-15
+ jno 0f
+ lm %r0,%r15,__PT_R0(%r11)
stpt __LC_EXIT_TIMER
- lpsw __LC_RETURN_MCCK_PSW # back to caller
-0:
-#endif
- lm %r0,%r15,SP_R0(%r15) # load gprs 0-15
- lpsw __LC_RETURN_MCCK_PSW # back to caller
+ lpsw __LC_RETURN_MCCK_PSW
+0: lm %r0,%r15,__PT_R0(%r11)
+ lpsw __LC_RETURN_MCCK_PSW
- RESTORE_ALL __LC_RETURN_MCCK_PSW,0
-
-/*
- * Restart interruption handler, kick starter for additional CPUs
- */
-#ifdef CONFIG_SMP
- __CPUINIT
- .globl restart_int_handler
-restart_int_handler:
- l %r15,__LC_SAVE_AREA+60 # load ksp
- lctl %c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs
- lam %a0,%a15,__LC_AREGS_SAVE_AREA
- lm %r6,%r15,__SF_GPRS(%r15) # load registers from clone
- stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on
- basr %r14,0
- l %r14,restart_addr-.(%r14)
- br %r14 # branch to start_secondary
-restart_addr:
- .long start_secondary
- .previous
-#else
-/*
- * If we do not run with SMP enabled, let the new CPU crash ...
- */
- .globl restart_int_handler
-restart_int_handler:
- basr %r1,0
-restart_base:
- lpsw restart_crash-restart_base(%r1)
- .align 8
-restart_crash:
- .long 0x000a0000,0x00000000
-restart_go:
-#endif
+mcck_panic:
+ l %r14,__LC_PANIC_STACK
+ slr %r14,%r15
+ sra %r14,PAGE_SHIFT
+ jz 0f
+ l %r15,__LC_PANIC_STACK
+ j mcck_skip
+0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j mcck_skip
+
+#
+# PSW restart interrupt handler
+#
+ENTRY(restart_int_handler)
+ st %r15,__LC_SAVE_AREA_RESTART
+ l %r15,__LC_RESTART_STACK
+ ahi %r15,-__PT_SIZE # create pt_regs on stack
+ xc 0(__PT_SIZE,%r15),0(%r15)
+ stm %r0,%r14,__PT_R0(%r15)
+ mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART
+ mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw
+ ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
+ xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
+ l %r1,__LC_RESTART_FN # load fn, parm & source cpu
+ l %r2,__LC_RESTART_DATA
+ l %r3,__LC_RESTART_SOURCE
+ ltr %r3,%r3 # test source cpu address
+ jm 1f # negative -> skip source stop
+0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
+ brc 10,0b # wait for status stored
+1: basr %r14,%r1 # call function
+ stap __SF_EMPTY(%r15) # store cpu address
+ lh %r3,__SF_EMPTY(%r15)
+2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
+ brc 2,2b
+3: j 3b
+
+ .section .kprobes.text, "ax"
#ifdef CONFIG_CHECK_STACK
/*
@@ -867,237 +753,214 @@ restart_go:
*/
stack_overflow:
l %r15,__LC_PANIC_STACK # change to panic stack
- sl %r15,BASED(.Lc_spsize)
- mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack
- stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack
- la %r1,__LC_SAVE_AREA
- ch %r12,BASED(.L0x020) # old psw addr == __LC_SVC_OLD_PSW ?
- be BASED(0f)
- ch %r12,BASED(.L0x028) # old psw addr == __LC_PGM_OLD_PSW ?
- be BASED(0f)
- la %r1,__LC_SAVE_AREA+16
-0: mvc SP_R12(16,%r15),0(%r1) # move %r12-%r15 to stack
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear back chain
- l %r1,BASED(1f) # branch to kernel_stack_overflow
- la %r2,SP_PTREGS(%r15) # load pt_regs
- br %r1
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stm %r0,%r7,__PT_R0(%r11)
+ stm %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_R8(32,%r11),0(%r14)
+ l %r1,BASED(1f)
+ xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+ lr %r2,%r11 # pass pointer to pt_regs
+ br %r1 # branch to kernel_stack_overflow
1: .long kernel_stack_overflow
#endif
-cleanup_table_system_call:
- .long system_call + 0x80000000, sysc_do_svc + 0x80000000
-cleanup_table_sysc_return:
- .long sysc_return + 0x80000000, sysc_leave + 0x80000000
-cleanup_table_sysc_leave:
- .long sysc_leave + 0x80000000, sysc_done + 0x80000000
-cleanup_table_sysc_work_loop:
- .long sysc_work_loop + 0x80000000, sysc_work_done + 0x80000000
-cleanup_table_io_return:
- .long io_return + 0x80000000, io_leave + 0x80000000
-cleanup_table_io_leave:
- .long io_leave + 0x80000000, io_done + 0x80000000
-cleanup_table_io_work_loop:
- .long io_work_loop + 0x80000000, io_work_done + 0x80000000
+cleanup_table:
+ .long system_call + 0x80000000
+ .long sysc_do_svc + 0x80000000
+ .long sysc_tif + 0x80000000
+ .long sysc_restore + 0x80000000
+ .long sysc_done + 0x80000000
+ .long io_tif + 0x80000000
+ .long io_restore + 0x80000000
+ .long io_done + 0x80000000
+ .long psw_idle + 0x80000000
+ .long psw_idle_end + 0x80000000
cleanup_critical:
- clc 4(4,%r12),BASED(cleanup_table_system_call)
- bl BASED(0f)
- clc 4(4,%r12),BASED(cleanup_table_system_call+4)
- bl BASED(cleanup_system_call)
-0:
- clc 4(4,%r12),BASED(cleanup_table_sysc_return)
- bl BASED(0f)
- clc 4(4,%r12),BASED(cleanup_table_sysc_return+4)
- bl BASED(cleanup_sysc_return)
-0:
- clc 4(4,%r12),BASED(cleanup_table_sysc_leave)
- bl BASED(0f)
- clc 4(4,%r12),BASED(cleanup_table_sysc_leave+4)
- bl BASED(cleanup_sysc_leave)
-0:
- clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop)
- bl BASED(0f)
- clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop+4)
- bl BASED(cleanup_sysc_return)
-0:
- clc 4(4,%r12),BASED(cleanup_table_io_return)
- bl BASED(0f)
- clc 4(4,%r12),BASED(cleanup_table_io_return+4)
- bl BASED(cleanup_io_return)
-0:
- clc 4(4,%r12),BASED(cleanup_table_io_leave)
- bl BASED(0f)
- clc 4(4,%r12),BASED(cleanup_table_io_leave+4)
- bl BASED(cleanup_io_leave)
-0:
- clc 4(4,%r12),BASED(cleanup_table_io_work_loop)
- bl BASED(0f)
- clc 4(4,%r12),BASED(cleanup_table_io_work_loop+4)
- bl BASED(cleanup_io_return)
-0:
- br %r14
+ cl %r9,BASED(cleanup_table) # system_call
+ jl 0f
+ cl %r9,BASED(cleanup_table+4) # sysc_do_svc
+ jl cleanup_system_call
+ cl %r9,BASED(cleanup_table+8) # sysc_tif
+ jl 0f
+ cl %r9,BASED(cleanup_table+12) # sysc_restore
+ jl cleanup_sysc_tif
+ cl %r9,BASED(cleanup_table+16) # sysc_done
+ jl cleanup_sysc_restore
+ cl %r9,BASED(cleanup_table+20) # io_tif
+ jl 0f
+ cl %r9,BASED(cleanup_table+24) # io_restore
+ jl cleanup_io_tif
+ cl %r9,BASED(cleanup_table+28) # io_done
+ jl cleanup_io_restore
+ cl %r9,BASED(cleanup_table+32) # psw_idle
+ jl 0f
+ cl %r9,BASED(cleanup_table+36) # psw_idle_end
+ jl cleanup_idle
+0: br %r14
cleanup_system_call:
- mvc __LC_RETURN_PSW(8),0(%r12)
- c %r12,BASED(.Lmck_old_psw)
- be BASED(0f)
- la %r12,__LC_SAVE_AREA+16
- b BASED(1f)
-0: la %r12,__LC_SAVE_AREA+32
-1:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+4)
- bh BASED(0f)
+ # check if stpt has been executed
+ cl %r9,BASED(cleanup_system_call_insn)
+ jh 0f
mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
-0: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+8)
- bhe BASED(cleanup_vtime)
-#endif
- clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn)
- bh BASED(0f)
- mvc __LC_SAVE_AREA(16),0(%r12)
-0: st %r13,4(%r12)
- st %r12,__LC_SAVE_AREA+48 # argh
- SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
- CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
- l %r12,__LC_SAVE_AREA+48 # argh
- st %r15,12(%r12)
- lh %r7,0x8a
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-cleanup_vtime:
- clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12)
- bhe BASED(cleanup_stime)
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
-cleanup_stime:
- clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+16)
- bh BASED(cleanup_update)
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
-cleanup_update:
+ chi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
+0: # check if stm has been executed
+ cl %r9,BASED(cleanup_system_call_insn+4)
+ jh 0f
+ mvc __LC_SAVE_AREA_SYNC(32),0(%r11)
+0: # set up saved registers r12, and r13
+ st %r12,16(%r11) # r12 thread-info pointer
+ st %r13,20(%r11) # r13 literal-pool pointer
+ # check if the user time calculation has been done
+ cl %r9,BASED(cleanup_system_call_insn+8)
+ jh 0f
+ l %r10,__LC_EXIT_TIMER
+ l %r15,__LC_EXIT_TIMER+4
+ SUB64 %r10,%r15,__LC_SYNC_ENTER_TIMER
+ ADD64 %r10,%r15,__LC_USER_TIMER
+ st %r10,__LC_USER_TIMER
+ st %r15,__LC_USER_TIMER+4
+0: # check if the system time calculation has been done
+ cl %r9,BASED(cleanup_system_call_insn+12)
+ jh 0f
+ l %r10,__LC_LAST_UPDATE_TIMER
+ l %r15,__LC_LAST_UPDATE_TIMER+4
+ SUB64 %r10,%r15,__LC_EXIT_TIMER
+ ADD64 %r10,%r15,__LC_SYSTEM_TIMER
+ st %r10,__LC_SYSTEM_TIMER
+ st %r15,__LC_SYSTEM_TIMER+4
+0: # update accounting time stamp
mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-#endif
- mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_system_call+4)
- la %r12,__LC_RETURN_PSW
+ # set up saved register 11
+ l %r15,__LC_KERNEL_STACK
+ la %r9,STACK_FRAME_OVERHEAD(%r15)
+ st %r9,12(%r11) # r11 pt_regs pointer
+ # fill pt_regs
+ mvc __PT_R8(32,%r9),__LC_SAVE_AREA_SYNC
+ stm %r0,%r7,__PT_R0(%r9)
+ mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW
+ mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
+ xc __PT_FLAGS(4,%r9),__PT_FLAGS(%r9)
+ mvi __PT_FLAGS+3(%r9),_PIF_SYSCALL
+ # setup saved register 15
+ st %r15,28(%r11) # r15 stack pointer
+ # set new psw address and exit
+ l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000
br %r14
cleanup_system_call_insn:
- .long sysc_saveall + 0x80000000
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
.long system_call + 0x80000000
- .long sysc_vtime + 0x80000000
- .long sysc_stime + 0x80000000
- .long sysc_update + 0x80000000
-#endif
+ .long sysc_stm + 0x80000000
+ .long sysc_vtime + 0x80000000 + 36
+ .long sysc_vtime + 0x80000000 + 76
-cleanup_sysc_return:
- mvc __LC_RETURN_PSW(4),0(%r12)
- mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_sysc_return)
- la %r12,__LC_RETURN_PSW
+cleanup_sysc_tif:
+ l %r9,BASED(cleanup_table+8) # sysc_tif + 0x80000000
br %r14
-cleanup_sysc_leave:
- clc 4(4,%r12),BASED(cleanup_sysc_leave_insn)
- be BASED(2f)
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
- clc 4(4,%r12),BASED(cleanup_sysc_leave_insn+4)
- be BASED(2f)
-#endif
- mvc __LC_RETURN_PSW(8),SP_PSW(%r15)
- c %r12,BASED(.Lmck_old_psw)
- bne BASED(0f)
- mvc __LC_SAVE_AREA+32(16),SP_R12(%r15)
- b BASED(1f)
-0: mvc __LC_SAVE_AREA+16(16),SP_R12(%r15)
-1: lm %r0,%r11,SP_R0(%r15)
- l %r15,SP_R15(%r15)
-2: la %r12,__LC_RETURN_PSW
+cleanup_sysc_restore:
+ cl %r9,BASED(cleanup_sysc_restore_insn)
+ jhe 0f
+ l %r9,12(%r11) # get saved pointer to pt_regs
+ mvc __LC_RETURN_PSW(8),__PT_PSW(%r9)
+ mvc 0(32,%r11),__PT_R8(%r9)
+ lm %r0,%r7,__PT_R0(%r9)
+0: lm %r8,%r9,__LC_RETURN_PSW
br %r14
-cleanup_sysc_leave_insn:
+cleanup_sysc_restore_insn:
.long sysc_done - 4 + 0x80000000
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- .long sysc_done - 8 + 0x80000000
-#endif
-cleanup_io_return:
- mvc __LC_RETURN_PSW(4),0(%r12)
- mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_work_loop)
- la %r12,__LC_RETURN_PSW
+cleanup_io_tif:
+ l %r9,BASED(cleanup_table+20) # io_tif + 0x80000000
br %r14
-cleanup_io_leave:
- clc 4(4,%r12),BASED(cleanup_io_leave_insn)
- be BASED(2f)
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
- clc 4(4,%r12),BASED(cleanup_io_leave_insn+4)
- be BASED(2f)
-#endif
- mvc __LC_RETURN_PSW(8),SP_PSW(%r15)
- c %r12,BASED(.Lmck_old_psw)
- bne BASED(0f)
- mvc __LC_SAVE_AREA+32(16),SP_R12(%r15)
- b BASED(1f)
-0: mvc __LC_SAVE_AREA+16(16),SP_R12(%r15)
-1: lm %r0,%r11,SP_R0(%r15)
- l %r15,SP_R15(%r15)
-2: la %r12,__LC_RETURN_PSW
+cleanup_io_restore:
+ cl %r9,BASED(cleanup_io_restore_insn)
+ jhe 0f
+ l %r9,12(%r11) # get saved r11 pointer to pt_regs
+ mvc __LC_RETURN_PSW(8),__PT_PSW(%r9)
+ mvc 0(32,%r11),__PT_R8(%r9)
+ lm %r0,%r7,__PT_R0(%r9)
+0: lm %r8,%r9,__LC_RETURN_PSW
br %r14
-cleanup_io_leave_insn:
+cleanup_io_restore_insn:
.long io_done - 4 + 0x80000000
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- .long io_done - 8 + 0x80000000
-#endif
+
+cleanup_idle:
+ # copy interrupt clock & cpu timer
+ mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+ mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
+ chi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+ mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
+0: # check if stck has been executed
+ cl %r9,BASED(cleanup_idle_insn)
+ jhe 1f
+ mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
+ mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r3)
+1: # account system time going idle
+ lm %r9,%r10,__LC_STEAL_TIMER
+ ADD64 %r9,%r10,__CLOCK_IDLE_ENTER(%r2)
+ SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK
+ stm %r9,%r10,__LC_STEAL_TIMER
+ mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
+ lm %r9,%r10,__LC_SYSTEM_TIMER
+ ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER
+ SUB64 %r9,%r10,__TIMER_IDLE_ENTER(%r2)
+ stm %r9,%r10,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
+ # prepare return psw
+ n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits
+ l %r9,24(%r11) # return from psw_idle
+ br %r14
+cleanup_idle_insn:
+ .long psw_idle_lpsw + 0x80000000
+cleanup_idle_wait:
+ .long 0xfcfdffff
/*
* Integer constants
*/
- .align 4
-.Lc_spsize: .long SP_SIZE
-.Lc_overhead: .long STACK_FRAME_OVERHEAD
-.Lnr_syscalls: .long NR_syscalls
-.L0x018: .short 0x018
-.L0x020: .short 0x020
-.L0x028: .short 0x028
-.L0x030: .short 0x030
-.L0x038: .short 0x038
-.Lc_1: .long 1
+ .align 4
+.Lnr_syscalls:
+ .long NR_syscalls
+.Lvtimer_max:
+ .quad 0x7fffffffffffffff
/*
* Symbol constants
*/
-.Ls390_mcck: .long s390_do_machine_check
-.Ls390_handle_mcck:
- .long s390_handle_mcck
-.Lmck_old_psw: .long __LC_MCK_OLD_PSW
-.Ldo_IRQ: .long do_IRQ
-.Ldo_extint: .long do_extint
-.Ldo_signal: .long do_signal
-.Lhandle_per: .long do_single_step
-.Ldo_execve: .long do_execve
-.Lexecve_tail: .long execve_tail
-.Ljump_table: .long pgm_check_table
-.Lschedule: .long schedule
+.Ldo_machine_check: .long s390_do_machine_check
+.Lhandle_mcck: .long s390_handle_mcck
+.Ldo_IRQ: .long do_IRQ
+.Ldo_signal: .long do_signal
+.Ldo_notify_resume: .long do_notify_resume
+.Ldo_per_trap: .long do_per_trap
+.Ljump_table: .long pgm_check_table
+.Lschedule: .long schedule
#ifdef CONFIG_PREEMPT
-.Lpreempt_schedule_irq:
- .long preempt_schedule_irq
+.Lpreempt_irq: .long preempt_schedule_irq
#endif
-.Ltrace: .long syscall_trace
-.Lschedtail: .long schedule_tail
-.Lsysc_table: .long sys_call_table
+.Ltrace_enter: .long do_syscall_trace_enter
+.Ltrace_exit: .long do_syscall_trace_exit
+.Lschedule_tail: .long schedule_tail
+.Lsysc_per: .long sysc_per + 0x80000000
#ifdef CONFIG_TRACE_IRQFLAGS
-.Ltrace_irq_on: .long trace_hardirqs_on
-.Ltrace_irq_off:
- .long trace_hardirqs_off
-.Llockdep_sys_exit:
- .long lockdep_sys_exit
+.Lhardirqs_on: .long trace_hardirqs_on_caller
+.Lhardirqs_off: .long trace_hardirqs_off_caller
+#endif
+#ifdef CONFIG_LOCKDEP
+.Llockdep_sys_exit: .long lockdep_sys_exit
#endif
-.Lcritical_start:
- .long __critical_start + 0x80000000
-.Lcritical_end:
- .long __critical_end + 0x80000000
-.Lcleanup_critical:
- .long cleanup_critical
+.Lcritical_start: .long __critical_start + 0x80000000
+.Lcritical_length: .long __critical_end - __critical_start
.section .rodata, "a"
#define SYSCALL(esa,esame,emu) .long esa
+ .globl sys_call_table
sys_call_table:
#include "syscalls.S"
#undef SYSCALL
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 6b1896345ed..6ac78192455 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -4,57 +4,70 @@
#include <linux/types.h>
#include <linux/signal.h>
#include <asm/ptrace.h>
+#include <asm/cputime.h>
-typedef void pgm_check_handler_t(struct pt_regs *, long);
-extern pgm_check_handler_t *pgm_check_table[128];
-pgm_check_handler_t do_protection_exception;
-pgm_check_handler_t do_dat_exception;
+extern void *restart_stack;
+extern unsigned long suspend_zero_pages;
-extern int sysctl_userprocess_debug;
+void system_call(void);
+void pgm_check_handler(void);
+void ext_int_handler(void);
+void io_int_handler(void);
+void mcck_int_handler(void);
+void restart_int_handler(void);
+void restart_call_handler(void);
+void psw_idle(struct s390_idle_data *, unsigned long);
-void do_single_step(struct pt_regs *regs);
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
+
+void do_protection_exception(struct pt_regs *regs);
+void do_dat_exception(struct pt_regs *regs);
+
+void addressing_exception(struct pt_regs *regs);
+void data_exception(struct pt_regs *regs);
+void default_trap_handler(struct pt_regs *regs);
+void divide_exception(struct pt_regs *regs);
+void execute_exception(struct pt_regs *regs);
+void hfp_divide_exception(struct pt_regs *regs);
+void hfp_overflow_exception(struct pt_regs *regs);
+void hfp_significance_exception(struct pt_regs *regs);
+void hfp_sqrt_exception(struct pt_regs *regs);
+void hfp_underflow_exception(struct pt_regs *regs);
+void illegal_op(struct pt_regs *regs);
+void operand_exception(struct pt_regs *regs);
+void overflow_exception(struct pt_regs *regs);
+void privileged_op(struct pt_regs *regs);
+void space_switch_exception(struct pt_regs *regs);
+void special_op_exception(struct pt_regs *regs);
+void specification_exception(struct pt_regs *regs);
+void transaction_exception(struct pt_regs *regs);
+void translation_exception(struct pt_regs *regs);
+
+void do_per_trap(struct pt_regs *regs);
void syscall_trace(struct pt_regs *regs, int entryexit);
void kernel_stack_overflow(struct pt_regs * regs);
void do_signal(struct pt_regs *regs);
-int handle_signal32(unsigned long sig, struct k_sigaction *ka,
+void handle_signal32(unsigned long sig, struct k_sigaction *ka,
siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
+void do_notify_resume(struct pt_regs *regs);
-void do_extint(struct pt_regs *regs, unsigned short code);
-int __cpuinit start_secondary(void *cpuvoid);
+void __init init_IRQ(void);
+void do_IRQ(struct pt_regs *regs, int irq);
+void do_restart(void);
void __init startup_init(void);
-void die(const char * str, struct pt_regs * regs, long err);
+void die(struct pt_regs *regs, const char *str);
+int setup_profiling_timer(unsigned int multiplier);
+void __init time_init(void);
+int pfn_is_nosave(unsigned long);
+void s390_early_resume(void);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
-struct new_utsname;
-struct mmap_arg_struct;
+struct s390_mmap_arg_struct;
struct fadvise64_64_args;
struct old_sigaction;
-struct sel_arg_struct;
-
-long sys_pipe(unsigned long __user *fildes);
-long sys_mmap2(struct mmap_arg_struct __user *arg);
-long old_mmap(struct mmap_arg_struct __user *arg);
-long sys_ipc(uint call, int first, unsigned long second,
- unsigned long third, void __user *ptr);
-long s390x_newuname(struct new_utsname __user *name);
-long s390x_personality(unsigned long personality);
-long s390_fadvise64(int fd, u32 offset_high, u32 offset_low,
- size_t len, int advice);
-long s390_fadvise64_64(struct fadvise64_64_args __user *args);
-long s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, u32 len_low);
-long sys_fork(void);
-long sys_clone(void);
-long sys_vfork(void);
-void execve_tail(void);
-long sys_execve(void);
-int sys_sigsuspend(int history0, int history1, old_sigset_t mask);
-long sys_sigaction(int sig, const struct old_sigaction __user *act,
- struct old_sigaction __user *oact);
-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss);
-long sys_sigreturn(void);
-long sys_rt_sigreturn(void);
-long sys32_sigreturn(void);
-long sys32_rt_sigreturn(void);
-long old_select(struct sel_arg_struct __user *arg);
-long sys_ptrace(long request, long pid, long addr, long data);
+
+long sys_s390_personality(unsigned int personality);
+long sys_s390_runtime_instr(int command, int signum);
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index fee10177dbf..f2e674c702e 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -1,193 +1,173 @@
/*
- * arch/s390/kernel/entry64.S
* S390 low-level entry points.
*
- * Copyright (C) IBM Corp. 1999,2006
+ * Copyright IBM Corp. 1999, 2012
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Hartmut Penner (hp@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
* Heiko Carstens <heiko.carstens@de.ibm.com>
*/
-#include <linux/sys.h>
-#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/processor.h>
#include <asm/cache.h>
-#include <asm/lowcore.h>
#include <asm/errno.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/page.h>
+#include <asm/sigp.h>
+#include <asm/irq.h>
-/*
- * Stack layout for the system_call stack entry.
- * The first few entries are identical to the user_regs_struct.
- */
-SP_PTREGS = STACK_FRAME_OVERHEAD
-SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS
-SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW
-SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS
-SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8
-SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16
-SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24
-SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32
-SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40
-SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48
-SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56
-SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 64
-SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 72
-SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 80
-SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 88
-SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 96
-SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104
-SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112
-SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120
-SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2
-SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC
-SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP
-SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE
+__PT_R0 = __PT_GPRS
+__PT_R1 = __PT_GPRS + 8
+__PT_R2 = __PT_GPRS + 16
+__PT_R3 = __PT_GPRS + 24
+__PT_R4 = __PT_GPRS + 32
+__PT_R5 = __PT_GPRS + 40
+__PT_R6 = __PT_GPRS + 48
+__PT_R7 = __PT_GPRS + 56
+__PT_R8 = __PT_GPRS + 64
+__PT_R9 = __PT_GPRS + 72
+__PT_R10 = __PT_GPRS + 80
+__PT_R11 = __PT_GPRS + 88
+__PT_R12 = __PT_GPRS + 96
+__PT_R13 = __PT_GPRS + 104
+__PT_R14 = __PT_GPRS + 112
+__PT_R15 = __PT_GPRS + 120
STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
STACK_SIZE = 1 << STACK_SHIFT
+STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING)
+_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
+_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+ _TIF_SYSCALL_TRACEPOINT)
+_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE)
+_PIF_WORK = (_PIF_PER_TRAP)
#define BASED(name) name-system_call(%r13)
-#ifdef CONFIG_TRACE_IRQFLAGS
.macro TRACE_IRQS_ON
- brasl %r14,trace_hardirqs_on
+#ifdef CONFIG_TRACE_IRQFLAGS
+ basr %r2,%r0
+ brasl %r14,trace_hardirqs_on_caller
+#endif
.endm
.macro TRACE_IRQS_OFF
- brasl %r14,trace_hardirqs_off
- .endm
-
- .macro TRACE_IRQS_CHECK
- tm SP_PSW(%r15),0x03 # irqs enabled?
- jz 0f
- brasl %r14,trace_hardirqs_on
- j 1f
-0: brasl %r14,trace_hardirqs_off
-1:
- .endm
-#else
-#define TRACE_IRQS_ON
-#define TRACE_IRQS_OFF
-#define TRACE_IRQS_CHECK
+#ifdef CONFIG_TRACE_IRQFLAGS
+ basr %r2,%r0
+ brasl %r14,trace_hardirqs_off_caller
#endif
+ .endm
-#ifdef CONFIG_LOCKDEP
.macro LOCKDEP_SYS_EXIT
- tm SP_PSW+1(%r15),0x01 # returning to user ?
- jz 0f
+#ifdef CONFIG_LOCKDEP
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jz .+10
brasl %r14,lockdep_sys_exit
-0:
- .endm
-#else
-#define LOCKDEP_SYS_EXIT
-#endif
-
- .macro STORE_TIMER lc_offset
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- stpt \lc_offset
#endif
.endm
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- .macro UPDATE_VTIME lc_from,lc_to,lc_sum
- lg %r10,\lc_from
- slg %r10,\lc_to
- alg %r10,\lc_sum
- stg %r10,\lc_sum
- .endm
+ .macro LPP newpp
+#if IS_ENABLED(CONFIG_KVM)
+ tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
+ jz .+8
+ .insn s,0xb2800000,\newpp
#endif
-
-/*
- * Register usage in interrupt handlers:
- * R9 - pointer to current task structure
- * R13 - pointer to literal pool
- * R14 - return register for function calls
- * R15 - kernel stack pointer
- */
-
- .macro SAVE_ALL_BASE savearea
- stmg %r12,%r15,\savearea
- larl %r13,system_call
.endm
- .macro SAVE_ALL_SVC psworg,savearea
- la %r12,\psworg
- lg %r15,__LC_KERNEL_STACK # problem state -> load ksp
+ .macro HANDLE_SIE_INTERCEPT scratch,reason
+#if IS_ENABLED(CONFIG_KVM)
+ tmhh %r8,0x0001 # interrupting from user ?
+ jnz .+62
+ lgr \scratch,%r9
+ slg \scratch,BASED(.Lsie_critical)
+ clg \scratch,BASED(.Lsie_critical_length)
+ .if \reason==1
+ # Some program interrupts are suppressing (e.g. protection).
+ # We must also check the instruction after SIE in that case.
+ # do_protection_exception will rewind to rewind_pad
+ jh .+42
+ .else
+ jhe .+42
+ .endif
+ lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ LPP __SF_EMPTY+16(%r15) # set host id
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ larl %r9,sie_exit # skip forward to sie_exit
+ mvi __SF_EMPTY+31(%r15),\reason # set exit reason
+#endif
.endm
- .macro SAVE_ALL_SYNC psworg,savearea
- la %r12,\psworg
- tm \psworg+1,0x01 # test problem state bit
- jz 2f # skip stack setup save
- lg %r15,__LC_KERNEL_STACK # problem state -> load ksp
+ .macro CHECK_STACK stacksize,savearea
#ifdef CONFIG_CHECK_STACK
- j 3f
-2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
+ tml %r15,\stacksize - CONFIG_STACK_GUARD
+ lghi %r14,\savearea
jz stack_overflow
-3:
#endif
-2:
.endm
- .macro SAVE_ALL_ASYNC psworg,savearea
- la %r12,\psworg
- tm \psworg+1,0x01 # test problem state bit
- jnz 1f # from user -> load kernel stack
- clc \psworg+8(8),BASED(.Lcritical_end)
+ .macro SWITCH_ASYNC savearea,stack,shift
+ tmhh %r8,0x0001 # interrupting from user ?
+ jnz 1f
+ lgr %r14,%r9
+ slg %r14,BASED(.Lcritical_start)
+ clg %r14,BASED(.Lcritical_length)
jhe 0f
- clc \psworg+8(8),BASED(.Lcritical_start)
- jl 0f
+ lghi %r11,\savearea # inside critical section, do cleanup
brasl %r14,cleanup_critical
- tm 1(%r12),0x01 # retest problem state after cleanup
+ tmhh %r8,0x0001 # retest problem state after cleanup
jnz 1f
-0: lg %r14,__LC_ASYNC_STACK # are we already on the async. stack ?
+0: lg %r14,\stack # are we already on the target stack?
slgr %r14,%r15
- srag %r14,%r14,STACK_SHIFT
- jz 2f
-1: lg %r15,__LC_ASYNC_STACK # load async stack
-#ifdef CONFIG_CHECK_STACK
- j 3f
-2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
- jz stack_overflow
-3:
-#endif
-2:
+ srag %r14,%r14,\shift
+ jnz 1f
+ CHECK_STACK 1<<\shift,\savearea
+ aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j 2f
+1: lg %r15,\stack # load target stack
+2: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
- .macro CREATE_STACK_FRAME psworg,savearea
- aghi %r15,-SP_SIZE # make room for registers & psw
- mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack
- la %r12,\psworg
- stg %r2,SP_ORIG_R2(%r15) # store original content of gpr 2
- icm %r12,12,__LC_SVC_ILC
- stmg %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack
- st %r12,SP_ILC(%r15)
- mvc SP_R12(32,%r15),\savearea # move %r12-%r15 to stack
- la %r12,0
- stg %r12,__SF_BACKCHAIN(%r15)
+ .macro UPDATE_VTIME scratch,enter_timer
+ lg \scratch,__LC_EXIT_TIMER
+ slg \scratch,\enter_timer
+ alg \scratch,__LC_USER_TIMER
+ stg \scratch,__LC_USER_TIMER
+ lg \scratch,__LC_LAST_UPDATE_TIMER
+ slg \scratch,__LC_EXIT_TIMER
+ alg \scratch,__LC_SYSTEM_TIMER
+ stg \scratch,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
.endm
- .macro RESTORE_ALL psworg,sync
- mvc \psworg(16),SP_PSW(%r15) # move user PSW to lowcore
- .if !\sync
- ni \psworg+1,0xfd # clear wait state bit
- .endif
- lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user
- STORE_TIMER __LC_EXIT_TIMER
- lpswe \psworg # back to caller
+ .macro LAST_BREAK scratch
+ srag \scratch,%r10,23
+ jz .+10
+ stg %r10,__TI_last_break(%r12)
+ .endm
+
+ .macro REENABLE_IRQS
+ stg %r8,__LC_RETURN_PSW
+ ni __LC_RETURN_PSW,0xbf
+ ssm __LC_RETURN_PSW
.endm
+ .macro STCK savearea
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ .insn s,0xb27c0000,\savearea # store clock fast
+#else
+ .insn s,0xb2050000,\savearea # store clock
+#endif
+ .endm
+
+ .section .kprobes.text, "ax"
+
/*
* Scheduler resume function, called by switch_to
* gpr2 = (task_struct *) prev
@@ -195,32 +175,20 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
* Returns:
* gpr2 = prev
*/
- .globl __switch_to
-__switch_to:
- tm __THREAD_per+4(%r3),0xe8 # is the new process using per ?
- jz __switch_to_noper # if not we're fine
- stctg %c9,%c11,__SF_EMPTY(%r15)# We are using per stuff
- clc __THREAD_per(24,%r3),__SF_EMPTY(%r15)
- je __switch_to_noper # we got away without bashing TLB's
- lctlg %c9,%c11,__THREAD_per(%r3) # Nope we didn't
-__switch_to_noper:
- lg %r4,__THREAD_info(%r2) # get thread_info of prev
- tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending?
- jz __switch_to_no_mcck
- ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev
- lg %r4,__THREAD_info(%r3) # get thread_info of next
- oi __TI_flags+7(%r4),_TIF_MCCK_PENDING # set it in next
-__switch_to_no_mcck:
- stmg %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task
- stg %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp
- lg %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp
- lmg %r6,%r15,__SF_GPRS(%r15)# load __switch_to registers of next task
- stg %r3,__LC_CURRENT # __LC_CURRENT = current task struct
- lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
- lg %r3,__THREAD_info(%r3) # load thread_info from task struct
- stg %r3,__LC_THREAD_INFO
- aghi %r3,STACK_SIZE
- stg %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack
+ENTRY(__switch_to)
+ stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
+ stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev
+ lg %r4,__THREAD_info(%r2) # get thread_info of prev
+ lg %r5,__THREAD_info(%r3) # get thread_info of next
+ lgr %r15,%r5
+ aghi %r15,STACK_INIT # end of kernel stack of next
+ stg %r3,__LC_CURRENT # store task struct of next
+ stg %r5,__LC_THREAD_INFO # store thread info of next
+ stg %r15,__LC_KERNEL_STACK # store end of kernel stack
+ lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
+ mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
+ lg %r15,__THREAD_ksp(%r3) # load kernel stack of next
+ lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
br %r14
__critical_start:
@@ -229,633 +197,589 @@ __critical_start:
* are executed with interrupts enabled.
*/
- .globl system_call
-system_call:
- STORE_TIMER __LC_SYNC_ENTER_TIMER
-sysc_saveall:
- SAVE_ALL_BASE __LC_SAVE_AREA
- SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
- CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
- llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ENTRY(system_call)
+ stpt __LC_SYNC_ENTER_TIMER
+sysc_stmg:
+ stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ lghi %r14,_PIF_SYSCALL
+sysc_per:
+ lg %r15,__LC_KERNEL_STACK
+ la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
sysc_vtime:
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
-sysc_stime:
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
-sysc_update:
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-#endif
+ UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
+ LAST_BREAK %r13
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+ mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
+ mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
+ stg %r14,__PT_FLAGS(%r11)
sysc_do_svc:
- lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- slag %r7,%r7,2 # *4 and test for svc 0
+ lg %r10,__TI_sysc_table(%r12) # address of system call table
+ llgh %r8,__PT_INT_CODE+2(%r11)
+ slag %r8,%r8,2 # shift and test for svc 0
jnz sysc_nr_ok
# svc 0: system call number in %r1
- cl %r1,BASED(.Lnr_syscalls)
+ llgfr %r1,%r1 # clear high word in r1
+ cghi %r1,NR_syscalls
jnl sysc_nr_ok
- lgfr %r7,%r1 # clear high word in r1
- slag %r7,%r7,2 # svc 0: system call number in %r1
+ sth %r1,__PT_INT_CODE+2(%r11)
+ slag %r8,%r1,2
sysc_nr_ok:
- mvc SP_ARGS(8,%r15),SP_R7(%r15)
-sysc_do_restart:
- larl %r10,sys_call_table
-#ifdef CONFIG_COMPAT
- tm __TI_flags+5(%r9),(_TIF_31BIT>>16) # running in 31 bit mode ?
- jno sysc_noemu
- larl %r10,sys_call_table_emu # use 31 bit emulation system calls
-sysc_noemu:
-#endif
- tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
- lgf %r8,0(%r7,%r10) # load address of system call routine
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stg %r2,__PT_ORIG_GPR2(%r11)
+ stg %r7,STACK_FRAME_OVERHEAD(%r15)
+ lgf %r9,0(%r8,%r10) # get system call add.
+ tm __TI_flags+7(%r12),_TIF_TRACE
jnz sysc_tracesys
- basr %r14,%r8 # call sys_xxxx
- stg %r2,SP_R2(%r15) # store return value (change R2 on stack)
+ basr %r14,%r9 # call sys_xxxx
+ stg %r2,__PT_R2(%r11) # store return value
sysc_return:
- tm __TI_flags+7(%r9),_TIF_WORK_SVC
- jnz sysc_work # there is work to do (signals etc.)
-sysc_restore:
-#ifdef CONFIG_TRACE_IRQFLAGS
- larl %r1,sysc_restore_trace_psw
- lpswe 0(%r1)
-sysc_restore_trace:
- TRACE_IRQS_CHECK
LOCKDEP_SYS_EXIT
-#endif
-sysc_leave:
- RESTORE_ALL __LC_RETURN_PSW,1
+sysc_tif:
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno sysc_restore
+ tm __PT_FLAGS+7(%r11),_PIF_WORK
+ jnz sysc_work
+ tm __TI_flags+7(%r12),_TIF_WORK
+ jnz sysc_work # check for work
+ tm __LC_CPU_FLAGS+7,_CIF_WORK
+ jnz sysc_work
+sysc_restore:
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+ lmg %r11,%r15,__PT_R11(%r11)
+ lpswe __LC_RETURN_PSW
sysc_done:
-#ifdef CONFIG_TRACE_IRQFLAGS
- .align 8
- .globl sysc_restore_trace_psw
-sysc_restore_trace_psw:
- .quad 0, sysc_restore_trace
-#endif
-
-#
-# recheck if there is more work to do
-#
-sysc_work_loop:
- tm __TI_flags+7(%r9),_TIF_WORK_SVC
- jz sysc_restore # there is no work to do
#
# One of the work bits is on. Find out which one.
#
sysc_work:
- tm SP_PSW+1(%r15),0x01 # returning to user ?
- jno sysc_restore
- tm __TI_flags+7(%r9),_TIF_MCCK_PENDING
+ tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
jo sysc_mcck_pending
- tm __TI_flags+7(%r9),_TIF_NEED_RESCHED
+ tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
jo sysc_reschedule
- tm __TI_flags+7(%r9),_TIF_SIGPENDING
- jnz sysc_sigpending
- tm __TI_flags+7(%r9),_TIF_RESTART_SVC
- jo sysc_restart
- tm __TI_flags+7(%r9),_TIF_SINGLE_STEP
+ tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP
jo sysc_singlestep
- j sysc_restore
-sysc_work_done:
+ tm __TI_flags+7(%r12),_TIF_SIGPENDING
+ jo sysc_sigpending
+ tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+ jo sysc_notify_resume
+ tm __LC_CPU_FLAGS+7,_CIF_ASCE
+ jo sysc_uaccess
+ j sysc_return # beware of critical section cleanup
#
# _TIF_NEED_RESCHED is set, call schedule
#
sysc_reschedule:
- larl %r14,sysc_work_loop
- jg schedule # return point is sysc_return
+ larl %r14,sysc_return
+ jg schedule
#
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
#
sysc_mcck_pending:
- larl %r14,sysc_work_loop
+ larl %r14,sysc_return
jg s390_handle_mcck # TIF bit will be cleared by handler
#
+# _CIF_ASCE is set, load user space asce
+#
+sysc_uaccess:
+ ni __LC_CPU_FLAGS+7,255-_CIF_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ j sysc_return
+
+#
# _TIF_SIGPENDING is set, call do_signal
#
sysc_sigpending:
- ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
- la %r2,SP_PTREGS(%r15) # load pt_regs
- brasl %r14,do_signal # call do_signal
- tm __TI_flags+7(%r9),_TIF_RESTART_SVC
- jo sysc_restart
- tm __TI_flags+7(%r9),_TIF_SINGLE_STEP
- jo sysc_singlestep
- j sysc_work_loop
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_signal
+ tm __PT_FLAGS+7(%r11),_PIF_SYSCALL
+ jno sysc_return
+ lmg %r2,%r7,__PT_R2(%r11) # load svc arguments
+ lg %r10,__TI_sysc_table(%r12) # address of system call table
+ lghi %r8,0 # svc 0 returns -ENOSYS
+ llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number
+ cghi %r1,NR_syscalls
+ jnl sysc_nr_ok # invalid svc number -> do svc 0
+ slag %r8,%r1,2
+ j sysc_nr_ok # restart svc
#
-# _TIF_RESTART_SVC is set, set up registers and restart svc
+# _TIF_NOTIFY_RESUME is set, call do_notify_resume
#
-sysc_restart:
- ni __TI_flags+7(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC
- lg %r7,SP_R2(%r15) # load new svc number
- slag %r7,%r7,2 # *4
- mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument
- lmg %r2,%r6,SP_R2(%r15) # load svc arguments
- j sysc_do_restart # restart svc
+sysc_notify_resume:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,sysc_return
+ jg do_notify_resume
#
-# _TIF_SINGLE_STEP is set, call do_single_step
+# _PIF_PER_TRAP is set, call do_per_trap
#
sysc_singlestep:
- ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
- lhi %r0,__LC_PGM_OLD_PSW
- sth %r0,SP_TRAP(%r15) # set trap indication to pgm check
- la %r2,SP_PTREGS(%r15) # address of register-save area
- larl %r14,sysc_return # load adr. of system return
- jg do_single_step # branch to do_sigtrap
+ ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,sysc_return
+ jg do_per_trap
#
-# call syscall_trace before and after system call
-# special linkage: %r12 contains the return address for trace_svc
+# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
+# and after the system call
#
sysc_tracesys:
- la %r2,SP_PTREGS(%r15) # load pt_regs
+ lgr %r2,%r11 # pass pointer to pt_regs
la %r3,0
- srl %r7,2
- stg %r7,SP_R2(%r15)
- brasl %r14,syscall_trace
+ llgh %r0,__PT_INT_CODE+2(%r11)
+ stg %r0,__PT_R2(%r11)
+ brasl %r14,do_syscall_trace_enter
lghi %r0,NR_syscalls
- clg %r0,SP_R2(%r15)
+ clgr %r0,%r2
jnh sysc_tracenogo
- lg %r7,SP_R2(%r15) # strace might have changed the
- sll %r7,2 # system call
- lgf %r8,0(%r7,%r10)
+ sllg %r8,%r2,2
+ lgf %r9,0(%r8,%r10)
sysc_tracego:
- lmg %r3,%r6,SP_R3(%r15)
- lg %r2,SP_ORIG_R2(%r15)
- basr %r14,%r8 # call sys_xxx
- stg %r2,SP_R2(%r15) # store return value
+ lmg %r3,%r7,__PT_R3(%r11)
+ stg %r7,STACK_FRAME_OVERHEAD(%r15)
+ lg %r2,__PT_ORIG_GPR2(%r11)
+ basr %r14,%r9 # call sys_xxx
+ stg %r2,__PT_R2(%r11) # store return value
sysc_tracenogo:
- tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
+ tm __TI_flags+7(%r12),_TIF_TRACE
jz sysc_return
- la %r2,SP_PTREGS(%r15) # load pt_regs
- la %r3,1
- larl %r14,sysc_return # return point is sysc_return
- jg syscall_trace
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,sysc_return
+ jg do_syscall_trace_exit
#
# a new process exits the kernel with ret_from_fork
#
- .globl ret_from_fork
-ret_from_fork:
- lg %r13,__LC_SVC_NEW_PSW+8
- lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- tm SP_PSW+1(%r15),0x01 # forking a kernel thread ?
- jo 0f
- stg %r15,SP_R15(%r15) # store stack pointer for new kthread
-0: brasl %r14,schedule_tail
+ENTRY(ret_from_fork)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ lg %r12,__LC_THREAD_INFO
+ brasl %r14,schedule_tail
TRACE_IRQS_ON
- stosm 24(%r15),0x03 # reenable interrupts
- j sysc_return
-
-#
-# kernel_execve function needs to deal with pt_regs that is not
-# at the usual place
-#
- .globl kernel_execve
-kernel_execve:
- stmg %r12,%r15,96(%r15)
- lgr %r14,%r15
- aghi %r15,-SP_SIZE
- stg %r14,__SF_BACKCHAIN(%r15)
- la %r12,SP_PTREGS(%r15)
- xc 0(__PT_SIZE,%r12),0(%r12)
- lgr %r5,%r12
- brasl %r14,do_execve
- ltgfr %r2,%r2
- je 0f
- aghi %r15,SP_SIZE
- lmg %r12,%r15,96(%r15)
- br %r14
- # execve succeeded.
-0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts
- lg %r15,__LC_KERNEL_STACK # load ksp
- aghi %r15,-SP_SIZE # make room for registers & psw
- lg %r13,__LC_SVC_NEW_PSW+8
- lg %r9,__LC_THREAD_INFO
- mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
- brasl %r14,execve_tail
- j sysc_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
+ jne sysc_tracenogo
+ # it's a kernel thread
+ lmg %r9,%r10,__PT_R9(%r11) # load gprs
+ENTRY(kernel_thread_starter)
+ la %r2,0(%r10)
+ basr %r14,%r9
+ j sysc_tracenogo
/*
* Program check handler routine
*/
- .globl pgm_check_handler
-pgm_check_handler:
-/*
- * First we need to check for a special case:
- * Single stepping an instruction that disables the PER event mask will
- * cause a PER event AFTER the mask has been set. Example: SVC or LPSW.
- * For a single stepped SVC the program check handler gets control after
- * the SVC new PSW has been loaded. But we want to execute the SVC first and
- * then handle the PER event. Therefore we update the SVC old PSW to point
- * to the pgm_check_handler and branch to the SVC handler after we checked
- * if we have to load the kernel stack register.
- * For every other possible cause for PER event without the PER mask set
- * we just ignore the PER event (FIXME: is there anything we have to do
- * for LPSW?).
- */
- STORE_TIMER __LC_SYNC_ENTER_TIMER
- SAVE_ALL_BASE __LC_SAVE_AREA
- tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception
- jnz pgm_per # got per exception -> special case
- SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA
- CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- tm SP_PSW+1(%r15),0x01 # interrupting from user ?
- jz pgm_no_vtime
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-pgm_no_vtime:
-#endif
- lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- mvc SP_ARGS(8,%r15),__LC_LAST_BREAK
- TRACE_IRQS_OFF
- lgf %r3,__LC_PGM_ILC # load program interruption code
- lghi %r8,0x7f
- ngr %r8,%r3
-pgm_do_call:
- sll %r8,3
+ENTRY(pgm_check_handler)
+ stpt __LC_SYNC_ENTER_TIMER
+ stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ larl %r13,system_call
+ lmg %r8,%r9,__LC_PGM_OLD_PSW
+ HANDLE_SIE_INTERCEPT %r14,1
+ tmhh %r8,0x0001 # test problem state bit
+ jnz 1f # -> fault in user space
+ tmhh %r8,0x4000 # PER bit set in old PSW ?
+ jnz 0f # -> enabled, can't be a double fault
+ tm __LC_PGM_ILC+3,0x80 # check for per exception
+ jnz pgm_svcper # -> single stepped svc
+0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+ aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j 2f
+1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
+ LAST_BREAK %r14
+ lg %r15,__LC_KERNEL_STACK
+ lg %r14,__TI_task(%r12)
+ lghi %r13,__LC_PGM_TDB
+ tm __LC_PGM_ILC+2,0x02 # check for transaction abort
+ jz 2f
+ mvc __THREAD_trap_tdb(256,%r14),0(%r13)
+2: la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+ stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
+ mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ stg %r10,__PT_ARGS(%r11)
+ tm __LC_PGM_ILC+3,0x80 # check for per exception
+ jz 0f
+ tmhh %r8,0x0001 # kernel per event ?
+ jz pgm_kprobe
+ oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
+ mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
+ mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
+ mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
+0: REENABLE_IRQS
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
- lg %r1,0(%r8,%r1) # load address of handler routine
- la %r2,SP_PTREGS(%r15) # address of register-save area
- larl %r14,sysc_return
- br %r1 # branch to interrupt-handler
-
-#
-# handle per exception
-#
-pgm_per:
- tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on
- jnz pgm_per_std # ok, normal per event from user space
-# ok its one of the special cases, now we need to find out which one
- clc __LC_PGM_OLD_PSW(16),__LC_SVC_NEW_PSW
- je pgm_svcper
-# no interesting special case, ignore PER event
- lmg %r12,%r15,__LC_SAVE_AREA
- lpswe __LC_PGM_OLD_PSW
-
-#
-# Normal per exception
-#
-pgm_per_std:
- SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA
- CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- tm SP_PSW+1(%r15),0x01 # interrupting from user ?
- jz pgm_no_vtime2
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-pgm_no_vtime2:
-#endif
- lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- TRACE_IRQS_OFF
- lg %r1,__TI_task(%r9)
- tm SP_PSW+1(%r15),0x01 # kernel per event ?
- jz kernel_per
- mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
- mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS
- mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
- oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
- lgf %r3,__LC_PGM_ILC # load program interruption code
- lghi %r8,0x7f
- ngr %r8,%r3 # clear per-event-bit and ilc
+ llgh %r10,__PT_INT_CODE+2(%r11)
+ nill %r10,0x007f
+ sll %r10,2
je sysc_return
- j pgm_do_call
+ lgf %r1,0(%r10,%r1) # load address of handler routine
+ lgr %r2,%r11 # pass pointer to pt_regs
+ basr %r14,%r1 # branch to interrupt-handler
+ j sysc_return
#
-# it was a single stepped SVC that is causing all the trouble
+# PER event in supervisor state, must be kprobes
#
-pgm_svcper:
- SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
- CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-#endif
- llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore
- lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- lg %r1,__TI_task(%r9)
- mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
- mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS
- mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
- oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
- TRACE_IRQS_ON
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
- j sysc_do_svc
+pgm_kprobe:
+ REENABLE_IRQS
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_per_trap
+ j sysc_return
#
-# per was called from kernel, must be kprobes
+# single stepped system call
#
-kernel_per:
- lhi %r0,__LC_PGM_OLD_PSW
- sth %r0,SP_TRAP(%r15) # set trap indication to pgm check
- la %r2,SP_PTREGS(%r15) # address of register-save area
- larl %r14,sysc_restore # load adr. of system ret, no work
- jg do_single_step # branch to do_single_step
+pgm_svcper:
+ mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+ larl %r14,sysc_per
+ stg %r14,__LC_RETURN_PSW+8
+ lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP
+ lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs
/*
* IO interrupt handler routine
*/
- .globl io_int_handler
-io_int_handler:
- STORE_TIMER __LC_ASYNC_ENTER_TIMER
- stck __LC_INT_CLOCK
- SAVE_ALL_BASE __LC_SAVE_AREA+32
- SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
- CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- tm SP_PSW+1(%r15),0x01 # interrupting from user ?
- jz io_no_vtime
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
-io_no_vtime:
-#endif
- lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
+ENTRY(io_int_handler)
+ STCK __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
+ stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ larl %r13,system_call
+ lmg %r8,%r9,__LC_IO_OLD_PSW
+ HANDLE_SIE_INTERCEPT %r14,2
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
+ tmhh %r8,0x0001 # interrupting from user?
+ jz io_skip
+ UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
+ LAST_BREAK %r14
+io_skip:
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+ stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
TRACE_IRQS_OFF
- la %r2,SP_PTREGS(%r15) # address of register-save area
- brasl %r14,do_IRQ # call standard irq handler
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+io_loop:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ lghi %r3,IO_INTERRUPT
+ tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
+ jz io_call
+ lghi %r3,THIN_INTERRUPT
+io_call:
+ brasl %r14,do_IRQ
+ tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR
+ jz io_return
+ tpi 0
+ jz io_return
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ j io_loop
io_return:
- tm __TI_flags+7(%r9),_TIF_WORK_INT
+ LOCKDEP_SYS_EXIT
+ TRACE_IRQS_ON
+io_tif:
+ tm __TI_flags+7(%r12),_TIF_WORK
jnz io_work # there is work to do (signals etc.)
+ tm __LC_CPU_FLAGS+7,_CIF_WORK
+ jnz io_work
io_restore:
-#ifdef CONFIG_TRACE_IRQFLAGS
- larl %r1,io_restore_trace_psw
- lpswe 0(%r1)
-io_restore_trace:
- TRACE_IRQS_CHECK
- LOCKDEP_SYS_EXIT
-#endif
-io_leave:
- RESTORE_ALL __LC_RETURN_PSW,0
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+ lmg %r11,%r15,__PT_R11(%r11)
+ lpswe __LC_RETURN_PSW
io_done:
-#ifdef CONFIG_TRACE_IRQFLAGS
- .align 8
- .globl io_restore_trace_psw
-io_restore_trace_psw:
- .quad 0, io_restore_trace
-#endif
-
#
-# There is work todo, we need to check if we return to userspace, then
-# check, if we are in SIE, if yes leave it
+# There is work todo, find out in which context we have been interrupted:
+# 1) if we return to user space we can do all _TIF_WORK work
+# 2) if we return to kernel code and kvm is enabled check if we need to
+# modify the psw to leave SIE
+# 3) if we return to kernel code and preemptive scheduling is enabled check
+# the preemption counter and if it is zero call preempt_schedule_irq
+# Before any work can be done, a switch to the kernel stack is required.
#
io_work:
- tm SP_PSW+1(%r15),0x01 # returning to user ?
-#ifndef CONFIG_PREEMPT
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
- jnz io_work_user # yes -> no need to check for SIE
- la %r1, BASED(sie_opcode) # we return to kernel here
- lg %r2, SP_PSW+8(%r15)
- clc 0(2,%r1), 0(%r2) # is current instruction = SIE?
- jne io_restore # no-> return to kernel
- lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE
- aghi %r1, 4
- stg %r1, SP_PSW+8(%r15)
- j io_restore # return to kernel
-#else
- jno io_restore # no-> skip resched & signal
-#endif
-#else
- jnz io_work_user # yes -> do resched & signal
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
- la %r1, BASED(sie_opcode)
- lg %r2, SP_PSW+8(%r15)
- clc 0(2,%r1), 0(%r2) # is current instruction = SIE?
- jne 0f # no -> leave PSW alone
- lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE
- aghi %r1, 4
- stg %r1, SP_PSW+8(%r15)
-0:
-#endif
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jo io_work_user # yes -> do resched & signal
+#ifdef CONFIG_PREEMPT
# check for preemptive scheduling
- icm %r0,15,__TI_precount(%r9)
+ icm %r0,15,__TI_precount(%r12)
jnz io_restore # preemption is disabled
+ tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
+ jno io_restore
# switch to kernel stack
- lg %r1,SP_R15(%r15)
- aghi %r1,-SP_SIZE
- mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
- xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain
+ lg %r1,__PT_R15(%r11)
+ aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
-io_resume_loop:
- tm __TI_flags+7(%r9),_TIF_NEED_RESCHED
- jno io_restore
- larl %r14,io_resume_loop
- jg preempt_schedule_irq
+ # TRACE_IRQS_ON already done at io_return, call
+ # TRACE_IRQS_OFF to keep things symmetrical
+ TRACE_IRQS_OFF
+ brasl %r14,preempt_schedule_irq
+ j io_return
+#else
+ j io_restore
#endif
+#
+# Need to do work before returning to userspace, switch to kernel stack
+#
io_work_user:
lg %r1,__LC_KERNEL_STACK
- aghi %r1,-SP_SIZE
- mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
- xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
+
#
# One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_RESTORE_SIGPENDING, _TIF_NEED_RESCHED
-# and _TIF_MCCK_PENDING
#
-io_work_loop:
- tm __TI_flags+7(%r9),_TIF_MCCK_PENDING
+io_work_tif:
+ tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
jo io_mcck_pending
- tm __TI_flags+7(%r9),_TIF_NEED_RESCHED
+ tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
jo io_reschedule
- tm __TI_flags+7(%r9),_TIF_SIGPENDING
- jnz io_sigpending
- j io_restore
-io_work_done:
-
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
-sie_opcode:
- .long 0xb2140000
-#endif
+ tm __TI_flags+7(%r12),_TIF_SIGPENDING
+ jo io_sigpending
+ tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+ jo io_notify_resume
+ tm __LC_CPU_FLAGS+7,_CIF_ASCE
+ jo io_uaccess
+ j io_return # beware of critical section cleanup
#
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
#
io_mcck_pending:
+ # TRACE_IRQS_ON already done at io_return
brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler
- j io_work_loop
+ TRACE_IRQS_OFF
+ j io_return
+
+#
+# _CIF_ASCE is set, load user space asce
+#
+io_uaccess:
+ ni __LC_CPU_FLAGS+7,255-_CIF_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ j io_return
#
# _TIF_NEED_RESCHED is set, call schedule
#
io_reschedule:
- TRACE_IRQS_ON
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
+ # TRACE_IRQS_ON already done at io_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
brasl %r14,schedule # call scheduler
- stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
TRACE_IRQS_OFF
- tm __TI_flags+7(%r9),_TIF_WORK_INT
- jz io_restore # there is no work to do
- j io_work_loop
+ j io_return
#
# _TIF_SIGPENDING or is set, call do_signal
#
io_sigpending:
- TRACE_IRQS_ON
- stosm __SF_EMPTY(%r15),0x03 # reenable interrupts
- la %r2,SP_PTREGS(%r15) # load pt_regs
- brasl %r14,do_signal # call do_signal
- stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts
+ # TRACE_IRQS_ON already done at io_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_signal
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
TRACE_IRQS_OFF
- j io_work_loop
+ j io_return
+
+#
+# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
+#
+io_notify_resume:
+ # TRACE_IRQS_ON already done at io_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_notify_resume
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j io_return
/*
* External interrupt handler routine
*/
- .globl ext_int_handler
-ext_int_handler:
- STORE_TIMER __LC_ASYNC_ENTER_TIMER
- stck __LC_INT_CLOCK
- SAVE_ALL_BASE __LC_SAVE_AREA+32
- SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
- CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- tm SP_PSW+1(%r15),0x01 # interrupting from user ?
- jz ext_no_vtime
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
-ext_no_vtime:
-#endif
- lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
+ENTRY(ext_int_handler)
+ STCK __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
+ stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ larl %r13,system_call
+ lmg %r8,%r9,__LC_EXT_OLD_PSW
+ HANDLE_SIE_INTERCEPT %r14,3
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
+ tmhh %r8,0x0001 # interrupting from user ?
+ jz ext_skip
+ UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
+ LAST_BREAK %r14
+ext_skip:
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+ stmg %r8,%r9,__PT_PSW(%r11)
+ lghi %r1,__LC_EXT_PARAMS2
+ mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+ mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
+ mvc __PT_INT_PARM_LONG(8,%r11),0(%r1)
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
TRACE_IRQS_OFF
- la %r2,SP_PTREGS(%r15) # address of register-save area
- llgh %r3,__LC_EXT_INT_CODE # get interruption code
- brasl %r14,do_extint
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ lghi %r3,EXT_INTERRUPT
+ brasl %r14,do_IRQ
j io_return
+/*
+ * Load idle PSW. The second "half" of this function is in cleanup_idle.
+ */
+ENTRY(psw_idle)
+ stg %r3,__SF_EMPTY(%r15)
+ larl %r1,psw_idle_lpsw+4
+ stg %r1,__SF_EMPTY+8(%r15)
+ STCK __CLOCK_IDLE_ENTER(%r2)
+ stpt __TIMER_IDLE_ENTER(%r2)
+psw_idle_lpsw:
+ lpswe __SF_EMPTY(%r15)
+ br %r14
+psw_idle_end:
+
__critical_end:
/*
* Machine check handler routines
*/
- .globl mcck_int_handler
-mcck_int_handler:
+ENTRY(mcck_int_handler)
+ STCK __LC_MCCK_CLOCK
la %r1,4095 # revalidate r1
spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
- SAVE_ALL_BASE __LC_SAVE_AREA+64
- la %r12,__LC_MCK_OLD_PSW
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ larl %r13,system_call
+ lmg %r8,%r9,__LC_MCK_OLD_PSW
+ HANDLE_SIE_INTERCEPT %r14,4
tm __LC_MCCK_CODE,0x80 # system damage?
- jo mcck_int_main # yes -> rest of mcck code invalid
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- la %r14,4095
- mvc __LC_SAVE_AREA+104(8),__LC_ASYNC_ENTER_TIMER
- mvc __LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14)
+ jo mcck_panic # yes -> rest of mcck code invalid
+ lghi %r14,__LC_CPU_TIMER_SAVE_AREA
+ mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid?
- jo 1f
+ jo 3f
la %r14,__LC_SYNC_ENTER_TIMER
clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER
jl 0f
la %r14,__LC_ASYNC_ENTER_TIMER
0: clc 0(8,%r14),__LC_EXIT_TIMER
- jl 0f
+ jl 1f
la %r14,__LC_EXIT_TIMER
-0: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
- jl 0f
+1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
+ jl 2f
la %r14,__LC_LAST_UPDATE_TIMER
-0: spt 0(%r14)
- mvc __LC_ASYNC_ENTER_TIMER(8),0(%r14)
-1:
-#endif
- tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid?
- jno mcck_int_main # no -> skip cleanup critical
- tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit
- jnz mcck_int_main # from user -> load kernel stack
- clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_end)
- jhe mcck_int_main
- clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_start)
- jl mcck_int_main
- brasl %r14,cleanup_critical
-mcck_int_main:
- lg %r14,__LC_PANIC_STACK # are we already on the panic stack?
- slgr %r14,%r15
- srag %r14,%r14,PAGE_SHIFT
- jz 0f
- lg %r15,__LC_PANIC_STACK # load panic stack
-0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid?
- jno mcck_no_vtime # no -> no timer update
- tm SP_PSW+1(%r15),0x01 # interrupting from user ?
- jz mcck_no_vtime
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
-mcck_no_vtime:
-#endif
- lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct
- la %r2,SP_PTREGS(%r15) # load pt_regs
+2: spt 0(%r14)
+ mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
+3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid?
+ jno mcck_panic # no -> skip cleanup critical
+ SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
+ tm %r8,0x0001 # interrupting from user ?
+ jz mcck_skip
+ UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
+ LAST_BREAK %r14
+mcck_skip:
+ lghi %r14,__LC_GPREGS_SAVE_AREA+64
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),0(%r14)
+ stmg %r8,%r9,__PT_PSW(%r11)
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,s390_do_machine_check
- tm SP_PSW+1(%r15),0x01 # returning to user ?
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
jno mcck_return
lg %r1,__LC_KERNEL_STACK # switch to kernel stack
- aghi %r1,-SP_SIZE
- mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
- xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
- stosm __SF_EMPTY(%r15),0x04 # turn dat on
- tm __TI_flags+7(%r9),_TIF_MCCK_PENDING
+ ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
+ tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
jno mcck_return
TRACE_IRQS_OFF
brasl %r14,s390_handle_mcck
TRACE_IRQS_ON
mcck_return:
- mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW
- ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
- lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mvc __LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+104
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
jno 0f
stpt __LC_EXIT_TIMER
-0:
-#endif
- lpswe __LC_RETURN_MCCK_PSW # back to caller
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+0: lmg %r11,%r15,__PT_R11(%r11)
+ lpswe __LC_RETURN_MCCK_PSW
-/*
- * Restart interruption handler, kick starter for additional CPUs
- */
-#ifdef CONFIG_SMP
- __CPUINIT
- .globl restart_int_handler
-restart_int_handler:
- lg %r15,__LC_SAVE_AREA+120 # load ksp
- lghi %r10,__LC_CREGS_SAVE_AREA
- lctlg %c0,%c15,0(%r10) # get new ctl regs
- lghi %r10,__LC_AREGS_SAVE_AREA
- lam %a0,%a15,0(%r10)
- lmg %r6,%r15,__SF_GPRS(%r15) # load registers from clone
- stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on
- jg start_secondary
- .previous
-#else
-/*
- * If we do not run with SMP enabled, let the new CPU crash ...
- */
- .globl restart_int_handler
-restart_int_handler:
- basr %r1,0
-restart_base:
- lpswe restart_crash-restart_base(%r1)
- .align 8
-restart_crash:
- .long 0x000a0000,0x00000000,0x00000000,0x00000000
-restart_go:
-#endif
+mcck_panic:
+ lg %r14,__LC_PANIC_STACK
+ slgr %r14,%r15
+ srag %r14,%r14,PAGE_SHIFT
+ jz 0f
+ lg %r15,__LC_PANIC_STACK
+0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j mcck_skip
+
+#
+# PSW restart interrupt handler
+#
+ENTRY(restart_int_handler)
+ stg %r15,__LC_SAVE_AREA_RESTART
+ lg %r15,__LC_RESTART_STACK
+ aghi %r15,-__PT_SIZE # create pt_regs on stack
+ xc 0(__PT_SIZE,%r15),0(%r15)
+ stmg %r0,%r14,__PT_R0(%r15)
+ mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
+ mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
+ aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
+ xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
+ lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
+ lg %r2,__LC_RESTART_DATA
+ lg %r3,__LC_RESTART_SOURCE
+ ltgr %r3,%r3 # test source cpu address
+ jm 1f # negative -> skip source stop
+0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
+ brc 10,0b # wait for status stored
+1: basr %r14,%r1 # call function
+ stap __SF_EMPTY(%r15) # store cpu address
+ llgh %r3,__SF_EMPTY(%r15)
+2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
+ brc 2,2b
+3: j 3b
+
+ .section .kprobes.text, "ax"
#ifdef CONFIG_CHECK_STACK
/*
@@ -865,202 +789,247 @@ restart_go:
*/
stack_overflow:
lg %r15,__LC_PANIC_STACK # change to panic stack
- aghi %r15,-SP_SIZE
- mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack
- stmg %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack
- la %r1,__LC_SAVE_AREA
- chi %r12,__LC_SVC_OLD_PSW
- je 0f
- chi %r12,__LC_PGM_OLD_PSW
- je 0f
- la %r1,__LC_SAVE_AREA+32
-0: mvc SP_R12(32,%r15),0(%r1) # move %r12-%r15 to stack
- mvc SP_ARGS(8,%r15),__LC_LAST_BREAK
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain
- la %r2,SP_PTREGS(%r15) # load pt_regs
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_R8(64,%r11),0(%r14)
+ stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
jg kernel_stack_overflow
#endif
-cleanup_table_system_call:
- .quad system_call, sysc_do_svc
-cleanup_table_sysc_return:
- .quad sysc_return, sysc_leave
-cleanup_table_sysc_leave:
- .quad sysc_leave, sysc_done
-cleanup_table_sysc_work_loop:
- .quad sysc_work_loop, sysc_work_done
-cleanup_table_io_return:
- .quad io_return, io_leave
-cleanup_table_io_leave:
- .quad io_leave, io_done
-cleanup_table_io_work_loop:
- .quad io_work_loop, io_work_done
+ .align 8
+cleanup_table:
+ .quad system_call
+ .quad sysc_do_svc
+ .quad sysc_tif
+ .quad sysc_restore
+ .quad sysc_done
+ .quad io_tif
+ .quad io_restore
+ .quad io_done
+ .quad psw_idle
+ .quad psw_idle_end
cleanup_critical:
- clc 8(8,%r12),BASED(cleanup_table_system_call)
+ clg %r9,BASED(cleanup_table) # system_call
jl 0f
- clc 8(8,%r12),BASED(cleanup_table_system_call+8)
+ clg %r9,BASED(cleanup_table+8) # sysc_do_svc
jl cleanup_system_call
-0:
- clc 8(8,%r12),BASED(cleanup_table_sysc_return)
- jl 0f
- clc 8(8,%r12),BASED(cleanup_table_sysc_return+8)
- jl cleanup_sysc_return
-0:
- clc 8(8,%r12),BASED(cleanup_table_sysc_leave)
- jl 0f
- clc 8(8,%r12),BASED(cleanup_table_sysc_leave+8)
- jl cleanup_sysc_leave
-0:
- clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop)
- jl 0f
- clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop+8)
- jl cleanup_sysc_return
-0:
- clc 8(8,%r12),BASED(cleanup_table_io_return)
+ clg %r9,BASED(cleanup_table+16) # sysc_tif
jl 0f
- clc 8(8,%r12),BASED(cleanup_table_io_return+8)
- jl cleanup_io_return
-0:
- clc 8(8,%r12),BASED(cleanup_table_io_leave)
+ clg %r9,BASED(cleanup_table+24) # sysc_restore
+ jl cleanup_sysc_tif
+ clg %r9,BASED(cleanup_table+32) # sysc_done
+ jl cleanup_sysc_restore
+ clg %r9,BASED(cleanup_table+40) # io_tif
jl 0f
- clc 8(8,%r12),BASED(cleanup_table_io_leave+8)
- jl cleanup_io_leave
-0:
- clc 8(8,%r12),BASED(cleanup_table_io_work_loop)
+ clg %r9,BASED(cleanup_table+48) # io_restore
+ jl cleanup_io_tif
+ clg %r9,BASED(cleanup_table+56) # io_done
+ jl cleanup_io_restore
+ clg %r9,BASED(cleanup_table+64) # psw_idle
jl 0f
- clc 8(8,%r12),BASED(cleanup_table_io_work_loop+8)
- jl cleanup_io_return
-0:
- br %r14
+ clg %r9,BASED(cleanup_table+72) # psw_idle_end
+ jl cleanup_idle
+0: br %r14
+
cleanup_system_call:
- mvc __LC_RETURN_PSW(16),0(%r12)
- cghi %r12,__LC_MCK_OLD_PSW
- je 0f
- la %r12,__LC_SAVE_AREA+32
- j 1f
-0: la %r12,__LC_SAVE_AREA+64
-1:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+8)
+ # check if stpt has been executed
+ clg %r9,BASED(cleanup_system_call_insn)
jh 0f
mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
-0: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16)
- jhe cleanup_vtime
-#endif
- clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn)
+ cghi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
+0: # check if stmg has been executed
+ clg %r9,BASED(cleanup_system_call_insn+8)
+ jh 0f
+ mvc __LC_SAVE_AREA_SYNC(64),0(%r11)
+0: # check if base register setup + TIF bit load has been done
+ clg %r9,BASED(cleanup_system_call_insn+16)
+ jhe 0f
+ # set up saved registers r10 and r12
+ stg %r10,16(%r11) # r10 last break
+ stg %r12,32(%r11) # r12 thread-info pointer
+0: # check if the user time update has been done
+ clg %r9,BASED(cleanup_system_call_insn+24)
jh 0f
- mvc __LC_SAVE_AREA(32),0(%r12)
-0: stg %r13,8(%r12)
- stg %r12,__LC_SAVE_AREA+96 # argh
- SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
- CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
- lg %r12,__LC_SAVE_AREA+96 # argh
- stg %r15,24(%r12)
- llgh %r7,__LC_SVC_INT_CODE
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-cleanup_vtime:
- clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24)
- jhe cleanup_stime
- UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
-cleanup_stime:
- clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+32)
- jh cleanup_update
- UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
-cleanup_update:
+ lg %r15,__LC_EXIT_TIMER
+ slg %r15,__LC_SYNC_ENTER_TIMER
+ alg %r15,__LC_USER_TIMER
+ stg %r15,__LC_USER_TIMER
+0: # check if the system time update has been done
+ clg %r9,BASED(cleanup_system_call_insn+32)
+ jh 0f
+ lg %r15,__LC_LAST_UPDATE_TIMER
+ slg %r15,__LC_EXIT_TIMER
+ alg %r15,__LC_SYSTEM_TIMER
+ stg %r15,__LC_SYSTEM_TIMER
+0: # update accounting time stamp
mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-#endif
- mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8)
- la %r12,__LC_RETURN_PSW
+ # do LAST_BREAK
+ lg %r9,16(%r11)
+ srag %r9,%r9,23
+ jz 0f
+ mvc __TI_last_break(8,%r12),16(%r11)
+0: # set up saved register r11
+ lg %r15,__LC_KERNEL_STACK
+ la %r9,STACK_FRAME_OVERHEAD(%r15)
+ stg %r9,24(%r11) # r11 pt_regs pointer
+ # fill pt_regs
+ mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
+ stmg %r0,%r7,__PT_R0(%r9)
+ mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW
+ mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
+ xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
+ mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL
+ # setup saved register r15
+ stg %r15,56(%r11) # r15 stack pointer
+ # set new psw address and exit
+ larl %r9,sysc_do_svc
br %r14
cleanup_system_call_insn:
- .quad sysc_saveall
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
.quad system_call
- .quad sysc_vtime
- .quad sysc_stime
- .quad sysc_update
-#endif
+ .quad sysc_stmg
+ .quad sysc_per
+ .quad sysc_vtime+18
+ .quad sysc_vtime+42
-cleanup_sysc_return:
- mvc __LC_RETURN_PSW(8),0(%r12)
- mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_sysc_return)
- la %r12,__LC_RETURN_PSW
+cleanup_sysc_tif:
+ larl %r9,sysc_tif
br %r14
-cleanup_sysc_leave:
- clc 8(8,%r12),BASED(cleanup_sysc_leave_insn)
- je 2f
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
- clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
- je 2f
-#endif
- mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
- cghi %r12,__LC_MCK_OLD_PSW
- jne 0f
- mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
- j 1f
-0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
-1: lmg %r0,%r11,SP_R0(%r15)
- lg %r15,SP_R15(%r15)
-2: la %r12,__LC_RETURN_PSW
+cleanup_sysc_restore:
+ clg %r9,BASED(cleanup_sysc_restore_insn)
+ je 0f
+ lg %r9,24(%r11) # get saved pointer to pt_regs
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
+ mvc 0(64,%r11),__PT_R8(%r9)
+ lmg %r0,%r7,__PT_R0(%r9)
+0: lmg %r8,%r9,__LC_RETURN_PSW
br %r14
-cleanup_sysc_leave_insn:
+cleanup_sysc_restore_insn:
.quad sysc_done - 4
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- .quad sysc_done - 8
-#endif
-cleanup_io_return:
- mvc __LC_RETURN_PSW(8),0(%r12)
- mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_work_loop)
- la %r12,__LC_RETURN_PSW
+cleanup_io_tif:
+ larl %r9,io_tif
br %r14
-cleanup_io_leave:
- clc 8(8,%r12),BASED(cleanup_io_leave_insn)
- je 2f
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
- clc 8(8,%r12),BASED(cleanup_io_leave_insn+8)
- je 2f
-#endif
- mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
- cghi %r12,__LC_MCK_OLD_PSW
- jne 0f
- mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
- j 1f
-0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
-1: lmg %r0,%r11,SP_R0(%r15)
- lg %r15,SP_R15(%r15)
-2: la %r12,__LC_RETURN_PSW
+cleanup_io_restore:
+ clg %r9,BASED(cleanup_io_restore_insn)
+ je 0f
+ lg %r9,24(%r11) # get saved r11 pointer to pt_regs
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
+ mvc 0(64,%r11),__PT_R8(%r9)
+ lmg %r0,%r7,__PT_R0(%r9)
+0: lmg %r8,%r9,__LC_RETURN_PSW
br %r14
-cleanup_io_leave_insn:
+cleanup_io_restore_insn:
.quad io_done - 4
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- .quad io_done - 8
-#endif
+
+cleanup_idle:
+ # copy interrupt clock & cpu timer
+ mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+ mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
+ cghi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+ mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
+0: # check if stck & stpt have been executed
+ clg %r9,BASED(cleanup_idle_insn)
+ jhe 1f
+ mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
+ mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
+1: # account system time going idle
+ lg %r9,__LC_STEAL_TIMER
+ alg %r9,__CLOCK_IDLE_ENTER(%r2)
+ slg %r9,__LC_LAST_UPDATE_CLOCK
+ stg %r9,__LC_STEAL_TIMER
+ mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
+ lg %r9,__LC_SYSTEM_TIMER
+ alg %r9,__LC_LAST_UPDATE_TIMER
+ slg %r9,__TIMER_IDLE_ENTER(%r2)
+ stg %r9,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
+ # prepare return psw
+ nihh %r8,0xfcfd # clear irq & wait state bits
+ lg %r9,48(%r11) # return from psw_idle
+ br %r14
+cleanup_idle_insn:
+ .quad psw_idle_lpsw
/*
* Integer constants
*/
- .align 4
-.Lconst:
-.Lnr_syscalls: .long NR_syscalls
-.L0x0130: .short 0x130
-.L0x0140: .short 0x140
-.L0x0150: .short 0x150
-.L0x0160: .short 0x160
-.L0x0170: .short 0x170
+ .align 8
.Lcritical_start:
- .quad __critical_start
-.Lcritical_end:
- .quad __critical_end
+ .quad __critical_start
+.Lcritical_length:
+ .quad __critical_end - __critical_start
+
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ENTRY(sie64a)
+ stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
+ stg %r2,__SF_EMPTY(%r15) # save control block pointer
+ stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
+ xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
+ lmg %r0,%r13,0(%r3) # load guest gprs 0-13
+ lg %r14,__LC_GMAP # get gmap pointer
+ ltgr %r14,%r14
+ jz sie_gmap
+ lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
+sie_gmap:
+ lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
+ tm __SIE_PROG20+3(%r14),1 # last exit...
+ jnz sie_done
+ LPP __SF_EMPTY(%r15) # set guest id
+ sie 0(%r14)
+sie_done:
+ LPP __SF_EMPTY+16(%r15) # set host id
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and sie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also HANDLE_SIE_INTERCEPT
+rewind_pad:
+ nop 0
+ .globl sie_exit
+sie_exit:
+ lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
+ stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
+ lg %r2,__SF_EMPTY+24(%r15) # return exit reason code
+ br %r14
+sie_fault:
+ lghi %r14,-EFAULT
+ stg %r14,__SF_EMPTY+24(%r15) # set exit reason code
+ j sie_exit
+
+ .align 8
+.Lsie_critical:
+ .quad sie_gmap
+.Lsie_critical_length:
+ .quad sie_done - sie_gmap
+
+ EX_TABLE(rewind_pad,sie_fault)
+ EX_TABLE(sie_exit,sie_fault)
+#endif
.section .rodata, "a"
#define SYSCALL(esa,esame,emu) .long esame
+ .globl sys_call_table
sys_call_table:
#include "syscalls.S"
#undef SYSCALL
@@ -1068,6 +1037,7 @@ sys_call_table:
#ifdef CONFIG_COMPAT
#define SYSCALL(esa,esame,emu) .long emu
+ .globl sys_call_table_emu
sys_call_table_emu:
#include "syscalls.S"
#undef SYSCALL
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
new file mode 100644
index 00000000000..54d6493c4a5
--- /dev/null
+++ b/arch/s390/kernel/ftrace.c
@@ -0,0 +1,192 @@
+/*
+ * Dynamic function tracer architecture backend.
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <trace/syscall.h>
+#include <asm/asm-offsets.h>
+#include "entry.h"
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+void ftrace_disable_code(void);
+void ftrace_enable_insn(void);
+
+#ifdef CONFIG_64BIT
+/*
+ * The 64-bit mcount code looks like this:
+ * stg %r14,8(%r15) # offset 0
+ * > larl %r1,<&counter> # offset 6
+ * > brasl %r14,_mcount # offset 12
+ * lg %r14,8(%r15) # offset 18
+ * Total length is 24 bytes. The middle two instructions of the mcount
+ * block get overwritten by ftrace_make_nop / ftrace_make_call.
+ * The 64-bit enabled ftrace code block looks like this:
+ * stg %r14,8(%r15) # offset 0
+ * > lg %r1,__LC_FTRACE_FUNC # offset 6
+ * > lgr %r0,%r0 # offset 12
+ * > basr %r14,%r1 # offset 16
+ * lg %r14,8(%15) # offset 18
+ * The return points of the mcount/ftrace function have the same offset 18.
+ * The 64-bit disable ftrace code block looks like this:
+ * stg %r14,8(%r15) # offset 0
+ * > jg .+18 # offset 6
+ * > lgr %r0,%r0 # offset 12
+ * > basr %r14,%r1 # offset 16
+ * lg %r14,8(%15) # offset 18
+ * The jg instruction branches to offset 24 to skip as many instructions
+ * as possible.
+ */
+asm(
+ " .align 4\n"
+ "ftrace_disable_code:\n"
+ " jg 0f\n"
+ " lgr %r0,%r0\n"
+ " basr %r14,%r1\n"
+ "0:\n"
+ " .align 4\n"
+ "ftrace_enable_insn:\n"
+ " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n");
+
+#define FTRACE_INSN_SIZE 6
+
+#else /* CONFIG_64BIT */
+/*
+ * The 31-bit mcount code looks like this:
+ * st %r14,4(%r15) # offset 0
+ * > bras %r1,0f # offset 4
+ * > .long _mcount # offset 8
+ * > .long <&counter> # offset 12
+ * > 0: l %r14,0(%r1) # offset 16
+ * > l %r1,4(%r1) # offset 20
+ * basr %r14,%r14 # offset 24
+ * l %r14,4(%r15) # offset 26
+ * Total length is 30 bytes. The twenty bytes starting from offset 4
+ * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call.
+ * The 31-bit enabled ftrace code block looks like this:
+ * st %r14,4(%r15) # offset 0
+ * > l %r14,__LC_FTRACE_FUNC # offset 4
+ * > j 0f # offset 8
+ * > .fill 12,1,0x07 # offset 12
+ * 0: basr %r14,%r14 # offset 24
+ * l %r14,4(%r14) # offset 26
+ * The return points of the mcount/ftrace function have the same offset 26.
+ * The 31-bit disabled ftrace code block looks like this:
+ * st %r14,4(%r15) # offset 0
+ * > j .+26 # offset 4
+ * > j 0f # offset 8
+ * > .fill 12,1,0x07 # offset 12
+ * 0: basr %r14,%r14 # offset 24
+ * l %r14,4(%r14) # offset 26
+ * The j instruction branches to offset 30 to skip as many instructions
+ * as possible.
+ */
+asm(
+ " .align 4\n"
+ "ftrace_disable_code:\n"
+ " j 1f\n"
+ " j 0f\n"
+ " .fill 12,1,0x07\n"
+ "0: basr %r14,%r14\n"
+ "1:\n"
+ " .align 4\n"
+ "ftrace_enable_insn:\n"
+ " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n");
+
+#define FTRACE_INSN_SIZE 4
+
+#endif /* CONFIG_64BIT */
+
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+{
+ if (probe_kernel_write((void *) rec->ip, ftrace_disable_code,
+ MCOUNT_INSN_SIZE))
+ return -EPERM;
+ return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn,
+ FTRACE_INSN_SIZE))
+ return -EPERM;
+ return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ return 0;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * Hook the return address and push it in the stack of return addresses
+ * in current thread info.
+ */
+unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
+ unsigned long ip)
+{
+ struct ftrace_graph_ent trace;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ goto out;
+ ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+ trace.func = ip;
+ trace.depth = current->curr_ret_stack + 1;
+ /* Only trace if the calling function expects to. */
+ if (!ftrace_graph_entry(&trace))
+ goto out;
+ if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
+ goto out;
+ parent = (unsigned long) return_to_handler;
+out:
+ return parent;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Patch the kernel code at ftrace_graph_caller location. The instruction
+ * there is branch relative and save to prepare_ftrace_return. To disable
+ * the call to prepare_ftrace_return we patch the bras offset to point
+ * directly after the instructions. To enable the call we calculate
+ * the original offset to prepare_ftrace_return and put it back.
+ */
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ unsigned short offset;
+
+ offset = ((void *) prepare_ftrace_return -
+ (void *) ftrace_graph_caller) / 2;
+ return probe_kernel_write((void *) ftrace_graph_caller + 2,
+ &offset, sizeof(offset));
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ static unsigned short offset = 0x0002;
+
+ return probe_kernel_write((void *) ftrace_graph_caller + 2,
+ &offset, sizeof(offset));
+}
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 83477c7dc74..e88d35d7495 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -1,7 +1,5 @@
/*
- * arch/s390/kernel/head.S
- *
- * Copyright (C) IBM Corp. 1999,2006
+ * Copyright IBM Corp. 1999, 2010
*
* Author(s): Hartmut Penner <hp@de.ibm.com>
* Martin Schwidefsky <schwidefsky@de.ibm.com>
@@ -23,8 +21,8 @@
*
*/
-#include <asm/setup.h>
-#include <asm/lowcore.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
@@ -35,126 +33,8 @@
#define ARCH_OFFSET 0
#endif
-.section ".text.head","ax"
-#ifndef CONFIG_IPL
- .org 0
- .long 0x00080000,0x80000000+startup # Just a restart PSW
-#else
-#ifdef CONFIG_IPL_TAPE
-#define IPL_BS 1024
- .org 0
- .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded
- .long 0x27000000,0x60000001 # by ipl to addresses 0-23.
- .long 0x02000000,0x20000000+IPL_BS # (a PSW and two CCWs).
- .long 0x00000000,0x00000000 # external old psw
- .long 0x00000000,0x00000000 # svc old psw
- .long 0x00000000,0x00000000 # program check old psw
- .long 0x00000000,0x00000000 # machine check old psw
- .long 0x00000000,0x00000000 # io old psw
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x000a0000,0x00000058 # external new psw
- .long 0x000a0000,0x00000060 # svc new psw
- .long 0x000a0000,0x00000068 # program check new psw
- .long 0x000a0000,0x00000070 # machine check new psw
- .long 0x00080000,0x80000000+.Lioint # io new psw
-
- .org 0x100
-#
-# subroutine for loading from tape
-# Paramters:
-# R1 = device number
-# R2 = load address
-.Lloader:
- st %r14,.Lldret
- la %r3,.Lorbread # r3 = address of orb
- la %r5,.Lirb # r5 = address of irb
- st %r2,.Lccwread+4 # initialize CCW data addresses
- lctl %c6,%c6,.Lcr6
- slr %r2,%r2
-.Lldlp:
- la %r6,3 # 3 retries
-.Lssch:
- ssch 0(%r3) # load chunk of IPL_BS bytes
- bnz .Llderr
-.Lw4end:
- bas %r14,.Lwait4io
- tm 8(%r5),0x82 # do we have a problem ?
- bnz .Lrecov
- slr %r7,%r7
- icm %r7,3,10(%r5) # get residual count
- lcr %r7,%r7
- la %r7,IPL_BS(%r7) # IPL_BS-residual=#bytes read
- ar %r2,%r7 # add to total size
- tm 8(%r5),0x01 # found a tape mark ?
- bnz .Ldone
- l %r0,.Lccwread+4 # update CCW data addresses
- ar %r0,%r7
- st %r0,.Lccwread+4
- b .Lldlp
-.Ldone:
- l %r14,.Lldret
- br %r14 # r2 contains the total size
-.Lrecov:
- bas %r14,.Lsense # do the sensing
- bct %r6,.Lssch # dec. retry count & branch
- b .Llderr
-#
-# Sense subroutine
-#
-.Lsense:
- st %r14,.Lsnsret
- la %r7,.Lorbsense
- ssch 0(%r7) # start sense command
- bnz .Llderr
- bas %r14,.Lwait4io
- l %r14,.Lsnsret
- tm 8(%r5),0x82 # do we have a problem ?
- bnz .Llderr
- br %r14
-#
-# Wait for interrupt subroutine
-#
-.Lwait4io:
- lpsw .Lwaitpsw
-.Lioint:
- c %r1,0xb8 # compare subchannel number
- bne .Lwait4io
- tsch 0(%r5)
- slr %r0,%r0
- tm 8(%r5),0x82 # do we have a problem ?
- bnz .Lwtexit
- tm 8(%r5),0x04 # got device end ?
- bz .Lwait4io
-.Lwtexit:
- br %r14
-.Llderr:
- lpsw .Lcrash
-
- .align 8
-.Lorbread:
- .long 0x00000000,0x0080ff00,.Lccwread
- .align 8
-.Lorbsense:
- .long 0x00000000,0x0080ff00,.Lccwsense
- .align 8
-.Lccwread:
- .long 0x02200000+IPL_BS,0x00000000
-.Lccwsense:
- .long 0x04200001,0x00000000
-.Lwaitpsw:
- .long 0x020a0000,0x80000000+.Lioint
-
-.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-.Lcr6: .long 0xff000000
- .align 8
-.Lcrash:.long 0x000a0000,0x00000000
-.Lldret:.long 0
-.Lsnsret: .long 0
-#endif /* CONFIG_IPL_TAPE */
+__HEAD
-#ifdef CONFIG_IPL_VM
#define IPL_BS 0x730
.org 0
.long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded
@@ -172,7 +52,7 @@
.long 0x02000370,0x60000050 # the channel program the PSW
.long 0x020003c0,0x60000050 # at location 0 is loaded.
.long 0x02000410,0x60000050 # Initial processing starts
- .long 0x02000460,0x60000050 # at 0xf0 = iplstart.
+ .long 0x02000460,0x60000050 # at 0x200 = iplstart.
.long 0x020004b0,0x60000050
.long 0x02000500,0x60000050
.long 0x02000550,0x60000050
@@ -182,11 +62,54 @@
.long 0x02000690,0x60000050
.long 0x020006e0,0x20000050
- .org 0xf0
+ .org 0x200
+#
+# subroutine to set architecture mode
+#
+.Lsetmode:
+#ifdef CONFIG_64BIT
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam31 # switch to 31 bit addressing mode
+#else
+ mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0)
+#endif
+ br %r14
+
+#
+# subroutine to wait for end I/O
+#
+.Lirqwait:
+#ifdef CONFIG_64BIT
+ mvc 0x1f0(16),.Lnewpsw # set up IO interrupt psw
+ lpsw .Lwaitpsw
+.Lioint:
+ br %r14
+ .align 8
+.Lnewpsw:
+ .quad 0x0000000080000000,.Lioint
+#else
+ mvc 0x78(8),.Lnewpsw # set up IO interrupt psw
+ lpsw .Lwaitpsw
+.Lioint:
+ br %r14
+ .align 8
+.Lnewpsw:
+ .long 0x00080000,0x80000000+.Lioint
+#endif
+.Lwaitpsw:
+ .long 0x020a0000,0x80000000+.Lioint
+
#
# subroutine for loading cards from the reader
#
.Lloader:
+ la %r4,0(%r14)
la %r3,.Lorb # r2 = address of orb into r2
la %r5,.Lirb # r4 = address of irb
la %r6,.Lccws
@@ -203,9 +126,7 @@
ssch 0(%r3) # load chunk of 1600 bytes
bnz .Llderr
.Lwait4irq:
- mvc 0x78(8),.Lnewpsw # set up IO interrupt psw
- lpsw .Lwaitpsw
-.Lioint:
+ bas %r14,.Lirqwait
c %r1,0xb8 # compare subchannel number
bne .Lwait4irq
tsch 0(%r5)
@@ -224,7 +145,7 @@
sr %r0,%r3 # #ccws*80-residual=#bytes read
ar %r2,%r0
- br %r14 # r2 contains the total size
+ br %r4 # r2 contains the total size
.Lcont:
ahi %r2,0x640 # add 0x640 to total size
@@ -248,19 +169,15 @@
.Lloadp:.long 0,0
.align 8
.Lcrash:.long 0x000a0000,0x00000000
-.Lnewpsw:
- .long 0x00080000,0x80000000+.Lioint
-.Lwaitpsw:
- .long 0x020a0000,0x80000000+.Lioint
.align 8
.Lccws: .rept 19
.long 0x02600050,0x00000000
.endr
.long 0x02200050,0x00000000
-#endif /* CONFIG_IPL_VM */
iplstart:
+ bas %r14,.Lsetmode # Immediately switch to 64 bit mode
lh %r1,0xb8 # test if subchannel number
bct %r1,.Lnoload # is valid
l %r1,0xb8 # load ipl subchannel number
@@ -288,19 +205,7 @@ iplstart:
bz .Lagain1 # skip dateset trailer
la %r5,0(%r4,%r2)
lr %r3,%r2
-.Lidebc:
- tm 0(%r5),0x80 # high order bit set ?
- bo .Ldocv # yes -> convert from EBCDIC
- ahi %r5,-1
- bct %r3,.Lidebc
- b .Lnocv
-.Ldocv:
- l %r3,.Lcvtab
- tr 0(256,%r4),0(%r3) # convert parameters to ascii
- tr 256(256,%r4),0(%r3)
- tr 512(256,%r4),0(%r3)
- tr 768(122,%r4),0(%r3)
-.Lnocv: la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line
+ la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line
mvc 0(256,%r3),0(%r4)
mvc 256(256,%r3),256(%r4)
mvc 512(256,%r3),512(%r4)
@@ -339,12 +244,11 @@ iplstart:
clc 0(3,%r2),.L_eof
bz .Lagain2
-#ifdef CONFIG_IPL_VM
#
# reset files in VM reader
#
- stidp __LC_CPUID # store cpuid
- tm __LC_CPUID,0xff # running VM ?
+ stidp .Lcpuid # store cpuid
+ tm .Lcpuid,0xff # running VM ?
bno .Lnoreset
la %r2,.Lreset
lhi %r3,26
@@ -356,24 +260,14 @@ iplstart:
tm 31(%r5),0xff # bits is set in the schib
bz .Lnoreset
.Lwaitforirq:
- mvc 0x78(8),.Lrdrnewpsw # set up IO interrupt psw
-.Lwaitrdrirq:
- lpsw .Lrdrwaitpsw
-.Lrdrint:
+ bas %r14,.Lirqwait # wait for IO interrupt
c %r1,0xb8 # compare subchannel number
- bne .Lwaitrdrirq
+ bne .Lwaitforirq
la %r5,.Lirb
tsch 0(%r5)
.Lnoreset:
b .Lnoload
- .align 8
-.Lrdrnewpsw:
- .long 0x00080000,0x80000000+.Lrdrint
-.Lrdrwaitpsw:
- .long 0x020a0000,0x80000000+.Lrdrint
-#endif
-
#
# everything loaded, go for it
#
@@ -381,17 +275,16 @@ iplstart:
l %r1,.Lstartup
br %r1
-.Linitrd:.long _end + 0x400000 # default address of initrd
+.Linitrd:.long _end # default address of initrd
.Lparm: .long PARMAREA
.Lstartup: .long startup
-.Lcvtab:.long _ebcasc # ebcdic to ascii table
.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40
.byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6
.byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold"
.L_eof: .long 0xc5d6c600 /* C'EOF' */
.L_hdr: .long 0xc8c4d900 /* C'HDR' */
-
-#endif /* CONFIG_IPL */
+ .align 8
+.Lcpuid:.fill 8,1,0
#
# SALIPL loader support. Based on a patch by Rob van der Heij.
@@ -399,9 +292,9 @@ iplstart:
# doesn't need a builtin ipl record.
#
.org 0x800
- .globl start
-start:
+ENTRY(start)
stm %r0,%r15,0x07b0 # store registers
+ bas %r14,.Lsetmode # Immediately switch to 64 bit mode
basr %r12,%r0
.base:
l %r11,.parm
@@ -417,13 +310,10 @@ start:
.sk8x8:
mvc 0(240,%r8),0(%r9) # copy iplparms into buffer
.gotr:
- l %r10,.tbl # EBCDIC to ASCII table
- tr 0(240,%r8),0(%r10)
slr %r0,%r0
st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11)
st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11)
j startup # continue with startup
-.tbl: .long _ebcasc # translate table
.cmd: .long COMMAND_LINE # address of command line buffer
.parm: .long PARMAREA
.lowcase:
@@ -461,8 +351,151 @@ start:
.byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7
.byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+#
+# startup-code at 0x10000, running in absolute addressing mode
+# this is called either by the ipl loader or directly by PSW restart
+# or linload or SALIPL
+#
+ .org 0x10000
+ENTRY(startup)
+ j .Lep_startup_normal
+ .org 0x10008
+#
+# This is a list of s390 kernel entry points. At address 0x1000f the number of
+# valid entry points is stored.
+#
+# IMPORTANT: Do not change this table, it is s390 kernel ABI!
+#
+ .ascii "S390EP"
+ .byte 0x00,0x01
+#
+# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode
+#
+ .org 0x10010
+ENTRY(startup_kdump)
+ j .Lep_startup_kdump
+.Lep_startup_normal:
+#ifdef CONFIG_64BIT
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam31 # switch to 31 bit addressing mode
+#else
+ mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0)
+#endif
+ basr %r13,0 # get base
+.LPG0:
+ xc 0x200(256),0x200 # partially clear lowcore
+ xc 0x300(256),0x300
+ xc 0xe00(256),0xe00
+ stck __LC_LAST_UPDATE_CLOCK
+ spt 6f-.LPG0(%r13)
+ mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
+ xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST
+#ifndef CONFIG_MARCH_G5
+ # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10}
+ .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list
+ tm __LC_STFL_FAC_LIST,0x01 # stfle available ?
+ jz 0f
+ la %r0,1
+ .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended
+ # verify if all required facilities are supported by the machine
+0: la %r1,__LC_STFL_FAC_LIST
+ la %r2,3f+8-.LPG0(%r13)
+ l %r3,0(%r2)
+1: l %r0,0(%r1)
+ n %r0,4(%r2)
+ cl %r0,4(%r2)
+ jne 2f
+ la %r1,4(%r1)
+ la %r2,4(%r2)
+ ahi %r3,-1
+ jnz 1b
+ j 4f
+2: l %r15,.Lstack-.LPG0(%r13)
+ ahi %r15,-96
+ la %r2,.Lals_string-.LPG0(%r13)
+ l %r3,.Lsclp_print-.LPG0(%r13)
+ basr %r14,%r3
+ lpsw 3f-.LPG0(%r13) # machine type not good enough, crash
+.Lals_string:
+ .asciz "The Linux kernel requires more recent processor hardware"
+.Lsclp_print:
+ .long _sclp_print_early
+.Lstack:
+ .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
+ .align 16
+3: .long 0x000a0000,0x8badcccc
+
+# List of facilities that are required. If not all facilities are present
+# the kernel will crash. Format is number of facility words with bits set,
+# followed by the facility words.
+
+#if defined(CONFIG_64BIT)
+#if defined(CONFIG_MARCH_ZEC12)
+ .long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+#elif defined(CONFIG_MARCH_Z196)
+ .long 2, 0xc100eff2, 0xf46c0000
+#elif defined(CONFIG_MARCH_Z10)
+ .long 2, 0xc100eff2, 0xf0680000
+#elif defined(CONFIG_MARCH_Z9_109)
+ .long 1, 0xc100efc2
+#elif defined(CONFIG_MARCH_Z990)
+ .long 1, 0xc0002000
+#elif defined(CONFIG_MARCH_Z900)
+ .long 1, 0xc0000000
+#endif
+#else
+#if defined(CONFIG_MARCH_ZEC12)
+ .long 1, 0x8100c880
+#elif defined(CONFIG_MARCH_Z196)
+ .long 1, 0x8100c880
+#elif defined(CONFIG_MARCH_Z10)
+ .long 1, 0x8100c880
+#elif defined(CONFIG_MARCH_Z9_109)
+ .long 1, 0x8100c880
+#elif defined(CONFIG_MARCH_Z990)
+ .long 1, 0x80002000
+#elif defined(CONFIG_MARCH_Z900)
+ .long 1, 0x80000000
+#endif
+#endif
+4:
+#endif
+
#ifdef CONFIG_64BIT
-#include "head64.S"
+ /* Continue with 64bit startup code in head64.S */
+ sam64 # switch to 64 bit mode
+ jg startup_continue
#else
-#include "head31.S"
+ /* Continue with 31bit startup code in head31.S */
+ l %r13,5f-.LPG0(%r13)
+ b 0(%r13)
+ .align 8
+5: .long startup_continue
#endif
+
+ .align 8
+6: .long 0x7fffffff,0xffffffff
+
+#include "head_kdump.S"
+
+#
+# params at 10400 (setup.h)
+#
+ .org PARMAREA
+ .long 0,0 # IPL_DEVICE
+ .long 0,0 # INITRD_START
+ .long 0,0 # INITRD_SIZE
+ .long 0,0 # OLDMEM_BASE
+ .long 0,0 # OLDMEM_SIZE
+
+ .org COMMAND_LINE
+ .byte "root=/dev/ram0 ro"
+ .byte 0
+
+ .org 0x11000
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index a816e2de32b..6dbe80983a2 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -1,7 +1,5 @@
/*
- * arch/s390/kernel/head31.S
- *
- * Copyright (C) IBM Corp. 2005,2006
+ * Copyright IBM Corp. 2005, 2010
*
* Author(s): Hartmut Penner <hp@de.ibm.com>
* Martin Schwidefsky <schwidefsky@de.ibm.com>
@@ -10,47 +8,31 @@
*
*/
-#
-# startup-code at 0x10000, running in absolute addressing mode
-# this is called either by the ipl loader or directly by PSW restart
-# or linload or SALIPL
-#
- .org 0x10000
-startup:basr %r13,0 # get base
-.LPG0: l %r13,0f-.LPG0(%r13)
- b 0(%r13)
-0: .long startup_continue
-
-#
-# params at 10400 (setup.h)
-#
- .org PARMAREA
- .long 0,0 # IPL_DEVICE
- .long 0,0 # INITRD_START
- .long 0,0 # INITRD_SIZE
-
- .org COMMAND_LINE
- .byte "root=/dev/ram0 ro"
- .byte 0
-
- .org 0x11000
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
-startup_continue:
+__HEAD
+ENTRY(startup_continue)
basr %r13,0 # get base
-.LPG1: mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0)
+.LPG1:
+
+ l %r1,.Lbase_cc-.LPG1(%r13)
+ mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK
lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
# move IPL device to lowcore
- mvc __LC_IPLDEV(4),IPL_DEVICE-PARMAREA(%r12)
#
# Setup stack
#
l %r15,.Linittu-.LPG1(%r13)
+ st %r15,__LC_THREAD_INFO # cache thread info in lowcore
mvc __LC_CURRENT(4),__TI_task(%r15)
ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
st %r15,__LC_KERNEL_STACK # set end of kernel stack
ahi %r15,-96
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain
#
# Save ipl parameters, clear bss memory, initialize storage key for kernel pages,
# and create a kernel NSS if the SAVESYS= parm is defined
@@ -61,7 +43,7 @@ startup_continue:
# virtual and never return ...
.align 8
.Lentry:.long 0x00080000,0x80000000 + _stext
-.Lctl: .long 0x04b50002 # cr0: various things
+.Lctl: .long 0x04b50000 # cr0: various things
.long 0 # cr1: primary space segment table
.long .Lduct # cr2: dispatchable unit control table
.long 0 # cr3: instruction authorization
@@ -77,7 +59,6 @@ startup_continue:
.long 0 # cr13: home space segment table
.long 0xc0000000 # cr14: machine check handling off
.long 0 # cr15: linkage stack operations
-.Lmchunk:.long memory_chunk
.Lbss_bgn: .long __bss_start
.Lbss_end: .long _end
.Lparmaddr: .long PARMAREA
@@ -91,23 +72,21 @@ startup_continue:
.Lduald:.rept 8
.long 0x80000000,0,0,0 # invalid access-list entries
.endr
+.Lbase_cc:
+ .long sched_clock_base_cc
- .org 0x12000
- .globl _ehead
-_ehead:
-#ifdef CONFIG_SHARED_KERNEL
- .org 0x100000
-#endif
+ENTRY(_ehead)
+ .org 0x100000 - 0x11000 # head.o ends at 0x11000
#
# startup-code, running in absolute addressing mode
#
- .globl _stext
-_stext: basr %r13,0 # get base
+ENTRY(_stext)
+ basr %r13,0 # get base
.LPG3:
# check control registers
stctl %c0,%c15,0(%r15)
- oi 2(%r15),0x40 # enable sigp emergency signal
+ oi 2(%r15),0x60 # enable sigp emergency & external call
oi 0(%r15),0x10 # switch on low address protection
lctl %c0,%c15,0(%r15)
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 1d06961e87b..d7c00507568 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -1,7 +1,5 @@
/*
- * arch/s390/kernel/head64.S
- *
- * Copyright (C) IBM Corp. 1999,2006
+ * Copyright IBM Corp. 1999, 2010
*
* Author(s): Hartmut Penner <hp@de.ibm.com>
* Martin Schwidefsky <schwidefsky@de.ibm.com>
@@ -10,116 +8,32 @@
*
*/
-#
-# startup-code at 0x10000, running in absolute addressing mode
-# this is called either by the ipl loader or directly by PSW restart
-# or linload or SALIPL
-#
- .org 0x10000
-startup:basr %r13,0 # get base
-.LPG0: l %r13,0f-.LPG0(%r13)
- b 0(%r13)
-0: .long startup_continue
-
-#
-# params at 10400 (setup.h)
-#
- .org PARMAREA
- .quad 0 # IPL_DEVICE
- .quad 0 # INITRD_START
- .quad 0 # INITRD_SIZE
-
- .org COMMAND_LINE
- .byte "root=/dev/ram0 ro"
- .byte 0
-
- .org 0x11000
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
-startup_continue:
- basr %r13,0 # get base
-.LPG1: sll %r13,1 # remove high order bit
- srl %r13,1
-
-#ifdef CONFIG_ZFCPDUMP
-
- # check if we have been ipled using zfcp dump:
-
- tm 0xb9,0x01 # test if subchannel is enabled
- jno .nodump # subchannel disabled
- l %r1,0xb8
- la %r5,.Lipl_schib-.LPG1(%r13)
- stsch 0(%r5) # get schib of subchannel
- jne .nodump # schib not available
- tm 5(%r5),0x01 # devno valid?
- jno .nodump
- tm 4(%r5),0x80 # qdio capable device?
- jno .nodump
- l %r2,20(%r0) # address of ipl parameter block
- lhi %r3,0
- ic %r3,0x148(%r2) # get opt field
- chi %r3,0x20 # load with dump?
- jne .nodump
-
- # store all prefix registers in case of load with dump:
-
- la %r7,0 # base register for 0 page
- la %r8,0 # first cpu
- l %r11,.Lpref_arr_ptr-.LPG1(%r13) # address of prefix array
- ahi %r11,4 # skip boot cpu
- lr %r12,%r11
- ahi %r12,(CONFIG_NR_CPUS*4) # end of prefix array
- stap .Lcurrent_cpu+2-.LPG1(%r13) # store current cpu addr
-1:
- cl %r8,.Lcurrent_cpu-.LPG1(%r13) # is ipl cpu ?
- je 4f # if yes get next cpu
-2:
- lr %r9,%r7
- sigp %r9,%r8,0x9 # stop & store status of cpu
- brc 8,3f # accepted
- brc 4,4f # status stored: next cpu
- brc 2,2b # busy: try again
- brc 1,4f # not op: next cpu
-3:
- mvc 0(4,%r11),264(%r7) # copy prefix register to prefix array
- ahi %r11,4 # next element in prefix array
- clr %r11,%r12
- je 5f # no more space in prefix array
-4:
- ahi %r8,1 # next cpu (r8 += 1)
- cl %r8,.Llast_cpu-.LPG1(%r13) # is last possible cpu ?
- jl 1b # jump if not last cpu
-5:
- lhi %r1,2 # mode 2 = esame (dump)
- j 6f
- .align 4
-.Lipl_schib:
- .rept 13
- .long 0
- .endr
-.nodump:
- lhi %r1,1 # mode 1 = esame (normal ipl)
-6:
-#else
- lhi %r1,1 # mode 1 = esame (normal ipl)
-#endif /* CONFIG_ZFCPDUMP */
- mvi __LC_AR_MODE_ID,1 # set esame flag
- slr %r0,%r0 # set cpuid to zero
- sigp %r1,%r0,0x12 # switch to esame mode
- sam64 # switch to 64 bit mode
+__HEAD
+ENTRY(startup_continue)
+ larl %r1,sched_clock_base_cc
+ mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK
+ larl %r13,.LPG1 # get base
lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
# move IPL device to lowcore
- mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
+ lghi %r0,__LC_PASTE
+ stg %r0,__LC_VDSO_PER_CPU
#
# Setup stack
#
larl %r15,init_thread_union
+ stg %r15,__LC_THREAD_INFO # cache thread info in lowcore
lg %r14,__TI_task(%r15) # cache current in lowcore
stg %r14,__LC_CURRENT
aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
stg %r15,__LC_KERNEL_STACK # set end of kernel stack
aghi %r15,-160
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain
#
# Save ipl parameters, clear bss memory, initialize storage key for kernel pages,
# and create a kernel NSS if the SAVESYS= parm is defined
@@ -128,8 +42,9 @@ startup_continue:
lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space,
# virtual and never return ...
.align 16
+.LPG1:
.Lentry:.quad 0x0000000180000000,_stext
-.Lctl: .quad 0x04350002 # cr0: various things
+.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
.quad 0 # cr1: primary space segment table
.quad .Lduct # cr2: dispatchable unit control table
.quad 0 # cr3: instruction authorization
@@ -144,45 +59,36 @@ startup_continue:
.quad 0 # cr12: tracing off
.quad 0 # cr13: home space segment table
.quad 0xc0000000 # cr14: machine check handling off
- .quad 0 # cr15: linkage stack operations
+ .quad .Llinkage_stack # cr15: linkage stack operations
.Lpcmsk:.quad 0x0000000180000000
.L4malign:.quad 0xffffffffffc00000
.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8
.Lnop: .long 0x07000700
-#ifdef CONFIG_ZFCPDUMP
-.Lcurrent_cpu:
- .long 0x0
-.Llast_cpu:
- .long 0x0000ffff
-.Lpref_arr_ptr:
- .long zfcpdump_prefix_array
-#endif /* CONFIG_ZFCPDUMP */
.Lparmaddr:
.quad PARMAREA
.align 64
-.Lduct: .long 0,0,0,0,.Lduald,0,0,0
+.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0
.long 0,0,0,0,0,0,0,0
+.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0
.align 128
.Lduald:.rept 8
.long 0x80000000,0,0,0 # invalid access-list entries
.endr
+.Llinkage_stack:
+ .long 0,0,0x89000000,0,0,0,0x8a000000,0
- .org 0x12000
- .globl _ehead
-_ehead:
-#ifdef CONFIG_SHARED_KERNEL
- .org 0x100000
-#endif
+ENTRY(_ehead)
+ .org 0x100000 - 0x11000 # head.o ends at 0x11000
#
# startup-code, running in absolute addressing mode
#
- .globl _stext
-_stext: basr %r13,0 # get base
+ENTRY(_stext)
+ basr %r13,0 # get base
.LPG3:
# check control registers
stctg %c0,%c15,0(%r15)
- oi 6(%r15),0x40 # enable sigp emergency signal
+ oi 6(%r15),0x60 # enable sigp emergency & external call
oi 4(%r15),0x10 # switch on low address proctection
lctlg %c0,%c15,0(%r15)
diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S
new file mode 100644
index 00000000000..085a95eb315
--- /dev/null
+++ b/arch/s390/kernel/head_kdump.S
@@ -0,0 +1,108 @@
+/*
+ * S390 kdump lowlevel functions (new kernel)
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <asm/sigp.h>
+
+#define DATAMOVER_ADDR 0x4000
+#define COPY_PAGE_ADDR 0x6000
+
+#ifdef CONFIG_CRASH_DUMP
+
+#
+# kdump entry (new kernel - not yet relocated)
+#
+# Note: This code has to be position independent
+#
+
+.align 2
+.Lep_startup_kdump:
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode
+ sam64 # Switch to 64 bit addressing
+ basr %r13,0
+.Lbase:
+ larl %r2,.Lbase_addr # Check, if we have been
+ lg %r2,0(%r2) # already relocated:
+ clgr %r2,%r13 #
+ jne .Lrelocate # No : Start data mover
+ lghi %r2,0 # Yes: Start kdump kernel
+ brasl %r14,startup_kdump_relocated
+
+.Lrelocate:
+ larl %r4,startup
+ lg %r2,0x418(%r4) # Get kdump base
+ lg %r3,0x420(%r4) # Get kdump size
+
+ larl %r10,.Lcopy_start # Source of data mover
+ lghi %r8,DATAMOVER_ADDR # Target of data mover
+ mvc 0(256,%r8),0(%r10) # Copy data mover code
+
+ agr %r8,%r2 # Copy data mover to
+ mvc 0(256,%r8),0(%r10) # reserved mem
+
+ lghi %r14,DATAMOVER_ADDR # Jump to copied data mover
+ basr %r14,%r14
+.Lbase_addr:
+ .quad .Lbase
+
+#
+# kdump data mover code (runs at address DATAMOVER_ADDR)
+#
+# r2: kdump base address
+# r3: kdump size
+#
+.Lcopy_start:
+ basr %r13,0 # Base
+0:
+ lgr %r11,%r2 # Save kdump base address
+ lgr %r12,%r2
+ agr %r12,%r3 # Compute kdump end address
+
+ lghi %r5,0
+ lghi %r10,COPY_PAGE_ADDR # Load copy page address
+1:
+ mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp
+ mvc 0(256,%r5),0(%r11) # Copy new kernel to old
+ mvc 0(256,%r11),0(%r10) # Copy tmp to new
+ aghi %r11,256
+ aghi %r5,256
+ clgr %r11,%r12
+ jl 1b
+
+ lg %r14,.Lstartup_kdump-0b(%r13)
+ basr %r14,%r14 # Start relocated kernel
+.Lstartup_kdump:
+ .long 0x00000000,0x00000000 + startup_kdump_relocated
+.Lcopy_end:
+
+#
+# Startup of kdump (relocated new kernel)
+#
+.align 2
+startup_kdump_relocated:
+ basr %r13,0
+0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel...
+.align 8
+.Lrestart_psw:
+ .quad 0x0000000080000000,0x0000000000000000 + startup
+#else
+.align 2
+.Lep_startup_kdump:
+#ifdef CONFIG_64BIT
+ larl %r13,startup_kdump_crash
+ lpswe 0(%r13)
+.align 8
+startup_kdump_crash:
+ .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash
+#else
+ basr %r13,0
+0: lpsw startup_kdump_crash-0b(%r13)
+.align 8
+startup_kdump_crash:
+ .long 0x000a0000,0x00000000 + startup_kdump_crash
+#endif /* CONFIG_64BIT */
+#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c
deleted file mode 100644
index 7ad00396925..00000000000
--- a/arch/s390/kernel/init_task.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * arch/s390/kernel/init_task.c
- *
- * S390 version
- *
- * Derived from "arch/i386/kernel/init_task.c"
- */
-
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init_task.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial thread structure.
- *
- * We need to make sure that this is 8192-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 532542447d6..633ca750453 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1,8 +1,7 @@
/*
- * arch/s390/kernel/ipl.c
* ipl/reipl/dump support for Linux on s390.
*
- * Copyright IBM Corp. 2005,2007
+ * Copyright IBM Corp. 2005, 2012
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
* Heiko Carstens <heiko.carstens@de.ibm.com>
* Volker Sameske <sameske@de.ibm.com>
@@ -14,6 +13,10 @@
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/crash_dump.h>
+#include <linux/debug_locks.h>
#include <asm/ipl.h>
#include <asm/smp.h>
#include <asm/setup.h>
@@ -22,6 +25,10 @@
#include <asm/ebcdic.h>
#include <asm/reset.h>
#include <asm/sclp.h>
+#include <asm/checksum.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
+#include "entry.h"
#define IPL_PARM_BLOCK_VERSION 0
@@ -41,11 +48,13 @@
* - halt
* - power off
* - reipl
+ * - restart
*/
#define ON_PANIC_STR "on_panic"
#define ON_HALT_STR "on_halt"
#define ON_POFF_STR "on_poff"
#define ON_REIPL_STR "on_reboot"
+#define ON_RESTART_STR "on_restart"
struct shutdown_action;
struct shutdown_trigger {
@@ -54,18 +63,20 @@ struct shutdown_trigger {
};
/*
- * Five shutdown action types are supported:
+ * The following shutdown action types are supported:
*/
#define SHUTDOWN_ACTION_IPL_STR "ipl"
#define SHUTDOWN_ACTION_REIPL_STR "reipl"
#define SHUTDOWN_ACTION_DUMP_STR "dump"
#define SHUTDOWN_ACTION_VMCMD_STR "vmcmd"
#define SHUTDOWN_ACTION_STOP_STR "stop"
+#define SHUTDOWN_ACTION_DUMP_REIPL_STR "dump_reipl"
struct shutdown_action {
char *name;
void (*fn) (struct shutdown_trigger *trigger);
int (*init) (void);
+ int init_rc;
};
static char *ipl_type_str(enum ipl_type type)
@@ -121,6 +132,7 @@ enum ipl_method {
REIPL_METHOD_FCP_RO_VM,
REIPL_METHOD_FCP_DUMP,
REIPL_METHOD_NSS,
+ REIPL_METHOD_NSS_DIAG,
REIPL_METHOD_DEFAULT,
};
@@ -134,14 +146,16 @@ enum dump_method {
static int diag308_set_works = 0;
+static struct ipl_parameter_block ipl_block;
+
static int reipl_capabilities = IPL_TYPE_UNKNOWN;
static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT;
static struct ipl_parameter_block *reipl_block_fcp;
static struct ipl_parameter_block *reipl_block_ccw;
-
-static char reipl_nss_name[NSS_NAME_SIZE + 1];
+static struct ipl_parameter_block *reipl_block_nss;
+static struct ipl_parameter_block *reipl_block_actual;
static int dump_capabilities = DUMP_TYPE_NONE;
static enum dump_type dump_type = DUMP_TYPE_NONE;
@@ -213,7 +227,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
const char *buf, size_t len) \
{ \
strncpy(_value, buf, sizeof(_value) - 1); \
- strstrip(_value); \
+ strim(_value); \
return len; \
} \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
@@ -263,6 +277,118 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
+/* VM IPL PARM routines */
+static size_t reipl_get_ascii_vmparm(char *dest, size_t size,
+ const struct ipl_parameter_block *ipb)
+{
+ int i;
+ size_t len;
+ char has_lowercase = 0;
+
+ len = 0;
+ if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
+ (ipb->ipl_info.ccw.vm_parm_len > 0)) {
+
+ len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
+ memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
+ /* If at least one character is lowercase, we assume mixed
+ * case; otherwise we convert everything to lowercase.
+ */
+ for (i = 0; i < len; i++)
+ if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
+ (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
+ (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
+ has_lowercase = 1;
+ break;
+ }
+ if (!has_lowercase)
+ EBC_TOLOWER(dest, len);
+ EBCASC(dest, len);
+ }
+ dest[len] = 0;
+
+ return len;
+}
+
+size_t append_ipl_vmparm(char *dest, size_t size)
+{
+ size_t rc;
+
+ rc = 0;
+ if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+ rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
+ else
+ dest[0] = 0;
+ return rc;
+}
+
+static ssize_t ipl_vm_parm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ char parm[DIAG308_VMPARM_SIZE + 1] = {};
+
+ append_ipl_vmparm(parm, sizeof(parm));
+ return sprintf(page, "%s\n", parm);
+}
+
+static size_t scpdata_length(const char* buf, size_t count)
+{
+ while (count) {
+ if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
+ break;
+ count--;
+ }
+ return count;
+}
+
+static size_t reipl_append_ascii_scpdata(char *dest, size_t size,
+ const struct ipl_parameter_block *ipb)
+{
+ size_t count;
+ size_t i;
+ int has_lowercase;
+
+ count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
+ ipb->ipl_info.fcp.scp_data_len));
+ if (!count)
+ goto out;
+
+ has_lowercase = 0;
+ for (i = 0; i < count; i++) {
+ if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
+ count = 0;
+ goto out;
+ }
+ if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
+ has_lowercase = 1;
+ }
+
+ if (has_lowercase)
+ memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+ else
+ for (i = 0; i < count; i++)
+ dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
+out:
+ dest[count] = '\0';
+ return count;
+}
+
+size_t append_ipl_scpdata(char *dest, size_t len)
+{
+ size_t rc;
+
+ rc = 0;
+ if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
+ rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
+ else
+ dest[0] = 0;
+ return rc;
+}
+
+
+static struct kobj_attribute sys_ipl_vm_parm_attr =
+ __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
+
static ssize_t sys_ipl_device_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
@@ -282,17 +408,12 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
static struct kobj_attribute sys_ipl_device_attr =
__ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
-static ssize_t ipl_parameter_read(struct kobject *kobj, struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
+static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
{
- unsigned int size = IPL_PARMBLOCK_SIZE;
-
- if (off > size)
- return 0;
- if (off + count > size)
- count = size - off;
- memcpy(buf, (void *)IPL_PARMBLOCK_START + off, count);
- return count;
+ return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
+ IPL_PARMBLOCK_SIZE);
}
static struct bin_attribute ipl_parameter_attr = {
@@ -304,18 +425,14 @@ static struct bin_attribute ipl_parameter_attr = {
.read = &ipl_parameter_read,
};
-static ssize_t ipl_scp_data_read(struct kobject *kobj, struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
+static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
{
unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
- if (off > size)
- return 0;
- if (off + count > size)
- count = size - off;
- memcpy(buf, scp_data + off, count);
- return count;
+ return memory_read_from_buffer(buf, count, &off, scp_data, size);
}
static struct bin_attribute ipl_scp_data_attr = {
@@ -363,22 +480,34 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
return sprintf(page, "#unknown#\n");
memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
EBCASC(loadparm, LOADPARM_LEN);
- strstrip(loadparm);
+ strim(loadparm);
return sprintf(page, "%s\n", loadparm);
}
static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
__ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
-static struct attribute *ipl_ccw_attrs[] = {
+static struct attribute *ipl_ccw_attrs_vm[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_device_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_vm_parm_attr.attr,
+ NULL,
+};
+
+static struct attribute *ipl_ccw_attrs_lpar[] = {
&sys_ipl_type_attr.attr,
&sys_ipl_device_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
NULL,
};
-static struct attribute_group ipl_ccw_attr_group = {
- .attrs = ipl_ccw_attrs,
+static struct attribute_group ipl_ccw_attr_group_vm = {
+ .attrs = ipl_ccw_attrs_vm,
+};
+
+static struct attribute_group ipl_ccw_attr_group_lpar = {
+ .attrs = ipl_ccw_attrs_lpar
};
/* NSS ipl device attributes */
@@ -388,6 +517,8 @@ DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name);
static struct attribute *ipl_nss_attrs[] = {
&sys_ipl_type_attr.attr,
&sys_ipl_nss_name_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_vm_parm_attr.attr,
NULL,
};
@@ -430,7 +561,7 @@ out:
return rc;
}
-static void ipl_run(struct shutdown_trigger *trigger)
+static void __ipl_run(void *unused)
{
diag308(DIAG308_IPL, NULL);
if (MACHINE_IS_VM)
@@ -439,6 +570,11 @@ static void ipl_run(struct shutdown_trigger *trigger)
reipl_ccw_dev(&ipl_info.data.ccw.dev_id);
}
+static void ipl_run(struct shutdown_trigger *trigger)
+{
+ smp_call_ipl_cpu(__ipl_run, NULL);
+}
+
static int __init ipl_init(void)
{
int rc;
@@ -450,7 +586,12 @@ static int __init ipl_init(void)
}
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group);
+ if (MACHINE_IS_VM)
+ rc = sysfs_create_group(&ipl_kset->kobj,
+ &ipl_ccw_attr_group_vm);
+ else
+ rc = sysfs_create_group(&ipl_kset->kobj,
+ &ipl_ccw_attr_group_lpar);
break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
@@ -481,11 +622,141 @@ static struct shutdown_action __refdata ipl_action = {
* reipl shutdown action: Reboot Linux on shutdown.
*/
+/* VM IPL PARM attributes */
+static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
+ char *page)
+{
+ char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+
+ reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+ return sprintf(page, "%s\n", vmparm);
+}
+
+static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
+ size_t vmparm_max,
+ const char *buf, size_t len)
+{
+ int i, ip_len;
+
+ /* ignore trailing newline */
+ ip_len = len;
+ if ((len > 0) && (buf[len - 1] == '\n'))
+ ip_len--;
+
+ if (ip_len > vmparm_max)
+ return -EINVAL;
+
+ /* parm is used to store kernel options, check for common chars */
+ for (i = 0; i < ip_len; i++)
+ if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
+ return -EINVAL;
+
+ memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+ ipb->ipl_info.ccw.vm_parm_len = ip_len;
+ if (ip_len > 0) {
+ ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+ memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len);
+ ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len);
+ } else {
+ ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+ }
+
+ return len;
+}
+
+/* NSS wrapper */
+static ssize_t reipl_nss_vmparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_vmparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_vmparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_vmparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nss_vmparm_attr =
+ __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show,
+ reipl_nss_vmparm_store);
+static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
+ __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show,
+ reipl_ccw_vmparm_store);
+
/* FCP reipl device attributes */
-DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
+ void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
+
+ return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ size_t padding;
+ size_t scpdata_len;
+
+ if (off < 0)
+ return -EINVAL;
+
+ if (off >= DIAG308_SCPDATA_SIZE)
+ return -ENOSPC;
+
+ if (count > DIAG308_SCPDATA_SIZE - off)
+ count = DIAG308_SCPDATA_SIZE - off;
+
+ memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
+ scpdata_len = off + count;
+
+ if (scpdata_len % 8) {
+ padding = 8 - (scpdata_len % 8);
+ memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
+ 0, padding);
+ scpdata_len += padding;
+ }
+
+ reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
+ reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
+ reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
+
+ return count;
+}
+
+static struct bin_attribute sys_reipl_fcp_scp_data_attr = {
+ .attr = {
+ .name = "scp_data",
+ .mode = S_IRUGO | S_IWUSR,
+ },
+ .size = PAGE_SIZE,
+ .read = reipl_fcp_scpdata_read,
+ .write = reipl_fcp_scpdata_write,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
reipl_block_fcp->ipl_info.fcp.wwpn);
-DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
reipl_block_fcp->ipl_info.fcp.lun);
DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
reipl_block_fcp->ipl_info.fcp.bootprog);
@@ -504,7 +775,6 @@ static struct attribute *reipl_fcp_attrs[] = {
};
static struct attribute_group reipl_fcp_attr_group = {
- .name = IPL_FCP_STR,
.attrs = reipl_fcp_attrs,
};
@@ -513,27 +783,26 @@ static struct attribute_group reipl_fcp_attr_group = {
DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
reipl_block_ccw->ipl_info.ccw.devno);
-static void reipl_get_ascii_loadparm(char *loadparm)
+static void reipl_get_ascii_loadparm(char *loadparm,
+ struct ipl_parameter_block *ibp)
{
- memcpy(loadparm, &reipl_block_ccw->ipl_info.ccw.load_param,
- LOADPARM_LEN);
+ memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN);
EBCASC(loadparm, LOADPARM_LEN);
loadparm[LOADPARM_LEN] = 0;
- strstrip(loadparm);
+ strim(loadparm);
}
-static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
+ char *page)
{
char buf[LOADPARM_LEN + 1];
- reipl_get_ascii_loadparm(buf);
+ reipl_get_ascii_loadparm(buf, ipb);
return sprintf(page, "%s\n", buf);
}
-static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
+static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
+ const char *buf, size_t len)
{
int i, lp_len;
@@ -552,35 +821,128 @@ static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
return -EINVAL;
}
/* initialize loadparm with blanks */
- memset(&reipl_block_ccw->ipl_info.ccw.load_param, ' ', LOADPARM_LEN);
+ memset(ipb->ipl_info.ccw.load_parm, ' ', LOADPARM_LEN);
/* copy and convert to ebcdic */
- memcpy(&reipl_block_ccw->ipl_info.ccw.load_param, buf, lp_len);
- ASCEBC(reipl_block_ccw->ipl_info.ccw.load_param, LOADPARM_LEN);
+ memcpy(ipb->ipl_info.ccw.load_parm, buf, lp_len);
+ ASCEBC(ipb->ipl_info.ccw.load_parm, LOADPARM_LEN);
return len;
}
+/* NSS wrapper */
+static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
+}
+
static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
- __ATTR(loadparm, 0644, reipl_ccw_loadparm_show,
- reipl_ccw_loadparm_store);
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
+ reipl_ccw_loadparm_store);
+
+static struct attribute *reipl_ccw_attrs_vm[] = {
+ &sys_reipl_ccw_device_attr.attr,
+ &sys_reipl_ccw_loadparm_attr.attr,
+ &sys_reipl_ccw_vmparm_attr.attr,
+ NULL,
+};
-static struct attribute *reipl_ccw_attrs[] = {
+static struct attribute *reipl_ccw_attrs_lpar[] = {
&sys_reipl_ccw_device_attr.attr,
&sys_reipl_ccw_loadparm_attr.attr,
NULL,
};
-static struct attribute_group reipl_ccw_attr_group = {
+static struct attribute_group reipl_ccw_attr_group_vm = {
.name = IPL_CCW_STR,
- .attrs = reipl_ccw_attrs,
+ .attrs = reipl_ccw_attrs_vm,
+};
+
+static struct attribute_group reipl_ccw_attr_group_lpar = {
+ .name = IPL_CCW_STR,
+ .attrs = reipl_ccw_attrs_lpar,
};
/* NSS reipl device attributes */
+static void reipl_get_ascii_nss_name(char *dst,
+ struct ipl_parameter_block *ipb)
+{
+ memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+ EBCASC(dst, NSS_NAME_SIZE);
+ dst[NSS_NAME_SIZE] = 0;
+}
-DEFINE_IPL_ATTR_STR_RW(reipl_nss, name, "%s\n", "%s\n", reipl_nss_name);
+static ssize_t reipl_nss_name_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ char nss_name[NSS_NAME_SIZE + 1] = {};
+
+ reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
+ return sprintf(page, "%s\n", nss_name);
+}
+
+static ssize_t reipl_nss_name_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int nss_len;
+
+ /* ignore trailing newline */
+ nss_len = len;
+ if ((len > 0) && (buf[len - 1] == '\n'))
+ nss_len--;
+
+ if (nss_len > NSS_NAME_SIZE)
+ return -EINVAL;
+
+ memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE);
+ if (nss_len > 0) {
+ reipl_block_nss->ipl_info.ccw.vm_flags |=
+ DIAG308_VM_FLAGS_NSS_VALID;
+ memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len);
+ ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+ EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+ } else {
+ reipl_block_nss->ipl_info.ccw.vm_flags &=
+ ~DIAG308_VM_FLAGS_NSS_VALID;
+ }
+
+ return len;
+}
+
+static struct kobj_attribute sys_reipl_nss_name_attr =
+ __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show,
+ reipl_nss_name_store);
+
+static struct kobj_attribute sys_reipl_nss_loadparm_attr =
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show,
+ reipl_nss_loadparm_store);
static struct attribute *reipl_nss_attrs[] = {
&sys_reipl_nss_name_attr.attr,
+ &sys_reipl_nss_loadparm_attr.attr,
+ &sys_reipl_nss_vmparm_attr.attr,
NULL,
};
@@ -589,6 +951,13 @@ static struct attribute_group reipl_nss_attr_group = {
.attrs = reipl_nss_attrs,
};
+static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block)
+{
+ reipl_block_actual = reipl_block;
+ os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+ reipl_block->hdr.len);
+}
+
/* reipl type */
static int reipl_set_type(enum ipl_type type)
@@ -604,6 +973,7 @@ static int reipl_set_type(enum ipl_type type)
reipl_method = REIPL_METHOD_CCW_VM;
else
reipl_method = REIPL_METHOD_CCW_CIO;
+ set_reipl_block_actual(reipl_block_ccw);
break;
case IPL_TYPE_FCP:
if (diag308_set_works)
@@ -612,12 +982,17 @@ static int reipl_set_type(enum ipl_type type)
reipl_method = REIPL_METHOD_FCP_RO_VM;
else
reipl_method = REIPL_METHOD_FCP_RO_DIAG;
+ set_reipl_block_actual(reipl_block_fcp);
break;
case IPL_TYPE_FCP_DUMP:
reipl_method = REIPL_METHOD_FCP_DUMP;
break;
case IPL_TYPE_NSS:
- reipl_method = REIPL_METHOD_NSS;
+ if (diag308_set_works)
+ reipl_method = REIPL_METHOD_NSS_DIAG;
+ else
+ reipl_method = REIPL_METHOD_NSS;
+ set_reipl_block_actual(reipl_block_nss);
break;
case IPL_TYPE_UNKNOWN:
reipl_method = REIPL_METHOD_DEFAULT;
@@ -654,12 +1029,40 @@ static struct kobj_attribute reipl_type_attr =
__ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
static struct kset *reipl_kset;
+static struct kset *reipl_fcp_kset;
-static void reipl_run(struct shutdown_trigger *trigger)
+static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
+ const enum ipl_method m)
+{
+ char loadparm[LOADPARM_LEN + 1] = {};
+ char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+ char nss_name[NSS_NAME_SIZE + 1] = {};
+ size_t pos = 0;
+
+ reipl_get_ascii_loadparm(loadparm, ipb);
+ reipl_get_ascii_nss_name(nss_name, ipb);
+ reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+
+ switch (m) {
+ case REIPL_METHOD_CCW_VM:
+ pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno);
+ break;
+ case REIPL_METHOD_NSS:
+ pos = sprintf(dst, "IPL %s", nss_name);
+ break;
+ default:
+ break;
+ }
+ if (strlen(loadparm) > 0)
+ pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm);
+ if (strlen(vmparm) > 0)
+ sprintf(dst + pos, " PARM %s", vmparm);
+}
+
+static void __reipl_run(void *unused)
{
struct ccw_dev_id devid;
- static char buf[100];
- char loadparm[LOADPARM_LEN + 1];
+ static char buf[128];
switch (reipl_method) {
case REIPL_METHOD_CCW_CIO:
@@ -668,13 +1071,7 @@ static void reipl_run(struct shutdown_trigger *trigger)
reipl_ccw_dev(&devid);
break;
case REIPL_METHOD_CCW_VM:
- reipl_get_ascii_loadparm(loadparm);
- if (strlen(loadparm) == 0)
- sprintf(buf, "IPL %X CLEAR",
- reipl_block_ccw->ipl_info.ccw.devno);
- else
- sprintf(buf, "IPL %X CLEAR LOADPARM '%s'",
- reipl_block_ccw->ipl_info.ccw.devno, loadparm);
+ get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM);
__cpcmd(buf, NULL, 0, NULL);
break;
case REIPL_METHOD_CCW_DIAG:
@@ -691,8 +1088,12 @@ static void reipl_run(struct shutdown_trigger *trigger)
case REIPL_METHOD_FCP_RO_VM:
__cpcmd("IPL", NULL, 0, NULL);
break;
+ case REIPL_METHOD_NSS_DIAG:
+ diag308(DIAG308_SET, reipl_block_nss);
+ diag308(DIAG308_IPL, NULL);
+ break;
case REIPL_METHOD_NSS:
- sprintf(buf, "IPL %s", reipl_nss_name);
+ get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
__cpcmd(buf, NULL, 0, NULL);
break;
case REIPL_METHOD_DEFAULT:
@@ -701,22 +1102,46 @@ static void reipl_run(struct shutdown_trigger *trigger)
diag308(DIAG308_IPL, NULL);
break;
case REIPL_METHOD_FCP_DUMP:
- default:
break;
}
disabled_wait((unsigned long) __builtin_return_address(0));
}
-static void __init reipl_probe(void)
+static void reipl_run(struct shutdown_trigger *trigger)
+{
+ smp_call_ipl_cpu(__reipl_run, NULL);
+}
+
+static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
{
- void *buffer;
+ ipb->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
+ ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
+ ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+}
- buffer = (void *) get_zeroed_page(GFP_KERNEL);
- if (!buffer)
- return;
- if (diag308(DIAG308_STORE, buffer) == DIAG308_RC_OK)
- diag308_set_works = 1;
- free_page((unsigned long)buffer);
+static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
+{
+ /* LOADPARM */
+ /* check if read scp info worked and set loadparm */
+ if (sclp_ipl_info.is_valid)
+ memcpy(ipb->ipl_info.ccw.load_parm,
+ &sclp_ipl_info.loadparm, LOADPARM_LEN);
+ else
+ /* read scp info failed: set empty loadparm (EBCDIC blanks) */
+ memset(ipb->ipl_info.ccw.load_parm, 0x40, LOADPARM_LEN);
+ ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
+
+ /* VM PARM */
+ if (MACHINE_IS_VM && diag308_set_works &&
+ (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
+
+ ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+ ipb->ipl_info.ccw.vm_parm_len =
+ ipl_block.ipl_info.ccw.vm_parm_len;
+ memcpy(ipb->ipl_info.ccw.vm_parm,
+ ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+ }
}
static int __init reipl_nss_init(void)
@@ -725,10 +1150,31 @@ static int __init reipl_nss_init(void)
if (!MACHINE_IS_VM)
return 0;
+
+ reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_nss)
+ return -ENOMEM;
+
+ if (!diag308_set_works)
+ sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO;
+
rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
if (rc)
return rc;
- strncpy(reipl_nss_name, kernel_nss_name, NSS_NAME_SIZE + 1);
+
+ reipl_block_ccw_init(reipl_block_nss);
+ if (ipl_info.type == IPL_TYPE_NSS) {
+ memset(reipl_block_nss->ipl_info.ccw.nss_name,
+ ' ', NSS_NAME_SIZE);
+ memcpy(reipl_block_nss->ipl_info.ccw.nss_name,
+ kernel_nss_name, strlen(kernel_nss_name));
+ ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+ reipl_block_nss->ipl_info.ccw.vm_flags |=
+ DIAG308_VM_FLAGS_NSS_VALID;
+
+ reipl_block_ccw_fill_parms(reipl_block_nss);
+ }
+
reipl_capabilities |= IPL_TYPE_NSS;
return 0;
}
@@ -740,28 +1186,27 @@ static int __init reipl_ccw_init(void)
reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
if (!reipl_block_ccw)
return -ENOMEM;
- rc = sysfs_create_group(&reipl_kset->kobj, &reipl_ccw_attr_group);
- if (rc) {
- free_page((unsigned long)reipl_block_ccw);
- return rc;
+
+ if (MACHINE_IS_VM) {
+ if (!diag308_set_works)
+ sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO;
+ rc = sysfs_create_group(&reipl_kset->kobj,
+ &reipl_ccw_attr_group_vm);
+ } else {
+ if(!diag308_set_works)
+ sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
+ rc = sysfs_create_group(&reipl_kset->kobj,
+ &reipl_ccw_attr_group_lpar);
}
- reipl_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
- reipl_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
- reipl_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
- reipl_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
- reipl_block_ccw->hdr.flags = DIAG308_FLAGS_LP_VALID;
- /* check if read scp info worked and set loadparm */
- if (sclp_ipl_info.is_valid)
- memcpy(reipl_block_ccw->ipl_info.ccw.load_param,
- &sclp_ipl_info.loadparm, LOADPARM_LEN);
- else
- /* read scp info failed: set empty loadparm (EBCDIC blanks) */
- memset(reipl_block_ccw->ipl_info.ccw.load_param, 0x40,
- LOADPARM_LEN);
- if (!MACHINE_IS_VM && !diag308_set_works)
- sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
- if (ipl_info.type == IPL_TYPE_CCW)
+ if (rc)
+ return rc;
+
+ reipl_block_ccw_init(reipl_block_ccw);
+ if (ipl_info.type == IPL_TYPE_CCW) {
reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
+ reipl_block_ccw_fill_parms(reipl_block_ccw);
+ }
+
reipl_capabilities |= IPL_TYPE_CCW;
return 0;
}
@@ -770,22 +1215,45 @@ static int __init reipl_fcp_init(void)
{
int rc;
- if ((!diag308_set_works) && (ipl_info.type != IPL_TYPE_FCP))
- return 0;
- if ((!diag308_set_works) && (ipl_info.type == IPL_TYPE_FCP))
- make_attrs_ro(reipl_fcp_attrs);
+ if (!diag308_set_works) {
+ if (ipl_info.type == IPL_TYPE_FCP) {
+ make_attrs_ro(reipl_fcp_attrs);
+ sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO;
+ } else
+ return 0;
+ }
reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
if (!reipl_block_fcp)
return -ENOMEM;
- rc = sysfs_create_group(&reipl_kset->kobj, &reipl_fcp_attr_group);
+
+ /* sysfs: create fcp kset for mixing attr group and bin attrs */
+ reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
+ &reipl_kset->kobj);
+ if (!reipl_fcp_kset) {
+ free_page((unsigned long) reipl_block_fcp);
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+ if (rc) {
+ kset_unregister(reipl_fcp_kset);
+ free_page((unsigned long) reipl_block_fcp);
+ return rc;
+ }
+
+ rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj,
+ &sys_reipl_fcp_scp_data_attr);
if (rc) {
- free_page((unsigned long)reipl_block_fcp);
+ sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+ kset_unregister(reipl_fcp_kset);
+ free_page((unsigned long) reipl_block_fcp);
return rc;
}
- if (ipl_info.type == IPL_TYPE_FCP) {
+
+ if (ipl_info.type == IPL_TYPE_FCP)
memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
- } else {
+ else {
reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
@@ -796,6 +1264,29 @@ static int __init reipl_fcp_init(void)
return 0;
}
+static int __init reipl_type_init(void)
+{
+ enum ipl_type reipl_type = ipl_info.type;
+ struct ipl_parameter_block *reipl_block;
+ unsigned long size;
+
+ reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size);
+ if (!reipl_block)
+ goto out;
+ /*
+ * If we have an OS info reipl block, this will be used
+ */
+ if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) {
+ memcpy(reipl_block_fcp, reipl_block, size);
+ reipl_type = IPL_TYPE_FCP;
+ } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) {
+ memcpy(reipl_block_ccw, reipl_block, size);
+ reipl_type = IPL_TYPE_CCW;
+ }
+out:
+ return reipl_set_type(reipl_type);
+}
+
static int __init reipl_init(void)
{
int rc;
@@ -817,10 +1308,7 @@ static int __init reipl_init(void)
rc = reipl_nss_init();
if (rc)
return rc;
- rc = reipl_set_type(ipl_info.type);
- if (rc)
- return rc;
- return 0;
+ return reipl_type_init();
}
static struct shutdown_action __refdata reipl_action = {
@@ -835,9 +1323,9 @@ static struct shutdown_action __refdata reipl_action = {
/* FCP dump device attributes */
-DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
dump_block_fcp->ipl_info.fcp.wwpn);
-DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n",
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
dump_block_fcp->ipl_info.fcp.lun);
DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
dump_block_fcp->ipl_info.fcp.bootprog);
@@ -926,38 +1414,50 @@ static struct kobj_attribute dump_type_attr =
static struct kset *dump_kset;
-static void dump_run(struct shutdown_trigger *trigger)
+static void diag308_dump(void *dump_block)
+{
+ diag308(DIAG308_SET, dump_block);
+ while (1) {
+ if (diag308(DIAG308_DUMP, NULL) != 0x302)
+ break;
+ udelay_simple(USEC_PER_SEC);
+ }
+}
+
+static void __dump_run(void *unused)
{
struct ccw_dev_id devid;
static char buf[100];
switch (dump_method) {
case DUMP_METHOD_CCW_CIO:
- smp_send_stop();
devid.devno = dump_block_ccw->ipl_info.ccw.devno;
devid.ssid = 0;
reipl_ccw_dev(&devid);
break;
case DUMP_METHOD_CCW_VM:
- smp_send_stop();
sprintf(buf, "STORE STATUS");
__cpcmd(buf, NULL, 0, NULL);
sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno);
__cpcmd(buf, NULL, 0, NULL);
break;
case DUMP_METHOD_CCW_DIAG:
- diag308(DIAG308_SET, dump_block_ccw);
- diag308(DIAG308_DUMP, NULL);
+ diag308_dump(dump_block_ccw);
break;
case DUMP_METHOD_FCP_DIAG:
- diag308(DIAG308_SET, dump_block_fcp);
- diag308(DIAG308_DUMP, NULL);
+ diag308_dump(dump_block_fcp);
break;
- case DUMP_METHOD_NONE:
default:
- return;
+ break;
}
- printk(KERN_EMERG "Dump failed!\n");
+}
+
+static void dump_run(struct shutdown_trigger *trigger)
+{
+ if (dump_method == DUMP_METHOD_NONE)
+ return;
+ smp_send_stop();
+ smp_call_ipl_cpu(__dump_run, NULL);
}
static int __init dump_ccw_init(void)
@@ -1033,6 +1533,31 @@ static struct shutdown_action __refdata dump_action = {
.init = dump_init,
};
+static void dump_reipl_run(struct shutdown_trigger *trigger)
+{
+ unsigned long ipib = (unsigned long) reipl_block_actual;
+ unsigned int csum;
+
+ csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+ mem_assign_absolute(S390_lowcore.ipib, ipib);
+ mem_assign_absolute(S390_lowcore.ipib_checksum, csum);
+ dump_run(trigger);
+}
+
+static int __init dump_reipl_init(void)
+{
+ if (!diag308_set_works)
+ return -EOPNOTSUPP;
+ else
+ return 0;
+}
+
+static struct shutdown_action __refdata dump_reipl_action = {
+ .name = SHUTDOWN_ACTION_DUMP_REIPL_STR,
+ .fn = dump_reipl_run,
+ .init = dump_reipl_init,
+};
+
/*
* vmcmd shutdown action: Trigger vm command on shutdown.
*/
@@ -1041,17 +1566,20 @@ static char vmcmd_on_reboot[128];
static char vmcmd_on_panic[128];
static char vmcmd_on_halt[128];
static char vmcmd_on_poff[128];
+static char vmcmd_on_restart[128];
DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt);
DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart);
static struct attribute *vmcmd_attrs[] = {
&sys_vmcmd_on_reboot_attr.attr,
&sys_vmcmd_on_panic_attr.attr,
&sys_vmcmd_on_halt_attr.attr,
&sys_vmcmd_on_poff_attr.attr,
+ &sys_vmcmd_on_restart_attr.attr,
NULL,
};
@@ -1063,7 +1591,7 @@ static struct kset *vmcmd_kset;
static void vmcmd_run(struct shutdown_trigger *trigger)
{
- char *cmd, *next_cmd;
+ char *cmd;
if (strcmp(trigger->name, ON_REIPL_STR) == 0)
cmd = vmcmd_on_reboot;
@@ -1073,26 +1601,20 @@ static void vmcmd_run(struct shutdown_trigger *trigger)
cmd = vmcmd_on_halt;
else if (strcmp(trigger->name, ON_POFF_STR) == 0)
cmd = vmcmd_on_poff;
+ else if (strcmp(trigger->name, ON_RESTART_STR) == 0)
+ cmd = vmcmd_on_restart;
else
return;
if (strlen(cmd) == 0)
return;
- do {
- next_cmd = strchr(cmd, '\n');
- if (next_cmd) {
- next_cmd[0] = 0;
- next_cmd += 1;
- }
- __cpcmd(cmd, NULL, 0, NULL);
- cmd = next_cmd;
- } while (cmd != NULL);
+ __cpcmd(cmd, NULL, 0, NULL);
}
static int vmcmd_init(void)
{
if (!MACHINE_IS_VM)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
if (!vmcmd_kset)
return -ENOMEM;
@@ -1108,12 +1630,10 @@ static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
static void stop_run(struct shutdown_trigger *trigger)
{
- if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+ if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+ strcmp(trigger->name, ON_RESTART_STR) == 0)
disabled_wait((unsigned long) __builtin_return_address(0));
- else {
- signal_processor(smp_processor_id(), sigp_stop);
- for (;;);
- }
+ smp_stop_cpu();
}
static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR,
@@ -1122,7 +1642,8 @@ static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR,
/* action list */
static struct shutdown_action *shutdown_actions_list[] = {
- &ipl_action, &reipl_action, &dump_action, &vmcmd_action, &stop_action};
+ &ipl_action, &reipl_action, &dump_reipl_action, &dump_action,
+ &vmcmd_action, &stop_action};
#define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *))
/*
@@ -1135,13 +1656,15 @@ static int set_trigger(const char *buf, struct shutdown_trigger *trigger,
size_t len)
{
int i;
+
for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
- if (!shutdown_actions_list[i])
- continue;
- if (strncmp(buf, shutdown_actions_list[i]->name,
- strlen(shutdown_actions_list[i]->name)) == 0) {
- trigger->action = shutdown_actions_list[i];
- return len;
+ if (sysfs_streq(buf, shutdown_actions_list[i]->name)) {
+ if (shutdown_actions_list[i]->init_rc) {
+ return shutdown_actions_list[i]->init_rc;
+ } else {
+ trigger->action = shutdown_actions_list[i];
+ return len;
+ }
}
}
return -EINVAL;
@@ -1198,10 +1721,51 @@ static struct kobj_attribute on_panic_attr =
static void do_panic(void)
{
+ lgr_info_log();
on_panic_trigger.action->fn(&on_panic_trigger);
stop_run(&on_panic_trigger);
}
+/* on restart */
+
+static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
+ &stop_action};
+
+static ssize_t on_restart_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_restart_trigger.action->name);
+}
+
+static ssize_t on_restart_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_restart_trigger, len);
+}
+
+static struct kobj_attribute on_restart_attr =
+ __ATTR(on_restart, 0644, on_restart_show, on_restart_store);
+
+static void __do_restart(void *ignore)
+{
+ __arch_local_irq_stosm(0x04); /* enable DAT */
+ smp_send_stop();
+#ifdef CONFIG_CRASH_DUMP
+ crash_kexec(NULL);
+#endif
+ on_restart_trigger.action->fn(&on_restart_trigger);
+ stop_run(&on_restart_trigger);
+}
+
+void do_restart(void)
+{
+ tracing_off();
+ debug_locks_off();
+ lgr_info_log();
+ smp_call_online_cpu(__do_restart, NULL);
+}
+
/* on halt */
static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
@@ -1278,7 +1842,9 @@ static void __init shutdown_triggers_init(void)
if (sysfs_create_file(&shutdown_actions_kset->kobj,
&on_poff_attr.attr))
goto fail;
-
+ if (sysfs_create_file(&shutdown_actions_kset->kobj,
+ &on_restart_attr.attr))
+ goto fail;
return;
fail:
panic("shutdown_triggers_init failed\n");
@@ -1291,14 +1857,13 @@ static void __init shutdown_actions_init(void)
for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
if (!shutdown_actions_list[i]->init)
continue;
- if (shutdown_actions_list[i]->init())
- shutdown_actions_list[i] = NULL;
+ shutdown_actions_list[i]->init_rc =
+ shutdown_actions_list[i]->init();
}
}
static int __init s390_ipl_init(void)
{
- reipl_probe();
sclp_get_ipl_info(&sclp_ipl_info);
shutdown_actions_init();
shutdown_triggers_init();
@@ -1374,7 +1939,7 @@ static int on_panic_notify(struct notifier_block *self,
static struct notifier_block on_panic_nb = {
.notifier_call = on_panic_notify,
- .priority = 0,
+ .priority = INT_MIN,
};
void __init setup_ipl(void)
@@ -1398,17 +1963,24 @@ void __init setup_ipl(void)
sizeof(ipl_info.data.nss.name));
break;
case IPL_TYPE_UNKNOWN:
- default:
/* We have no info to copy */
break;
}
atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
}
+void __init ipl_update_parameters(void)
+{
+ int rc;
+
+ rc = diag308(DIAG308_STORE, &ipl_block);
+ if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG))
+ diag308_set_works = 1;
+}
+
void __init ipl_save_parameters(void)
{
struct cio_iplinfo iplinfo;
- unsigned int *ipl_ptr;
void *src, *dst;
if (cio_get_iplinfo(&iplinfo))
@@ -1419,11 +1991,10 @@ void __init ipl_save_parameters(void)
if (!iplinfo.is_qdio)
return;
ipl_flags |= IPL_PARMBLOCK_VALID;
- ipl_ptr = (unsigned int *)__LC_IPL_PARMBLOCK_PTR;
- src = (void *)(unsigned long)*ipl_ptr;
+ src = (void *)(unsigned long)S390_lowcore.ipl_parmblock_ptr;
dst = (void *)IPL_PARMBLOCK_ORIGIN;
memmove(dst, src, PAGE_SIZE);
- *ipl_ptr = IPL_PARMBLOCK_ORIGIN;
+ S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
}
static LIST_HEAD(rcall);
@@ -1449,13 +2020,19 @@ static void do_reset_calls(void)
{
struct reset_call *reset;
+#ifdef CONFIG_64BIT
+ if (diag308_set_works) {
+ diag308_reset();
+ return;
+ }
+#endif
list_for_each_entry(reset, &rcall, list)
reset->fn();
}
u32 dump_prefix_page;
-void s390_reset_system(void)
+void s390_reset_system(void (*func)(void *), void *data)
{
struct _lowcore *lc;
@@ -1474,15 +2051,19 @@ void s390_reset_system(void)
__ctl_clear_bit(0,28);
/* Set new machine check handler */
- S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK;
+ S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
S390_lowcore.mcck_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
/* Set new program check handler */
- S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK;
+ S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
S390_lowcore.program_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+ /* Store status at absolute zero */
+ store_status();
+
do_reset_calls();
+ if (func)
+ func(data);
}
-
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index e7c5bfb7c75..99b0b09646c 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -1,104 +1,307 @@
/*
- * arch/s390/kernel/irq.c
- *
- * Copyright IBM Corp. 2004,2007
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Thomas Spatzier (tspat@de.ibm.com)
+ * Copyright IBM Corp. 2004, 2011
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ * Thomas Spatzier <tspat@de.ibm.com>,
*
* This file contains interrupt related functions.
*/
-#include <linux/module.h>
-#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
-#include <linux/cpu.h>
#include <linux/proc_fs.h>
#include <linux/profile.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/cputime.h>
+#include <asm/lowcore.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include "entry.h"
+
+DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stat);
+
+struct irq_class {
+ char *name;
+ char *desc;
+};
+
+/*
+ * The list of "main" irq classes on s390. This is the list of interrupts
+ * that appear both in /proc/stat ("intr" line) and /proc/interrupts.
+ * Historically only external and I/O interrupts have been part of /proc/stat.
+ * We can't add the split external and I/O sub classes since the first field
+ * in the "intr" line in /proc/stat is supposed to be the sum of all other
+ * fields.
+ * Since the external and I/O interrupt fields are already sums we would end
+ * up with having a sum which accounts each interrupt twice.
+ */
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+ [EXT_INTERRUPT] = {.name = "EXT"},
+ [IO_INTERRUPT] = {.name = "I/O"},
+ [THIN_INTERRUPT] = {.name = "AIO"},
+};
+
+/*
+ * The list of split external and I/O interrupts that appear only in
+ * /proc/interrupts.
+ * In addition this list contains non external / I/O events like NMIs.
+ */
+static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
+ [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"},
+ [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"},
+ [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"},
+ [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"},
+ [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"},
+ [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
+ [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"},
+ [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"},
+ [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"},
+ [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"},
+ [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
+ [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
+ [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
+ [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
+ [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
+ [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"},
+ [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"},
+ [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"},
+ [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"},
+ [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"},
+ [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"},
+ [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"},
+ [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"},
+ [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"},
+ [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"},
+ [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+ [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
+ [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
+ [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+ [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
+ [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"},
+ [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
+};
+
+void __init init_IRQ(void)
+{
+ init_cio_interrupts();
+ init_airq_interrupts();
+ init_ext_interrupts();
+}
+
+void do_IRQ(struct pt_regs *regs, int irq)
+{
+ struct pt_regs *old_regs;
+
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+ if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+ /* Serve timer interrupts first. */
+ clock_comparator_work();
+ generic_handle_irq(irq);
+ irq_exit();
+ set_irq_regs(old_regs);
+}
/*
* show_interrupts is needed by /proc/interrupts.
*/
int show_interrupts(struct seq_file *p, void *v)
{
- static const char *intrclass_names[] = { "EXT", "I/O", };
- int i = *(loff_t *) v, j;
+ int irq = *(loff_t *) v;
+ int cpu;
get_online_cpus();
- if (i == 0) {
+ if (irq == 0) {
seq_puts(p, " ");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%d ",j);
+ for_each_online_cpu(cpu)
+ seq_printf(p, "CPU%d ", cpu);
seq_putc(p, '\n');
+ goto out;
}
-
- if (i < NR_IRQS) {
- seq_printf(p, "%s: ", intrclass_names[i]);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
- seq_putc(p, '\n');
-
- }
+ if (irq < NR_IRQS) {
+ if (irq >= NR_IRQS_BASE)
+ goto out;
+ seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+ seq_putc(p, '\n');
+ goto out;
+ }
+ for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
+ seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat, cpu).irqs[irq]);
+ if (irqclass_sub_desc[irq].desc)
+ seq_printf(p, " %s", irqclass_sub_desc[irq].desc);
+ seq_putc(p, '\n');
+ }
+out:
put_online_cpus();
- return 0;
+ return 0;
+}
+
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+ return from < THIN_INTERRUPT ? THIN_INTERRUPT : from;
}
/*
- * For compatibilty only. S/390 specific setup of interrupts et al. is done
- * much later in init_channel_subsystem().
+ * Switch to the asynchronous interrupt stack for softirq execution.
*/
-void __init
-init_IRQ(void)
+void do_softirq_own_stack(void)
{
- /* nothing... */
+ unsigned long old, new;
+
+ /* Get current stack pointer. */
+ asm volatile("la %0,0(15)" : "=a" (old));
+ /* Check against async. stack address range. */
+ new = S390_lowcore.async_stack;
+ if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
+ /* Need to switch to the async. stack. */
+ new -= STACK_FRAME_OVERHEAD;
+ ((struct stack_frame *) new)->back_chain = old;
+ asm volatile(" la 15,0(%0)\n"
+ " basr 14,%2\n"
+ " la 15,0(%1)\n"
+ : : "a" (new), "a" (old),
+ "a" (__do_softirq)
+ : "0", "1", "2", "3", "4", "5", "14",
+ "cc", "memory" );
+ } else {
+ /* We are already on the async stack. */
+ __do_softirq();
+ }
}
/*
- * Switch to the asynchronous interrupt stack for softirq execution.
+ * ext_int_hash[index] is the list head for all external interrupts that hash
+ * to this index.
*/
-asmlinkage void do_softirq(void)
+static struct hlist_head ext_int_hash[32] ____cacheline_aligned;
+
+struct ext_int_info {
+ ext_int_handler_t handler;
+ struct hlist_node entry;
+ struct rcu_head rcu;
+ u16 code;
+};
+
+/* ext_int_hash_lock protects the handler lists for external interrupts */
+static DEFINE_SPINLOCK(ext_int_hash_lock);
+
+static inline int ext_hash(u16 code)
+{
+ BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash)));
+
+ return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
+}
+
+int register_external_irq(u16 code, ext_int_handler_t handler)
+{
+ struct ext_int_info *p;
+ unsigned long flags;
+ int index;
+
+ p = kmalloc(sizeof(*p), GFP_ATOMIC);
+ if (!p)
+ return -ENOMEM;
+ p->code = code;
+ p->handler = handler;
+ index = ext_hash(code);
+
+ spin_lock_irqsave(&ext_int_hash_lock, flags);
+ hlist_add_head_rcu(&p->entry, &ext_int_hash[index]);
+ spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(register_external_irq);
+
+int unregister_external_irq(u16 code, ext_int_handler_t handler)
{
- unsigned long flags, old, new;
-
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
-
- if (local_softirq_pending()) {
- /* Get current stack pointer. */
- asm volatile("la %0,0(15)" : "=a" (old));
- /* Check against async. stack address range. */
- new = S390_lowcore.async_stack;
- if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
- /* Need to switch to the async. stack. */
- new -= STACK_FRAME_OVERHEAD;
- ((struct stack_frame *) new)->back_chain = old;
-
- asm volatile(" la 15,0(%0)\n"
- " basr 14,%2\n"
- " la 15,0(%1)\n"
- : : "a" (new), "a" (old),
- "a" (__do_softirq)
- : "0", "1", "2", "3", "4", "5", "14",
- "cc", "memory" );
- } else
- /* We are already on the async stack. */
- __do_softirq();
+ struct ext_int_info *p;
+ unsigned long flags;
+ int index = ext_hash(code);
+
+ spin_lock_irqsave(&ext_int_hash_lock, flags);
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ if (p->code == code && p->handler == handler) {
+ hlist_del_rcu(&p->entry);
+ kfree_rcu(p, rcu);
+ }
}
+ spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(unregister_external_irq);
+
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
+{
+ struct pt_regs *regs = get_irq_regs();
+ struct ext_code ext_code;
+ struct ext_int_info *p;
+ int index;
- local_irq_restore(flags);
+ ext_code = *(struct ext_code *) &regs->int_code;
+ if (ext_code.code != EXT_IRQ_CLK_COMP)
+ __get_cpu_var(s390_idle).nohz_delay = 1;
+
+ index = ext_hash(ext_code.code);
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ if (unlikely(p->code != ext_code.code))
+ continue;
+ p->handler(ext_code, regs->int_parm, regs->int_parm_long);
+ }
+ rcu_read_unlock();
+ return IRQ_HANDLED;
}
-void init_irq_proc(void)
+static struct irqaction external_interrupt = {
+ .name = "EXT",
+ .handler = do_ext_interrupt,
+};
+
+void __init init_ext_interrupts(void)
{
- struct proc_dir_entry *root_irq_dir;
+ int idx;
+
+ for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+ INIT_HLIST_HEAD(&ext_int_hash[idx]);
- root_irq_dir = proc_mkdir("irq", NULL);
- create_prof_cpu_mask(root_irq_dir);
+ irq_set_chip_and_handler(EXT_INTERRUPT,
+ &dummy_irq_chip, handle_percpu_irq);
+ setup_irq(EXT_INTERRUPT, &external_interrupt);
+}
+
+static DEFINE_SPINLOCK(irq_subclass_lock);
+static unsigned char irq_subclass_refcount[64];
+
+void irq_subclass_register(enum irq_subclass subclass)
+{
+ spin_lock(&irq_subclass_lock);
+ if (!irq_subclass_refcount[subclass])
+ ctl_set_bit(0, subclass);
+ irq_subclass_refcount[subclass]++;
+ spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_register);
+
+void irq_subclass_unregister(enum irq_subclass subclass)
+{
+ spin_lock(&irq_subclass_lock);
+ irq_subclass_refcount[subclass]--;
+ if (!irq_subclass_refcount[subclass])
+ ctl_clear_bit(0, subclass);
+ spin_unlock(&irq_subclass_lock);
}
+EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
new file mode 100644
index 00000000000..b987ab2c154
--- /dev/null
+++ b/arch/s390/kernel/jump_label.c
@@ -0,0 +1,70 @@
+/*
+ * Jump label s390 support
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/stop_machine.h>
+#include <linux/jump_label.h>
+#include <asm/ipl.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+struct insn {
+ u16 opcode;
+ s32 offset;
+} __packed;
+
+struct insn_args {
+ struct jump_entry *entry;
+ enum jump_label_type type;
+};
+
+static void __jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ struct insn insn;
+ int rc;
+
+ if (type == JUMP_LABEL_ENABLE) {
+ /* brcl 15,offset */
+ insn.opcode = 0xc0f4;
+ insn.offset = (entry->target - entry->code) >> 1;
+ } else {
+ /* brcl 0,0 */
+ insn.opcode = 0xc004;
+ insn.offset = 0;
+ }
+
+ rc = probe_kernel_write((void *)entry->code, &insn, JUMP_LABEL_NOP_SIZE);
+ WARN_ON_ONCE(rc < 0);
+}
+
+static int __sm_arch_jump_label_transform(void *data)
+{
+ struct insn_args *args = data;
+
+ __jump_label_transform(args->entry, args->type);
+ return 0;
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ struct insn_args args;
+
+ args.entry = entry;
+ args.type = type;
+
+ stop_machine(__sm_arch_jump_label_transform, &args, NULL);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ __jump_label_transform(entry, type);
+}
+
+#endif
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index ed04d1372d5..bc71a7b95af 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -15,7 +15,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
- * Copyright (C) IBM Corporation, 2002, 2006
+ * Copyright IBM Corp. 2002, 2006
*
* s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
*/
@@ -25,333 +25,467 @@
#include <linux/preempt.h>
#include <linux/stop_machine.h>
#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>
-#include <asm/uaccess.h>
-#include <linux/module.h>
+#include <asm/dis.h>
-DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
-
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
-{
- /* Make sure the probe isn't going on a difficult instruction */
- if (is_prohibited_opcode((kprobe_opcode_t *) p->addr))
- return -EINVAL;
-
- if ((unsigned long)p->addr & 0x01) {
- printk("Attempt to register kprobe at an unaligned address\n");
- return -EINVAL;
- }
+struct kretprobe_blackpoint kretprobe_blacklist[] = { };
- /* Use the get_insn_slot() facility for correctness */
- if (!(p->ainsn.insn = get_insn_slot()))
- return -ENOMEM;
+DEFINE_INSN_CACHE_OPS(dmainsn);
- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+static void *alloc_dmainsn_page(void)
+{
+ return (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+}
- get_instruction_type(&p->ainsn);
- p->opcode = *p->addr;
- return 0;
+static void free_dmainsn_page(void *page)
+{
+ free_page((unsigned long)page);
}
-int __kprobes is_prohibited_opcode(kprobe_opcode_t *instruction)
+struct kprobe_insn_cache kprobe_dmainsn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
+ .alloc = alloc_dmainsn_page,
+ .free = free_dmainsn_page,
+ .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+ .insn_size = MAX_INSN_SIZE,
+};
+
+static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
{
- switch (*(__u8 *) instruction) {
+ if (!is_known_insn((unsigned char *)insn))
+ return -EINVAL;
+ switch (insn[0] >> 8) {
case 0x0c: /* bassm */
case 0x0b: /* bsm */
case 0x83: /* diag */
case 0x44: /* ex */
+ case 0xac: /* stnsm */
+ case 0xad: /* stosm */
return -EINVAL;
+ case 0xc6:
+ switch (insn[0] & 0x0f) {
+ case 0x00: /* exrl */
+ return -EINVAL;
+ }
}
- switch (*(__u16 *) instruction) {
+ switch (insn[0]) {
case 0x0101: /* pr */
case 0xb25a: /* bsa */
case 0xb240: /* bakr */
case 0xb258: /* bsg */
case 0xb218: /* pc */
case 0xb228: /* pt */
+ case 0xb98d: /* epsw */
return -EINVAL;
}
return 0;
}
-void __kprobes get_instruction_type(struct arch_specific_insn *ainsn)
+static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
{
/* default fixup method */
- ainsn->fixup = FIXUP_PSW_NORMAL;
-
- /* save r1 operand */
- ainsn->reg = (*ainsn->insn & 0xf0) >> 4;
+ int fixup = FIXUP_PSW_NORMAL;
- /* save the instruction length (pop 5-5) in bytes */
- switch (*(__u8 *) (ainsn->insn) >> 6) {
- case 0:
- ainsn->ilen = 2;
- break;
- case 1:
- case 2:
- ainsn->ilen = 4;
- break;
- case 3:
- ainsn->ilen = 6;
- break;
- }
-
- switch (*(__u8 *) ainsn->insn) {
+ switch (insn[0] >> 8) {
case 0x05: /* balr */
case 0x0d: /* basr */
- ainsn->fixup = FIXUP_RETURN_REGISTER;
+ fixup = FIXUP_RETURN_REGISTER;
/* if r2 = 0, no branch will be taken */
- if ((*ainsn->insn & 0x0f) == 0)
- ainsn->fixup |= FIXUP_BRANCH_NOT_TAKEN;
+ if ((insn[0] & 0x0f) == 0)
+ fixup |= FIXUP_BRANCH_NOT_TAKEN;
break;
case 0x06: /* bctr */
case 0x07: /* bcr */
- ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
break;
case 0x45: /* bal */
case 0x4d: /* bas */
- ainsn->fixup = FIXUP_RETURN_REGISTER;
+ fixup = FIXUP_RETURN_REGISTER;
break;
case 0x47: /* bc */
case 0x46: /* bct */
case 0x86: /* bxh */
case 0x87: /* bxle */
- ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
break;
case 0x82: /* lpsw */
- ainsn->fixup = FIXUP_NOT_REQUIRED;
+ fixup = FIXUP_NOT_REQUIRED;
break;
case 0xb2: /* lpswe */
- if (*(((__u8 *) ainsn->insn) + 1) == 0xb2) {
- ainsn->fixup = FIXUP_NOT_REQUIRED;
- }
+ if ((insn[0] & 0xff) == 0xb2)
+ fixup = FIXUP_NOT_REQUIRED;
break;
case 0xa7: /* bras */
- if ((*ainsn->insn & 0x0f) == 0x05) {
- ainsn->fixup |= FIXUP_RETURN_REGISTER;
- }
+ if ((insn[0] & 0x0f) == 0x05)
+ fixup |= FIXUP_RETURN_REGISTER;
break;
case 0xc0:
- if ((*ainsn->insn & 0x0f) == 0x00 /* larl */
- || (*ainsn->insn & 0x0f) == 0x05) /* brasl */
- ainsn->fixup |= FIXUP_RETURN_REGISTER;
+ if ((insn[0] & 0x0f) == 0x05) /* brasl */
+ fixup |= FIXUP_RETURN_REGISTER;
break;
case 0xeb:
- if (*(((__u8 *) ainsn->insn) + 5 ) == 0x44 || /* bxhg */
- *(((__u8 *) ainsn->insn) + 5) == 0x45) {/* bxleg */
- ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+ switch (insn[2] & 0xff) {
+ case 0x44: /* bxhg */
+ case 0x45: /* bxleg */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
}
break;
case 0xe3: /* bctg */
- if (*(((__u8 *) ainsn->insn) + 5) == 0x46) {
- ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+ if ((insn[2] & 0xff) == 0x46)
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ case 0xec:
+ switch (insn[2] & 0xff) {
+ case 0xe5: /* clgrb */
+ case 0xe6: /* cgrb */
+ case 0xf6: /* crb */
+ case 0xf7: /* clrb */
+ case 0xfc: /* cgib */
+ case 0xfd: /* cglib */
+ case 0xfe: /* cib */
+ case 0xff: /* clib */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
}
break;
}
+ return fixup;
}
-static int __kprobes swap_instruction(void *aref)
+static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn)
{
- struct ins_replace_args *args = aref;
- u32 *addr;
- u32 instr;
- int err = -EFAULT;
+ /* Check if we have a RIL-b or RIL-c format instruction which
+ * we need to modify in order to avoid instruction emulation. */
+ switch (insn[0] >> 8) {
+ case 0xc0:
+ if ((insn[0] & 0x0f) == 0x00) /* larl */
+ return true;
+ break;
+ case 0xc4:
+ switch (insn[0] & 0x0f) {
+ case 0x02: /* llhrl */
+ case 0x04: /* lghrl */
+ case 0x05: /* lhrl */
+ case 0x06: /* llghrl */
+ case 0x07: /* sthrl */
+ case 0x08: /* lgrl */
+ case 0x0b: /* stgrl */
+ case 0x0c: /* lgfrl */
+ case 0x0d: /* lrl */
+ case 0x0e: /* llgfrl */
+ case 0x0f: /* strl */
+ return true;
+ }
+ break;
+ case 0xc6:
+ switch (insn[0] & 0x0f) {
+ case 0x02: /* pfdrl */
+ case 0x04: /* cghrl */
+ case 0x05: /* chrl */
+ case 0x06: /* clghrl */
+ case 0x07: /* clhrl */
+ case 0x08: /* cgrl */
+ case 0x0a: /* clgrl */
+ case 0x0c: /* cgfrl */
+ case 0x0d: /* crl */
+ case 0x0e: /* clgfrl */
+ case 0x0f: /* clrl */
+ return true;
+ }
+ break;
+ }
+ return false;
+}
+static void __kprobes copy_instruction(struct kprobe *p)
+{
+ s64 disp, new_disp;
+ u64 addr, new_addr;
+
+ memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8));
+ if (!is_insn_relative_long(p->ainsn.insn))
+ return;
/*
- * Text segment is read-only, hence we use stura to bypass dynamic
- * address translation to exchange the instruction. Since stura
- * always operates on four bytes, but we only want to exchange two
- * bytes do some calculations to get things right. In addition we
- * shall not cross any page boundaries (vmalloc area!) when writing
- * the new instruction.
+ * For pc-relative instructions in RIL-b or RIL-c format patch the
+ * RI2 displacement field. We have already made sure that the insn
+ * slot for the patched instruction is within the same 2GB area
+ * as the original instruction (either kernel image or module area).
+ * Therefore the new displacement will always fit.
*/
- addr = (u32 *)((unsigned long)args->ptr & -4UL);
- if ((unsigned long)args->ptr & 2)
- instr = ((*addr) & 0xffff0000) | args->new;
- else
- instr = ((*addr) & 0x0000ffff) | args->new << 16;
+ disp = *(s32 *)&p->ainsn.insn[1];
+ addr = (u64)(unsigned long)p->addr;
+ new_addr = (u64)(unsigned long)p->ainsn.insn;
+ new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+ *(s32 *)&p->ainsn.insn[1] = new_disp;
+}
- asm volatile(
- " lra %1,0(%1)\n"
- "0: stura %2,%1\n"
- "1: la %0,0\n"
- "2:\n"
- EX_TABLE(0b,2b)
- : "+d" (err)
- : "a" (addr), "d" (instr)
- : "memory", "cc");
+static inline int is_kernel_addr(void *addr)
+{
+ return addr < (void *)_end;
+}
- return err;
+static inline int is_module_addr(void *addr)
+{
+#ifdef CONFIG_64BIT
+ BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
+ if (addr < (void *)MODULES_VADDR)
+ return 0;
+ if (addr > (void *)MODULES_END)
+ return 0;
+#endif
+ return 1;
}
-void __kprobes arch_arm_kprobe(struct kprobe *p)
+static int __kprobes s390_get_insn_slot(struct kprobe *p)
+{
+ /*
+ * Get an insn slot that is within the same 2GB area like the original
+ * instruction. That way instructions with a 32bit signed displacement
+ * field can be patched and executed within the insn slot.
+ */
+ p->ainsn.insn = NULL;
+ if (is_kernel_addr(p->addr))
+ p->ainsn.insn = get_dmainsn_slot();
+ else if (is_module_addr(p->addr))
+ p->ainsn.insn = get_insn_slot();
+ return p->ainsn.insn ? 0 : -ENOMEM;
+}
+
+static void __kprobes s390_free_insn_slot(struct kprobe *p)
+{
+ if (!p->ainsn.insn)
+ return;
+ if (is_kernel_addr(p->addr))
+ free_dmainsn_slot(p->ainsn.insn, 0);
+ else
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ if ((unsigned long) p->addr & 0x01)
+ return -EINVAL;
+ /* Make sure the probe isn't going on a difficult instruction */
+ if (is_prohibited_opcode(p->addr))
+ return -EINVAL;
+ if (s390_get_insn_slot(p))
+ return -ENOMEM;
+ p->opcode = *p->addr;
+ copy_instruction(p);
+ return 0;
+}
+
+struct ins_replace_args {
+ kprobe_opcode_t *ptr;
+ kprobe_opcode_t opcode;
+};
+
+static int __kprobes swap_instruction(void *aref)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
unsigned long status = kcb->kprobe_status;
- struct ins_replace_args args;
-
- args.ptr = p->addr;
- args.old = p->opcode;
- args.new = BREAKPOINT_INSTRUCTION;
+ struct ins_replace_args *args = aref;
kcb->kprobe_status = KPROBE_SWAP_INST;
- stop_machine_run(swap_instruction, &args, NR_CPUS);
+ probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode));
kcb->kprobe_status = status;
+ return 0;
}
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
+void __kprobes arch_arm_kprobe(struct kprobe *p)
{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long status = kcb->kprobe_status;
struct ins_replace_args args;
args.ptr = p->addr;
- args.old = BREAKPOINT_INSTRUCTION;
- args.new = p->opcode;
+ args.opcode = BREAKPOINT_INSTRUCTION;
+ stop_machine(swap_instruction, &args, NULL);
+}
- kcb->kprobe_status = KPROBE_SWAP_INST;
- stop_machine_run(swap_instruction, &args, NR_CPUS);
- kcb->kprobe_status = status;
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ struct ins_replace_args args;
+
+ args.ptr = p->addr;
+ args.opcode = p->opcode;
+ stop_machine(swap_instruction, &args, NULL);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- mutex_lock(&kprobe_mutex);
- free_insn_slot(p->ainsn.insn, 0);
- mutex_unlock(&kprobe_mutex);
+ s390_free_insn_slot(p);
}
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs,
+ unsigned long ip)
{
- per_cr_bits kprobe_per_regs[1];
+ struct per_regs per_kprobe;
- memset(kprobe_per_regs, 0, sizeof(per_cr_bits));
- regs->psw.addr = (unsigned long)p->ainsn.insn | PSW_ADDR_AMODE;
+ /* Set up the PER control registers %cr9-%cr11 */
+ per_kprobe.control = PER_EVENT_IFETCH;
+ per_kprobe.start = ip;
+ per_kprobe.end = ip;
- /* Set up the per control reg info, will pass to lctl */
- kprobe_per_regs[0].em_instruction_fetch = 1;
- kprobe_per_regs[0].starting_addr = (unsigned long)p->ainsn.insn;
- kprobe_per_regs[0].ending_addr = (unsigned long)p->ainsn.insn + 1;
+ /* Save control regs and psw mask */
+ __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+ kcb->kprobe_saved_imask = regs->psw.mask &
+ (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
- /* Set the PER control regs, turns on single step for this address */
- __ctl_load(kprobe_per_regs, 9, 11);
+ /* Set PER control regs, turns on single step for the given address */
+ __ctl_load(per_kprobe, 9, 11);
regs->psw.mask |= PSW_MASK_PER;
- regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK);
+ regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
+ regs->psw.addr = ip | PSW_ADDR_AMODE;
}
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs,
+ unsigned long ip)
{
- kcb->prev_kprobe.kp = kprobe_running();
- kcb->prev_kprobe.status = kcb->kprobe_status;
- kcb->prev_kprobe.kprobe_saved_imask = kcb->kprobe_saved_imask;
- memcpy(kcb->prev_kprobe.kprobe_saved_ctl, kcb->kprobe_saved_ctl,
- sizeof(kcb->kprobe_saved_ctl));
+ /* Restore control regs and psw mask, set new psw address */
+ __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+ regs->psw.mask &= ~PSW_MASK_PER;
+ regs->psw.mask |= kcb->kprobe_saved_imask;
+ regs->psw.addr = ip | PSW_ADDR_AMODE;
}
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+/*
+ * Activate a kprobe by storing its pointer to current_kprobe. The
+ * previous kprobe is stored in kcb->prev_kprobe. A stack of up to
+ * two kprobes can be active, see KPROBE_REENTER.
+ */
+static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
{
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
- kcb->kprobe_status = kcb->prev_kprobe.status;
- kcb->kprobe_saved_imask = kcb->prev_kprobe.kprobe_saved_imask;
- memcpy(kcb->kprobe_saved_ctl, kcb->prev_kprobe.kprobe_saved_ctl,
- sizeof(kcb->kprobe_saved_ctl));
+ kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe);
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ __get_cpu_var(current_kprobe) = p;
}
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
+/*
+ * Deactivate a kprobe by backing up to the previous state. If the
+ * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL,
+ * for any other state prev_kprobe.kp will be NULL.
+ */
+static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = p;
- /* Save the interrupt and per flags */
- kcb->kprobe_saved_imask = regs->psw.mask &
- (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK);
- /* Save the control regs that govern PER */
- __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ kcb->kprobe_status = kcb->prev_kprobe.status;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
/* Replace the return addr with trampoline addr */
- regs->gprs[14] = (unsigned long)&kretprobe_trampoline;
+ regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
+}
+
+static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb,
+ struct kprobe *p)
+{
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SSDONE:
+ case KPROBE_HIT_ACTIVE:
+ kprobes_inc_nmissed_count(p);
+ break;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ default:
+ /*
+ * A kprobe on the code path to single step an instruction
+ * is a BUG. The code path resides in the .kprobes.text
+ * section and is executed with interrupts disabled.
+ */
+ printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr);
+ dump_kprobe(p);
+ BUG();
+ }
}
static int __kprobes kprobe_handler(struct pt_regs *regs)
{
- struct kprobe *p;
- int ret = 0;
- unsigned long *addr = (unsigned long *)
- ((regs->psw.addr & PSW_ADDR_INSN) - 2);
struct kprobe_ctlblk *kcb;
+ struct kprobe *p;
/*
- * We don't want to be preempted for the entire
- * duration of kprobe processing
+ * We want to disable preemption for the entire duration of kprobe
+ * processing. That includes the calls to the pre/post handlers
+ * and single stepping the kprobe instruction.
*/
preempt_disable();
kcb = get_kprobe_ctlblk();
+ p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
- /* Check we're not actually recursing */
- if (kprobe_running()) {
- p = get_kprobe(addr);
- if (p) {
- if (kcb->kprobe_status == KPROBE_HIT_SS &&
- *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
- regs->psw.mask &= ~PSW_MASK_PER;
- regs->psw.mask |= kcb->kprobe_saved_imask;
- goto no_kprobe;
- }
- /* We have reentered the kprobe_handler(), since
- * another probe was hit while within the handler.
- * We here save the original kprobes variables and
- * just single step on the instruction of the new probe
- * without calling any user handlers.
+ if (p) {
+ if (kprobe_running()) {
+ /*
+ * We have hit a kprobe while another is still
+ * active. This can happen in the pre and post
+ * handler. Single step the instruction of the
+ * new probe but do not call any handler function
+ * of this secondary kprobe.
+ * push_kprobe and pop_kprobe saves and restores
+ * the currently active kprobe.
*/
- save_previous_kprobe(kcb);
- set_current_kprobe(p, regs, kcb);
- kprobes_inc_nmissed_count(p);
- prepare_singlestep(p, regs);
+ kprobe_reenter_check(kcb, p);
+ push_kprobe(kcb, p);
kcb->kprobe_status = KPROBE_REENTER;
- return 1;
} else {
- p = __get_cpu_var(current_kprobe);
- if (p->break_handler && p->break_handler(p, regs)) {
- goto ss_probe;
- }
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with single stepping. If we have a
+ * pre-handler and it returned non-zero, it prepped
+ * for calling the break_handler below on re-entry
+ * for jprobe processing, so get out doing nothing
+ * more here.
+ */
+ push_kprobe(kcb, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (p->pre_handler && p->pre_handler(p, regs))
+ return 1;
+ kcb->kprobe_status = KPROBE_HIT_SS;
}
- goto no_kprobe;
- }
-
- p = get_kprobe(addr);
- if (!p)
- /*
- * No kprobe at this address. The fault has not been
- * caused by a kprobe breakpoint. The race of breakpoint
- * vs. kprobe remove does not exist because on s390 we
- * use stop_machine_run to arm/disarm the breakpoints.
- */
- goto no_kprobe;
-
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- set_current_kprobe(p, regs, kcb);
- if (p->pre_handler && p->pre_handler(p, regs))
- /* handler has already set things up, so skip ss setup */
+ enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn);
return 1;
-
-ss_probe:
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_HIT_SS;
- return 1;
-
-no_kprobe:
+ } else if (kprobe_running()) {
+ p = __get_cpu_var(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs)) {
+ /*
+ * Continuation after the jprobe completed and
+ * caused the jprobe_return trap. The jprobe
+ * break_handler "returns" to the original
+ * function that still has the kprobe breakpoint
+ * installed. We continue with single stepping.
+ */
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ enable_singlestep(kcb, regs,
+ (unsigned long) p->ainsn.insn);
+ return 1;
+ } /* else:
+ * No kprobe at this address and the current kprobe
+ * has no break handler (no jprobe!). The kernel just
+ * exploded, let the standard trap handler pick up the
+ * pieces.
+ */
+ } /* else:
+ * No kprobe at this address and no active kprobe. The trap has
+ * not been caused by a kprobe breakpoint. The race of breakpoint
+ * vs. kprobe remove does not exist because on s390 as we use
+ * stop_machine to arm/disarm the breakpoints.
+ */
preempt_enable_no_resched();
- return ret;
+ return 0;
}
/*
@@ -372,20 +506,20 @@ static void __used kretprobe_trampoline_holder(void)
static int __kprobes trampoline_probe_handler(struct kprobe *p,
struct pt_regs *regs)
{
- struct kretprobe_instance *ri = NULL;
+ struct kretprobe_instance *ri;
struct hlist_head *head, empty_rp;
- struct hlist_node *node, *tmp;
- unsigned long flags, orig_ret_address = 0;
- unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+ struct hlist_node *tmp;
+ unsigned long flags, orig_ret_address;
+ unsigned long trampoline_address;
+ kprobe_opcode_t *correct_ret_addr;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
* task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more then one return
+ * have a return probe installed on them, and/or more than one return
* return probe was registered for a target function.
*
* We can handle this because:
@@ -395,34 +529,59 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
* real return address, and all the rest will point to
* kretprobe_trampoline
*/
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ ri = NULL;
+ orig_ret_address = 0;
+ correct_ret_addr = NULL;
+ trampoline_address = (unsigned long) &kretprobe_trampoline;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
- if (ri->rp && ri->rp->handler)
+ orig_ret_address = (unsigned long) ri->ret_addr;
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ correct_ret_addr = ri->ret_addr;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long) ri->ret_addr;
+
+ if (ri->rp && ri->rp->handler) {
+ ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
+ }
- orig_ret_address = (unsigned long)ri->ret_addr;
recycle_rp_inst(ri, &empty_rp);
- if (orig_ret_address != trampoline_address) {
+ if (orig_ret_address != trampoline_address)
/*
* This is the real return address. Any other
* instances associated with this task are for
* other calls deeper on the call stack
*/
break;
- }
}
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
- reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ pop_kprobe(get_kprobe_ctlblk());
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
- hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
}
@@ -445,55 +604,42 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+ int fixup = get_fixup_type(p->ainsn.insn);
- regs->psw.addr &= PSW_ADDR_INSN;
+ if (fixup & FIXUP_PSW_NORMAL)
+ ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
- if (p->ainsn.fixup & FIXUP_PSW_NORMAL)
- regs->psw.addr = (unsigned long)p->addr +
- ((unsigned long)regs->psw.addr -
- (unsigned long)p->ainsn.insn);
-
- if (p->ainsn.fixup & FIXUP_BRANCH_NOT_TAKEN)
- if ((unsigned long)regs->psw.addr -
- (unsigned long)p->ainsn.insn == p->ainsn.ilen)
- regs->psw.addr = (unsigned long)p->addr + p->ainsn.ilen;
+ if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+ int ilen = insn_length(p->ainsn.insn[0] >> 8);
+ if (ip - (unsigned long) p->ainsn.insn == ilen)
+ ip = (unsigned long) p->addr + ilen;
+ }
- if (p->ainsn.fixup & FIXUP_RETURN_REGISTER)
- regs->gprs[p->ainsn.reg] = ((unsigned long)p->addr +
- (regs->gprs[p->ainsn.reg] -
- (unsigned long)p->ainsn.insn))
- | PSW_ADDR_AMODE;
+ if (fixup & FIXUP_RETURN_REGISTER) {
+ int reg = (p->ainsn.insn[0] & 0xf0) >> 4;
+ regs->gprs[reg] += (unsigned long) p->addr -
+ (unsigned long) p->ainsn.insn;
+ }
- regs->psw.addr |= PSW_ADDR_AMODE;
- /* turn off PER mode */
- regs->psw.mask &= ~PSW_MASK_PER;
- /* Restore the original per control regs */
- __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
- regs->psw.mask |= kcb->kprobe_saved_imask;
+ disable_singlestep(kcb, regs, ip);
}
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
{
- struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ struct kprobe *p = kprobe_running();
- if (!cur)
+ if (!p)
return 0;
- if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
- cur->post_handler(cur, regs, 0);
+ p->post_handler(p, regs, 0);
}
- resume_execution(cur, regs);
-
- /*Restore back the original saved kprobes variables and continue. */
- if (kcb->kprobe_status == KPROBE_REENTER) {
- restore_previous_kprobe(kcb);
- goto out;
- }
- reset_current_kprobe();
-out:
+ resume_execution(p, regs);
+ pop_kprobe(kcb);
preempt_enable_no_resched();
/*
@@ -501,17 +647,16 @@ out:
* will have PER set, in which case, continue the remaining processing
* of do_single_step, as if this is not a probe hit.
*/
- if (regs->psw.mask & PSW_MASK_PER) {
+ if (regs->psw.mask & PSW_MASK_PER)
return 0;
- }
return 1;
}
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
{
- struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ struct kprobe *p = kprobe_running();
const struct exception_table_entry *entry;
switch(kcb->kprobe_status) {
@@ -527,23 +672,18 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* and allow the page fault handler to continue as a
* normal page fault.
*/
- regs->psw.addr = (unsigned long)cur->addr | PSW_ADDR_AMODE;
- regs->psw.mask &= ~PSW_MASK_PER;
- regs->psw.mask |= kcb->kprobe_saved_imask;
- if (kcb->kprobe_status == KPROBE_REENTER)
- restore_previous_kprobe(kcb);
- else
- reset_current_kprobe();
+ disable_singlestep(kcb, regs, (unsigned long) p->addr);
+ pop_kprobe(kcb);
preempt_enable_no_resched();
break;
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
* We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accouting
+ * we can also use npre/npostfault count for accounting
* these specific fault cases.
*/
- kprobes_inc_nmissed_count(cur);
+ kprobes_inc_nmissed_count(p);
/*
* We come here because instructions in the pre/post
@@ -552,7 +692,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* copy_from_user(), get_user() etc. Let the
* user-specified handler try to fix it first.
*/
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ if (p->fault_handler && p->fault_handler(p, regs, trapnr))
return 1;
/*
@@ -561,7 +701,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
*/
entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
if (entry) {
- regs->psw.addr = entry->fixup | PSW_ADDR_AMODE;
+ regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
return 1;
}
@@ -576,57 +716,71 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
return 0;
}
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ int ret;
+
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_disable();
+ ret = kprobe_trap_handler(regs, trapnr);
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+ return ret;
+}
+
/*
* Wrapper routine to for handling exceptions.
*/
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
- struct die_args *args = (struct die_args *)data;
+ struct die_args *args = (struct die_args *) data;
+ struct pt_regs *regs = args->regs;
int ret = NOTIFY_DONE;
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_disable();
+
switch (val) {
case DIE_BPT:
- if (kprobe_handler(args->regs))
+ if (kprobe_handler(regs))
ret = NOTIFY_STOP;
break;
case DIE_SSTEP:
- if (post_kprobe_handler(args->regs))
+ if (post_kprobe_handler(regs))
ret = NOTIFY_STOP;
break;
case DIE_TRAP:
- /* kprobe_running() needs smp_processor_id() */
- preempt_disable();
- if (kprobe_running() &&
- kprobe_fault_handler(args->regs, args->trapnr))
+ if (!preemptible() && kprobe_running() &&
+ kprobe_trap_handler(regs, args->trapnr))
ret = NOTIFY_STOP;
- preempt_enable();
break;
default:
break;
}
+
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+
return ret;
}
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
- unsigned long addr;
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long stack;
memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
/* setup return addr to the jprobe handler routine */
- regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE;
+ regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+ regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
- /* r14 is the function return address */
- kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14];
/* r15 is the stack pointer */
- kcb->jprobe_saved_r15 = (unsigned long)regs->gprs[15];
- addr = (unsigned long)kcb->jprobe_saved_r15;
+ stack = (unsigned long) regs->gprs[15];
- memcpy(kcb->jprobes_stack, (kprobe_opcode_t *) addr,
- MIN_STACK_SIZE(addr));
+ memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack));
return 1;
}
@@ -635,7 +789,7 @@ void __kprobes jprobe_return(void)
asm volatile(".word 0x0002");
}
-void __kprobes jprobe_return_end(void)
+static void __used __kprobes jprobe_return_end(void)
{
asm volatile("bcr 0,0");
}
@@ -643,30 +797,29 @@ void __kprobes jprobe_return_end(void)
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_r15);
+ unsigned long stack;
+
+ stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15];
/* Put the regs back */
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
/* put the stack back */
- memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
- MIN_STACK_SIZE(stack_addr));
+ memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack));
preempt_enable_no_resched();
return 1;
}
-static struct kprobe trampoline_p = {
- .addr = (kprobe_opcode_t *) & kretprobe_trampoline,
+static struct kprobe trampoline = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
.pre_handler = trampoline_probe_handler
};
int __init arch_init_kprobes(void)
{
- return register_kprobe(&trampoline_p);
+ return register_kprobe(&trampoline);
}
int __kprobes arch_trampoline_kprobe(struct kprobe *p)
{
- if (p->addr == (kprobe_opcode_t *) & kretprobe_trampoline)
- return 1;
- return 0;
+ return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline;
}
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
new file mode 100644
index 00000000000..6ea6d69339b
--- /dev/null
+++ b/arch/s390/kernel/lgr.c
@@ -0,0 +1,186 @@
+/*
+ * Linux Guest Relocation (LGR) detection
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <asm/facility.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/debug.h>
+#include <asm/ipl.h>
+
+#define LGR_TIMER_INTERVAL_SECS (30 * 60)
+#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */
+
+/*
+ * LGR info: Contains stfle and stsi data
+ */
+struct lgr_info {
+ /* Bit field with facility information: 4 DWORDs are stored */
+ u64 stfle_fac_list[4];
+ /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */
+ u32 level;
+ /* Level 1: CEC info (stsi 1.1.1) */
+ char manufacturer[16];
+ char type[4];
+ char sequence[16];
+ char plant[4];
+ char model[16];
+ /* Level 2: LPAR info (stsi 2.2.2) */
+ u16 lpar_number;
+ char name[8];
+ /* Level 3: VM info (stsi 3.2.2) */
+ u8 vm_count;
+ struct {
+ char name[8];
+ char cpi[16];
+ } vm[VM_LEVEL_MAX];
+} __packed __aligned(8);
+
+/*
+ * LGR globals
+ */
+static char lgr_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+static struct lgr_info lgr_info_last;
+static struct lgr_info lgr_info_cur;
+static struct debug_info *lgr_dbf;
+
+/*
+ * Copy buffer and then convert it to ASCII
+ */
+static void cpascii(char *dst, char *src, int size)
+{
+ memcpy(dst, src, size);
+ EBCASC(dst, size);
+}
+
+/*
+ * Fill LGR info with 1.1.1 stsi data
+ */
+static void lgr_stsi_1_1_1(struct lgr_info *lgr_info)
+{
+ struct sysinfo_1_1_1 *si = (void *) lgr_page;
+
+ if (stsi(si, 1, 1, 1))
+ return;
+ cpascii(lgr_info->manufacturer, si->manufacturer,
+ sizeof(si->manufacturer));
+ cpascii(lgr_info->type, si->type, sizeof(si->type));
+ cpascii(lgr_info->model, si->model, sizeof(si->model));
+ cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence));
+ cpascii(lgr_info->plant, si->plant, sizeof(si->plant));
+}
+
+/*
+ * Fill LGR info with 2.2.2 stsi data
+ */
+static void lgr_stsi_2_2_2(struct lgr_info *lgr_info)
+{
+ struct sysinfo_2_2_2 *si = (void *) lgr_page;
+
+ if (stsi(si, 2, 2, 2))
+ return;
+ cpascii(lgr_info->name, si->name, sizeof(si->name));
+ memcpy(&lgr_info->lpar_number, &si->lpar_number,
+ sizeof(lgr_info->lpar_number));
+}
+
+/*
+ * Fill LGR info with 3.2.2 stsi data
+ */
+static void lgr_stsi_3_2_2(struct lgr_info *lgr_info)
+{
+ struct sysinfo_3_2_2 *si = (void *) lgr_page;
+ int i;
+
+ if (stsi(si, 3, 2, 2))
+ return;
+ for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) {
+ cpascii(lgr_info->vm[i].name, si->vm[i].name,
+ sizeof(si->vm[i].name));
+ cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi,
+ sizeof(si->vm[i].cpi));
+ }
+ lgr_info->vm_count = si->count;
+}
+
+/*
+ * Fill LGR info with current data
+ */
+static void lgr_info_get(struct lgr_info *lgr_info)
+{
+ int level;
+
+ memset(lgr_info, 0, sizeof(*lgr_info));
+ stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list));
+ level = stsi(NULL, 0, 0, 0);
+ lgr_info->level = level;
+ if (level >= 1)
+ lgr_stsi_1_1_1(lgr_info);
+ if (level >= 2)
+ lgr_stsi_2_2_2(lgr_info);
+ if (level >= 3)
+ lgr_stsi_3_2_2(lgr_info);
+}
+
+/*
+ * Check if LGR info has changed and if yes log new LGR info to s390dbf
+ */
+void lgr_info_log(void)
+{
+ static DEFINE_SPINLOCK(lgr_info_lock);
+ unsigned long flags;
+
+ if (!spin_trylock_irqsave(&lgr_info_lock, flags))
+ return;
+ lgr_info_get(&lgr_info_cur);
+ if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) {
+ debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur));
+ lgr_info_last = lgr_info_cur;
+ }
+ spin_unlock_irqrestore(&lgr_info_lock, flags);
+}
+EXPORT_SYMBOL_GPL(lgr_info_log);
+
+static void lgr_timer_set(void);
+
+/*
+ * LGR timer callback
+ */
+static void lgr_timer_fn(unsigned long ignored)
+{
+ lgr_info_log();
+ lgr_timer_set();
+}
+
+static struct timer_list lgr_timer =
+ TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0);
+
+/*
+ * Setup next LGR timer
+ */
+static void lgr_timer_set(void)
+{
+ mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ);
+}
+
+/*
+ * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer
+ */
+static int __init lgr_init(void)
+{
+ lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info));
+ if (!lgr_dbf)
+ return -ENOMEM;
+ debug_register_view(lgr_dbf, &debug_hex_ascii_view);
+ lgr_info_get(&lgr_info_last);
+ debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last));
+ lgr_timer_set();
+ return 0;
+}
+module_init(lgr_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3c77dd36994..719e27b2cf2 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -1,10 +1,9 @@
/*
- * arch/s390/kernel/machine_kexec.c
- *
- * Copyright IBM Corp. 2005,2006
+ * Copyright IBM Corp. 2005, 2011
*
* Author(s): Rolf Adelsberger,
* Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
#include <linux/device.h>
@@ -12,27 +11,187 @@
#include <linux/kexec.h>
#include <linux/delay.h>
#include <linux/reboot.h>
+#include <linux/ftrace.h>
+#include <linux/debug_locks.h>
+#include <linux/suspend.h>
#include <asm/cio.h>
#include <asm/setup.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
-#include <asm/system.h>
#include <asm/smp.h>
#include <asm/reset.h>
#include <asm/ipl.h>
+#include <asm/diag.h>
+#include <asm/elf.h>
+#include <asm/asm-offsets.h>
+#include <asm/os_info.h>
typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
extern const unsigned char relocate_kernel[];
extern const unsigned long long relocate_kernel_len;
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * Create ELF notes for one CPU
+ */
+static void add_elf_notes(int cpu)
+{
+ struct save_area *sa = (void *) 4608 + store_prefix();
+ void *ptr;
+
+ memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa));
+ ptr = (u64 *) per_cpu_ptr(crash_notes, cpu);
+ ptr = fill_cpu_elf_notes(ptr, sa);
+ memset(ptr, 0, sizeof(struct elf_note));
+}
+
+/*
+ * Initialize CPU ELF notes
+ */
+static void setup_regs(void)
+{
+ unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
+ int cpu, this_cpu;
+
+ this_cpu = smp_find_processor_id(stap());
+ add_elf_notes(this_cpu);
+ for_each_online_cpu(cpu) {
+ if (cpu == this_cpu)
+ continue;
+ if (smp_store_status(cpu))
+ continue;
+ add_elf_notes(cpu);
+ }
+ /* Copy dump CPU store status info to absolute zero */
+ memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area));
+}
+
+/*
+ * PM notifier callback for kdump
+ */
+static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
+ void *ptr)
+{
+ switch (action) {
+ case PM_SUSPEND_PREPARE:
+ case PM_HIBERNATION_PREPARE:
+ if (crashk_res.start)
+ crash_map_reserved_pages();
+ break;
+ case PM_POST_SUSPEND:
+ case PM_POST_HIBERNATION:
+ if (crashk_res.start)
+ crash_unmap_reserved_pages();
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return NOTIFY_OK;
+}
+
+static int __init machine_kdump_pm_init(void)
+{
+ pm_notifier(machine_kdump_pm_cb, 0);
+ return 0;
+}
+arch_initcall(machine_kdump_pm_init);
+#endif
+
+/*
+ * Start kdump: We expect here that a store status has been done on our CPU
+ */
+static void __do_machine_kdump(void *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+ int (*start_kdump)(int) = (void *)((struct kimage *) image)->start;
+
+ setup_regs();
+ __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
+ start_kdump(1);
+#endif
+}
+
+/*
+ * Check if kdump checksums are valid: We call purgatory with parameter "0"
+ */
+static int kdump_csum_valid(struct kimage *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+ int (*start_kdump)(int) = (void *)image->start;
+ int rc;
+
+ __arch_local_irq_stnsm(0xfb); /* disable DAT */
+ rc = start_kdump(0);
+ __arch_local_irq_stosm(0x04); /* enable DAT */
+ return rc ? 0 : -EINVAL;
+#else
+ return -EINVAL;
+#endif
+}
+
+/*
+ * Map or unmap crashkernel memory
+ */
+static void crash_map_pages(int enable)
+{
+ unsigned long size = resource_size(&crashk_res);
+
+ BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN ||
+ size % KEXEC_CRASH_MEM_ALIGN);
+ if (enable)
+ vmem_add_mapping(crashk_res.start, size);
+ else {
+ vmem_remove_mapping(crashk_res.start, size);
+ if (size)
+ os_info_crashkernel_add(crashk_res.start, size);
+ else
+ os_info_crashkernel_add(0, 0);
+ }
+}
+
+/*
+ * Map crashkernel memory
+ */
+void crash_map_reserved_pages(void)
+{
+ crash_map_pages(1);
+}
+
+/*
+ * Unmap crashkernel memory
+ */
+void crash_unmap_reserved_pages(void)
+{
+ crash_map_pages(0);
+}
+
+/*
+ * Give back memory to hypervisor before new kdump is loaded
+ */
+static int machine_kexec_prepare_kdump(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (MACHINE_IS_VM)
+ diag10_range(PFN_DOWN(crashk_res.start),
+ PFN_DOWN(crashk_res.end - crashk_res.start + 1));
+ return 0;
+#else
+ return -EINVAL;
+#endif
+}
+
int machine_kexec_prepare(struct kimage *image)
{
void *reboot_code_buffer;
/* Can't replace kernel image since it is read-only. */
if (ipl_flags & IPL_NSS_VALID)
- return -ENOSYS;
+ return -EOPNOTSUPP;
+
+ if (image->type == KEXEC_TYPE_CRASH)
+ return machine_kexec_prepare_kdump();
/* We don't support anything but the default image type for now. */
if (image->type != KEXEC_TYPE_DEFAULT)
@@ -50,22 +209,64 @@ void machine_kexec_cleanup(struct kimage *image)
{
}
+void arch_crash_save_vmcoreinfo(void)
+{
+ VMCOREINFO_SYMBOL(lowcore_ptr);
+ VMCOREINFO_SYMBOL(high_memory);
+ VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+}
+
void machine_shutdown(void)
{
- printk(KERN_INFO "kexec: machine_shutdown called\n");
}
-void machine_kexec(struct kimage *image)
+void machine_crash_shutdown(struct pt_regs *regs)
{
- relocate_kernel_t data_mover;
+}
- smp_send_stop();
- pfault_fini();
- s390_reset_system();
+/*
+ * Do normal kexec
+ */
+static void __do_machine_kexec(void *data)
+{
+ relocate_kernel_t data_mover;
+ struct kimage *image = data;
data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page);
/* Call the moving routine */
(*data_mover)(&image->head, image->start);
- for (;;);
+}
+
+/*
+ * Reset system and call either kdump or normal kexec
+ */
+static void __machine_kexec(void *data)
+{
+ struct kimage *image = data;
+
+ __arch_local_irq_stosm(0x04); /* enable DAT */
+ pfault_fini();
+ tracing_off();
+ debug_locks_off();
+ if (image->type == KEXEC_TYPE_CRASH) {
+ lgr_info_log();
+ s390_reset_system(__do_machine_kdump, data);
+ } else {
+ s390_reset_system(__do_machine_kexec, data);
+ }
+ disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+/*
+ * Do either kdump or normal kexec. In case of kdump we first ask
+ * purgatory, if kdump checksums are valid.
+ */
+void machine_kexec(struct kimage *image)
+{
+ if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image))
+ return;
+ tracer_disable();
+ smp_send_stop();
+ smp_call_ipl_cpu(__machine_kexec, image);
}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
new file mode 100644
index 00000000000..08dcf21cb8d
--- /dev/null
+++ b/arch/s390/kernel/mcount.S
@@ -0,0 +1,73 @@
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+
+ .section .kprobes.text, "ax"
+
+ENTRY(ftrace_stub)
+ br %r14
+
+ENTRY(_mcount)
+#ifdef CONFIG_DYNAMIC_FTRACE
+ br %r14
+
+ENTRY(ftrace_caller)
+#endif
+ stm %r2,%r5,16(%r15)
+ bras %r1,2f
+0: .long ftrace_trace_function
+1: .long function_trace_stop
+2: l %r2,1b-0b(%r1)
+ icm %r2,0xf,0(%r2)
+ jnz 3f
+ st %r14,56(%r15)
+ lr %r0,%r15
+ ahi %r15,-96
+ l %r3,100(%r15)
+ la %r2,0(%r14)
+ st %r0,__SF_BACKCHAIN(%r15)
+ la %r3,0(%r3)
+ ahi %r2,-MCOUNT_INSN_SIZE
+ l %r14,0b-0b(%r1)
+ l %r14,0(%r14)
+ basr %r14,%r14
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ l %r2,100(%r15)
+ l %r3,152(%r15)
+ENTRY(ftrace_graph_caller)
+# The bras instruction gets runtime patched to call prepare_ftrace_return.
+# See ftrace_enable_ftrace_graph_caller. The patched instruction is:
+# bras %r14,prepare_ftrace_return
+ bras %r14,0f
+0: st %r2,100(%r15)
+#endif
+ ahi %r15,96
+ l %r14,56(%r15)
+3: lm %r2,%r5,16(%r15)
+ br %r14
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+ENTRY(return_to_handler)
+ stm %r2,%r5,16(%r15)
+ st %r14,56(%r15)
+ lr %r0,%r15
+ ahi %r15,-96
+ st %r0,__SF_BACKCHAIN(%r15)
+ bras %r1,0f
+ .long ftrace_return_to_handler
+0: l %r2,0b-0b(%r1)
+ basr %r14,%r2
+ lr %r14,%r2
+ ahi %r15,96
+ lm %r2,%r5,16(%r15)
+ br %r14
+
+#endif
diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S
new file mode 100644
index 00000000000..1c52eae3396
--- /dev/null
+++ b/arch/s390/kernel/mcount64.S
@@ -0,0 +1,65 @@
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+
+ .section .kprobes.text, "ax"
+
+ENTRY(ftrace_stub)
+ br %r14
+
+ENTRY(_mcount)
+#ifdef CONFIG_DYNAMIC_FTRACE
+ br %r14
+
+ENTRY(ftrace_caller)
+#endif
+ larl %r1,function_trace_stop
+ icm %r1,0xf,0(%r1)
+ bnzr %r14
+ stmg %r2,%r5,32(%r15)
+ stg %r14,112(%r15)
+ lgr %r1,%r15
+ aghi %r15,-160
+ stg %r1,__SF_BACKCHAIN(%r15)
+ lgr %r2,%r14
+ lg %r3,168(%r15)
+ aghi %r2,-MCOUNT_INSN_SIZE
+ larl %r14,ftrace_trace_function
+ lg %r14,0(%r14)
+ basr %r14,%r14
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ lg %r2,168(%r15)
+ lg %r3,272(%r15)
+ENTRY(ftrace_graph_caller)
+# The bras instruction gets runtime patched to call prepare_ftrace_return.
+# See ftrace_enable_ftrace_graph_caller. The patched instruction is:
+# bras %r14,prepare_ftrace_return
+ bras %r14,0f
+0: stg %r2,168(%r15)
+#endif
+ aghi %r15,160
+ lmg %r2,%r5,32(%r15)
+ lg %r14,112(%r15)
+ br %r14
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+ENTRY(return_to_handler)
+ stmg %r2,%r5,32(%r15)
+ lgr %r1,%r15
+ aghi %r15,-160
+ stg %r1,__SF_BACKCHAIN(%r15)
+ brasl %r14,ftrace_return_to_handler
+ aghi %r15,160
+ lgr %r14,%r2
+ lmg %r2,%r5,32(%r15)
+ br %r14
+
+#endif
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 59b4e796680..b89b59158b9 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -1,9 +1,8 @@
/*
- * arch/s390/kernel/module.c - Kernel module help for s390.
+ * Kernel module help for s390.
*
* S390 version
- * Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH,
- * IBM Corporation
+ * Copyright IBM Corp. 2002, 2003
* Author(s): Arnd Bergmann (arndb@de.ibm.com)
* Martin Schwidefsky (schwidefsky@de.ibm.com)
*
@@ -45,23 +44,28 @@
#define PLT_ENTRY_SIZE 20
#endif /* CONFIG_64BIT */
+#ifdef CONFIG_64BIT
void *module_alloc(unsigned long size)
{
- if (size == 0)
+ if (PAGE_ALIGN(size) > MODULES_LEN)
return NULL;
- return vmalloc(size);
+ return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE,
+ __builtin_return_address(0));
}
+#endif
/* Free memory returned from module_alloc */
void module_free(struct module *mod, void *module_region)
{
+ if (mod) {
+ vfree(mod->arch.syminfo);
+ mod->arch.syminfo = NULL;
+ }
vfree(module_region);
- /* FIXME: If module_region == mod->init_region, trim exception
- table entries. */
}
-static void
-check_rela(Elf_Rela *rela, struct module *me)
+static void check_rela(Elf_Rela *rela, struct module *me)
{
struct mod_arch_syminfo *info;
@@ -110,9 +114,8 @@ check_rela(Elf_Rela *rela, struct module *me)
* Account for GOT and PLT relocations. We can't add sections for
* got and plt but we can increase the core module size.
*/
-int
-module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
- char *secstrings, struct module *me)
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *me)
{
Elf_Shdr *symtab;
Elf_Sym *symbols;
@@ -174,22 +177,52 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
return 0;
}
-int
-apply_relocate(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex,
- unsigned int relsec, struct module *me)
+static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
+ int sign, int bits, int shift)
{
- printk(KERN_ERR "module %s: RELOCATION unsupported\n",
- me->name);
- return -ENOEXEC;
+ unsigned long umax;
+ long min, max;
+
+ if (val & ((1UL << shift) - 1))
+ return -ENOEXEC;
+ if (sign) {
+ val = (Elf_Addr)(((long) val) >> shift);
+ min = -(1L << (bits - 1));
+ max = (1L << (bits - 1)) - 1;
+ if ((long) val < min || (long) val > max)
+ return -ENOEXEC;
+ } else {
+ val >>= shift;
+ umax = ((1UL << (bits - 1)) << 1) - 1;
+ if ((unsigned long) val > umax)
+ return -ENOEXEC;
+ }
+
+ if (bits == 8)
+ *(unsigned char *) loc = val;
+ else if (bits == 12)
+ *(unsigned short *) loc = (val & 0xfff) |
+ (*(unsigned short *) loc & 0xf000);
+ else if (bits == 16)
+ *(unsigned short *) loc = val;
+ else if (bits == 20)
+ *(unsigned int *) loc = (val & 0xfff) << 16 |
+ (val & 0xff000) >> 4 |
+ (*(unsigned int *) loc & 0xf00000ff);
+ else if (bits == 32)
+ *(unsigned int *) loc = val;
+ else if (bits == 64)
+ *(unsigned long *) loc = val;
+ return 0;
}
-static int
-apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
- struct module *me)
+static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ const char *strtab, struct module *me)
{
struct mod_arch_syminfo *info;
Elf_Addr loc, val;
int r_type, r_sym;
+ int rc = -ENOEXEC;
/* This is where to make the change */
loc = base + rela->r_offset;
@@ -201,6 +234,9 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val = symtab[r_sym].st_value;
switch (r_type) {
+ case R_390_NONE: /* No relocation. */
+ rc = 0;
+ break;
case R_390_8: /* Direct 8 bit. */
case R_390_12: /* Direct 12 bit. */
case R_390_16: /* Direct 16 bit. */
@@ -209,20 +245,17 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_64: /* Direct 64 bit. */
val += rela->r_addend;
if (r_type == R_390_8)
- *(unsigned char *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 8, 0);
else if (r_type == R_390_12)
- *(unsigned short *) loc = (val & 0xfff) |
- (*(unsigned short *) loc & 0xf000);
+ rc = apply_rela_bits(loc, val, 0, 12, 0);
else if (r_type == R_390_16)
- *(unsigned short *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
else if (r_type == R_390_20)
- *(unsigned int *) loc =
- (*(unsigned int *) loc & 0xf00000ff) |
- (val & 0xfff) << 16 | (val & 0xff000) >> 4;
+ rc = apply_rela_bits(loc, val, 1, 20, 0);
else if (r_type == R_390_32)
- *(unsigned int *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
else if (r_type == R_390_64)
- *(unsigned long *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
break;
case R_390_PC16: /* PC relative 16 bit. */
case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
@@ -231,15 +264,15 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PC64: /* PC relative 64 bit. */
val += rela->r_addend - loc;
if (r_type == R_390_PC16)
- *(unsigned short *) loc = val;
+ rc = apply_rela_bits(loc, val, 1, 16, 0);
else if (r_type == R_390_PC16DBL)
- *(unsigned short *) loc = val >> 1;
+ rc = apply_rela_bits(loc, val, 1, 16, 1);
else if (r_type == R_390_PC32DBL)
- *(unsigned int *) loc = val >> 1;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
else if (r_type == R_390_PC32)
- *(unsigned int *) loc = val;
+ rc = apply_rela_bits(loc, val, 1, 32, 0);
else if (r_type == R_390_PC64)
- *(unsigned long *) loc = val;
+ rc = apply_rela_bits(loc, val, 1, 64, 0);
break;
case R_390_GOT12: /* 12 bit GOT offset. */
case R_390_GOT16: /* 16 bit GOT offset. */
@@ -264,26 +297,24 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val = info->got_offset + rela->r_addend;
if (r_type == R_390_GOT12 ||
r_type == R_390_GOTPLT12)
- *(unsigned short *) loc = (val & 0xfff) |
- (*(unsigned short *) loc & 0xf000);
+ rc = apply_rela_bits(loc, val, 0, 12, 0);
else if (r_type == R_390_GOT16 ||
r_type == R_390_GOTPLT16)
- *(unsigned short *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
else if (r_type == R_390_GOT20 ||
r_type == R_390_GOTPLT20)
- *(unsigned int *) loc =
- (*(unsigned int *) loc & 0xf00000ff) |
- (val & 0xfff) << 16 | (val & 0xff000) >> 4;
+ rc = apply_rela_bits(loc, val, 1, 20, 0);
else if (r_type == R_390_GOT32 ||
r_type == R_390_GOTPLT32)
- *(unsigned int *) loc = val;
- else if (r_type == R_390_GOTENT ||
- r_type == R_390_GOTPLTENT)
- *(unsigned int *) loc =
- (val + (Elf_Addr) me->module_core - loc) >> 1;
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
else if (r_type == R_390_GOT64 ||
r_type == R_390_GOTPLT64)
- *(unsigned long *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ else if (r_type == R_390_GOTENT ||
+ r_type == R_390_GOTPLTENT) {
+ val += (Elf_Addr) me->module_core - loc;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ }
break;
case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */
case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */
@@ -310,27 +341,32 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
info->plt_initialized = 1;
}
if (r_type == R_390_PLTOFF16 ||
- r_type == R_390_PLTOFF32
- || r_type == R_390_PLTOFF64
- )
+ r_type == R_390_PLTOFF32 ||
+ r_type == R_390_PLTOFF64)
val = me->arch.plt_offset - me->arch.got_offset +
info->plt_offset + rela->r_addend;
- else
- val = (Elf_Addr) me->module_core +
- me->arch.plt_offset + info->plt_offset +
- rela->r_addend - loc;
+ else {
+ if (!((r_type == R_390_PLT16DBL &&
+ val - loc + 0xffffUL < 0x1ffffeUL) ||
+ (r_type == R_390_PLT32DBL &&
+ val - loc + 0xffffffffULL < 0x1fffffffeULL)))
+ val = (Elf_Addr) me->module_core +
+ me->arch.plt_offset +
+ info->plt_offset;
+ val += rela->r_addend - loc;
+ }
if (r_type == R_390_PLT16DBL)
- *(unsigned short *) loc = val >> 1;
+ rc = apply_rela_bits(loc, val, 1, 16, 1);
else if (r_type == R_390_PLTOFF16)
- *(unsigned short *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
else if (r_type == R_390_PLT32DBL)
- *(unsigned int *) loc = val >> 1;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
else if (r_type == R_390_PLT32 ||
r_type == R_390_PLTOFF32)
- *(unsigned int *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
else if (r_type == R_390_PLT64 ||
r_type == R_390_PLTOFF64)
- *(unsigned long *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
break;
case R_390_GOTOFF16: /* 16 bit offset to GOT. */
case R_390_GOTOFF32: /* 32 bit offset to GOT. */
@@ -338,20 +374,20 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val = val + rela->r_addend -
((Elf_Addr) me->module_core + me->arch.got_offset);
if (r_type == R_390_GOTOFF16)
- *(unsigned short *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
else if (r_type == R_390_GOTOFF32)
- *(unsigned int *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
else if (r_type == R_390_GOTOFF64)
- *(unsigned long *) loc = val;
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
break;
case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
val = (Elf_Addr) me->module_core + me->arch.got_offset +
rela->r_addend - loc;
if (r_type == R_390_GOTPC)
- *(unsigned int *) loc = val;
+ rc = apply_rela_bits(loc, val, 1, 32, 0);
else if (r_type == R_390_GOTPCDBL)
- *(unsigned int *) loc = val >> 1;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
break;
case R_390_COPY:
case R_390_GLOB_DAT: /* Create GOT entry. */
@@ -359,19 +395,25 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_RELATIVE: /* Adjust by program base. */
/* Only needed if we want to support loading of
modules linked with -shared. */
- break;
+ return -ENOEXEC;
default:
- printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+ printk(KERN_ERR "module %s: unknown relocation: %u\n",
me->name, r_type);
return -ENOEXEC;
}
+ if (rc) {
+ printk(KERN_ERR "module %s: relocation error for symbol %s "
+ "(r_type %i, value 0x%lx)\n",
+ me->name, strtab + symtab[r_sym].st_name,
+ r_type, (unsigned long) val);
+ return rc;
+ }
return 0;
}
-int
-apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
- unsigned int symindex, unsigned int relsec,
- struct module *me)
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+ unsigned int symindex, unsigned int relsec,
+ struct module *me)
{
Elf_Addr base;
Elf_Sym *symtab;
@@ -387,7 +429,7 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
for (i = 0; i < n; i++, rela++) {
- rc = apply_rela(rela, base, symtab, me);
+ rc = apply_rela(rela, base, symtab, strtab, me);
if (rc)
return rc;
}
@@ -399,10 +441,6 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
vfree(me->arch.syminfo);
- return module_bug_finalize(hdr, sechdrs, me);
-}
-
-void module_arch_cleanup(struct module *mod)
-{
- module_bug_cleanup(mod);
+ me->arch.syminfo = NULL;
+ return 0;
}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
new file mode 100644
index 00000000000..210e1285f75
--- /dev/null
+++ b/arch/s390/kernel/nmi.c
@@ -0,0 +1,372 @@
+/*
+ * Machine check handler
+ *
+ * Copyright IBM Corp. 2000, 2009
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Cornelia Huck <cornelia.huck@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <asm/lowcore.h>
+#include <asm/smp.h>
+#include <asm/etr.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/crw.h>
+
+struct mcck_struct {
+ int kill_task;
+ int channel_report;
+ int warning;
+ unsigned long long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+
+static void s390_handle_damage(char *msg)
+{
+ smp_send_stop();
+ disabled_wait((unsigned long) __builtin_return_address(0));
+ while (1);
+}
+
+/*
+ * Main machine check handler function. Will be called with interrupts enabled
+ * or disabled and machine checks enabled or disabled.
+ */
+void s390_handle_mcck(void)
+{
+ unsigned long flags;
+ struct mcck_struct mcck;
+
+ /*
+ * Disable machine checks and get the current state of accumulated
+ * machine checks. Afterwards delete the old state and enable machine
+ * checks again.
+ */
+ local_irq_save(flags);
+ local_mcck_disable();
+ mcck = __get_cpu_var(cpu_mcck);
+ memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
+ clear_cpu_flag(CIF_MCCK_PENDING);
+ local_mcck_enable();
+ local_irq_restore(flags);
+
+ if (mcck.channel_report)
+ crw_handle_channel_report();
+ /*
+ * A warning may remain for a prolonged period on the bare iron.
+ * (actually until the machine is powered off, or the problem is gone)
+ * So we just stop listening for the WARNING MCH and avoid continuously
+ * being interrupted. One caveat is however, that we must do this per
+ * processor and cannot use the smp version of ctl_clear_bit().
+ * On VM we only get one interrupt per virtally presented machinecheck.
+ * Though one suffices, we may get one interrupt per (virtual) cpu.
+ */
+ if (mcck.warning) { /* WARNING pending ? */
+ static int mchchk_wng_posted = 0;
+
+ /* Use single cpu clear, as we cannot handle smp here. */
+ __ctl_clear_bit(14, 24); /* Disable WARNING MCH */
+ if (xchg(&mchchk_wng_posted, 1) == 0)
+ kill_cad_pid(SIGPWR, 1);
+ }
+ if (mcck.kill_task) {
+ local_irq_enable();
+ printk(KERN_EMERG "mcck: Terminating task because of machine "
+ "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+ printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+ current->comm, current->pid);
+ do_exit(SIGSEGV);
+ }
+}
+EXPORT_SYMBOL_GPL(s390_handle_mcck);
+
+/*
+ * returns 0 if all registers could be validated
+ * returns 1 otherwise
+ */
+static int notrace s390_revalidate_registers(struct mci *mci)
+{
+ int kill_task;
+ u64 zero;
+ void *fpt_save_area, *fpt_creg_save_area;
+
+ kill_task = 0;
+ zero = 0;
+
+ if (!mci->gr) {
+ /*
+ * General purpose registers couldn't be restored and have
+ * unknown contents. Process needs to be terminated.
+ */
+ kill_task = 1;
+ }
+ if (!mci->fp) {
+ /*
+ * Floating point registers can't be restored and
+ * therefore the process needs to be terminated.
+ */
+ kill_task = 1;
+ }
+#ifndef CONFIG_64BIT
+ asm volatile(
+ " ld 0,0(%0)\n"
+ " ld 2,8(%0)\n"
+ " ld 4,16(%0)\n"
+ " ld 6,24(%0)"
+ : : "a" (&S390_lowcore.floating_pt_save_area));
+#endif
+
+ if (MACHINE_HAS_IEEE) {
+#ifdef CONFIG_64BIT
+ fpt_save_area = &S390_lowcore.floating_pt_save_area;
+ fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
+#else
+ fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
+ fpt_creg_save_area = fpt_save_area + 128;
+#endif
+ if (!mci->fc) {
+ /*
+ * Floating point control register can't be restored.
+ * Task will be terminated.
+ */
+ asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
+ kill_task = 1;
+
+ } else
+ asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
+
+ asm volatile(
+ " ld 0,0(%0)\n"
+ " ld 1,8(%0)\n"
+ " ld 2,16(%0)\n"
+ " ld 3,24(%0)\n"
+ " ld 4,32(%0)\n"
+ " ld 5,40(%0)\n"
+ " ld 6,48(%0)\n"
+ " ld 7,56(%0)\n"
+ " ld 8,64(%0)\n"
+ " ld 9,72(%0)\n"
+ " ld 10,80(%0)\n"
+ " ld 11,88(%0)\n"
+ " ld 12,96(%0)\n"
+ " ld 13,104(%0)\n"
+ " ld 14,112(%0)\n"
+ " ld 15,120(%0)\n"
+ : : "a" (fpt_save_area));
+ }
+ /* Revalidate access registers */
+ asm volatile(
+ " lam 0,15,0(%0)"
+ : : "a" (&S390_lowcore.access_regs_save_area));
+ if (!mci->ar) {
+ /*
+ * Access registers have unknown contents.
+ * Terminating task.
+ */
+ kill_task = 1;
+ }
+ /* Revalidate control registers */
+ if (!mci->cr) {
+ /*
+ * Control registers have unknown contents.
+ * Can't recover and therefore stopping machine.
+ */
+ s390_handle_damage("invalid control registers.");
+ } else {
+#ifdef CONFIG_64BIT
+ asm volatile(
+ " lctlg 0,15,0(%0)"
+ : : "a" (&S390_lowcore.cregs_save_area));
+#else
+ asm volatile(
+ " lctl 0,15,0(%0)"
+ : : "a" (&S390_lowcore.cregs_save_area));
+#endif
+ }
+ /*
+ * We don't even try to revalidate the TOD register, since we simply
+ * can't write something sensible into that register.
+ */
+#ifdef CONFIG_64BIT
+ /*
+ * See if we can revalidate the TOD programmable register with its
+ * old contents (should be zero) otherwise set it to zero.
+ */
+ if (!mci->pr)
+ asm volatile(
+ " sr 0,0\n"
+ " sckpf"
+ : : : "0", "cc");
+ else
+ asm volatile(
+ " l 0,0(%0)\n"
+ " sckpf"
+ : : "a" (&S390_lowcore.tod_progreg_save_area)
+ : "0", "cc");
+#endif
+ /* Revalidate clock comparator register */
+ set_clock_comparator(S390_lowcore.clock_comparator);
+ /* Check if old PSW is valid */
+ if (!mci->wp)
+ /*
+ * Can't tell if we come from user or kernel mode
+ * -> stopping machine.
+ */
+ s390_handle_damage("old psw invalid.");
+
+ if (!mci->ms || !mci->pm || !mci->ia)
+ kill_task = 1;
+
+ return kill_task;
+}
+
+#define MAX_IPD_COUNT 29
+#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
+
+#define ED_STP_ISLAND 6 /* External damage STP island check */
+#define ED_STP_SYNC 7 /* External damage STP sync check */
+#define ED_ETR_SYNC 12 /* External damage ETR sync check */
+#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */
+
+/*
+ * machine check handler.
+ */
+void notrace s390_do_machine_check(struct pt_regs *regs)
+{
+ static int ipd_count;
+ static DEFINE_SPINLOCK(ipd_lock);
+ static unsigned long long last_ipd;
+ struct mcck_struct *mcck;
+ unsigned long long tmp;
+ struct mci *mci;
+ int umode;
+
+ nmi_enter();
+ inc_irq_stat(NMI_NMI);
+ mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+ mcck = &__get_cpu_var(cpu_mcck);
+ umode = user_mode(regs);
+
+ if (mci->sd) {
+ /* System damage -> stopping machine */
+ s390_handle_damage("received system damage machine check.");
+ }
+ if (mci->pd) {
+ if (mci->b) {
+ /* Processing backup -> verify if we can survive this */
+ u64 z_mcic, o_mcic, t_mcic;
+#ifdef CONFIG_64BIT
+ z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+ o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+ 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+ 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+ 1ULL<<16);
+#else
+ z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
+ 1ULL<<29);
+ o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+ 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+ 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
+#endif
+ t_mcic = *(u64 *)mci;
+
+ if (((t_mcic & z_mcic) != 0) ||
+ ((t_mcic & o_mcic) != o_mcic)) {
+ s390_handle_damage("processing backup machine "
+ "check with damage.");
+ }
+
+ /*
+ * Nullifying exigent condition, therefore we might
+ * retry this instruction.
+ */
+ spin_lock(&ipd_lock);
+ tmp = get_tod_clock();
+ if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
+ ipd_count++;
+ else
+ ipd_count = 1;
+ last_ipd = tmp;
+ if (ipd_count == MAX_IPD_COUNT)
+ s390_handle_damage("too many ipd retries.");
+ spin_unlock(&ipd_lock);
+ } else {
+ /* Processing damage -> stopping machine */
+ s390_handle_damage("received instruction processing "
+ "damage machine check.");
+ }
+ }
+ if (s390_revalidate_registers(mci)) {
+ if (umode) {
+ /*
+ * Couldn't restore all register contents while in
+ * user mode -> mark task for termination.
+ */
+ mcck->kill_task = 1;
+ mcck->mcck_code = *(unsigned long long *) mci;
+ set_cpu_flag(CIF_MCCK_PENDING);
+ } else {
+ /*
+ * Couldn't restore all register contents while in
+ * kernel mode -> stopping machine.
+ */
+ s390_handle_damage("unable to revalidate registers.");
+ }
+ }
+ if (mci->cd) {
+ /* Timing facility damage */
+ s390_handle_damage("TOD clock damaged");
+ }
+ if (mci->ed && mci->ec) {
+ /* External damage */
+ if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
+ etr_sync_check();
+ if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
+ etr_switch_to_local();
+ if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+ stp_sync_check();
+ if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+ stp_island_check();
+ }
+ if (mci->se)
+ /* Storage error uncorrected */
+ s390_handle_damage("received storage error uncorrected "
+ "machine check.");
+ if (mci->ke)
+ /* Storage key-error uncorrected */
+ s390_handle_damage("received storage key-error uncorrected "
+ "machine check.");
+ if (mci->ds && mci->fa)
+ /* Storage degradation */
+ s390_handle_damage("received storage degradation machine "
+ "check.");
+ if (mci->cp) {
+ /* Channel report word pending */
+ mcck->channel_report = 1;
+ set_cpu_flag(CIF_MCCK_PENDING);
+ }
+ if (mci->w) {
+ /* Warning pending */
+ mcck->warning = 1;
+ set_cpu_flag(CIF_MCCK_PENDING);
+ }
+ nmi_exit();
+}
+
+static int __init machine_check_init(void)
+{
+ ctl_set_bit(14, 25); /* enable external damage MCH */
+ ctl_set_bit(14, 27); /* enable system recovery MCH */
+ ctl_set_bit(14, 24); /* enable warning MCH */
+ return 0;
+}
+arch_initcall(machine_check_init);
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
new file mode 100644
index 00000000000..d112fc66f99
--- /dev/null
+++ b/arch/s390/kernel/os_info.c
@@ -0,0 +1,168 @@
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "os_info"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/crash_dump.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/checksum.h>
+#include <asm/lowcore.h>
+#include <asm/os_info.h>
+
+/*
+ * OS info structure has to be page aligned
+ */
+static struct os_info os_info __page_aligned_data;
+
+/*
+ * Compute checksum over OS info structure
+ */
+u32 os_info_csum(struct os_info *os_info)
+{
+ int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
+ return csum_partial(&os_info->version_major, size, 0);
+}
+
+/*
+ * Add crashkernel info to OS info and update checksum
+ */
+void os_info_crashkernel_add(unsigned long base, unsigned long size)
+{
+ os_info.crashkernel_addr = (u64)(unsigned long)base;
+ os_info.crashkernel_size = (u64)(unsigned long)size;
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Add OS info entry and update checksum
+ */
+void os_info_entry_add(int nr, void *ptr, u64 size)
+{
+ os_info.entry[nr].addr = (u64)(unsigned long)ptr;
+ os_info.entry[nr].size = size;
+ os_info.entry[nr].csum = csum_partial(ptr, size, 0);
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Initialize OS info struture and set lowcore pointer
+ */
+void __init os_info_init(void)
+{
+ void *ptr = &os_info;
+
+ os_info.version_major = OS_INFO_VERSION_MAJOR;
+ os_info.version_minor = OS_INFO_VERSION_MINOR;
+ os_info.magic = OS_INFO_MAGIC;
+ os_info.csum = os_info_csum(&os_info);
+ mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+static struct os_info *os_info_old;
+
+/*
+ * Allocate and copy OS info entry from oldmem
+ */
+static void os_info_old_alloc(int nr, int align)
+{
+ unsigned long addr, size = 0;
+ char *buf, *buf_align, *msg;
+ u32 csum;
+
+ addr = os_info_old->entry[nr].addr;
+ if (!addr) {
+ msg = "not available";
+ goto fail;
+ }
+ size = os_info_old->entry[nr].size;
+ buf = kmalloc(size + align - 1, GFP_KERNEL);
+ if (!buf) {
+ msg = "alloc failed";
+ goto fail;
+ }
+ buf_align = PTR_ALIGN(buf, align);
+ if (copy_from_oldmem(buf_align, (void *) addr, size)) {
+ msg = "copy failed";
+ goto fail_free;
+ }
+ csum = csum_partial(buf_align, size, 0);
+ if (csum != os_info_old->entry[nr].csum) {
+ msg = "checksum failed";
+ goto fail_free;
+ }
+ os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align;
+ msg = "copied";
+ goto out;
+fail_free:
+ kfree(buf);
+fail:
+ os_info_old->entry[nr].addr = 0;
+out:
+ pr_info("entry %i: %s (addr=0x%lx size=%lu)\n",
+ nr, msg, addr, size);
+}
+
+/*
+ * Initialize os info and os info entries from oldmem
+ */
+static void os_info_old_init(void)
+{
+ static int os_info_init;
+ unsigned long addr;
+
+ if (os_info_init)
+ return;
+ if (!OLDMEM_BASE)
+ goto fail;
+ if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr)))
+ goto fail;
+ if (addr == 0 || addr % PAGE_SIZE)
+ goto fail;
+ os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
+ if (!os_info_old)
+ goto fail;
+ if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old)))
+ goto fail_free;
+ if (os_info_old->magic != OS_INFO_MAGIC)
+ goto fail_free;
+ if (os_info_old->csum != os_info_csum(os_info_old))
+ goto fail_free;
+ if (os_info_old->version_major > OS_INFO_VERSION_MAJOR)
+ goto fail_free;
+ os_info_old_alloc(OS_INFO_VMCOREINFO, 1);
+ os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1);
+ pr_info("crashkernel: addr=0x%lx size=%lu\n",
+ (unsigned long) os_info_old->crashkernel_addr,
+ (unsigned long) os_info_old->crashkernel_size);
+ os_info_init = 1;
+ return;
+fail_free:
+ kfree(os_info_old);
+fail:
+ os_info_init = 1;
+ os_info_old = NULL;
+}
+
+/*
+ * Return pointer to os infor entry and its size
+ */
+void *os_info_old_entry(int nr, unsigned long *size)
+{
+ os_info_old_init();
+
+ if (!os_info_old)
+ return NULL;
+ if (!os_info_old->entry[nr].addr)
+ return NULL;
+ *size = (unsigned long) os_info_old->entry[nr].size;
+ return (void *)(unsigned long)os_info_old->entry[nr].addr;
+}
+#endif
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
new file mode 100644
index 00000000000..ea75d011a6f
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -0,0 +1,696 @@
+/*
+ * Performance event support for s390x - CPU-measurement Counter Facility
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT "cpum_cf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <asm/ctl_reg.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+
+/* CPU-measurement counter facility supports these CPU counter sets:
+ * For CPU counter sets:
+ * Basic counter set: 0-31
+ * Problem-state counter set: 32-63
+ * Crypto-activity counter set: 64-127
+ * Extented counter set: 128-159
+ */
+enum cpumf_ctr_set {
+ /* CPU counter sets */
+ CPUMF_CTR_SET_BASIC = 0,
+ CPUMF_CTR_SET_USER = 1,
+ CPUMF_CTR_SET_CRYPTO = 2,
+ CPUMF_CTR_SET_EXT = 3,
+
+ /* Maximum number of counter sets */
+ CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT 16
+#define CPUMF_LCCTL_ACTCTL_SHIFT 0
+static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = {
+ [CPUMF_CTR_SET_BASIC] = 0x02,
+ [CPUMF_CTR_SET_USER] = 0x04,
+ [CPUMF_CTR_SET_CRYPTO] = 0x08,
+ [CPUMF_CTR_SET_EXT] = 0x01,
+};
+
+static void ctr_set_enable(u64 *state, int ctr_set)
+{
+ *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+static void ctr_set_disable(u64 *state, int ctr_set)
+{
+ *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+static void ctr_set_start(u64 *state, int ctr_set)
+{
+ *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+static void ctr_set_stop(u64 *state, int ctr_set)
+{
+ *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+/* Local CPUMF event structure */
+struct cpu_hw_events {
+ struct cpumf_ctr_info info;
+ atomic_t ctr_set[CPUMF_CTR_SET_MAX];
+ u64 state, tx_state;
+ unsigned int flags;
+};
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+ .ctr_set = {
+ [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
+ },
+ .state = 0,
+ .flags = 0,
+};
+
+static int get_counter_set(u64 event)
+{
+ int set = -1;
+
+ if (event < 32)
+ set = CPUMF_CTR_SET_BASIC;
+ else if (event < 64)
+ set = CPUMF_CTR_SET_USER;
+ else if (event < 128)
+ set = CPUMF_CTR_SET_CRYPTO;
+ else if (event < 256)
+ set = CPUMF_CTR_SET_EXT;
+
+ return set;
+}
+
+static int validate_event(const struct hw_perf_event *hwc)
+{
+ switch (hwc->config_base) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ case CPUMF_CTR_SET_CRYPTO:
+ case CPUMF_CTR_SET_EXT:
+ /* check for reserved counters */
+ if ((hwc->config >= 6 && hwc->config <= 31) ||
+ (hwc->config >= 38 && hwc->config <= 63) ||
+ (hwc->config >= 80 && hwc->config <= 127))
+ return -EOPNOTSUPP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int validate_ctr_version(const struct hw_perf_event *hwc)
+{
+ struct cpu_hw_events *cpuhw;
+ int err = 0;
+
+ cpuhw = &get_cpu_var(cpu_hw_events);
+
+ /* check required version for counter sets */
+ switch (hwc->config_base) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ if (cpuhw->info.cfvn < 1)
+ err = -EOPNOTSUPP;
+ break;
+ case CPUMF_CTR_SET_CRYPTO:
+ case CPUMF_CTR_SET_EXT:
+ if (cpuhw->info.csvn < 1)
+ err = -EOPNOTSUPP;
+ if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
+ (cpuhw->info.csvn == 2 && hwc->config > 175) ||
+ (cpuhw->info.csvn > 2 && hwc->config > 255))
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ put_cpu_var(cpu_hw_events);
+ return err;
+}
+
+static int validate_ctr_auth(const struct hw_perf_event *hwc)
+{
+ struct cpu_hw_events *cpuhw;
+ u64 ctrs_state;
+ int err = 0;
+
+ cpuhw = &get_cpu_var(cpu_hw_events);
+
+ /* check authorization for cpu counter sets */
+ ctrs_state = cpumf_state_ctl[hwc->config_base];
+ if (!(ctrs_state & cpuhw->info.auth_ctl))
+ err = -EPERM;
+
+ put_cpu_var(cpu_hw_events);
+ return err;
+}
+
+/*
+ * Change the CPUMF state to active.
+ * Enable and activate the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ int err;
+
+ if (cpuhw->flags & PMU_F_ENABLED)
+ return;
+
+ err = lcctl(cpuhw->state);
+ if (err) {
+ pr_err("Enabling the performance measuring unit "
+ "failed with rc=%x\n", err);
+ return;
+ }
+
+ cpuhw->flags |= PMU_F_ENABLED;
+}
+
+/*
+ * Change the CPUMF state to inactive.
+ * Disable and enable (inactive) the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ int err;
+ u64 inactive;
+
+ if (!(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+ err = lcctl(inactive);
+ if (err) {
+ pr_err("Disabling the performance measuring unit "
+ "failed with rc=%x\n", err);
+ return;
+ }
+
+ cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events = ATOMIC_INIT(0);
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_hw_events *cpuhw;
+
+ if (!(alert & CPU_MF_INT_CF_MASK))
+ return;
+
+ inc_irq_stat(IRQEXT_CMC);
+ cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ /* Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case. */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* counter authorization change alert */
+ if (alert & CPU_MF_INT_CF_CACA)
+ qctri(&cpuhw->info);
+
+ /* loss of counter data alert */
+ if (alert & CPU_MF_INT_CF_LCDA)
+ pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+}
+
+#define PMC_INIT 0
+#define PMC_RELEASE 1
+static void setup_pmc_cpu(void *flags)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ switch (*((int *) flags)) {
+ case PMC_INIT:
+ memset(&cpuhw->info, 0, sizeof(cpuhw->info));
+ qctri(&cpuhw->info);
+ cpuhw->flags |= PMU_F_RESERVED;
+ break;
+
+ case PMC_RELEASE:
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ break;
+ }
+
+ /* Disable CPU counter sets */
+ lcctl(0);
+}
+
+/* Initialize the CPU-measurement facility */
+static int reserve_pmc_hardware(void)
+{
+ int flags = PMC_INIT;
+
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+/* Release the CPU-measurement facility */
+static void release_pmc_hardware(void)
+{
+ int flags = PMC_RELEASE;
+
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+}
+
+/* Release the PMU if event is the last perf event */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ if (!atomic_add_unless(&num_events, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
+static const int cpumf_generic_events_basic[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 1,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+ [PERF_COUNT_HW_CACHE_MISSES] = -1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+ [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+ [PERF_COUNT_HW_BUS_CYCLES] = -1,
+};
+/* CPUMF <-> perf event mappings for userspace (problem-state set) */
+static const int cpumf_generic_events_user[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 32,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 33,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+ [PERF_COUNT_HW_CACHE_MISSES] = -1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+ [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+ [PERF_COUNT_HW_BUS_CYCLES] = -1,
+};
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ int err;
+ u64 ev;
+
+ switch (attr->type) {
+ case PERF_TYPE_RAW:
+ /* Raw events are used to access counters directly,
+ * hence do not permit excludes */
+ if (attr->exclude_kernel || attr->exclude_user ||
+ attr->exclude_hv)
+ return -EOPNOTSUPP;
+ ev = attr->config;
+ break;
+
+ case PERF_TYPE_HARDWARE:
+ ev = attr->config;
+ /* Count user space (problem-state) only */
+ if (!attr->exclude_user && attr->exclude_kernel) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_user[ev];
+
+ /* No support for kernel space counters only */
+ } else if (!attr->exclude_kernel && attr->exclude_user) {
+ return -EOPNOTSUPP;
+
+ /* Count user and kernel space */
+ } else {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_basic[ev];
+ }
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ if (ev == -1)
+ return -ENOENT;
+
+ if (ev >= PERF_CPUM_CF_MAX_CTR)
+ return -EINVAL;
+
+ /* Use the hardware perf event structure to store the counter number
+ * in 'config' member and the counter set to which the counter belongs
+ * in the 'config_base'. The counter set (config_base) is then used
+ * to enable/disable the counters.
+ */
+ hwc->config = ev;
+ hwc->config_base = get_counter_set(ev);
+
+ /* Validate the counter that is assigned to this event.
+ * Because the counter facility can use numerous counters at the
+ * same time without constraints, it is not necessary to explicity
+ * validate event groups (event->group_leader != event).
+ */
+ err = validate_event(hwc);
+ if (err)
+ return err;
+
+ /* Initialize for using the CPU-measurement counter facility */
+ if (!atomic_inc_not_zero(&num_events)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ atomic_inc(&num_events);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+ event->destroy = hw_perf_event_destroy;
+
+ /* Finally, validate version and authorization of the counter set */
+ err = validate_ctr_auth(hwc);
+ if (!err)
+ err = validate_ctr_version(hwc);
+
+ return err;
+}
+
+static int cpumf_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ case PERF_TYPE_RAW:
+ /* The CPU measurement counter facility does not have overflow
+ * interrupts to do sampling. Sampling must be provided by
+ * external means, for example, by timers.
+ */
+ if (is_sampling_event(event))
+ return -ENOENT;
+ err = __hw_perf_event_init(event);
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ if (unlikely(err) && event->destroy)
+ event->destroy(event);
+
+ return err;
+}
+
+static int hw_perf_event_reset(struct perf_event *event)
+{
+ u64 prev, new;
+ int err;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ err = ecctr(event->hw.config, &new);
+ if (err) {
+ if (err != 3)
+ break;
+ /* The counter is not (yet) available. This
+ * might happen if the counter set to which
+ * this counter belongs is in the disabled
+ * state.
+ */
+ new = 0;
+ }
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ return err;
+}
+
+static int hw_perf_event_update(struct perf_event *event)
+{
+ u64 prev, new, delta;
+ int err;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ err = ecctr(event->hw.config, &new);
+ if (err)
+ goto out;
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ delta = (prev <= new) ? new - prev
+ : (-1ULL - prev) + new + 1; /* overflow */
+ local64_add(delta, &event->count);
+out:
+ return err;
+}
+
+static void cpumf_pmu_read(struct perf_event *event)
+{
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ hw_perf_event_update(event);
+}
+
+static void cpumf_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ if (WARN_ON_ONCE(hwc->config == -1))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ /* (Re-)enable and activate the counter set */
+ ctr_set_enable(&cpuhw->state, hwc->config_base);
+ ctr_set_start(&cpuhw->state, hwc->config_base);
+
+ /* The counter set to which this counter belongs can be already active.
+ * Because all counters in a set are active, the event->hw.prev_count
+ * needs to be synchronized. At this point, the counter set can be in
+ * the inactive or disabled state.
+ */
+ hw_perf_event_reset(event);
+
+ /* increment refcount for this counter set */
+ atomic_inc(&cpuhw->ctr_set[hwc->config_base]);
+}
+
+static void cpumf_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ /* Decrement reference count for this counter set and if this
+ * is the last used counter in the set, clear activation
+ * control and set the counter set state to inactive.
+ */
+ if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base]))
+ ctr_set_stop(&cpuhw->state, hwc->config_base);
+ event->hw.state |= PERF_HES_STOPPED;
+ }
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ hw_perf_event_update(event);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int cpumf_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ /* Check authorization for the counter set to which this
+ * counter belongs.
+ * For group events transaction, the authorization check is
+ * done in cpumf_pmu_commit_txn().
+ */
+ if (!(cpuhw->flags & PERF_EVENT_TXN))
+ if (validate_ctr_auth(&event->hw))
+ return -EPERM;
+
+ ctr_set_enable(&cpuhw->state, event->hw.config_base);
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ cpumf_pmu_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void cpumf_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ cpumf_pmu_stop(event, PERF_EF_UPDATE);
+
+ /* Check if any counter in the counter set is still used. If not used,
+ * change the counter set to the disabled state. This also clears the
+ * content of all counters in the set.
+ *
+ * When a new perf event has been added but not yet started, this can
+ * clear enable control and resets all counters in a set. Therefore,
+ * cpumf_pmu_start() always has to reenable a counter set.
+ */
+ if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base]))
+ ctr_set_disable(&cpuhw->state, event->hw.config_base);
+
+ perf_event_update_userpage(event);
+}
+
+/*
+ * Start group events scheduling transaction.
+ * Set flags to perform a single test at commit time.
+ */
+static void cpumf_pmu_start_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ perf_pmu_disable(pmu);
+ cpuhw->flags |= PERF_EVENT_TXN;
+ cpuhw->tx_state = cpuhw->state;
+}
+
+/*
+ * Stop and cancel a group events scheduling tranctions.
+ * Assumes cpumf_pmu_del() is called for each successful added
+ * cpumf_pmu_add() during the transaction.
+ */
+static void cpumf_pmu_cancel_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ WARN_ON(cpuhw->tx_state != cpuhw->state);
+
+ cpuhw->flags &= ~PERF_EVENT_TXN;
+ perf_pmu_enable(pmu);
+}
+
+/*
+ * Commit the group events scheduling transaction. On success, the
+ * transaction is closed. On error, the transaction is kept open
+ * until cpumf_pmu_cancel_txn() is called.
+ */
+static int cpumf_pmu_commit_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ u64 state;
+
+ /* check if the updated state can be scheduled */
+ state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+ state >>= CPUMF_LCCTL_ENABLE_SHIFT;
+ if ((state & cpuhw->info.auth_ctl) != state)
+ return -EPERM;
+
+ cpuhw->flags &= ~PERF_EVENT_TXN;
+ perf_pmu_enable(pmu);
+ return 0;
+}
+
+/* Performance monitoring unit for s390x */
+static struct pmu cpumf_pmu = {
+ .pmu_enable = cpumf_pmu_enable,
+ .pmu_disable = cpumf_pmu_disable,
+ .event_init = cpumf_pmu_event_init,
+ .add = cpumf_pmu_add,
+ .del = cpumf_pmu_del,
+ .start = cpumf_pmu_start,
+ .stop = cpumf_pmu_stop,
+ .read = cpumf_pmu_read,
+ .start_txn = cpumf_pmu_start_txn,
+ .commit_txn = cpumf_pmu_commit_txn,
+ .cancel_txn = cpumf_pmu_cancel_txn,
+};
+
+static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (long) hcpu;
+ int flags;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ flags = PMC_INIT;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ case CPU_DOWN_PREPARE:
+ flags = PMC_RELEASE;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int __init cpumf_pmu_init(void)
+{
+ int rc;
+
+ if (!cpum_cf_avail())
+ return -ENODEV;
+
+ /* clear bit 15 of cr0 to unauthorize problem-state to
+ * extract measurement counters */
+ ctl_clear_bit(0, 48);
+
+ /* register handler for measurement-alert interruptions */
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (rc) {
+ pr_err("Registering for CPU-measurement alerts "
+ "failed with rc=%i\n", rc);
+ goto out;
+ }
+
+ cpumf_pmu.attr_groups = cpumf_cf_event_group();
+ rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+ if (rc) {
+ pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ goto out;
+ }
+ perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+ return rc;
+}
+early_initcall(cpumf_pmu_init);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 00000000000..4554a4bae39
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,322 @@
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+
+static struct attribute *cpumcf_pmu_event_attr[] = {
+ CPUMF_EVENT_PTR(cf, CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+ NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+ NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumsf_pmu_events_group = {
+ .name = "events",
+ .attrs = cpumcf_pmu_event_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+ &cpumsf_pmu_events_group,
+ &cpumsf_pmu_format_group,
+ NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+ struct attribute **b)
+{
+ struct attribute **new;
+ int j, i;
+
+ for (j = 0; a[j]; j++)
+ ;
+ for (i = 0; b[i]; i++)
+ j++;
+ j++;
+
+ new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+ if (!new)
+ return NULL;
+ j = 0;
+ for (i = 0; a[i]; i++)
+ new[j++] = a[i];
+ for (i = 0; b[i]; i++)
+ new[j++] = b[i];
+ new[j] = NULL;
+
+ return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+ struct attribute **combined, **model;
+ struct cpuid cpu_id;
+
+ get_cpu_id(&cpu_id);
+ switch (cpu_id.machine) {
+ case 0x2097:
+ case 0x2098:
+ model = cpumcf_z10_pmu_event_attr;
+ break;
+ case 0x2817:
+ case 0x2818:
+ model = cpumcf_z196_pmu_event_attr;
+ break;
+ case 0x2827:
+ case 0x2828:
+ model = cpumcf_zec12_pmu_event_attr;
+ break;
+ default:
+ model = NULL;
+ break;
+ };
+
+ if (!model)
+ goto out;
+
+ combined = merge_attr(cpumcf_pmu_event_attr, model);
+ if (combined)
+ cpumsf_pmu_events_group.attrs = combined;
+out:
+ return cpumsf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 00000000000..ea0c7b2ef03
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,1643 @@
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT "cpum_sf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT 1
+
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
+ */
+#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
+
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+ return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account. Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
+ *
+ * Sampling buffer size Buffer characteristics
+ * ---------------------------------------------------
+ * 64KB == 16 pages (4KB per page)
+ * 1 page for SDB-tables
+ * 15 pages for SDBs
+ *
+ * 32MB == 8192 pages (4KB per page)
+ * 16 pages for SDB-tables
+ * 8176 pages for SDBs
+ */
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
+
+struct sf_buffer {
+ unsigned long *sdbt; /* Sample-data-block-table origin */
+ /* buffer characteristics (required for buffer increments) */
+ unsigned long num_sdb; /* Number of sample-data-blocks */
+ unsigned long num_sdbt; /* Number of sample-data-block-tables */
+ unsigned long *tail; /* last sample-data-block-table */
+};
+
+struct cpu_hw_sf {
+ /* CPU-measurement sampling information block */
+ struct hws_qsi_info_block qsi;
+ /* CPU-measurement sampling control block */
+ struct hws_lsctl_request_block lsctl;
+ struct sf_buffer sfb; /* Sampling buffer */
+ unsigned int flags; /* Status flags */
+ struct perf_event *event; /* Scheduled perf event */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+ struct hws_lsctl_request_block sreq;
+
+ memset(&sreq, 0, sizeof(sreq));
+ return lsctl(&sreq);
+}
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+ return !!cpuhw->sfb.sdbt;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+ unsigned long *sdbt, *curr;
+
+ if (!sfb->sdbt)
+ return;
+
+ sdbt = sfb->sdbt;
+ curr = sdbt;
+
+ /* Free the SDBT after all SDBs are processed... */
+ while (1) {
+ if (!*curr || !sdbt)
+ break;
+
+ /* Process table-link entries */
+ if (is_link_entry(curr)) {
+ curr = get_next_sdbt(curr);
+ if (sdbt)
+ free_page((unsigned long) sdbt);
+
+ /* If the origin is reached, sampling buffer is freed */
+ if (curr == sfb->sdbt)
+ break;
+ else
+ sdbt = curr;
+ } else {
+ /* Process SDB pointer */
+ if (*curr) {
+ free_page(*curr);
+ curr++;
+ }
+ }
+ }
+
+ debug_sprintf_event(sfdbg, 5,
+ "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+ memset(sfb, 0, sizeof(*sfb));
+}
+
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+ unsigned long sdb, *trailer;
+
+ /* Allocate and initialize sample-data-block */
+ sdb = get_zeroed_page(gfp_flags);
+ if (!sdb)
+ return -ENOMEM;
+ trailer = trailer_entry_ptr(sdb);
+ *trailer = SDB_TE_ALERT_REQ_MASK;
+
+ /* Link SDB into the sample-data-block-table */
+ *sdbt = sdb;
+
+ return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ * sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+ unsigned long num_sdb, gfp_t gfp_flags)
+{
+ int i, rc;
+ unsigned long *new, *tail;
+
+ if (!sfb->sdbt || !sfb->tail)
+ return -EINVAL;
+
+ if (!is_link_entry(sfb->tail))
+ return -EINVAL;
+
+ /* Append to the existing sampling buffer, overwriting the table-link
+ * register.
+ * The tail variables always points to the "tail" (last and table-link)
+ * entry in an SDB-table.
+ */
+ tail = sfb->tail;
+
+ /* Do a sanity check whether the table-link entry points to
+ * the sampling buffer origin.
+ */
+ if (sfb->sdbt != get_next_sdbt(tail)) {
+ debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
+ "sampling buffer is not linked: origin=%p"
+ "tail=%p\n",
+ (void *) sfb->sdbt, (void *) tail);
+ return -EINVAL;
+ }
+
+ /* Allocate remaining SDBs */
+ rc = 0;
+ for (i = 0; i < num_sdb; i++) {
+ /* Allocate a new SDB-table if it is full. */
+ if (require_table_link(tail)) {
+ new = (unsigned long *) get_zeroed_page(gfp_flags);
+ if (!new) {
+ rc = -ENOMEM;
+ break;
+ }
+ sfb->num_sdbt++;
+ /* Link current page to tail of chain */
+ *tail = (unsigned long)(void *) new + 1;
+ tail = new;
+ }
+
+ /* Allocate a new sample-data-block.
+ * If there is not enough memory, stop the realloc process
+ * and simply use what was allocated. If this is a temporary
+ * issue, a new realloc call (if required) might succeed.
+ */
+ rc = alloc_sample_data_block(tail, gfp_flags);
+ if (rc)
+ break;
+ sfb->num_sdb++;
+ tail++;
+ }
+
+ /* Link sampling buffer to its origin */
+ *tail = (unsigned long) sfb->sdbt + 1;
+ sfb->tail = tail;
+
+ debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
+ " settings: sdbt=%lu sdb=%lu\n",
+ sfb->num_sdbt, sfb->num_sdb);
+ return rc;
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB). For each allocation,
+ * a 4K page is used. The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+ int rc;
+
+ if (sfb->sdbt)
+ return -EINVAL;
+
+ /* Allocate the sample-data-block-table origin */
+ sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ if (!sfb->sdbt)
+ return -ENOMEM;
+ sfb->num_sdb = 0;
+ sfb->num_sdbt = 1;
+
+ /* Link the table origin to point to itself to prepare for
+ * realloc_sampling_buffer() invocation.
+ */
+ sfb->tail = sfb->sdbt;
+ *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+
+ /* Allocate requested number of sample-data-blocks */
+ rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+ if (rc) {
+ free_sampling_buffer(sfb);
+ debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
+ "realloc_sampling_buffer failed with rc=%i\n", rc);
+ } else
+ debug_sprintf_event(sfdbg, 4,
+ "alloc_sampling_buffer: tear=%p dear=%p\n",
+ sfb->sdbt, (void *) *sfb->sdbt);
+ return rc;
+}
+
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+ struct hws_qsi_info_block si;
+
+ CPUM_SF_MIN_SDB = min;
+ CPUM_SF_MAX_SDB = max;
+
+ memset(&si, 0, sizeof(si));
+ if (!qsi(&si))
+ CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+ return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+ : CPUM_SF_MAX_SDB;
+}
+
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ if (!sfb->sdbt)
+ return SFB_ALLOC_REG(hwc);
+ if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+ return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+ return 0;
+}
+
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ return sfb_pending_allocs(sfb, hwc) > 0;
+}
+
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ /* Limit the number of SDBs to not exceed the maximum */
+ num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
+ if (num)
+ SFB_ALLOC_REG(hwc) += num;
+}
+
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ SFB_ALLOC_REG(hwc) = 0;
+ sfb_account_allocs(num, hwc);
+}
+
+static size_t event_sample_size(struct hw_perf_event *hwc)
+{
+ struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+ size_t sample_size;
+
+ /* The sample size depends on the sampling function: The basic-sampling
+ * function must be always enabled, diagnostic-sampling function is
+ * optional.
+ */
+ sample_size = sfr->bsdes;
+ if (SAMPL_DIAG_MODE(hwc))
+ sample_size += sfr->dsdes;
+
+ return sample_size;
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+ if (cpuhw->sfb.sdbt)
+ free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+{
+ unsigned long n_sdb, freq, factor;
+ size_t sfr_size, sample_size;
+ struct sf_raw_sample *sfr;
+
+ /* Allocate raw sample buffer
+ *
+ * The raw sample buffer is used to temporarily store sampling data
+ * entries for perf raw sample processing. The buffer size mainly
+ * depends on the size of diagnostic-sampling data entries which is
+ * machine-specific. The exact size calculation includes:
+ * 1. The first 4 bytes of diagnostic-sampling data entries are
+ * already reflected in the sf_raw_sample structure. Subtract
+ * these bytes.
+ * 2. The perf raw sample data must be 8-byte aligned (u64) and
+ * perf's internal data size must be considered too. So add
+ * an additional u32 for correct alignment and subtract before
+ * allocating the buffer.
+ * 3. Store the raw sample buffer pointer in the perf event
+ * hardware structure.
+ */
+ sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) +
+ sizeof(u32), sizeof(u64));
+ sfr_size -= sizeof(u32);
+ sfr = kzalloc(sfr_size, GFP_KERNEL);
+ if (!sfr)
+ return -ENOMEM;
+ sfr->size = sfr_size;
+ sfr->bsdes = cpuhw->qsi.bsdes;
+ sfr->dsdes = cpuhw->qsi.dsdes;
+ RAWSAMPLE_REG(hwc) = (unsigned long) sfr;
+
+ /* Calculate sampling buffers using 4K pages
+ *
+ * 1. Determine the sample data size which depends on the used
+ * sampling functions, for example, basic-sampling or
+ * basic-sampling with diagnostic-sampling.
+ *
+ * 2. Use the sampling frequency as input. The sampling buffer is
+ * designed for almost one second. This can be adjusted through
+ * the "factor" variable.
+ * In any case, alloc_sampling_buffer() sets the Alert Request
+ * Control indicator to trigger a measurement-alert to harvest
+ * sample-data-blocks (sdb).
+ *
+ * 3. Compute the number of sample-data-blocks and ensure a minimum
+ * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
+ * exceed a "calculated" maximum. The symbolic maximum is
+ * designed for basic-sampling only and needs to be increased if
+ * diagnostic-sampling is active.
+ * See also the remarks for these symbolic constants.
+ *
+ * 4. Compute the number of sample-data-block-tables (SDBT) and
+ * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+ * to 511 SDBs).
+ */
+ sample_size = event_sample_size(hwc);
+ freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+ factor = 1;
+ n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
+ if (n_sdb < CPUM_SF_MIN_SDB)
+ n_sdb = CPUM_SF_MIN_SDB;
+
+ /* If there is already a sampling buffer allocated, it is very likely
+ * that the sampling facility is enabled too. If the event to be
+ * initialized requires a greater sampling buffer, the allocation must
+ * be postponed. Changing the sampling buffer requires the sampling
+ * facility to be in the disabled state. So, account the number of
+ * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+ * before the event is started.
+ */
+ sfb_init_allocs(n_sdb, hwc);
+ if (sf_buffer_available(cpuhw))
+ return 0;
+
+ debug_sprintf_event(sfdbg, 3,
+ "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
+ " sample_size=%lu cpuhw=%p\n",
+ SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+ sample_size, cpuhw);
+
+ return alloc_sampling_buffer(&cpuhw->sfb,
+ sfb_pending_allocs(&cpuhw->sfb, hwc));
+}
+
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+ unsigned long min)
+{
+ return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
+
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+ /* Use a percentage-based approach to extend the sampling facility
+ * buffer. Accept up to 5% sample data loss.
+ * Vary the extents between 1% to 5% of the current number of
+ * sample-data-blocks.
+ */
+ if (ratio <= 5)
+ return 0;
+ if (ratio <= 25)
+ return min_percent(1, base, 1);
+ if (ratio <= 50)
+ return min_percent(1, base, 1);
+ if (ratio <= 75)
+ return min_percent(2, base, 2);
+ if (ratio <= 100)
+ return min_percent(3, base, 3);
+ if (ratio <= 250)
+ return min_percent(4, base, 4);
+
+ return min_percent(5, base, 8);
+}
+
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+ struct hw_perf_event *hwc)
+{
+ unsigned long ratio, num;
+
+ if (!OVERFLOW_REG(hwc))
+ return;
+
+ /* The sample_overflow contains the average number of sample data
+ * that has been lost because sample-data-blocks were full.
+ *
+ * Calculate the total number of sample data entries that has been
+ * discarded. Then calculate the ratio of lost samples to total samples
+ * per second in percent.
+ */
+ ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+ sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+ /* Compute number of sample-data-blocks */
+ num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+ if (num)
+ sfb_account_allocs(num, hwc);
+
+ debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
+ " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+ OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb: Sampling buffer structure (for local CPU)
+ * @hwc: Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ * change the sampling buffer structure. Do not call this function
+ * when the PMU is active.
+ */
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ unsigned long num, num_old;
+ int rc;
+
+ num = sfb_pending_allocs(sfb, hwc);
+ if (!num)
+ return;
+ num_old = sfb->num_sdb;
+
+ /* Disable the sampling facility to reset any states and also
+ * clear pending measurement alerts.
+ */
+ sf_disable();
+
+ /* Extend the sampling buffer.
+ * This memory allocation typically happens in an atomic context when
+ * called by perf. Because this is a reallocation, it is fine if the
+ * new SDB-request cannot be satisfied immediately.
+ */
+ rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+ if (rc)
+ debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
+ "failed with rc=%i\n", rc);
+
+ if (sfb_has_pending_allocs(sfb, hwc))
+ debug_sprintf_event(sfdbg, 5, "sfb: extend: "
+ "req=%lu alloc=%lu remaining=%lu\n",
+ num, sfb->num_sdb - num_old,
+ sfb_pending_allocs(sfb, hwc));
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#define PMC_INIT 0
+#define PMC_RELEASE 1
+#define PMC_FAILURE 2
+static void setup_pmc_cpu(void *flags)
+{
+ int err;
+ struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
+
+ err = 0;
+ switch (*((int *) flags)) {
+ case PMC_INIT:
+ memset(cpusf, 0, sizeof(*cpusf));
+ err = qsi(&cpusf->qsi);
+ if (err)
+ break;
+ cpusf->flags |= PMU_F_RESERVED;
+ err = sf_disable();
+ if (err)
+ pr_err("Switching off the sampling facility failed "
+ "with rc=%i\n", err);
+ debug_sprintf_event(sfdbg, 5,
+ "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+ break;
+ case PMC_RELEASE:
+ cpusf->flags &= ~PMU_F_RESERVED;
+ err = sf_disable();
+ if (err) {
+ pr_err("Switching off the sampling facility failed "
+ "with rc=%i\n", err);
+ } else
+ deallocate_buffers(cpusf);
+ debug_sprintf_event(sfdbg, 5,
+ "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+ break;
+ }
+ if (err)
+ *((int *) flags) |= PMC_FAILURE;
+}
+
+static void release_pmc_hardware(void)
+{
+ int flags = PMC_RELEASE;
+
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ perf_release_sampling();
+}
+
+static int reserve_pmc_hardware(void)
+{
+ int flags = PMC_INIT;
+ int err;
+
+ err = perf_reserve_sampling();
+ if (err)
+ return err;
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ if (flags & PMC_FAILURE) {
+ release_pmc_hardware();
+ return -ENODEV;
+ }
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ /* Free raw sample buffer */
+ if (RAWSAMPLE_REG(&event->hw))
+ kfree((void *) RAWSAMPLE_REG(&event->hw));
+
+ /* Release PMC if this is the last perf event */
+ if (!atomic_add_unless(&num_events, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+ hwc->sample_period = period;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static void hw_reset_registers(struct hw_perf_event *hwc,
+ unsigned long *sdbt_origin)
+{
+ struct sf_raw_sample *sfr;
+
+ /* (Re)set to first sample-data-block-table */
+ TEAR_REG(hwc) = (unsigned long) sdbt_origin;
+
+ /* (Re)set raw sampling buffer register */
+ sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+ memset(&sfr->basic, 0, sizeof(sfr->basic));
+ memset(&sfr->diag, 0, sfr->dsdes);
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+ unsigned long rate)
+{
+ return clamp_t(unsigned long, rate,
+ si->min_sampl_rate, si->max_sampl_rate);
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct cpu_hw_sf *cpuhw;
+ struct hws_qsi_info_block si;
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long rate;
+ int cpu, err;
+
+ /* Reserve CPU-measurement sampling facility */
+ err = 0;
+ if (!atomic_inc_not_zero(&num_events)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ atomic_inc(&num_events);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+ event->destroy = hw_perf_event_destroy;
+
+ if (err)
+ goto out;
+
+ /* Access per-CPU sampling information (query sampling info) */
+ /*
+ * The event->cpu value can be -1 to count on every CPU, for example,
+ * when attaching to a task. If this is specified, use the query
+ * sampling info from the current CPU, otherwise use event->cpu to
+ * retrieve the per-CPU information.
+ * Later, cpuhw indicates whether to allocate sampling buffers for a
+ * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+ */
+ memset(&si, 0, sizeof(si));
+ cpuhw = NULL;
+ if (event->cpu == -1)
+ qsi(&si);
+ else {
+ /* Event is pinned to a particular CPU, retrieve the per-CPU
+ * sampling structure for accessing the CPU-specific QSI.
+ */
+ cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+ si = cpuhw->qsi;
+ }
+
+ /* Check sampling facility authorization and, if not authorized,
+ * fall back to other PMUs. It is safe to check any CPU because
+ * the authorization is identical for all configured CPUs.
+ */
+ if (!si.as) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* Always enable basic sampling */
+ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+ /* Check if diagnostic sampling is requested. Deny if the required
+ * sampling authorization is missing.
+ */
+ if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+ if (!si.ad) {
+ err = -EPERM;
+ goto out;
+ }
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+ }
+
+ /* Check and set other sampling flags */
+ if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
+
+ /* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed. So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ */
+ rate = 0;
+ if (attr->freq) {
+ rate = freq_to_sample_rate(&si, attr->sample_freq);
+ rate = hw_limit_rate(&si, rate);
+ attr->freq = 0;
+ attr->sample_period = rate;
+ } else {
+ /* The min/max sampling rates specifies the valid range
+ * of sample periods. If the specified sample period is
+ * out of range, limit the period to the range boundary.
+ */
+ rate = hw_limit_rate(&si, hwc->sample_period);
+
+ /* The perf core maintains a maximum sample rate that is
+ * configurable through the sysctl interface. Ensure the
+ * sampling rate does not exceed this value. This also helps
+ * to avoid throttling when pushing samples with
+ * perf_event_overflow().
+ */
+ if (sample_rate_to_freq(&si, rate) >
+ sysctl_perf_event_sample_rate) {
+ err = -EINVAL;
+ debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
+ goto out;
+ }
+ }
+ SAMPL_RATE(hwc) = rate;
+ hw_init_period(hwc, SAMPL_RATE(hwc));
+
+ /* Initialize sample data overflow accounting */
+ hwc->extra_reg.reg = REG_OVERFLOW;
+ OVERFLOW_REG(hwc) = 0;
+
+ /* Allocate the per-CPU sampling buffer using the CPU information
+ * from the event. If the event is not pinned to a particular
+ * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+ * buffers for each online CPU.
+ */
+ if (cpuhw)
+ /* Event is pinned to a particular CPU */
+ err = allocate_buffers(cpuhw, hwc);
+ else {
+ /* Event is not pinned, allocate sampling buffer on
+ * each online CPU
+ */
+ for_each_online_cpu(cpu) {
+ cpuhw = &per_cpu(cpu_hw_sf, cpu);
+ err = allocate_buffers(cpuhw, hwc);
+ if (err)
+ break;
+ }
+ }
+out:
+ return err;
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ /* No support for taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+ (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
+ return -ENOENT;
+ break;
+ case PERF_TYPE_HARDWARE:
+ /* Support sampling of CPU cycles in addition to the
+ * counter facility. However, the counter facility
+ * is more precise and, hence, restrict this PMU to
+ * sampling events only.
+ */
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+ return -ENOENT;
+ if (!is_sampling_event(event))
+ return -ENOENT;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ /* Check online status of the CPU to which the event is pinned */
+ if (event->cpu >= nr_cpumask_bits ||
+ (event->cpu >= 0 && !cpu_online(event->cpu)))
+ return -ENODEV;
+
+ /* Force reset of idle/hv excludes regardless of what the
+ * user requested.
+ */
+ if (event->attr.exclude_hv)
+ event->attr.exclude_hv = 0;
+ if (event->attr.exclude_idle)
+ event->attr.exclude_idle = 0;
+
+ err = __hw_perf_event_init(event);
+ if (unlikely(err))
+ if (event->destroy)
+ event->destroy(event);
+ return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct hw_perf_event *hwc;
+ int err;
+
+ if (cpuhw->flags & PMU_F_ENABLED)
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Check whether to extent the sampling buffer.
+ *
+ * Two conditions trigger an increase of the sampling buffer for a
+ * perf event:
+ * 1. Postponed buffer allocations from the event initialization.
+ * 2. Sampling overflows that contribute to pending allocations.
+ *
+ * Note that the extend_sampling_buffer() function disables the sampling
+ * facility, but it can be fully re-enabled using sampling controls that
+ * have been saved in cpumsf_pmu_disable().
+ */
+ if (cpuhw->event) {
+ hwc = &cpuhw->event->hw;
+ /* Account number of overflow-designated buffer extents */
+ sfb_account_overflows(cpuhw, hwc);
+ if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
+ }
+
+ /* (Re)enable the PMU and sampling facility */
+ cpuhw->flags |= PMU_F_ENABLED;
+ barrier();
+
+ err = lsctl(&cpuhw->lsctl);
+ if (err) {
+ cpuhw->flags &= ~PMU_F_ENABLED;
+ pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ 1, err);
+ return;
+ }
+
+ debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
+ "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
+ cpuhw->lsctl.ed, cpuhw->lsctl.cd,
+ (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct hws_lsctl_request_block inactive;
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Switch off sampling activation control */
+ inactive = cpuhw->lsctl;
+ inactive.cs = 0;
+ inactive.cd = 0;
+
+ err = lsctl(&inactive);
+ if (err) {
+ pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ 2, err);
+ return;
+ }
+
+ /* Save state of TEAR and DEAR register contents */
+ if (!qsi(&si)) {
+ /* TEAR/DEAR values are valid only if the sampling facility is
+ * enabled. Note that cpumsf_pmu_disable() might be called even
+ * for a disabled sampling facility because cpumsf_pmu_enable()
+ * controls the enable/disable state.
+ */
+ if (si.es) {
+ cpuhw->lsctl.tear = si.tear;
+ cpuhw->lsctl.dear = si.dear;
+ }
+ } else
+ debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
+ "qsi() failed with err=%i\n", err);
+
+ cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_exclude_event() - Filter event
+ * @event: The perf event
+ * @regs: pt_regs structure
+ * @sde_regs: Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+ struct perf_sf_sde_regs *sde_regs)
+{
+ if (event->attr.exclude_user && user_mode(regs))
+ return 1;
+ if (event->attr.exclude_kernel && !user_mode(regs))
+ return 1;
+ if (event->attr.exclude_guest && sde_regs->in_guest)
+ return 1;
+ if (event->attr.exclude_host && !sde_regs->in_guest)
+ return 1;
+ return 0;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event: The perf event
+ * @sample: Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample. The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows. If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
+{
+ int overflow;
+ struct pt_regs regs;
+ struct perf_sf_sde_regs *sde_regs;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+
+ /* Setup perf sample */
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ raw.size = sfr->size;
+ raw.data = sfr;
+ data.raw = &raw;
+
+ /* Setup pt_regs to look like an CPU-measurement external interrupt
+ * using the Program Request Alert code. The regs.int_parm_long
+ * field which is unused contains additional sample-data-entry related
+ * indicators.
+ */
+ memset(&regs, 0, sizeof(regs));
+ regs.int_code = 0x1407;
+ regs.int_parm = CPU_MF_INT_SF_PRA;
+ sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
+ regs.psw.addr = sfr->basic.ia;
+ if (sfr->basic.T)
+ regs.psw.mask |= PSW_MASK_DAT;
+ if (sfr->basic.W)
+ regs.psw.mask |= PSW_MASK_WAIT;
+ if (sfr->basic.P)
+ regs.psw.mask |= PSW_MASK_PSTATE;
+ switch (sfr->basic.AS) {
+ case 0x0:
+ regs.psw.mask |= PSW_ASC_PRIMARY;
+ break;
+ case 0x1:
+ regs.psw.mask |= PSW_ASC_ACCREG;
+ break;
+ case 0x2:
+ regs.psw.mask |= PSW_ASC_SECONDARY;
+ break;
+ case 0x3:
+ regs.psw.mask |= PSW_ASC_HOME;
+ break;
+ }
+
+ /* The host-program-parameter (hpp) contains the sie control
+ * block that is set by sie64a() in entry64.S. Check if hpp
+ * refers to a valid control block and set sde_regs flags
+ * accordingly. This would allow to use hpp values for other
+ * purposes too.
+ * For now, simply use a non-zero value as guest indicator.
+ */
+ if (sfr->basic.hpp)
+ sde_regs->in_guest = 1;
+
+ overflow = 0;
+ if (perf_exclude_event(event, &regs, sde_regs))
+ goto out;
+ if (perf_event_overflow(event, &data, &regs)) {
+ overflow = 1;
+ event->pmu->stop(event, 0);
+ }
+ perf_event_update_userpage(event);
+out:
+ return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+ local64_add(count, &event->count);
+}
+
+static int sample_format_is_valid(struct hws_combined_entry *sample,
+ unsigned int flags)
+{
+ if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+ /* Only basic-sampling data entries with data-entry-format
+ * version of 0x0001 can be processed.
+ */
+ if (sample->basic.def != 0x0001)
+ return 0;
+ if (flags & PERF_CPUM_SF_DIAG_MODE)
+ /* The data-entry-format number of diagnostic-sampling data
+ * entries can vary. Because diagnostic data is just passed
+ * through, do only a sanity check on the DEF.
+ */
+ if (sample->diag.def < 0x8001)
+ return 0;
+ return 1;
+}
+
+static int sample_is_consistent(struct hws_combined_entry *sample,
+ unsigned long flags)
+{
+ /* This check applies only to basic-sampling data entries of potentially
+ * combined-sampling data entries. Invalid entries cannot be processed
+ * by the PMU and, thus, do not deliver an associated
+ * diagnostic-sampling data entry.
+ */
+ if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE)))
+ return 0;
+ /*
+ * Samples are skipped, if they are invalid or for which the
+ * instruction address is not predictable, i.e., the wait-state bit is
+ * set.
+ */
+ if (sample->basic.I || sample->basic.W)
+ return 0;
+ return 1;
+}
+
+static void reset_sample_slot(struct hws_combined_entry *sample,
+ unsigned long flags)
+{
+ if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+ sample->basic.def = 0;
+ if (flags & PERF_CPUM_SF_DIAG_MODE)
+ sample->diag.def = 0;
+}
+
+static void sfr_store_sample(struct sf_raw_sample *sfr,
+ struct hws_combined_entry *sample)
+{
+ if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE))
+ sfr->basic = sample->basic;
+ if (sfr->format & PERF_CPUM_SF_DIAG_MODE)
+ memcpy(&sfr->diag, &sample->diag, sfr->dsdes);
+}
+
+static void debug_sample_entry(struct hws_combined_entry *sample,
+ struct hws_trailer_entry *te,
+ unsigned long flags)
+{
+ debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
+ "sampling data entry: te->f=%i basic.def=%04x (%p)"
+ " diag.def=%04x (%p)\n", te->f,
+ sample->basic.def, &sample->basic,
+ (flags & PERF_CPUM_SF_DIAG_MODE)
+ ? sample->diag.def : 0xFFFF,
+ (flags & PERF_CPUM_SF_DIAG_MODE)
+ ? &sample->diag : NULL);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event: The perf event
+ * @sdbt: Sample-data-block table
+ * @overflow: Event overflow counter
+ *
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem. Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries. A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry. The sampling function is determined by the flags in the perf
+ * event hardware structure. The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+ unsigned long long *overflow)
+{
+ unsigned long flags = SAMPL_FLAGS(&event->hw);
+ struct hws_combined_entry *sample;
+ struct hws_trailer_entry *te;
+ struct sf_raw_sample *sfr;
+ size_t sample_size;
+
+ /* Prepare and initialize raw sample data */
+ sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw);
+ sfr->format = flags & PERF_CPUM_SF_MODE_MASK;
+
+ sample_size = event_sample_size(&event->hw);
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+ sample = (struct hws_combined_entry *) *sdbt;
+ while ((unsigned long *) sample < (unsigned long *) te) {
+ /* Check for an empty sample */
+ if (!sample->basic.def)
+ break;
+
+ /* Update perf event period */
+ perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+ /* Check sampling data entry */
+ if (sample_format_is_valid(sample, flags)) {
+ /* If an event overflow occurred, the PMU is stopped to
+ * throttle event delivery. Remaining sample data is
+ * discarded.
+ */
+ if (!*overflow) {
+ if (sample_is_consistent(sample, flags)) {
+ /* Deliver sample data to perf */
+ sfr_store_sample(sfr, sample);
+ *overflow = perf_push_sample(event, sfr);
+ }
+ } else
+ /* Count discarded samples */
+ *overflow += 1;
+ } else {
+ debug_sample_entry(sample, te, flags);
+ /* Sample slot is not yet written or other record.
+ *
+ * This condition can occur if the buffer was reused
+ * from a combined basic- and diagnostic-sampling.
+ * If only basic-sampling is then active, entries are
+ * written into the larger diagnostic entries.
+ * This is typically the case for sample-data-blocks
+ * that are not full. Stop processing if the first
+ * invalid format was detected.
+ */
+ if (!te->f)
+ break;
+ }
+
+ /* Reset sample slot and advance to next sample */
+ reset_sample_slot(sample, flags);
+ sample += sample_size;
+ }
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event: The perf event
+ * @flush_all: Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed. Specify the flash_all flag
+ * to also walk through partially filled sample-data-blocks. It is ignored
+ * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag
+ * enforces the processing of full sample-data-blocks only (trailer entries
+ * with the block-full-indicator bit set).
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hws_trailer_entry *te;
+ unsigned long *sdbt;
+ unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
+ int done;
+
+ if (flush_all && SDB_FULL_BLOCKS(hwc))
+ flush_all = 0;
+
+ sdbt = (unsigned long *) TEAR_REG(hwc);
+ done = event_overflow = sampl_overflow = num_sdb = 0;
+ while (!done) {
+ /* Get the trailer entry of the sample-data-block */
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+
+ /* Leave loop if no more work to do (block full indicator) */
+ if (!te->f) {
+ done = 1;
+ if (!flush_all)
+ break;
+ }
+
+ /* Check the sample overflow count */
+ if (te->overflow)
+ /* Account sample overflows and, if a particular limit
+ * is reached, extend the sampling buffer.
+ * For details, see sfb_account_overflows().
+ */
+ sampl_overflow += te->overflow;
+
+ /* Timestamps are valid for full sample-data-blocks only */
+ debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
+ "overflow=%llu timestamp=0x%llx\n",
+ sdbt, te->overflow,
+ (te->f) ? trailer_timestamp(te) : 0ULL);
+
+ /* Collect all samples from a single sample-data-block and
+ * flag if an (perf) event overflow happened. If so, the PMU
+ * is stopped and remaining samples will be discarded.
+ */
+ hw_collect_samples(event, sdbt, &event_overflow);
+ num_sdb++;
+
+ /* Reset trailer (using compare-double-and-swap) */
+ do {
+ te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+ te_flags |= SDB_TE_ALERT_REQ_MASK;
+ } while (!cmpxchg_double(&te->flags, &te->overflow,
+ te->flags, te->overflow,
+ te_flags, 0ULL));
+
+ /* Advance to next sample-data-block */
+ sdbt++;
+ if (is_link_entry(sdbt))
+ sdbt = get_next_sdbt(sdbt);
+
+ /* Update event hardware registers */
+ TEAR_REG(hwc) = (unsigned long) sdbt;
+
+ /* Stop processing sample-data if all samples of the current
+ * sample-data-block were flushed even if it was not full.
+ */
+ if (flush_all && done)
+ break;
+
+ /* If an event overflow happened, discard samples by
+ * processing any remaining sample-data-blocks.
+ */
+ if (event_overflow)
+ flush_all = 1;
+ }
+
+ /* Account sample overflows in the event hardware structure */
+ if (sampl_overflow)
+ OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+ sampl_overflow, 1 + num_sdb);
+ if (sampl_overflow || event_overflow)
+ debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
+ "overflow stats: sample=%llu event=%llu\n",
+ sampl_overflow, event_overflow);
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+ /* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+ perf_pmu_disable(event->pmu);
+ event->hw.state = 0;
+ cpuhw->lsctl.cs = 1;
+ if (SAMPL_DIAG_MODE(&event->hw))
+ cpuhw->lsctl.cd = 1;
+ perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ perf_pmu_disable(event->pmu);
+ cpuhw->lsctl.cs = 0;
+ cpuhw->lsctl.cd = 0;
+ event->hw.state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+ hw_perf_event_update(event, 1);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+ perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ int err;
+
+ if (cpuhw->flags & PMU_F_IN_USE)
+ return -EAGAIN;
+
+ if (!cpuhw->sfb.sdbt)
+ return -EINVAL;
+
+ err = 0;
+ perf_pmu_disable(event->pmu);
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ /* Set up sampling controls. Always program the sampling register
+ * using the SDB-table start. Reset TEAR_REG event hardware register
+ * that is used by hw_perf_event_update() to store the sampling buffer
+ * position after samples have been flushed.
+ */
+ cpuhw->lsctl.s = 0;
+ cpuhw->lsctl.h = 1;
+ cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+ hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+
+ /* Ensure sampling functions are in the disabled state. If disabled,
+ * switch on sampling enable control. */
+ if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
+ err = -EAGAIN;
+ goto out;
+ }
+ cpuhw->lsctl.es = 1;
+ if (SAMPL_DIAG_MODE(&event->hw))
+ cpuhw->lsctl.ed = 1;
+
+ /* Set in_use flag and store event */
+ event->hw.idx = 0; /* only one sampling event per CPU supported */
+ cpuhw->event = event;
+ cpuhw->flags |= PMU_F_IN_USE;
+
+ if (flags & PERF_EF_START)
+ cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+ return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ perf_pmu_disable(event->pmu);
+ cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+ cpuhw->lsctl.es = 0;
+ cpuhw->lsctl.ed = 0;
+ cpuhw->flags &= ~PMU_F_IN_USE;
+ cpuhw->event = NULL;
+
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_event_idx(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
+
+static struct attribute *cpumsf_pmu_events_attr[] = {
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
+ NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+ .name = "events",
+ .attrs = cpumsf_pmu_events_attr,
+};
+static struct attribute_group cpumsf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumsf_pmu_format_attr,
+};
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+ &cpumsf_pmu_events_group,
+ &cpumsf_pmu_format_group,
+ NULL,
+};
+
+static struct pmu cpumf_sampling = {
+ .pmu_enable = cpumsf_pmu_enable,
+ .pmu_disable = cpumsf_pmu_disable,
+
+ .event_init = cpumsf_pmu_event_init,
+ .add = cpumsf_pmu_add,
+ .del = cpumsf_pmu_del,
+
+ .start = cpumsf_pmu_start,
+ .stop = cpumsf_pmu_stop,
+ .read = cpumsf_pmu_read,
+
+ .event_idx = cpumsf_pmu_event_idx,
+ .attr_groups = cpumsf_pmu_attr_groups,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_hw_sf *cpuhw;
+
+ if (!(alert & CPU_MF_INT_SF_MASK))
+ return;
+ inc_irq_stat(IRQEXT_CMS);
+ cpuhw = &__get_cpu_var(cpu_hw_sf);
+
+ /* Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case. */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* The processing below must take care of multiple alert events that
+ * might be indicated concurrently. */
+
+ /* Program alert request */
+ if (alert & CPU_MF_INT_SF_PRA) {
+ if (cpuhw->flags & PMU_F_IN_USE)
+ hw_perf_event_update(cpuhw->event, 0);
+ else
+ WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+ }
+
+ /* Report measurement alerts only for non-PRA codes */
+ if (alert != CPU_MF_INT_SF_PRA)
+ debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
+
+ /* Sampling authorization change request */
+ if (alert & CPU_MF_INT_SF_SACA)
+ qsi(&cpuhw->qsi);
+
+ /* Loss of sample data due to high-priority machine activities */
+ if (alert & CPU_MF_INT_SF_LSDA) {
+ pr_err("Sample data was lost\n");
+ cpuhw->flags |= PMU_F_ERR_LSDA;
+ sf_disable();
+ }
+
+ /* Invalid sampling buffer entry */
+ if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+ pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+ alert);
+ cpuhw->flags |= PMU_F_ERR_IBE;
+ sf_disable();
+ }
+}
+
+static int cpumf_pmu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long) hcpu;
+ int flags;
+
+ /* Ignore the notification if no events are scheduled on the PMU.
+ * This might be racy...
+ */
+ if (!atomic_read(&num_events))
+ return NOTIFY_OK;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ flags = PMC_INIT;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ case CPU_DOWN_PREPARE:
+ flags = PMC_RELEASE;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+ int rc;
+ unsigned long min, max;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ if (!val || !strlen(val))
+ return -EINVAL;
+
+ /* Valid parameter values: "min,max" or "max" */
+ min = CPUM_SF_MIN_SDB;
+ max = CPUM_SF_MAX_SDB;
+ if (strchr(val, ','))
+ rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+ else
+ rc = kstrtoul(val, 10, &max);
+
+ if (min < 2 || min >= max || max > get_num_physpages())
+ rc = -EINVAL;
+ if (rc)
+ return rc;
+
+ sfb_set_limits(min, max);
+ pr_info("The sampling buffer limits have changed to: "
+ "min=%lu max=%lu (diag=x%lu)\n",
+ CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
+ return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static struct kernel_param_ops param_ops_sfb_size = {
+ .set = param_set_sfb_size,
+ .get = param_get_sfb_size,
+};
+
+#define RS_INIT_FAILURE_QSI 0x0001
+#define RS_INIT_FAILURE_BSDES 0x0002
+#define RS_INIT_FAILURE_ALRT 0x0003
+#define RS_INIT_FAILURE_PERF 0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+ pr_err("Sampling facility support for perf is not available: "
+ "reason=%04x\n", reason);
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+ return -ENODEV;
+ }
+
+ if (si.bsdes != sizeof(struct hws_basic_entry)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+ return -EINVAL;
+ }
+
+ if (si.ad)
+ sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+
+ sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+ if (!sfdbg)
+ pr_err("Registering for s390dbf failed\n");
+ debug_register_view(sfdbg, &debug_sprintf_view);
+
+ err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+ goto out;
+ }
+
+ err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_PERF);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ goto out;
+ }
+ perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+ return err;
+}
+arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
new file mode 100644
index 00000000000..61595c1f0a0
--- /dev/null
+++ b/arch/s390/kernel/perf_event.c
@@ -0,0 +1,324 @@
+/*
+ * Performance event support for s390x
+ *
+ * Copyright IBM Corp. 2012, 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT "perf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sysinfo.h>
+
+const char *perf_pmu_name(void)
+{
+ if (cpum_cf_avail() || cpum_sf_avail())
+ return "CPU-Measurement Facilities (CPU-MF)";
+ return "pmu";
+}
+EXPORT_SYMBOL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+ int num = 0;
+
+ if (cpum_cf_avail())
+ num += PERF_CPUM_CF_MAX_CTR;
+ if (cpum_sf_avail())
+ num += PERF_CPUM_SF_MAX_CTR;
+
+ return num;
+}
+EXPORT_SYMBOL(perf_num_counters);
+
+static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
+{
+ struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
+
+ if (!stack)
+ return NULL;
+
+ return (struct kvm_s390_sie_block *) stack->empty1[0];
+}
+
+static bool is_in_guest(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return false;
+#if IS_ENABLED(CONFIG_KVM)
+ return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+ return false;
+#endif
+}
+
+static unsigned long guest_is_user_mode(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
+}
+
+static unsigned long instruction_pointer_guest(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+ return is_in_guest(regs) ? instruction_pointer_guest(regs)
+ : instruction_pointer(regs);
+}
+
+static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
+{
+ return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+}
+
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+ struct perf_sf_sde_regs *sde_regs;
+ unsigned long flags;
+
+ sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+ if (sde_regs->in_guest)
+ flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+ else
+ flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+ return flags;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+ /* Check if the cpum_sf PMU has created the pt_regs structure.
+ * In this case, perf misc flags can be easily extracted. Otherwise,
+ * do regular checks on the pt_regs content.
+ */
+ if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+ if (!regs->gprs[15])
+ return perf_misc_flags_sf(regs);
+
+ if (is_in_guest(regs))
+ return perf_misc_guest_flags(regs);
+
+ return user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+}
+
+static void print_debug_cf(void)
+{
+ struct cpumf_ctr_info cf_info;
+ int cpu = smp_processor_id();
+
+ memset(&cf_info, 0, sizeof(cf_info));
+ if (!qctri(&cf_info))
+ pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
+ cpu, cf_info.cfvn, cf_info.csvn,
+ cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+}
+
+static void print_debug_sf(void)
+{
+ struct hws_qsi_info_block si;
+ int cpu = smp_processor_id();
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
+ cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+
+ if (si.as)
+ pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+ " bsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+ if (si.ad)
+ pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+ " dsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ if (cpum_cf_avail())
+ print_debug_cf();
+ if (cpum_sf_avail())
+ print_debug_sf();
+ local_irq_restore(flags);
+}
+
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+ struct cpumf_ctr_info ci;
+
+ memset(&ci, 0, sizeof(ci));
+ if (qctri(&ci))
+ return;
+
+ seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+ "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+ struct hws_qsi_info_block si;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ if (!si.as && !si.ad)
+ return;
+
+ seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+ " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+ if (si.as)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+ " sample_size=%u\n", si.bsdes);
+ if (si.ad)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+ " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+ struct service_level *sl)
+{
+ if (cpum_cf_avail())
+ sl_print_counter(m);
+ if (cpum_sf_avail())
+ sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+ .seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+ return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
+/* See also arch/s390/kernel/traps.c */
+static unsigned long __store_trace(struct perf_callchain_entry *entry,
+ unsigned long sp,
+ unsigned long low, unsigned long high)
+{
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+
+ while (1) {
+ sp = sp & PSW_ADDR_INSN;
+ if (sp < low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+ /* Follow the backchain. */
+ while (1) {
+ low = sp;
+ sp = sf->back_chain & PSW_ADDR_INSN;
+ if (!sp)
+ break;
+ if (sp <= low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ perf_callchain_store(entry,
+ sf->gprs[8] & PSW_ADDR_INSN);
+ }
+ /* Zero backchain detected, check for interrupt frame. */
+ sp = (unsigned long) (sf + 1);
+ if (sp <= low || sp > high - sizeof(*regs))
+ return sp;
+ regs = (struct pt_regs *) sp;
+ perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+ low = sp;
+ sp = regs->gprs[15];
+ }
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+ unsigned long head;
+ struct stack_frame *head_sf;
+
+ if (user_mode(regs))
+ return;
+
+ head = regs->gprs[15];
+ head_sf = (struct stack_frame *) head;
+
+ if (!head_sf || !head_sf->back_chain)
+ return;
+
+ head = head_sf->back_chain;
+ head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE,
+ S390_lowcore.async_stack);
+
+ __store_trace(entry, head, S390_lowcore.thread_info,
+ S390_lowcore.thread_info + THREAD_SIZE);
+}
+
+/* Perf defintions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%04llx,name=%s\n",
+ pmu_attr->id, attr->attr.name);
+}
+
+/* Reserve/release functions for sharing perf hardware */
+static DEFINE_SPINLOCK(perf_hw_owner_lock);
+static void *perf_sampling_owner;
+
+int perf_reserve_sampling(void)
+{
+ int err;
+
+ err = 0;
+ spin_lock(&perf_hw_owner_lock);
+ if (perf_sampling_owner) {
+ pr_warn("The sampling facility is already reserved by %p\n",
+ perf_sampling_owner);
+ err = -EBUSY;
+ } else
+ perf_sampling_owner = __builtin_return_address(0);
+ spin_unlock(&perf_hw_owner_lock);
+ return err;
+}
+EXPORT_SYMBOL(perf_reserve_sampling);
+
+void perf_release_sampling(void)
+{
+ spin_lock(&perf_hw_owner_lock);
+ WARN_ON(!perf_sampling_owner);
+ perf_sampling_owner = NULL;
+ spin_unlock(&perf_hw_owner_lock);
+}
+EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
new file mode 100644
index 00000000000..813ec726087
--- /dev/null
+++ b/arch/s390/kernel/pgm_check.S
@@ -0,0 +1,152 @@
+/*
+ * Program check table.
+ *
+ * Copyright IBM Corp. 2012
+ */
+
+#include <linux/linkage.h>
+
+#ifdef CONFIG_32BIT
+#define PGM_CHECK_64BIT(handler) .long default_trap_handler
+#else
+#define PGM_CHECK_64BIT(handler) .long handler
+#endif
+
+#define PGM_CHECK(handler) .long handler
+#define PGM_CHECK_DEFAULT PGM_CHECK(default_trap_handler)
+
+/*
+ * The program check table contains exactly 128 (0x00-0x7f) entries. Each
+ * line defines the 31 and/or 64 bit function to be called corresponding
+ * to the program check interruption code.
+ */
+.section .rodata, "a"
+ENTRY(pgm_check_table)
+PGM_CHECK_DEFAULT /* 00 */
+PGM_CHECK(illegal_op) /* 01 */
+PGM_CHECK(privileged_op) /* 02 */
+PGM_CHECK(execute_exception) /* 03 */
+PGM_CHECK(do_protection_exception) /* 04 */
+PGM_CHECK(addressing_exception) /* 05 */
+PGM_CHECK(specification_exception) /* 06 */
+PGM_CHECK(data_exception) /* 07 */
+PGM_CHECK(overflow_exception) /* 08 */
+PGM_CHECK(divide_exception) /* 09 */
+PGM_CHECK(overflow_exception) /* 0a */
+PGM_CHECK(divide_exception) /* 0b */
+PGM_CHECK(hfp_overflow_exception) /* 0c */
+PGM_CHECK(hfp_underflow_exception) /* 0d */
+PGM_CHECK(hfp_significance_exception) /* 0e */
+PGM_CHECK(hfp_divide_exception) /* 0f */
+PGM_CHECK(do_dat_exception) /* 10 */
+PGM_CHECK(do_dat_exception) /* 11 */
+PGM_CHECK(translation_exception) /* 12 */
+PGM_CHECK(special_op_exception) /* 13 */
+PGM_CHECK_DEFAULT /* 14 */
+PGM_CHECK(operand_exception) /* 15 */
+PGM_CHECK_DEFAULT /* 16 */
+PGM_CHECK_DEFAULT /* 17 */
+PGM_CHECK_64BIT(transaction_exception) /* 18 */
+PGM_CHECK_DEFAULT /* 19 */
+PGM_CHECK_DEFAULT /* 1a */
+PGM_CHECK_DEFAULT /* 1b */
+PGM_CHECK(space_switch_exception) /* 1c */
+PGM_CHECK(hfp_sqrt_exception) /* 1d */
+PGM_CHECK_DEFAULT /* 1e */
+PGM_CHECK_DEFAULT /* 1f */
+PGM_CHECK_DEFAULT /* 20 */
+PGM_CHECK_DEFAULT /* 21 */
+PGM_CHECK_DEFAULT /* 22 */
+PGM_CHECK_DEFAULT /* 23 */
+PGM_CHECK_DEFAULT /* 24 */
+PGM_CHECK_DEFAULT /* 25 */
+PGM_CHECK_DEFAULT /* 26 */
+PGM_CHECK_DEFAULT /* 27 */
+PGM_CHECK_DEFAULT /* 28 */
+PGM_CHECK_DEFAULT /* 29 */
+PGM_CHECK_DEFAULT /* 2a */
+PGM_CHECK_DEFAULT /* 2b */
+PGM_CHECK_DEFAULT /* 2c */
+PGM_CHECK_DEFAULT /* 2d */
+PGM_CHECK_DEFAULT /* 2e */
+PGM_CHECK_DEFAULT /* 2f */
+PGM_CHECK_DEFAULT /* 30 */
+PGM_CHECK_DEFAULT /* 31 */
+PGM_CHECK_DEFAULT /* 32 */
+PGM_CHECK_DEFAULT /* 33 */
+PGM_CHECK_DEFAULT /* 34 */
+PGM_CHECK_DEFAULT /* 35 */
+PGM_CHECK_DEFAULT /* 36 */
+PGM_CHECK_DEFAULT /* 37 */
+PGM_CHECK_64BIT(do_dat_exception) /* 38 */
+PGM_CHECK_64BIT(do_dat_exception) /* 39 */
+PGM_CHECK_64BIT(do_dat_exception) /* 3a */
+PGM_CHECK_64BIT(do_dat_exception) /* 3b */
+PGM_CHECK_DEFAULT /* 3c */
+PGM_CHECK_DEFAULT /* 3d */
+PGM_CHECK_DEFAULT /* 3e */
+PGM_CHECK_DEFAULT /* 3f */
+PGM_CHECK_DEFAULT /* 40 */
+PGM_CHECK_DEFAULT /* 41 */
+PGM_CHECK_DEFAULT /* 42 */
+PGM_CHECK_DEFAULT /* 43 */
+PGM_CHECK_DEFAULT /* 44 */
+PGM_CHECK_DEFAULT /* 45 */
+PGM_CHECK_DEFAULT /* 46 */
+PGM_CHECK_DEFAULT /* 47 */
+PGM_CHECK_DEFAULT /* 48 */
+PGM_CHECK_DEFAULT /* 49 */
+PGM_CHECK_DEFAULT /* 4a */
+PGM_CHECK_DEFAULT /* 4b */
+PGM_CHECK_DEFAULT /* 4c */
+PGM_CHECK_DEFAULT /* 4d */
+PGM_CHECK_DEFAULT /* 4e */
+PGM_CHECK_DEFAULT /* 4f */
+PGM_CHECK_DEFAULT /* 50 */
+PGM_CHECK_DEFAULT /* 51 */
+PGM_CHECK_DEFAULT /* 52 */
+PGM_CHECK_DEFAULT /* 53 */
+PGM_CHECK_DEFAULT /* 54 */
+PGM_CHECK_DEFAULT /* 55 */
+PGM_CHECK_DEFAULT /* 56 */
+PGM_CHECK_DEFAULT /* 57 */
+PGM_CHECK_DEFAULT /* 58 */
+PGM_CHECK_DEFAULT /* 59 */
+PGM_CHECK_DEFAULT /* 5a */
+PGM_CHECK_DEFAULT /* 5b */
+PGM_CHECK_DEFAULT /* 5c */
+PGM_CHECK_DEFAULT /* 5d */
+PGM_CHECK_DEFAULT /* 5e */
+PGM_CHECK_DEFAULT /* 5f */
+PGM_CHECK_DEFAULT /* 60 */
+PGM_CHECK_DEFAULT /* 61 */
+PGM_CHECK_DEFAULT /* 62 */
+PGM_CHECK_DEFAULT /* 63 */
+PGM_CHECK_DEFAULT /* 64 */
+PGM_CHECK_DEFAULT /* 65 */
+PGM_CHECK_DEFAULT /* 66 */
+PGM_CHECK_DEFAULT /* 67 */
+PGM_CHECK_DEFAULT /* 68 */
+PGM_CHECK_DEFAULT /* 69 */
+PGM_CHECK_DEFAULT /* 6a */
+PGM_CHECK_DEFAULT /* 6b */
+PGM_CHECK_DEFAULT /* 6c */
+PGM_CHECK_DEFAULT /* 6d */
+PGM_CHECK_DEFAULT /* 6e */
+PGM_CHECK_DEFAULT /* 6f */
+PGM_CHECK_DEFAULT /* 70 */
+PGM_CHECK_DEFAULT /* 71 */
+PGM_CHECK_DEFAULT /* 72 */
+PGM_CHECK_DEFAULT /* 73 */
+PGM_CHECK_DEFAULT /* 74 */
+PGM_CHECK_DEFAULT /* 75 */
+PGM_CHECK_DEFAULT /* 76 */
+PGM_CHECK_DEFAULT /* 77 */
+PGM_CHECK_DEFAULT /* 78 */
+PGM_CHECK_DEFAULT /* 79 */
+PGM_CHECK_DEFAULT /* 7a */
+PGM_CHECK_DEFAULT /* 7b */
+PGM_CHECK_DEFAULT /* 7c */
+PGM_CHECK_DEFAULT /* 7d */
+PGM_CHECK_DEFAULT /* 7e */
+PGM_CHECK_DEFAULT /* 7f */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7920861109d..93b9ca42e5c 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -1,51 +1,37 @@
/*
- * arch/s390/kernel/process.c
+ * This file handles the architecture dependent parts of process handling.
*
- * S390 version
- * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Hartmut Penner (hp@de.ibm.com),
- * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
- *
- * Derived from "arch/i386/kernel/process.c"
- * Copyright (C) 1995, Linus Torvalds
- */
-
-/*
- * This file handles the architecture-dependent parts of process handling..
+ * Copyright IBM Corp. 1999, 2009
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Hartmut Penner <hp@de.ibm.com>,
+ * Denis Joseph Barrow,
*/
#include <linux/compiler.h>
#include <linux/cpu.h>
-#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/fs.h>
+#include <linux/elfcore.h>
#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/user.h>
#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/utsname.h>
#include <linux/tick.h>
-#include <linux/elfcore.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
+#include <linux/personality.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/kprobes.h>
+#include <linux/random.h>
+#include <linux/module.h>
#include <asm/io.h>
#include <asm/processor.h>
+#include <asm/vtimer.h>
+#include <asm/exec.h>
#include <asm/irq.h>
-#include <asm/timer.h>
-#include <asm/cpu.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
#include "entry.h"
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -75,310 +61,163 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return sf->gprs[8];
}
-/*
- * Need to know about CPUs going idle?
- */
-static ATOMIC_NOTIFIER_HEAD(idle_chain);
-DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
-
-int register_idle_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_register(&idle_chain, nb);
-}
-EXPORT_SYMBOL(register_idle_notifier);
-
-int unregister_idle_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&idle_chain, nb);
-}
-EXPORT_SYMBOL(unregister_idle_notifier);
-
-static int s390_idle_enter(void)
-{
- struct s390_idle_data *idle;
- int nr_calls = 0;
- void *hcpu;
- int rc;
-
- hcpu = (void *)(long)smp_processor_id();
- rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
- &nr_calls);
- if (rc == NOTIFY_BAD) {
- nr_calls--;
- __atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
- hcpu, nr_calls, NULL);
- return rc;
- }
- idle = &__get_cpu_var(s390_idle);
- spin_lock(&idle->lock);
- idle->idle_count++;
- idle->in_idle = 1;
- idle->idle_enter = get_clock();
- spin_unlock(&idle->lock);
- return NOTIFY_OK;
-}
-
-void s390_idle_leave(void)
-{
- struct s390_idle_data *idle;
-
- idle = &__get_cpu_var(s390_idle);
- spin_lock(&idle->lock);
- idle->idle_time += get_clock() - idle->idle_enter;
- idle->in_idle = 0;
- spin_unlock(&idle->lock);
- atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
- (void *)(long) smp_processor_id());
-}
-
-extern void s390_handle_mcck(void);
-/*
- * The idle loop on a S390...
- */
-static void default_idle(void)
+void arch_cpu_idle(void)
{
- /* CPU is going idle. */
- local_irq_disable();
- if (need_resched()) {
- local_irq_enable();
- return;
- }
- if (s390_idle_enter() == NOTIFY_BAD) {
- local_irq_enable();
- return;
- }
-#ifdef CONFIG_HOTPLUG_CPU
- if (cpu_is_offline(smp_processor_id())) {
- preempt_enable_no_resched();
- cpu_die();
- }
-#endif
local_mcck_disable();
- if (test_thread_flag(TIF_MCCK_PENDING)) {
+ if (test_cpu_flag(CIF_MCCK_PENDING)) {
local_mcck_enable();
- s390_idle_leave();
local_irq_enable();
- s390_handle_mcck();
return;
}
- trace_hardirqs_on();
- /* Wait for external, I/O or machine check interrupt. */
- __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
- PSW_MASK_IO | PSW_MASK_EXT);
+ /* Halt the cpu and keep track of cpu time accounting. */
+ vtime_stop_cpu();
+ local_irq_enable();
}
-void cpu_idle(void)
+void arch_cpu_idle_exit(void)
{
- for (;;) {
- tick_nohz_stop_sched_tick();
- while (!need_resched())
- default_idle();
- tick_nohz_restart_sched_tick();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
- }
+ if (test_cpu_flag(CIF_MCCK_PENDING))
+ s390_handle_mcck();
}
-extern void kernel_thread_starter(void);
-
-asm(
- ".align 4\n"
- "kernel_thread_starter:\n"
- " la 2,0(10)\n"
- " basr 14,9\n"
- " la 2,0\n"
- " br 11\n");
-
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+void arch_cpu_idle_dead(void)
{
- struct pt_regs regs;
-
- memset(&regs, 0, sizeof(regs));
- regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT;
- regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
- regs.gprs[9] = (unsigned long) fn;
- regs.gprs[10] = (unsigned long) arg;
- regs.gprs[11] = (unsigned long) do_exit;
- regs.orig_gpr2 = -1;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
- 0, &regs, 0, NULL, NULL);
+ cpu_die();
}
+extern void __kprobes kernel_thread_starter(void);
+
/*
* Free current thread data structures etc..
*/
void exit_thread(void)
{
+ exit_thread_runtime_instr();
}
void flush_thread(void)
{
- clear_used_math();
- clear_tsk_thread_flag(current, TIF_USEDFPU);
}
void release_thread(struct task_struct *dead_task)
{
}
-int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
- unsigned long unused,
- struct task_struct * p, struct pt_regs * regs)
+int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
+ unsigned long arg, struct task_struct *p)
{
- struct fake_frame
- {
- struct stack_frame sf;
- struct pt_regs childregs;
- } *frame;
+ struct thread_info *ti;
+ struct fake_frame
+ {
+ struct stack_frame sf;
+ struct pt_regs childregs;
+ } *frame;
+
+ frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
+ p->thread.ksp = (unsigned long) frame;
+ /* Save access registers to new thread structure. */
+ save_access_regs(&p->thread.acrs[0]);
+ /* start new process with ar4 pointing to the correct address space */
+ p->thread.mm_segment = get_fs();
+ /* Don't copy debug registers */
+ memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
+ memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
+ clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+ /* Initialize per thread user and system timer values */
+ ti = task_thread_info(p);
+ ti->user_timer = 0;
+ ti->system_timer = 0;
+
+ frame->sf.back_chain = 0;
+ /* new return point is ret_from_fork */
+ frame->sf.gprs[8] = (unsigned long) ret_from_fork;
+ /* fake return stack for resume(), don't go back to schedule */
+ frame->sf.gprs[9] = (unsigned long) frame;
- frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
- p->thread.ksp = (unsigned long) frame;
/* Store access registers to kernel stack of new process. */
- frame->childregs = *regs;
- frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
- frame->childregs.gprs[15] = new_stackp;
- frame->sf.back_chain = 0;
-
- /* new return point is ret_from_fork */
- frame->sf.gprs[8] = (unsigned long) ret_from_fork;
+ if (unlikely(p->flags & PF_KTHREAD)) {
+ /* kernel thread */
+ memset(&frame->childregs, 0, sizeof(struct pt_regs));
+ frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ frame->childregs.psw.addr = PSW_ADDR_AMODE |
+ (unsigned long) kernel_thread_starter;
+ frame->childregs.gprs[9] = new_stackp; /* function */
+ frame->childregs.gprs[10] = arg;
+ frame->childregs.gprs[11] = (unsigned long) do_exit;
+ frame->childregs.orig_gpr2 = -1;
- /* fake return stack for resume(), don't go back to schedule */
- frame->sf.gprs[9] = (unsigned long) frame;
+ return 0;
+ }
+ frame->childregs = *current_pt_regs();
+ frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
+ frame->childregs.flags = 0;
+ if (new_stackp)
+ frame->childregs.gprs[15] = new_stackp;
- /* Save access registers to new thread structure. */
- save_access_regs(&p->thread.acrs[0]);
+ /* Don't copy runtime instrumentation info */
+ p->thread.ri_cb = NULL;
+ p->thread.ri_signum = 0;
+ frame->childregs.psw.mask &= ~PSW_MASK_RI;
#ifndef CONFIG_64BIT
- /*
+ /*
* save fprs to current->thread.fp_regs to merge them with
* the emulated registers and then copy the result to the child.
*/
- save_fp_regs(&current->thread.fp_regs);
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ save_fp_regs(current->thread.fp_regs.fprs);
memcpy(&p->thread.fp_regs, &current->thread.fp_regs,
sizeof(s390_fp_regs));
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS)
- p->thread.acrs[0] = regs->gprs[6];
+ p->thread.acrs[0] = frame->childregs.gprs[6];
#else /* CONFIG_64BIT */
/* Save the fpu registers to new thread structure. */
- save_fp_regs(&p->thread.fp_regs);
+ save_fp_ctl(&p->thread.fp_regs.fpc);
+ save_fp_regs(p->thread.fp_regs.fprs);
+ p->thread.fp_regs.pad = 0;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
- if (test_thread_flag(TIF_31BIT)) {
- p->thread.acrs[0] = (unsigned int) regs->gprs[6];
+ unsigned long tls = frame->childregs.gprs[6];
+ if (is_compat_task()) {
+ p->thread.acrs[0] = (unsigned int)tls;
} else {
- p->thread.acrs[0] = (unsigned int)(regs->gprs[6] >> 32);
- p->thread.acrs[1] = (unsigned int) regs->gprs[6];
+ p->thread.acrs[0] = (unsigned int)(tls >> 32);
+ p->thread.acrs[1] = (unsigned int)tls;
}
}
#endif /* CONFIG_64BIT */
- /* start new process with ar4 pointing to the correct address space */
- p->thread.mm_segment = get_fs();
- /* Don't copy debug registers */
- memset(&p->thread.per_info,0,sizeof(p->thread.per_info));
-
- return 0;
-}
-
-asmlinkage long sys_fork(void)
-{
- struct pt_regs *regs = task_pt_regs(current);
- return do_fork(SIGCHLD, regs->gprs[15], regs, 0, NULL, NULL);
-}
-
-asmlinkage long sys_clone(void)
-{
- struct pt_regs *regs = task_pt_regs(current);
- unsigned long clone_flags;
- unsigned long newsp;
- int __user *parent_tidptr, *child_tidptr;
-
- clone_flags = regs->gprs[3];
- newsp = regs->orig_gpr2;
- parent_tidptr = (int __user *) regs->gprs[4];
- child_tidptr = (int __user *) regs->gprs[5];
- if (!newsp)
- newsp = regs->gprs[15];
- return do_fork(clone_flags, newsp, regs, 0,
- parent_tidptr, child_tidptr);
-}
-
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-asmlinkage long sys_vfork(void)
-{
- struct pt_regs *regs = task_pt_regs(current);
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
- regs->gprs[15], regs, 0, NULL, NULL);
+ return 0;
}
asmlinkage void execve_tail(void)
{
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
- task_unlock(current);
current->thread.fp_regs.fpc = 0;
if (MACHINE_HAS_IEEE)
asm volatile("sfpc %0,%0" : : "d" (0));
}
/*
- * sys_execve() executes a new program.
- */
-asmlinkage long sys_execve(void)
-{
- struct pt_regs *regs = task_pt_regs(current);
- char *filename;
- unsigned long result;
- int rc;
-
- filename = getname((char __user *) regs->orig_gpr2);
- if (IS_ERR(filename)) {
- result = PTR_ERR(filename);
- goto out;
- }
- rc = do_execve(filename, (char __user * __user *) regs->gprs[3],
- (char __user * __user *) regs->gprs[4], regs);
- if (rc) {
- result = rc;
- goto out_putname;
- }
- execve_tail();
- result = regs->gprs[2];
-out_putname:
- putname(filename);
-out:
- return result;
-}
-
-/*
* fill in the FPU structure for a core dump.
*/
int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
{
#ifndef CONFIG_64BIT
- /*
+ /*
* save fprs to current->thread.fp_regs to merge them with
* the emulated registers and then copy the result to the dump.
*/
- save_fp_regs(&current->thread.fp_regs);
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ save_fp_regs(current->thread.fp_regs.fprs);
memcpy(fpregs, &current->thread.fp_regs, sizeof(s390_fp_regs));
#else /* CONFIG_64BIT */
- save_fp_regs(fpregs);
+ save_fp_ctl(&fpregs->fpc);
+ save_fp_regs(fpregs->fprs);
#endif /* CONFIG_64BIT */
return 1;
}
+EXPORT_SYMBOL(dump_fpu);
unsigned long get_wchan(struct task_struct *p)
{
@@ -404,3 +243,36 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ sp -= get_random_int() & ~PAGE_MASK;
+ return sp & ~0xf;
+}
+
+static inline unsigned long brk_rnd(void)
+{
+ /* 8MB for 32bit, 1GB for 64bit */
+ if (is_32bit_task())
+ return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
+ else
+ return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long ret;
+
+ ret = PAGE_ALIGN(mm->brk + brk_rnd());
+ return (ret > mm->brk) ? ret : mm->brk;
+}
+
+unsigned long randomize_et_dyn(unsigned long base)
+{
+ unsigned long ret;
+
+ if (!(current->flags & PF_RANDOMIZE))
+ return base;
+ ret = PAGE_ALIGN(base + brk_rnd());
+ return (ret > base) ? ret : base;
+}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
new file mode 100644
index 00000000000..24612029f45
--- /dev/null
+++ b/arch/s390/kernel/processor.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/seq_file.h>
+#include <linux/delay.h>
+#include <linux/cpu.h>
+#include <asm/elf.h>
+#include <asm/lowcore.h>
+#include <asm/param.h>
+
+static DEFINE_PER_CPU(struct cpuid, cpu_id);
+
+/*
+ * cpu_init - initializes state that is per-CPU.
+ */
+void cpu_init(void)
+{
+ struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
+ struct cpuid *id = &__get_cpu_var(cpu_id);
+
+ get_cpu_id(id);
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+ BUG_ON(current->mm);
+ enter_lazy_tlb(&init_mm, current);
+ memset(idle, 0, sizeof(*idle));
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+ static const char *hwcap_str[] = {
+ "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
+ "edat", "etf3eh", "highgprs", "te"
+ };
+ unsigned long n = (unsigned long) v - 1;
+ int i;
+
+ if (!n) {
+ s390_adjust_jiffies();
+ seq_printf(m, "vendor_id : IBM/S390\n"
+ "# processors : %i\n"
+ "bogomips per cpu: %lu.%02lu\n",
+ num_online_cpus(), loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ))%100);
+ seq_puts(m, "features\t: ");
+ for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+ if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+ seq_printf(m, "%s ", hwcap_str[i]);
+ seq_puts(m, "\n");
+ show_cacheinfo(m);
+ }
+ get_online_cpus();
+ if (cpu_online(n)) {
+ struct cpuid *id = &per_cpu(cpu_id, n);
+ seq_printf(m, "processor %li: "
+ "version = %02X, "
+ "identification = %06X, "
+ "machine = %04X\n",
+ n, id->version, id->ident, id->machine);
+ }
+ put_online_cpus();
+ return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
+};
+
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 35827b9bd4d..5dc7ad9e2fb 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -1,114 +1,143 @@
/*
- * arch/s390/kernel/ptrace.c
+ * Ptrace user space interface.
*
- * S390 version
- * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ * Copyright IBM Corp. 1999, 2010
+ * Author(s): Denis Joseph Barrow
* Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * Based on PowerPC version
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Derived from "arch/m68k/kernel/ptrace.c"
- * Copyright (C) 1994 by Hamish Macdonald
- * Taken from linux/kernel/ptrace.c and modified for M680x0.
- * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
- *
- * Modified by Cort Dougan (cort@cs.nmt.edu)
- *
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file README.legal in the main directory of
- * this archive for more details.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/smp.h>
-#include <linux/smp_lock.h>
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/security.h>
#include <linux/audit.h>
#include <linux/signal.h>
-
+#include <linux/elf.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/seccomp.h>
+#include <linux/compat.h>
+#include <trace/syscall.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
+#include <asm/switch_to.h>
#include "entry.h"
#ifdef CONFIG_COMPAT
#include "compat_ptrace.h"
#endif
-static void
-FixPerRegisters(struct task_struct *task)
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+enum s390_regset {
+ REGSET_GENERAL,
+ REGSET_FP,
+ REGSET_LAST_BREAK,
+ REGSET_TDB,
+ REGSET_SYSTEM_CALL,
+ REGSET_GENERAL_EXTENDED,
+};
+
+void update_cr_regs(struct task_struct *task)
{
- struct pt_regs *regs;
- per_struct *per_info;
+ struct pt_regs *regs = task_pt_regs(task);
+ struct thread_struct *thread = &task->thread;
+ struct per_regs old, new;
- regs = task_pt_regs(task);
- per_info = (per_struct *) &task->thread.per_info;
- per_info->control_regs.bits.em_instruction_fetch =
- per_info->single_step | per_info->instruction_fetch;
-
- if (per_info->single_step) {
- per_info->control_regs.bits.starting_addr = 0;
-#ifdef CONFIG_COMPAT
- if (test_thread_flag(TIF_31BIT))
- per_info->control_regs.bits.ending_addr = 0x7fffffffUL;
+#ifdef CONFIG_64BIT
+ /* Take care of the enable/disable of transactional execution. */
+ if (MACHINE_HAS_TE) {
+ unsigned long cr, cr_new;
+
+ __ctl_store(cr, 0, 0);
+ /* Set or clear transaction execution TXC bit 8. */
+ cr_new = cr | (1UL << 55);
+ if (task->thread.per_flags & PER_FLAG_NO_TE)
+ cr_new &= ~(1UL << 55);
+ if (cr_new != cr)
+ __ctl_load(cr_new, 0, 0);
+ /* Set or clear transaction execution TDC bits 62 and 63. */
+ __ctl_store(cr, 2, 2);
+ cr_new = cr & ~3UL;
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+ cr_new |= 1UL;
+ else
+ cr_new |= 2UL;
+ }
+ if (cr_new != cr)
+ __ctl_load(cr_new, 2, 2);
+ }
+#endif
+ /* Copy user specified PER registers */
+ new.control = thread->per_user.control;
+ new.start = thread->per_user.start;
+ new.end = thread->per_user.end;
+
+ /* merge TIF_SINGLE_STEP into user specified PER registers. */
+ if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) {
+ if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
+ new.control |= PER_EVENT_BRANCH;
else
+ new.control |= PER_EVENT_IFETCH;
+#ifdef CONFIG_64BIT
+ new.control |= PER_CONTROL_SUSPENSION;
+ new.control |= PER_EVENT_TRANSACTION_END;
#endif
- per_info->control_regs.bits.ending_addr = PSW_ADDR_INSN;
- } else {
- per_info->control_regs.bits.starting_addr =
- per_info->starting_addr;
- per_info->control_regs.bits.ending_addr =
- per_info->ending_addr;
+ new.start = 0;
+ new.end = PSW_ADDR_INSN;
}
- /*
- * if any of the control reg tracing bits are on
- * we switch on per in the psw
- */
- if (per_info->control_regs.words.cr[0] & PER_EM_MASK)
- regs->psw.mask |= PSW_MASK_PER;
- else
- regs->psw.mask &= ~PSW_MASK_PER;
- if (per_info->control_regs.bits.em_storage_alteration)
- per_info->control_regs.bits.storage_alt_space_ctl = 1;
- else
- per_info->control_regs.bits.storage_alt_space_ctl = 0;
+ /* Take care of the PER enablement bit in the PSW. */
+ if (!(new.control & PER_EVENT_MASK)) {
+ regs->psw.mask &= ~PSW_MASK_PER;
+ return;
+ }
+ regs->psw.mask |= PSW_MASK_PER;
+ __ctl_store(old, 9, 11);
+ if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
+ __ctl_load(new, 9, 11);
}
void user_enable_single_step(struct task_struct *task)
{
- task->thread.per_info.single_step = 1;
- FixPerRegisters(task);
+ clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+ set_tsk_thread_flag(task, TIF_SINGLE_STEP);
}
void user_disable_single_step(struct task_struct *task)
{
- task->thread.per_info.single_step = 0;
- FixPerRegisters(task);
+ clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+ clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+ set_tsk_thread_flag(task, TIF_BLOCK_STEP);
}
/*
* Called by kernel/ptrace.c when detaching..
*
- * Make sure single step bits etc are not set.
+ * Clear all debugging related fields.
*/
-void
-ptrace_disable(struct task_struct *child)
+void ptrace_disable(struct task_struct *task)
{
- /* make sure the single step bit is not set. */
- user_disable_single_step(child);
+ memset(&task->thread.per_user, 0, sizeof(task->thread.per_user));
+ memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
+ clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+ clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP);
+ task->thread.per_flags = 0;
}
#ifndef CONFIG_64BIT
@@ -117,6 +146,47 @@ ptrace_disable(struct task_struct *child)
# define __ADDR_MASK 7
#endif
+static inline unsigned long __peek_user_per(struct task_struct *child,
+ addr_t addr)
+{
+ struct per_struct_kernel *dummy = NULL;
+
+ if (addr == (addr_t) &dummy->cr9)
+ /* Control bits of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ PER_EVENT_IFETCH : child->thread.per_user.control;
+ else if (addr == (addr_t) &dummy->cr10)
+ /* Start address of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ 0 : child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy->cr11)
+ /* End address of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ PSW_ADDR_INSN : child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy->bits)
+ /* Single-step bit. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ (1UL << (BITS_PER_LONG - 1)) : 0;
+ else if (addr == (addr_t) &dummy->starting_addr)
+ /* Start address of the user specified per set. */
+ return child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy->ending_addr)
+ /* End address of the user specified per set. */
+ return child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy->perc_atmid)
+ /* PER code, ATMID and AI of the last PER trap */
+ return (unsigned long)
+ child->thread.per_event.cause << (BITS_PER_LONG - 16);
+ else if (addr == (addr_t) &dummy->address)
+ /* Address of the last PER trap */
+ return child->thread.per_event.address;
+ else if (addr == (addr_t) &dummy->access_id)
+ /* Access id of the last PER trap */
+ return (unsigned long)
+ child->thread.per_event.paid << (BITS_PER_LONG - 8);
+ return 0;
+}
+
/*
* Read the word at offset addr from the user area of a process. The
* trouble here is that the information is littered over different
@@ -126,33 +196,21 @@ ptrace_disable(struct task_struct *child)
* struct user contain pad bytes that should be read as zeroes.
* Lovely...
*/
-static int
-peek_user(struct task_struct *child, addr_t addr, addr_t data)
+static unsigned long __peek_user(struct task_struct *child, addr_t addr)
{
struct user *dummy = NULL;
- addr_t offset, tmp, mask;
-
- /*
- * Stupid gdb peeks/pokes the access registers in 64 bit with
- * an alignment of 4. Programmers from hell...
- */
- mask = __ADDR_MASK;
-#ifdef CONFIG_64BIT
- if (addr >= (addr_t) &dummy->regs.acrs &&
- addr < (addr_t) &dummy->regs.orig_gpr2)
- mask = 3;
-#endif
- if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
- return -EIO;
+ addr_t offset, tmp;
if (addr < (addr_t) &dummy->regs.acrs) {
/*
* psw and gprs are stored on the stack
*/
tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr);
- if (addr == (addr_t) &dummy->regs.psw.mask)
- /* Remove per bit from user psw. */
- tmp &= ~PSW_MASK_PER;
+ if (addr == (addr_t) &dummy->regs.psw.mask) {
+ /* Return a clean psw mask. */
+ tmp &= PSW_MASK_USER | PSW_MASK_RI;
+ tmp |= PSW_USER_BITS;
+ }
} else if (addr < (addr_t) &dummy->regs.orig_gpr2) {
/*
@@ -177,6 +235,13 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
*/
tmp = (addr_t) task_pt_regs(child)->orig_gpr2;
+ } else if (addr < (addr_t) &dummy->regs.fp_regs) {
+ /*
+ * prevent reads of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ tmp = 0;
+
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
@@ -184,64 +249,101 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
offset = addr - (addr_t) &dummy->regs.fp_regs;
tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset);
if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
- tmp &= (unsigned long) FPC_VALID_MASK
- << (BITS_PER_LONG - 32);
+ tmp <<= BITS_PER_LONG - 32;
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
- * per_info is found in the thread structure
+ * Handle access to the per_info structure.
*/
- offset = addr - (addr_t) &dummy->regs.per_info;
- tmp = *(addr_t *)((addr_t) &child->thread.per_info + offset);
+ addr -= (addr_t) &dummy->regs.per_info;
+ tmp = __peek_user_per(child, addr);
} else
tmp = 0;
- return put_user(tmp, (addr_t __user *) data);
+ return tmp;
}
-/*
- * Write a word to the user area of a process at location addr. This
- * operation does have an additional problem compared to peek_user.
- * Stores to the program status word and on the floating point
- * control register needs to get checked for validity.
- */
static int
-poke_user(struct task_struct *child, addr_t addr, addr_t data)
+peek_user(struct task_struct *child, addr_t addr, addr_t data)
{
- struct user *dummy = NULL;
- addr_t offset, mask;
+ addr_t tmp, mask;
/*
* Stupid gdb peeks/pokes the access registers in 64 bit with
- * an alignment of 4. Programmers from hell indeed...
+ * an alignment of 4. Programmers from hell...
*/
mask = __ADDR_MASK;
#ifdef CONFIG_64BIT
- if (addr >= (addr_t) &dummy->regs.acrs &&
- addr < (addr_t) &dummy->regs.orig_gpr2)
+ if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
+ addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
mask = 3;
#endif
if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
return -EIO;
+ tmp = __peek_user(child, addr);
+ return put_user(tmp, (addr_t __user *) data);
+}
+
+static inline void __poke_user_per(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ struct per_struct_kernel *dummy = NULL;
+
+ /*
+ * There are only three fields in the per_info struct that the
+ * debugger user can write to.
+ * 1) cr9: the debugger wants to set a new PER event mask
+ * 2) starting_addr: the debugger wants to set a new starting
+ * address to use with the PER event mask.
+ * 3) ending_addr: the debugger wants to set a new ending
+ * address to use with the PER event mask.
+ * The user specified PER event mask and the start and end
+ * addresses are used only if single stepping is not in effect.
+ * Writes to any other field in per_info are ignored.
+ */
+ if (addr == (addr_t) &dummy->cr9)
+ /* PER event mask of the user specified per set. */
+ child->thread.per_user.control =
+ data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+ else if (addr == (addr_t) &dummy->starting_addr)
+ /* Starting address of the user specified per set. */
+ child->thread.per_user.start = data;
+ else if (addr == (addr_t) &dummy->ending_addr)
+ /* Ending address of the user specified per set. */
+ child->thread.per_user.end = data;
+}
+
+/*
+ * Write a word to the user area of a process at location addr. This
+ * operation does have an additional problem compared to peek_user.
+ * Stores to the program status word and on the floating point
+ * control register needs to get checked for validity.
+ */
+static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+ struct user *dummy = NULL;
+ addr_t offset;
+
if (addr < (addr_t) &dummy->regs.acrs) {
/*
* psw and gprs are stored on the stack
*/
- if (addr == (addr_t) &dummy->regs.psw.mask &&
-#ifdef CONFIG_COMPAT
- data != PSW_MASK_MERGE(psw_user32_bits, data) &&
-#endif
- data != PSW_MASK_MERGE(psw_user_bits, data))
- /* Invalid psw mask. */
- return -EINVAL;
-#ifndef CONFIG_64BIT
- if (addr == (addr_t) &dummy->regs.psw.addr)
- /* I'd like to reject addresses without the
- high order bit but older gdb's rely on it */
- data |= PSW_ADDR_AMODE;
-#endif
+ if (addr == (addr_t) &dummy->regs.psw.mask) {
+ unsigned long mask = PSW_MASK_USER;
+
+ mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
+ if ((data ^ PSW_USER_BITS) & ~mask)
+ /* Invalid psw mask. */
+ return -EINVAL;
+ if ((data & PSW_MASK_ASC) == PSW_ASC_HOME)
+ /* Invalid address-space-control bits */
+ return -EINVAL;
+ if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))
+ /* Invalid addressing mode bits */
+ return -EINVAL;
+ }
*(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
} else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
@@ -268,54 +370,67 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
*/
task_pt_regs(child)->orig_gpr2 = data;
+ } else if (addr < (addr_t) &dummy->regs.fp_regs) {
+ /*
+ * prevent writes of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ return 0;
+
} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
*/
- if (addr == (addr_t) &dummy->regs.fp_regs.fpc &&
- (data & ~((unsigned long) FPC_VALID_MASK
- << (BITS_PER_LONG - 32))) != 0)
- return -EINVAL;
+ if (addr == (addr_t) &dummy->regs.fp_regs.fpc)
+ if ((unsigned int) data != 0 ||
+ test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+ return -EINVAL;
offset = addr - (addr_t) &dummy->regs.fp_regs;
*(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data;
} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
/*
- * per_info is found in the thread structure
+ * Handle access to the per_info structure.
*/
- offset = addr - (addr_t) &dummy->regs.per_info;
- *(addr_t *)((addr_t) &child->thread.per_info + offset) = data;
+ addr -= (addr_t) &dummy->regs.per_info;
+ __poke_user_per(child, addr, data);
}
- FixPerRegisters(child);
return 0;
}
-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+static int poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+ addr_t mask;
+
+ /*
+ * Stupid gdb peeks/pokes the access registers in 64 bit with
+ * an alignment of 4. Programmers from hell indeed...
+ */
+ mask = __ADDR_MASK;
+#ifdef CONFIG_64BIT
+ if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
+ addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+ mask = 3;
+#endif
+ if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+ return -EIO;
+
+ return __poke_user(child, addr, data);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
{
ptrace_area parea;
int copied, ret;
switch (request) {
- case PTRACE_PEEKTEXT:
- case PTRACE_PEEKDATA:
- /* Remove high order bit from address (only for 31 bit). */
- addr &= PSW_ADDR_INSN;
- /* read word at location addr. */
- return generic_ptrace_peekdata(child, addr, data);
-
case PTRACE_PEEKUSR:
/* read the word at location addr in the USER area. */
return peek_user(child, addr, data);
- case PTRACE_POKETEXT:
- case PTRACE_POKEDATA:
- /* Remove high order bit from address (only for 31 bit). */
- addr &= PSW_ADDR_INSN;
- /* write the word at location addr. */
- return generic_ptrace_pokedata(child, addr, data);
-
case PTRACE_POKEUSR:
/* write the word at location addr in the USER area */
return poke_user(child, addr, data);
@@ -345,8 +460,45 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
copied += sizeof(unsigned long);
}
return 0;
+ case PTRACE_GET_LAST_BREAK:
+ put_user(task_thread_info(child)->last_break,
+ (unsigned long __user *) data);
+ return 0;
+ case PTRACE_ENABLE_TE:
+ if (!MACHINE_HAS_TE)
+ return -EIO;
+ child->thread.per_flags &= ~PER_FLAG_NO_TE;
+ return 0;
+ case PTRACE_DISABLE_TE:
+ if (!MACHINE_HAS_TE)
+ return -EIO;
+ child->thread.per_flags |= PER_FLAG_NO_TE;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ return 0;
+ case PTRACE_TE_ABORT_RAND:
+ if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+ return -EIO;
+ switch (data) {
+ case 0UL:
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ break;
+ case 1UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ case 2UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+ default:
+ /* Removing high order bit from addr (only for 31 bit). */
+ addr &= PSW_ADDR_INSN;
+ return ptrace_request(child, request, addr, data);
}
- return ptrace_request(child, request, addr, data);
}
#ifdef CONFIG_COMPAT
@@ -365,36 +517,73 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
*/
/*
+ * Same as peek_user_per but for a 31 bit program.
+ */
+static inline __u32 __peek_user_per_compat(struct task_struct *child,
+ addr_t addr)
+{
+ struct compat_per_struct_kernel *dummy32 = NULL;
+
+ if (addr == (addr_t) &dummy32->cr9)
+ /* Control bits of the active per set. */
+ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+ PER_EVENT_IFETCH : child->thread.per_user.control;
+ else if (addr == (addr_t) &dummy32->cr10)
+ /* Start address of the active per set. */
+ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+ 0 : child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy32->cr11)
+ /* End address of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ PSW32_ADDR_INSN : child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy32->bits)
+ /* Single-step bit. */
+ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+ 0x80000000 : 0;
+ else if (addr == (addr_t) &dummy32->starting_addr)
+ /* Start address of the user specified per set. */
+ return (__u32) child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy32->ending_addr)
+ /* End address of the user specified per set. */
+ return (__u32) child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy32->perc_atmid)
+ /* PER code, ATMID and AI of the last PER trap */
+ return (__u32) child->thread.per_event.cause << 16;
+ else if (addr == (addr_t) &dummy32->address)
+ /* Address of the last PER trap */
+ return (__u32) child->thread.per_event.address;
+ else if (addr == (addr_t) &dummy32->access_id)
+ /* Access id of the last PER trap */
+ return (__u32) child->thread.per_event.paid << 24;
+ return 0;
+}
+
+/*
* Same as peek_user but for a 31 bit program.
*/
-static int
-peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
+static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
{
- struct user32 *dummy32 = NULL;
- per_struct32 *dummy_per32 = NULL;
+ struct compat_user *dummy32 = NULL;
addr_t offset;
__u32 tmp;
- if (!test_thread_flag(TIF_31BIT) ||
- (addr & 3) || addr > sizeof(struct user) - 3)
- return -EIO;
-
if (addr < (addr_t) &dummy32->regs.acrs) {
+ struct pt_regs *regs = task_pt_regs(child);
/*
* psw and gprs are stored on the stack
*/
if (addr == (addr_t) &dummy32->regs.psw.mask) {
/* Fake a 31 bit psw mask. */
- tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32);
- tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp);
+ tmp = (__u32)(regs->psw.mask >> 32);
+ tmp &= PSW32_MASK_USER | PSW32_MASK_RI;
+ tmp |= PSW32_USER_BITS;
} else if (addr == (addr_t) &dummy32->regs.psw.addr) {
/* Fake a 31 bit psw address. */
- tmp = (__u32) task_pt_regs(child)->psw.addr |
- PSW32_ADDR_AMODE31;
+ tmp = (__u32) regs->psw.addr |
+ (__u32)(regs->psw.mask & PSW_MASK_BA);
} else {
/* gpr 0-15 */
- tmp = *(__u32 *)((addr_t) &task_pt_regs(child)->psw +
- addr*2 + 4);
+ tmp = *(__u32 *)((addr_t) &regs->psw + addr*2 + 4);
}
} else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
/*
@@ -409,6 +598,13 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
*/
tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4);
+ } else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+ /*
+ * prevent reads of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ tmp = 0;
+
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
@@ -418,62 +614,87 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
- * per_info is found in the thread structure
+ * Handle access to the per_info structure.
*/
- offset = addr - (addr_t) &dummy32->regs.per_info;
- /* This is magic. See per_struct and per_struct32. */
- if ((offset >= (addr_t) &dummy_per32->control_regs &&
- offset < (addr_t) (&dummy_per32->control_regs + 1)) ||
- (offset >= (addr_t) &dummy_per32->starting_addr &&
- offset <= (addr_t) &dummy_per32->ending_addr) ||
- offset == (addr_t) &dummy_per32->lowcore.words.address)
- offset = offset*2 + 4;
- else
- offset = offset*2;
- tmp = *(__u32 *)((addr_t) &child->thread.per_info + offset);
+ addr -= (addr_t) &dummy32->regs.per_info;
+ tmp = __peek_user_per_compat(child, addr);
} else
tmp = 0;
+ return tmp;
+}
+
+static int peek_user_compat(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ __u32 tmp;
+
+ if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3)
+ return -EIO;
+
+ tmp = __peek_user_compat(child, addr);
return put_user(tmp, (__u32 __user *) data);
}
/*
- * Same as poke_user but for a 31 bit program.
+ * Same as poke_user_per but for a 31 bit program.
*/
-static int
-poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
+static inline void __poke_user_per_compat(struct task_struct *child,
+ addr_t addr, __u32 data)
{
- struct user32 *dummy32 = NULL;
- per_struct32 *dummy_per32 = NULL;
- addr_t offset;
- __u32 tmp;
+ struct compat_per_struct_kernel *dummy32 = NULL;
- if (!test_thread_flag(TIF_31BIT) ||
- (addr & 3) || addr > sizeof(struct user32) - 3)
- return -EIO;
+ if (addr == (addr_t) &dummy32->cr9)
+ /* PER event mask of the user specified per set. */
+ child->thread.per_user.control =
+ data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+ else if (addr == (addr_t) &dummy32->starting_addr)
+ /* Starting address of the user specified per set. */
+ child->thread.per_user.start = data;
+ else if (addr == (addr_t) &dummy32->ending_addr)
+ /* Ending address of the user specified per set. */
+ child->thread.per_user.end = data;
+}
- tmp = (__u32) data;
+/*
+ * Same as poke_user but for a 31 bit program.
+ */
+static int __poke_user_compat(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ struct compat_user *dummy32 = NULL;
+ __u32 tmp = (__u32) data;
+ addr_t offset;
if (addr < (addr_t) &dummy32->regs.acrs) {
+ struct pt_regs *regs = task_pt_regs(child);
/*
* psw, gprs, acrs and orig_gpr2 are stored on the stack
*/
if (addr == (addr_t) &dummy32->regs.psw.mask) {
+ __u32 mask = PSW32_MASK_USER;
+
+ mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
/* Build a 64 bit psw mask from 31 bit mask. */
- if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp))
+ if ((tmp ^ PSW32_USER_BITS) & ~mask)
/* Invalid psw mask. */
return -EINVAL;
- task_pt_regs(child)->psw.mask =
- PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32);
+ if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME)
+ /* Invalid address-space-control bits */
+ return -EINVAL;
+ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
+ (regs->psw.mask & PSW_MASK_BA) |
+ (__u64)(tmp & mask) << 32;
} else if (addr == (addr_t) &dummy32->regs.psw.addr) {
/* Build a 64 bit psw address from 31 bit address. */
- task_pt_regs(child)->psw.addr =
- (__u64) tmp & PSW32_ADDR_INSN;
+ regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
+ /* Transfer 31 bit amode bit to psw mask. */
+ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
+ (__u64)(tmp & PSW32_ADDR_AMODE);
} else {
/* gpr 0-15 */
- *(__u32*)((addr_t) &task_pt_regs(child)->psw
- + addr*2 + 4) = tmp;
+ *(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
}
} else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
/*
@@ -488,62 +709,60 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
*/
*(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp;
+ } else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+ /*
+ * prevent writess of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ return 0;
+
} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
/*
* floating point regs. are stored in the thread structure
*/
if (addr == (addr_t) &dummy32->regs.fp_regs.fpc &&
- (tmp & ~FPC_VALID_MASK) != 0)
- /* Invalid floating point control. */
+ test_fp_ctl(tmp))
return -EINVAL;
offset = addr - (addr_t) &dummy32->regs.fp_regs;
*(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp;
} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
/*
- * per_info is found in the thread structure.
+ * Handle access to the per_info structure.
*/
- offset = addr - (addr_t) &dummy32->regs.per_info;
- /*
- * This is magic. See per_struct and per_struct32.
- * By incident the offsets in per_struct are exactly
- * twice the offsets in per_struct32 for all fields.
- * The 8 byte fields need special handling though,
- * because the second half (bytes 4-7) is needed and
- * not the first half.
- */
- if ((offset >= (addr_t) &dummy_per32->control_regs &&
- offset < (addr_t) (&dummy_per32->control_regs + 1)) ||
- (offset >= (addr_t) &dummy_per32->starting_addr &&
- offset <= (addr_t) &dummy_per32->ending_addr) ||
- offset == (addr_t) &dummy_per32->lowcore.words.address)
- offset = offset*2 + 4;
- else
- offset = offset*2;
- *(__u32 *)((addr_t) &child->thread.per_info + offset) = tmp;
-
+ addr -= (addr_t) &dummy32->regs.per_info;
+ __poke_user_per_compat(child, addr, data);
}
- FixPerRegisters(child);
return 0;
}
+static int poke_user_compat(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ if (!is_compat_task() || (addr & 3) ||
+ addr > sizeof(struct compat_user) - 3)
+ return -EIO;
+
+ return __poke_user_compat(child, addr, data);
+}
+
long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
compat_ulong_t caddr, compat_ulong_t cdata)
{
unsigned long addr = caddr;
unsigned long data = cdata;
- ptrace_area_emu31 parea;
+ compat_ptrace_area parea;
int copied, ret;
switch (request) {
case PTRACE_PEEKUSR:
/* read the word at location addr in the USER area. */
- return peek_user_emu31(child, addr, data);
+ return peek_user_compat(child, addr, data);
case PTRACE_POKEUSR:
/* write the word at location addr in the USER area */
- return poke_user_emu31(child, addr, data);
+ return poke_user_compat(child, addr, data);
case PTRACE_PEEKUSR_AREA:
case PTRACE_POKEUSR_AREA:
@@ -555,13 +774,13 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
copied = 0;
while (copied < parea.len) {
if (request == PTRACE_PEEKUSR_AREA)
- ret = peek_user_emu31(child, addr, data);
+ ret = peek_user_compat(child, addr, data);
else {
__u32 utmp;
if (get_user(utmp,
(__u32 __force __user *) data))
return -EFAULT;
- ret = poke_user_emu31(child, addr, utmp);
+ ret = poke_user_compat(child, addr, utmp);
}
if (ret)
return ret;
@@ -570,43 +789,582 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
copied += sizeof(unsigned int);
}
return 0;
+ case PTRACE_GET_LAST_BREAK:
+ put_user(task_thread_info(child)->last_break,
+ (unsigned int __user *) data);
+ return 0;
}
return compat_ptrace_request(child, request, addr, data);
}
#endif
-asmlinkage void
-syscall_trace(struct pt_regs *regs, int entryexit)
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
{
- if (unlikely(current->audit_context) && entryexit)
- audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]);
+ long ret = 0;
- if (!test_thread_flag(TIF_SYSCALL_TRACE))
- goto out;
- if (!(current->ptrace & PT_PTRACED))
+ /* Do the secure computing check first. */
+ if (secure_computing(regs->gprs[2])) {
+ /* seccomp failures shouldn't expose any additional code. */
+ ret = -1;
goto out;
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
- ? 0x80 : 0));
+ }
/*
- * If the debuffer has set an invalid system call number,
- * we prepare to skip the system call restart handling.
+ * The sysc_tracesys code in entry.S stored the system
+ * call number to gprs[2].
*/
- if (!entryexit && regs->gprs[2] >= NR_syscalls)
- regs->trap = -1;
+ if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+ (tracehook_report_syscall_entry(regs) ||
+ regs->gprs[2] >= NR_syscalls)) {
+ /*
+ * Tracing decided this syscall should not happen or the
+ * debugger stored an invalid system call number. Skip
+ * the system call and the system call restart handling.
+ */
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+ ret = -1;
+ }
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->gprs[2]);
+
+ audit_syscall_entry(is_compat_task() ?
+ AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
+ regs->gprs[2], regs->orig_gpr2,
+ regs->gprs[3], regs->gprs[4],
+ regs->gprs[5]);
+out:
+ return ret ?: regs->gprs[2];
+}
+
+asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
+{
+ audit_syscall_exit(regs);
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_exit(regs, regs->gprs[2]);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, 0);
+}
+
+/*
+ * user_regset definitions.
+ */
+
+static int s390_regs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ unsigned long *k = kbuf;
+ while (count > 0) {
+ *k++ = __peek_user(target, pos);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ unsigned long __user *u = ubuf;
+ while (count > 0) {
+ if (__put_user(__peek_user(target, pos), u++))
+ return -EFAULT;
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
}
- out:
- if (unlikely(current->audit_context) && !entryexit)
- audit_syscall_entry(test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X,
- regs->gprs[2], regs->orig_gpr2, regs->gprs[3],
- regs->gprs[4], regs->gprs[5]);
+ return 0;
+}
+
+static int s390_regs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int rc = 0;
+
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ const unsigned long *k = kbuf;
+ while (count > 0 && !rc) {
+ rc = __poke_user(target, pos, *k++);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ const unsigned long __user *u = ubuf;
+ while (count > 0 && !rc) {
+ unsigned long word;
+ rc = __get_user(word, u++);
+ if (rc)
+ break;
+ rc = __poke_user(target, pos, word);
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+
+ if (rc == 0 && target == current)
+ restore_access_regs(target->thread.acrs);
+
+ return rc;
+}
+
+static int s390_fpregs_get(struct task_struct *target,
+ const struct user_regset *regset, unsigned int pos,
+ unsigned int count, void *kbuf, void __user *ubuf)
+{
+ if (target == current) {
+ save_fp_ctl(&target->thread.fp_regs.fpc);
+ save_fp_regs(target->thread.fp_regs.fprs);
+ }
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fp_regs, 0, -1);
+}
+
+static int s390_fpregs_set(struct task_struct *target,
+ const struct user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int rc = 0;
+
+ if (target == current) {
+ save_fp_ctl(&target->thread.fp_regs.fpc);
+ save_fp_regs(target->thread.fp_regs.fprs);
+ }
+
+ /* If setting FPC, must validate it first. */
+ if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
+ u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 };
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
+ 0, offsetof(s390_fp_regs, fprs));
+ if (rc)
+ return rc;
+ if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
+ return -EINVAL;
+ target->thread.fp_regs.fpc = ufpc[0];
+ }
+
+ if (rc == 0 && count > 0)
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.fp_regs.fprs,
+ offsetof(s390_fp_regs, fprs), -1);
+
+ if (rc == 0 && target == current) {
+ restore_fp_ctl(&target->thread.fp_regs.fpc);
+ restore_fp_regs(target->thread.fp_regs.fprs);
+ }
+
+ return rc;
+}
+
+#ifdef CONFIG_64BIT
+
+static int s390_last_break_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (count > 0) {
+ if (kbuf) {
+ unsigned long *k = kbuf;
+ *k = task_thread_info(target)->last_break;
+ } else {
+ unsigned long __user *u = ubuf;
+ if (__put_user(task_thread_info(target)->last_break, u))
+ return -EFAULT;
+ }
+ }
+ return 0;
+}
+
+static int s390_last_break_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
+static int s390_tdb_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct pt_regs *regs = task_pt_regs(target);
+ unsigned char *data;
+
+ if (!(regs->int_code & 0x200))
+ return -ENODATA;
+ data = target->thread.trap_tdb;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256);
+}
+
+static int s390_tdb_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
+#endif
+
+static int s390_system_call_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ unsigned int *data = &task_thread_info(target)->system_call;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(unsigned int));
+}
+
+static int s390_system_call_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ unsigned int *data = &task_thread_info(target)->system_call;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(unsigned int));
+}
+
+static const struct user_regset s390_regsets[] = {
+ [REGSET_GENERAL] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(s390_regs) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_regs_get,
+ .set = s390_regs_set,
+ },
+ [REGSET_FP] = {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(s390_fp_regs) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_fpregs_get,
+ .set = s390_fpregs_set,
+ },
+#ifdef CONFIG_64BIT
+ [REGSET_LAST_BREAK] = {
+ .core_note_type = NT_S390_LAST_BREAK,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_last_break_get,
+ .set = s390_last_break_set,
+ },
+ [REGSET_TDB] = {
+ .core_note_type = NT_S390_TDB,
+ .n = 1,
+ .size = 256,
+ .align = 1,
+ .get = s390_tdb_get,
+ .set = s390_tdb_set,
+ },
+#endif
+ [REGSET_SYSTEM_CALL] = {
+ .core_note_type = NT_S390_SYSTEM_CALL,
+ .n = 1,
+ .size = sizeof(unsigned int),
+ .align = sizeof(unsigned int),
+ .get = s390_system_call_get,
+ .set = s390_system_call_set,
+ },
+};
+
+static const struct user_regset_view user_s390_view = {
+ .name = UTS_MACHINE,
+ .e_machine = EM_S390,
+ .regsets = s390_regsets,
+ .n = ARRAY_SIZE(s390_regsets)
+};
+
+#ifdef CONFIG_COMPAT
+static int s390_compat_regs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ compat_ulong_t *k = kbuf;
+ while (count > 0) {
+ *k++ = __peek_user_compat(target, pos);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ compat_ulong_t __user *u = ubuf;
+ while (count > 0) {
+ if (__put_user(__peek_user_compat(target, pos), u++))
+ return -EFAULT;
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+ return 0;
+}
+
+static int s390_compat_regs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int rc = 0;
+
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ const compat_ulong_t *k = kbuf;
+ while (count > 0 && !rc) {
+ rc = __poke_user_compat(target, pos, *k++);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ const compat_ulong_t __user *u = ubuf;
+ while (count > 0 && !rc) {
+ compat_ulong_t word;
+ rc = __get_user(word, u++);
+ if (rc)
+ break;
+ rc = __poke_user_compat(target, pos, word);
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+
+ if (rc == 0 && target == current)
+ restore_access_regs(target->thread.acrs);
+
+ return rc;
+}
+
+static int s390_compat_regs_high_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ compat_ulong_t *gprs_high;
+
+ gprs_high = (compat_ulong_t *)
+ &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+ if (kbuf) {
+ compat_ulong_t *k = kbuf;
+ while (count > 0) {
+ *k++ = *gprs_high;
+ gprs_high += 2;
+ count -= sizeof(*k);
+ }
+ } else {
+ compat_ulong_t __user *u = ubuf;
+ while (count > 0) {
+ if (__put_user(*gprs_high, u++))
+ return -EFAULT;
+ gprs_high += 2;
+ count -= sizeof(*u);
+ }
+ }
+ return 0;
+}
+
+static int s390_compat_regs_high_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ compat_ulong_t *gprs_high;
+ int rc = 0;
+
+ gprs_high = (compat_ulong_t *)
+ &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+ if (kbuf) {
+ const compat_ulong_t *k = kbuf;
+ while (count > 0) {
+ *gprs_high = *k++;
+ *gprs_high += 2;
+ count -= sizeof(*k);
+ }
+ } else {
+ const compat_ulong_t __user *u = ubuf;
+ while (count > 0 && !rc) {
+ unsigned long word;
+ rc = __get_user(word, u++);
+ if (rc)
+ break;
+ *gprs_high = word;
+ *gprs_high += 2;
+ count -= sizeof(*u);
+ }
+ }
+
+ return rc;
+}
+
+static int s390_compat_last_break_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ compat_ulong_t last_break;
+
+ if (count > 0) {
+ last_break = task_thread_info(target)->last_break;
+ if (kbuf) {
+ unsigned long *k = kbuf;
+ *k = last_break;
+ } else {
+ unsigned long __user *u = ubuf;
+ if (__put_user(last_break, u))
+ return -EFAULT;
+ }
+ }
+ return 0;
+}
+
+static int s390_compat_last_break_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
+static const struct user_regset s390_compat_regsets[] = {
+ [REGSET_GENERAL] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
+ .size = sizeof(compat_long_t),
+ .align = sizeof(compat_long_t),
+ .get = s390_compat_regs_get,
+ .set = s390_compat_regs_set,
+ },
+ [REGSET_FP] = {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
+ .size = sizeof(compat_long_t),
+ .align = sizeof(compat_long_t),
+ .get = s390_fpregs_get,
+ .set = s390_fpregs_set,
+ },
+ [REGSET_LAST_BREAK] = {
+ .core_note_type = NT_S390_LAST_BREAK,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_compat_last_break_get,
+ .set = s390_compat_last_break_set,
+ },
+ [REGSET_TDB] = {
+ .core_note_type = NT_S390_TDB,
+ .n = 1,
+ .size = 256,
+ .align = 1,
+ .get = s390_tdb_get,
+ .set = s390_tdb_set,
+ },
+ [REGSET_SYSTEM_CALL] = {
+ .core_note_type = NT_S390_SYSTEM_CALL,
+ .n = 1,
+ .size = sizeof(compat_uint_t),
+ .align = sizeof(compat_uint_t),
+ .get = s390_system_call_get,
+ .set = s390_system_call_set,
+ },
+ [REGSET_GENERAL_EXTENDED] = {
+ .core_note_type = NT_S390_HIGH_GPRS,
+ .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
+ .size = sizeof(compat_long_t),
+ .align = sizeof(compat_long_t),
+ .get = s390_compat_regs_high_get,
+ .set = s390_compat_regs_high_set,
+ },
+};
+
+static const struct user_regset_view user_s390_compat_view = {
+ .name = "s390",
+ .e_machine = EM_S390,
+ .regsets = s390_compat_regsets,
+ .n = ARRAY_SIZE(s390_compat_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(task, TIF_31BIT))
+ return &user_s390_compat_view;
+#endif
+ return &user_s390_view;
+}
+
+static const char *gpr_names[NUM_GPRS] = {
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+};
+
+unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+ if (offset >= NUM_GPRS)
+ return 0;
+ return regs->gprs[offset];
+}
+
+int regs_query_register_offset(const char *name)
+{
+ unsigned long offset;
+
+ if (!name || *name != 'r')
+ return -EINVAL;
+ if (kstrtoul(name + 1, 10, &offset))
+ return -EINVAL;
+ if (offset >= NUM_GPRS)
+ return -EINVAL;
+ return offset;
+}
+
+const char *regs_query_register_name(unsigned int offset)
+{
+ if (offset >= NUM_GPRS)
+ return NULL;
+ return gpr_names[offset];
+}
+
+static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ unsigned long ksp = kernel_stack_pointer(regs);
+
+ return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:pt_regs which contains kernel stack pointer.
+ * @n:stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long addr;
+
+ addr = kernel_stack_pointer(regs) + n * sizeof(long);
+ if (!regs_within_kernel_stack(regs, addr))
+ return 0;
+ return *(unsigned long *)addr;
}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 2f481cc3d1c..dd8016b0477 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -1,19 +1,25 @@
/*
- * arch/s390/kernel/reipl.S
- *
* S390 version
- * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 2000
* Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com)
*/
-#include <asm/lowcore.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/sigp.h>
+
+#
+# store_status: Empty implementation until kdump is supported on 31 bit
+#
+ENTRY(store_status)
+ br %r14
#
# do_reipl_asm
# Parameter: r2 = schid of reipl device
#
- .globl do_reipl_asm
-do_reipl_asm: basr %r13,0
+ENTRY(do_reipl_asm)
+ basr %r13,0
.Lpg0: lpsw .Lnewpsw-.Lpg0(%r13)
.Lpg1: # do store status of all registers
@@ -53,7 +59,7 @@ do_reipl_asm: basr %r13,0
bas %r14,.Ldisab-.Lpg0(%r13)
.L003: st %r1,__LC_SUBCHANNEL_ID
lpsw 0
- sigp 0,0,0(6)
+ sigp 0,0,SIGP_RESTART
.Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13)
lpsw .Ldispsw-.Lpg0(%r13)
.align 8
diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S
index c41930499a5..dc3b1273c4d 100644
--- a/arch/s390/kernel/reipl64.S
+++ b/arch/s390/kernel/reipl64.S
@@ -1,38 +1,83 @@
/*
- * arch/s390/kernel/reipl.S
- *
- * S390 version
- * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com)
- Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ * Copyright IBM Corp 2000, 2011
+ * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ * Denis Joseph Barrow,
*/
-#include <asm/lowcore.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/sigp.h>
+
+#
+# store_status
+#
+# Prerequisites to run this function:
+# - Prefix register is set to zero
+# - Original prefix register is stored in "dump_prefix_page"
+# - Lowcore protection is off
+#
+ENTRY(store_status)
+ /* Save register one and load save area base */
+ stg %r1,__LC_SAVE_AREA_RESTART
+ lghi %r1,SAVE_AREA_BASE
+ /* General purpose registers */
+ stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ lg %r2,__LC_SAVE_AREA_RESTART
+ stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1)
+ /* Control registers */
+ stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ /* Access registers */
+ stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ /* Floating point registers */
+ std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ /* Floating point control register */
+ stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ /* CPU timer */
+ stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ /* Saved prefix register */
+ larl %r2,dump_prefix_page
+ mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2)
+ /* Clock comparator - seven bytes */
+ larl %r2,.Lclkcmp
+ stckc 0(%r2)
+ mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2)
+ /* Program status word */
+ epsw %r2,%r3
+ st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1)
+ st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1)
+ larl %r2,store_status
+ stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
+ br %r14
+
+ .section .bss
+ .align 8
+.Lclkcmp: .quad 0x0000000000000000
+ .previous
#
# do_reipl_asm
# Parameter: r2 = schid of reipl device
#
- .globl do_reipl_asm
-do_reipl_asm: basr %r13,0
+ENTRY(do_reipl_asm)
+ basr %r13,0
.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13)
-.Lpg1: # do store status of all registers
-
- stg %r1,.Lregsave-.Lpg0(%r13)
- lghi %r1,0x1000
- stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1)
- lg %r0,.Lregsave-.Lpg0(%r13)
- stg %r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1)
- stctg %c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1)
- stam %a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1)
- lg %r10,.Ldump_pfx-.Lpg0(%r13)
- mvc __LC_PREFIX_SAVE_AREA-0x1000(4,%r1),0(%r10)
- stfpc __LC_FP_CREG_SAVE_AREA-0x1000(%r1)
- stckc .Lclkcmp-.Lpg0(%r13)
- mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(8,%r1),.Lclkcmp-.Lpg0(%r13)
- stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1)
- stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1)
+.Lpg1: brasl %r14,store_status
lctlg %c6,%c6,.Lall-.Lpg0(%r13)
lgr %r1,%r2
@@ -62,17 +107,14 @@ do_reipl_asm: basr %r13,0
.L003: st %r1,__LC_SUBCHANNEL_ID
lhi %r1,0 # mode 0 = esa
slr %r0,%r0 # set cpuid to zero
- sigp %r1,%r0,0x12 # switch to esa mode
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode
lpsw 0
.Ldisab: sll %r14,1
srl %r14,1 # need to kill hi bit to avoid specification exceptions.
st %r14,.Ldispsw+12-.Lpg0(%r13)
lpswe .Ldispsw-.Lpg0(%r13)
.align 8
-.Lclkcmp: .quad 0x0000000000000000
.Lall: .quad 0x00000000ff000000
-.Ldump_pfx: .quad dump_prefix_page
-.Lregsave: .quad 0x0000000000000000
.align 16
/*
* These addresses have to be 31 bit otherwise
@@ -81,7 +123,7 @@ do_reipl_asm: basr %r13,0
* in the ESA psw.
* Bit 31 of the addresses has to be 0 for the
* 31bit lpswe instruction a fact they appear to have
- * ommited from the pop.
+ * omitted from the pop.
*/
.Lnewpsw: .quad 0x0000000080000000
.quad .Lpg1
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index 3b456b80bce..f4e6f20e117 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -1,13 +1,14 @@
/*
- * arch/s390/kernel/relocate_kernel.S
- *
- * (C) Copyright IBM Corp. 2005
+ * Copyright IBM Corp. 2005
*
* Author(s): Rolf Adelsberger,
* Heiko Carstens <heiko.carstens@de.ibm.com>
*
*/
+#include <linux/linkage.h>
+#include <asm/sigp.h>
+
/*
* moves the new kernel to its destination...
* %r2 = pointer to first kimage_entry_t
@@ -22,8 +23,7 @@
*/
.text
- .globl relocate_kernel
- relocate_kernel:
+ENTRY(relocate_kernel)
basr %r13,0 # base address
.base:
stnsm sys_msk-.base(%r13),0xfb # disable DAT
@@ -92,7 +92,7 @@
.no_diag308:
sr %r1,%r1 # clear %r1
sr %r2,%r2 # clear %r2
- sigp %r1,%r2,0x12 # set cpuid to zero
+ sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero
lpsw 0 # hopefully start new kernel...
.align 8
@@ -112,6 +112,7 @@
.byte 0
.align 8
relocate_kernel_end:
+ .align 8
.globl relocate_kernel_len
relocate_kernel_len:
.quad relocate_kernel_end - relocate_kernel
diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S
index 1f9ea2067b5..cfac28330b0 100644
--- a/arch/s390/kernel/relocate_kernel64.S
+++ b/arch/s390/kernel/relocate_kernel64.S
@@ -1,13 +1,14 @@
/*
- * arch/s390/kernel/relocate_kernel64.S
- *
- * (C) Copyright IBM Corp. 2005
+ * Copyright IBM Corp. 2005
*
* Author(s): Rolf Adelsberger,
* Heiko Carstens <heiko.carstens@de.ibm.com>
*
*/
+#include <linux/linkage.h>
+#include <asm/sigp.h>
+
/*
* moves the new kernel to its destination...
* %r2 = pointer to first kimage_entry_t
@@ -23,8 +24,7 @@
*/
.text
- .globl relocate_kernel
- relocate_kernel:
+ENTRY(relocate_kernel)
basr %r13,0 # base address
.base:
stnsm sys_msk-.base(%r13),0xfb # disable DAT
@@ -44,7 +44,7 @@
diag %r0,%r0,0x308
.back:
lhi %r1,1 # mode 1 = esame
- sigp %r1,%r0,0x12 # switch to esame mode
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode
sam64 # switch to 64 bit addressing mode
basr %r13,0
.back_base:
@@ -95,7 +95,7 @@
sam31 # 31 bit mode
sr %r1,%r1 # erase register r1
sr %r2,%r2 # erase register r2
- sigp %r1,%r2,0x12 # set cpuid to zero
+ sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero
lpsw 0 # hopefully start new kernel...
.align 8
@@ -115,6 +115,7 @@
.byte 0
.align 8
relocate_kernel_end:
+ .align 8
.globl relocate_kernel_len
relocate_kernel_len:
.quad relocate_kernel_end - relocate_kernel
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
new file mode 100644
index 00000000000..26b4ae96fdd
--- /dev/null
+++ b/arch/s390/kernel/runtime_instr.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright IBM Corp. 2012
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <asm/runtime_instr.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+
+/* empty control block to disable RI by loading it */
+struct runtime_instr_cb runtime_instr_empty_cb;
+
+static int runtime_instr_avail(void)
+{
+ return test_facility(64);
+}
+
+static void disable_runtime_instr(void)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+
+ load_runtime_instr_cb(&runtime_instr_empty_cb);
+
+ /*
+ * Make sure the RI bit is deleted from the PSW. If the user did not
+ * switch off RI before the system call the process will get a
+ * specification exception otherwise.
+ */
+ regs->psw.mask &= ~PSW_MASK_RI;
+}
+
+static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ cb->buf_limit = 0xfff;
+ cb->int_requested = 1;
+ cb->pstate = 1;
+ cb->pstate_set_buf = 1;
+ cb->pstate_sample = 1;
+ cb->pstate_collect = 1;
+ cb->key = PAGE_DEFAULT_KEY;
+ cb->valid = 1;
+}
+
+void exit_thread_runtime_instr(void)
+{
+ struct task_struct *task = current;
+
+ if (!task->thread.ri_cb)
+ return;
+ disable_runtime_instr();
+ kfree(task->thread.ri_cb);
+ task->thread.ri_signum = 0;
+ task->thread.ri_cb = NULL;
+}
+
+static void runtime_instr_int_handler(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct siginfo info;
+
+ if (!(param32 & CPU_MF_INT_RI_MASK))
+ return;
+
+ inc_irq_stat(IRQEXT_CMR);
+
+ if (!current->thread.ri_cb)
+ return;
+ if (current->thread.ri_signum < SIGRTMIN ||
+ current->thread.ri_signum > SIGRTMAX) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.si_signo = current->thread.ri_signum;
+ info.si_code = SI_QUEUE;
+ if (param32 & CPU_MF_INT_RI_BUF_FULL)
+ info.si_int = ENOBUFS;
+ else if (param32 & CPU_MF_INT_RI_HALTED)
+ info.si_int = ECANCELED;
+ else
+ return; /* unknown reason */
+
+ send_sig_info(current->thread.ri_signum, &info, current);
+}
+
+SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+{
+ struct runtime_instr_cb *cb;
+
+ if (!runtime_instr_avail())
+ return -EOPNOTSUPP;
+
+ if (command == S390_RUNTIME_INSTR_STOP) {
+ preempt_disable();
+ exit_thread_runtime_instr();
+ preempt_enable();
+ return 0;
+ }
+
+ if (command != S390_RUNTIME_INSTR_START ||
+ (signum < SIGRTMIN || signum > SIGRTMAX))
+ return -EINVAL;
+
+ if (!current->thread.ri_cb) {
+ cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+ if (!cb)
+ return -ENOMEM;
+ } else {
+ cb = current->thread.ri_cb;
+ memset(cb, 0, sizeof(*cb));
+ }
+
+ init_runtime_instr_cb(cb);
+ current->thread.ri_signum = signum;
+
+ /* now load the control block to make it available */
+ preempt_disable();
+ current->thread.ri_cb = cb;
+ load_runtime_instr_cb(cb);
+ preempt_enable();
+ return 0;
+}
+
+static int __init runtime_instr_init(void)
+{
+ int rc;
+
+ if (!runtime_instr_avail())
+ return 0;
+
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ runtime_instr_int_handler);
+ if (rc)
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ else
+ pr_info("Runtime instrumentation facility initialized\n");
+ return rc;
+}
+device_initcall(runtime_instr_init);
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
deleted file mode 100644
index e019b419efc..00000000000
--- a/arch/s390/kernel/s390_ext.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * arch/s390/kernel/s390_ext.c
- *
- * S390 version
- * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com),
- * Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/kernel_stat.h>
-#include <linux/interrupt.h>
-#include <asm/cpu.h>
-#include <asm/lowcore.h>
-#include <asm/s390_ext.h>
-#include <asm/irq_regs.h>
-#include <asm/irq.h>
-#include "entry.h"
-
-/*
- * ext_int_hash[index] is the start of the list for all external interrupts
- * that hash to this index. With the current set of external interrupts
- * (0x1202 external call, 0x1004 cpu timer, 0x2401 hwc console, 0x4000
- * iucv and 0x2603 pfault) this is always the first element.
- */
-ext_int_info_t *ext_int_hash[256] = { NULL, };
-
-static inline int ext_hash(__u16 code)
-{
- return (code + (code >> 9)) & 0xff;
-}
-
-int register_external_interrupt(__u16 code, ext_int_handler_t handler)
-{
- ext_int_info_t *p;
- int index;
-
- p = kmalloc(sizeof(ext_int_info_t), GFP_ATOMIC);
- if (p == NULL)
- return -ENOMEM;
- p->code = code;
- p->handler = handler;
- index = ext_hash(code);
- p->next = ext_int_hash[index];
- ext_int_hash[index] = p;
- return 0;
-}
-
-int register_early_external_interrupt(__u16 code, ext_int_handler_t handler,
- ext_int_info_t *p)
-{
- int index;
-
- if (p == NULL)
- return -EINVAL;
- p->code = code;
- p->handler = handler;
- index = ext_hash(code);
- p->next = ext_int_hash[index];
- ext_int_hash[index] = p;
- return 0;
-}
-
-int unregister_external_interrupt(__u16 code, ext_int_handler_t handler)
-{
- ext_int_info_t *p, *q;
- int index;
-
- index = ext_hash(code);
- q = NULL;
- p = ext_int_hash[index];
- while (p != NULL) {
- if (p->code == code && p->handler == handler)
- break;
- q = p;
- p = p->next;
- }
- if (p == NULL)
- return -ENOENT;
- if (q != NULL)
- q->next = p->next;
- else
- ext_int_hash[index] = p->next;
- kfree(p);
- return 0;
-}
-
-int unregister_early_external_interrupt(__u16 code, ext_int_handler_t handler,
- ext_int_info_t *p)
-{
- ext_int_info_t *q;
- int index;
-
- if (p == NULL || p->code != code || p->handler != handler)
- return -EINVAL;
- index = ext_hash(code);
- q = ext_int_hash[index];
- if (p != q) {
- while (q != NULL) {
- if (q->next == p)
- break;
- q = q->next;
- }
- if (q == NULL)
- return -ENOENT;
- q->next = p->next;
- } else
- ext_int_hash[index] = p->next;
- return 0;
-}
-
-void do_extint(struct pt_regs *regs, unsigned short code)
-{
- ext_int_info_t *p;
- int index;
- struct pt_regs *old_regs;
-
- old_regs = set_irq_regs(regs);
- irq_enter();
- s390_idle_check();
- if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
- /* Serve timer interrupts first. */
- clock_comparator_work();
- kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
- index = ext_hash(code);
- for (p = ext_int_hash[index]; p; p = p->next) {
- if (likely(p->code == code))
- p->handler(code);
- }
- irq_exit();
- set_irq_regs(old_regs);
-}
-
-EXPORT_SYMBOL(register_external_interrupt);
-EXPORT_SYMBOL(unregister_external_interrupt);
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 48238a114ce..9f60467938d 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -1,45 +1,13 @@
-/*
- * arch/s390/kernel/s390_ksyms.c
- *
- * S390 version
- */
-#include <linux/highuid.h>
#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/syscalls.h>
-#include <linux/interrupt.h>
-#include <asm/checksum.h>
-#include <asm/cpcmd.h>
-#include <asm/delay.h>
-#include <asm/pgalloc.h>
-#include <asm/setup.h>
-#ifdef CONFIG_IP_MULTICAST
-#include <net/arp.h>
-#endif
-
-/*
- * memory management
- */
-EXPORT_SYMBOL(_oi_bitmap);
-EXPORT_SYMBOL(_ni_bitmap);
-EXPORT_SYMBOL(_zb_findmap);
-EXPORT_SYMBOL(_sb_findmap);
-
-/*
- * binfmt_elf loader
- */
-extern int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs);
-EXPORT_SYMBOL(dump_fpu);
-EXPORT_SYMBOL(empty_zero_page);
+#include <linux/kvm_host.h>
+#include <asm/ftrace.h>
-/*
- * misc.
- */
-EXPORT_SYMBOL(machine_flags);
-EXPORT_SYMBOL(__udelay);
-EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(csum_fold);
-EXPORT_SYMBOL(console_mode);
-EXPORT_SYMBOL(console_devno);
-EXPORT_SYMBOL(console_irq);
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
+#if IS_ENABLED(CONFIG_KVM)
+EXPORT_SYMBOL(sie64a);
+EXPORT_SYMBOL(sie_exit);
+#endif
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
new file mode 100644
index 00000000000..a41f2c99dcc
--- /dev/null
+++ b/arch/s390/kernel/sclp.S
@@ -0,0 +1,360 @@
+/*
+ * Mini SCLP driver.
+ *
+ * Copyright IBM Corp. 2004, 2009
+ *
+ * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/irq.h>
+
+LC_EXT_NEW_PSW = 0x58 # addr of ext int handler
+LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit
+LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter
+LC_EXT_INT_CODE = 0x86 # addr of ext int code
+LC_AR_MODE_ID = 0xa3
+
+#
+# Subroutine which waits synchronously until either an external interruption
+# or a timeout occurs.
+#
+# Parameters:
+# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds
+#
+# Returns:
+# R2 = 0 on interrupt, 2 on timeout
+# R3 = external interruption parameter if R2=0
+#
+
+_sclp_wait_int:
+ stm %r6,%r15,24(%r15) # save registers
+ basr %r13,0 # get base register
+.LbaseS1:
+ ahi %r15,-96 # create stack frame
+ la %r8,LC_EXT_NEW_PSW # register int handler
+ la %r9,.LextpswS1-.LbaseS1(%r13)
+#ifdef CONFIG_64BIT
+ tm LC_AR_MODE_ID,1
+ jno .Lesa1
+ la %r8,LC_EXT_NEW_PSW_64 # register int handler 64 bit
+ la %r9,.LextpswS1_64-.LbaseS1(%r13)
+.Lesa1:
+#endif
+ mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8)
+ mvc 0(16,%r8),0(%r9)
+#ifdef CONFIG_64BIT
+ epsw %r6,%r7 # set current addressing mode
+ nill %r6,0x1 # in new psw (31 or 64 bit mode)
+ nilh %r7,0x8000
+ stm %r6,%r7,0(%r8)
+#endif
+ lhi %r6,0x0200 # cr mask for ext int (cr0.54)
+ ltr %r2,%r2
+ jz .LsetctS1
+ ahi %r6,0x0800 # cr mask for clock int (cr0.52)
+ stck .LtimeS1-.LbaseS1(%r13) # initiate timeout
+ al %r2,.LtimeS1-.LbaseS1(%r13)
+ st %r2,.LtimeS1-.LbaseS1(%r13)
+ sckc .LtimeS1-.LbaseS1(%r13)
+
+.LsetctS1:
+ stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts
+ l %r0,.LctlS1-.LbaseS1(%r13)
+ lhi %r1,~(0x200 | 0x800) # clear old values
+ nr %r1,%r0
+ or %r1,%r6 # set new value
+ st %r1,.LctlS1-.LbaseS1(%r13)
+ lctl %c0,%c0,.LctlS1-.LbaseS1(%r13)
+ st %r0,.LctlS1-.LbaseS1(%r13)
+ lhi %r2,2 # return code for timeout
+.LloopS1:
+ lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt
+.LwaitS1:
+ lh %r7,LC_EXT_INT_CODE
+ chi %r7,EXT_IRQ_CLK_COMP # timeout?
+ je .LtimeoutS1
+ chi %r7,EXT_IRQ_SERVICE_SIG # service int?
+ jne .LloopS1
+ sr %r2,%r2
+ l %r3,LC_EXT_INT_PARAM
+.LtimeoutS1:
+ lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting
+ # restore old handler
+ mvc 0(16,%r8),.LoldpswS1-.LbaseS1(%r13)
+ lm %r6,%r15,120(%r15) # restore registers
+ br %r14 # return to caller
+
+ .align 8
+.LoldpswS1:
+ .long 0, 0, 0, 0 # old ext int PSW
+.LextpswS1:
+ .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int
+#ifdef CONFIG_64BIT
+.LextpswS1_64:
+ .quad 0, .LwaitS1 # PSW to handle ext int, 64 bit
+#endif
+.LwaitpswS1:
+ .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int
+.LtimeS1:
+ .quad 0 # current time
+.LctlS1:
+ .long 0 # CT0 contents
+
+#
+# Subroutine to synchronously issue a service call.
+#
+# Parameters:
+# R2 = command word
+# R3 = sccb address
+#
+# Returns:
+# R2 = 0 on success, 1 on failure
+# R3 = sccb response code if R2 = 0
+#
+
+_sclp_servc:
+ stm %r6,%r15,24(%r15) # save registers
+ ahi %r15,-96 # create stack frame
+ lr %r6,%r2 # save command word
+ lr %r7,%r3 # save sccb address
+.LretryS2:
+ lhi %r2,1 # error return code
+ .insn rre,0xb2200000,%r6,%r7 # servc
+ brc 1,.LendS2 # exit if not operational
+ brc 8,.LnotbusyS2 # go on if not busy
+ sr %r2,%r2 # wait until no longer busy
+ bras %r14,_sclp_wait_int
+ j .LretryS2 # retry
+.LnotbusyS2:
+ sr %r2,%r2 # wait until result
+ bras %r14,_sclp_wait_int
+ sr %r2,%r2
+ lh %r3,6(%r7)
+.LendS2:
+ lm %r6,%r15,120(%r15) # restore registers
+ br %r14
+
+#
+# Subroutine to set up the SCLP interface.
+#
+# Parameters:
+# R2 = 0 to activate, non-zero to deactivate
+#
+# Returns:
+# R2 = 0 on success, non-zero on failure
+#
+
+_sclp_setup:
+ stm %r6,%r15,24(%r15) # save registers
+ ahi %r15,-96 # create stack frame
+ basr %r13,0 # get base register
+.LbaseS3:
+ l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb
+ mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
+ ltr %r2,%r2 # initialization?
+ jz .LdoinitS3 # go ahead
+ # clear masks
+ xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
+.LdoinitS3:
+ l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
+ lr %r3,%r6 # get sccb address
+ bras %r14,_sclp_servc # issue service call
+ ltr %r2,%r2 # servc successful?
+ jnz .LerrorS3
+ chi %r3,0x20 # write mask successful?
+ jne .LerrorS3
+ # check masks
+ la %r2,.LinitmaskS3-.LinitsccbS3(%r6)
+ l %r1,0(%r2) # receive mask ok?
+ n %r1,12(%r2)
+ cl %r1,0(%r2)
+ jne .LerrorS3
+ l %r1,4(%r2) # send mask ok?
+ n %r1,8(%r2)
+ cl %r1,4(%r2)
+ sr %r2,%r2
+ je .LendS3
+.LerrorS3:
+ lhi %r2,1 # error return code
+.LendS3:
+ lm %r6,%r15,120(%r15) # restore registers
+ br %r14
+.LwritemaskS3:
+ .long 0x00780005 # SCLP command for write mask
+.LinitsccbS3:
+ .word .LinitendS3-.LinitsccbS3
+ .byte 0,0,0,0
+ .word 0
+ .word 0
+ .word 4
+.LinitmaskS3:
+ .long 0x80000000
+ .long 0x40000000
+ .long 0
+ .long 0
+.LinitendS3:
+
+#
+# Subroutine which prints a given text to the SCLP console.
+#
+# Parameters:
+# R2 = address of nil-terminated ASCII text
+#
+# Returns:
+# R2 = 0 on success, 1 on failure
+#
+
+_sclp_print:
+ stm %r6,%r15,24(%r15) # save registers
+ ahi %r15,-96 # create stack frame
+ basr %r13,0 # get base register
+.LbaseS4:
+ l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb
+ mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
+ la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr
+ sr %r0,%r0
+ l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table
+.LinitmtoS4:
+ # initialize mto
+ mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
+ lhi %r6,.LmtoendS4-.LmtoS4 # current mto length
+.LloopS4:
+ ic %r0,0(%r2) # get character
+ ahi %r2,1
+ ltr %r0,%r0 # end of string?
+ jz .LfinalizemtoS4
+ chi %r0,0x0a # end of line (NL)?
+ jz .LfinalizemtoS4
+ stc %r0,0(%r6,%r7) # copy to mto
+ la %r11,0(%r6,%r7)
+ tr 0(1,%r11),0(%r10) # translate to EBCDIC
+ ahi %r6,1
+ j .LloopS4
+.LfinalizemtoS4:
+ sth %r6,0(%r7) # update mto length
+ lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length
+ ar %r9,%r6
+ sth %r9,.LmdbS4-.LwritesccbS4(%r8)
+ lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
+ ar %r9,%r6
+ sth %r9,.LevbufS4-.LwritesccbS4(%r8)
+ lh %r9,0(%r8) # update sccb length
+ ar %r9,%r6
+ sth %r9,0(%r8)
+ ar %r7,%r6 # update current mto address
+ ltr %r0,%r0 # more characters?
+ jnz .LinitmtoS4
+ l %r2,.LwritedataS4-.LbaseS4(%r13)# write data
+ lr %r3,%r8
+ bras %r14,_sclp_servc
+ ltr %r2,%r2 # servc successful?
+ jnz .LendS4
+ chi %r3,0x20 # write data successful?
+ je .LendS4
+ lhi %r2,1 # error return code
+.LendS4:
+ lm %r6,%r15,120(%r15) # restore registers
+ br %r14
+
+#
+# Function which prints a given text to the SCLP console.
+#
+# Parameters:
+# R2 = address of nil-terminated ASCII text
+#
+# Returns:
+# R2 = 0 on success, 1 on failure
+#
+
+ENTRY(_sclp_print_early)
+ stm %r6,%r15,24(%r15) # save registers
+ ahi %r15,-96 # create stack frame
+#ifdef CONFIG_64BIT
+ tm LC_AR_MODE_ID,1
+ jno .Lesa2
+ ahi %r15,-80
+ stmh %r6,%r15,96(%r15) # store upper register halves
+.Lesa2:
+#endif
+ lr %r10,%r2 # save string pointer
+ lhi %r2,0
+ bras %r14,_sclp_setup # enable console
+ ltr %r2,%r2
+ jnz .LendS5
+ lr %r2,%r10
+ bras %r14,_sclp_print # print string
+ ltr %r2,%r2
+ jnz .LendS5
+ lhi %r2,1
+ bras %r14,_sclp_setup # disable console
+.LendS5:
+#ifdef CONFIG_64BIT
+ tm LC_AR_MODE_ID,1
+ jno .Lesa3
+ lmh %r6,%r15,96(%r15) # store upper register halves
+ ahi %r15,80
+.Lesa3:
+#endif
+ lm %r6,%r15,120(%r15) # restore registers
+ br %r14
+
+.LwritedataS4:
+ .long 0x00760005 # SCLP command for write data
+.LwritesccbS4:
+ # sccb
+ .word .LmtoS4-.LwritesccbS4
+ .byte 0
+ .byte 0,0,0
+ .word 0
+
+ # evbuf
+.LevbufS4:
+ .word .LmtoS4-.LevbufS4
+ .byte 0x02
+ .byte 0
+ .word 0
+
+.LmdbS4:
+ # mdb
+ .word .LmtoS4-.LmdbS4
+ .word 1
+ .long 0xd4c4c240
+ .long 1
+
+ # go
+.LgoS4:
+ .word .LmtoS4-.LgoS4
+ .word 1
+ .long 0
+ .byte 0,0,0,0,0,0,0,0
+ .byte 0,0,0
+ .byte 0
+ .byte 0,0,0,0,0,0,0
+ .byte 0
+ .word 0
+ .byte 0,0,0,0,0,0,0,0,0,0
+ .byte 0,0,0,0,0,0,0,0
+ .byte 0,0,0,0,0,0,0,0
+
+.LmtoS4:
+ .word .LmtoendS4-.LmtoS4
+ .word 4
+ .word 0x1000
+ .byte 0
+ .byte 0,0,0
+.LmtoendS4:
+
+ # Global constants
+.LsccbS0:
+ .long _sclp_work_area
+.Lascebc:
+ .long _ascebc
+
+.section .data,"aw",@progbits
+ .balign 4096
+_sclp_work_area:
+ .fill 4096
+.previous
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 2bc70b6e876..1e2264b46e4 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -1,8 +1,6 @@
/*
- * arch/s390/kernel/setup.c
- *
* S390 version
- * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 1999, 2012
* Author(s): Hartmut Penner (hp@de.ibm.com),
* Martin Schwidefsky (schwidefsky@de.ibm.com)
*
@@ -14,15 +12,18 @@
* This file handles the architecture-dependent parts of initialization
*/
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/errno.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/kernel.h>
+#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
-#include <linux/slab.h>
#include <linux/user.h>
#include <linux/tty.h>
#include <linux/ioport.h>
@@ -32,7 +33,6 @@
#include <linux/bootmem.h>
#include <linux/root_dev.h>
#include <linux/console.h>
-#include <linux/seq_file.h>
#include <linux/kernel_stat.h>
#include <linux/device.h>
#include <linux/notifier.h>
@@ -40,10 +40,14 @@
#include <linux/ctype.h>
#include <linux/reboot.h>
#include <linux/topology.h>
+#include <linux/ftrace.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
+#include <linux/compat.h>
#include <asm/ipl.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
+#include <asm/facility.h>
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
@@ -53,77 +57,56 @@
#include <asm/ptrace.h>
#include <asm/sections.h>
#include <asm/ebcdic.h>
-#include <asm/compat.h>
-
-long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
- PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
-long psw_user_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
- PSW_MASK_PSTATE | PSW_DEFAULT_KEY);
-
-/*
- * User copy operations.
- */
-struct uaccess_ops uaccess;
-EXPORT_SYMBOL(uaccess);
+#include <asm/kvm_virtio.h>
+#include <asm/diag.h>
+#include <asm/os_info.h>
+#include <asm/sclp.h>
+#include "entry.h"
/*
* Machine setup..
*/
unsigned int console_mode = 0;
+EXPORT_SYMBOL(console_mode);
+
unsigned int console_devno = -1;
+EXPORT_SYMBOL(console_devno);
+
unsigned int console_irq = -1;
-unsigned long machine_flags;
+EXPORT_SYMBOL(console_irq);
+
unsigned long elf_hwcap = 0;
char elf_platform[ELF_PLATFORM_SIZE];
-struct mem_chunk __meminitdata memory_chunk[MEMORY_CHUNKS];
-volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
-static unsigned long __initdata memory_end;
+int __initdata memory_end_set;
+unsigned long __initdata memory_end;
+unsigned long __initdata max_physmem_end;
-/*
- * This is set up by the setup-routine at boot-time
- * for S390 need to find out, what we have to setup
- * using address 0x10400 ...
- */
+unsigned long VMALLOC_START;
+EXPORT_SYMBOL(VMALLOC_START);
-#include <asm/setup.h>
+unsigned long VMALLOC_END;
+EXPORT_SYMBOL(VMALLOC_END);
-static struct resource code_resource = {
- .name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
-};
+struct page *vmemmap;
+EXPORT_SYMBOL(vmemmap);
-static struct resource data_resource = {
- .name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
-};
+#ifdef CONFIG_64BIT
+unsigned long MODULES_VADDR;
+unsigned long MODULES_END;
+#endif
+
+/* An array with a pointer to the lowcore of every CPU. */
+struct _lowcore *lowcore_ptr[NR_CPUS];
+EXPORT_SYMBOL(lowcore_ptr);
/*
- * cpu_init() initializes state that is per-CPU.
+ * This is set up by the setup-routine at boot-time
+ * for S390 need to find out, what we have to setup
+ * using address 0x10400 ...
*/
-void __cpuinit cpu_init(void)
-{
- int addr = hard_smp_processor_id();
-
- /*
- * Store processor id in lowcore (used e.g. in timer_interrupt)
- */
- get_cpu_id(&S390_lowcore.cpu_data.cpu_id);
- S390_lowcore.cpu_data.cpu_addr = addr;
- /*
- * Force FPU initialization:
- */
- clear_thread_flag(TIF_USEDFPU);
- clear_used_math();
-
- atomic_inc(&init_mm.mm_count);
- current->active_mm = &init_mm;
- if (current->mm)
- BUG();
- enter_lazy_tlb(&init_mm, current);
-}
+#include <asm/setup.h>
/*
* condev= and conmode= setup parameter.
@@ -143,6 +126,21 @@ static int __init condev_setup(char *str)
__setup("condev=", condev_setup);
+static void __init set_preferred_console(void)
+{
+ if (MACHINE_IS_KVM) {
+ if (sclp_has_vt220())
+ add_preferred_console("ttyS", 1, NULL);
+ else if (sclp_has_linemode())
+ add_preferred_console("ttyS", 0, NULL);
+ else
+ add_preferred_console("hvc", 0, NULL);
+ } else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+ add_preferred_console("ttyS", 0, NULL);
+ else if (CONSOLE_IS_3270)
+ add_preferred_console("tty3270", 0, NULL);
+}
+
static int __init conmode_setup(char *str)
{
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
@@ -157,6 +155,7 @@ static int __init conmode_setup(char *str)
if (strncmp(str, "3270", 5) == 0)
SET_CONSOLE_3270;
#endif
+ set_preferred_console();
return 1;
}
@@ -205,12 +204,6 @@ static void __init conmode_default(void)
SET_CONSOLE_SCLP;
#endif
}
- } else if (MACHINE_IS_P390) {
-#if defined(CONFIG_TN3215_CONSOLE)
- SET_CONSOLE_3215;
-#elif defined(CONFIG_TN3270_CONSOLE)
- SET_CONSOLE_3270;
-#endif
} else {
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
SET_CONSOLE_SCLP;
@@ -218,26 +211,19 @@ static void __init conmode_default(void)
}
}
-#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
-static void __init setup_zfcpdump(unsigned int console_devno)
+#ifdef CONFIG_CRASH_DUMP
+static void __init setup_zfcpdump(void)
{
- static char str[64];
-
if (ipl_info.type != IPL_TYPE_FCP_DUMP)
return;
- if (console_devno != -1)
- sprintf(str, "cio_ignore=all,!0.0.%04x,!0.0.%04x",
- ipl_info.data.fcp.dev_id.devno, console_devno);
- else
- sprintf(str, "cio_ignore=all,!0.0.%04x",
- ipl_info.data.fcp.dev_id.devno);
- strcat(COMMAND_LINE, " ");
- strcat(COMMAND_LINE, str);
+ if (OLDMEM_BASE)
+ return;
+ strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
console_loglevel = 2;
}
#else
-static inline void setup_zfcpdump(unsigned int console_devno) {}
-#endif /* CONFIG_ZFCPDUMP */
+static inline void setup_zfcpdump(void) {}
+#endif /* CONFIG_CRASH_DUMP */
/*
* Reboot, halt and power_off stubs. They just call _machine_restart,
@@ -281,423 +267,430 @@ void machine_power_off(void)
* Dummy power off function.
*/
void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL_GPL(pm_power_off);
static int __init early_parse_mem(char *p)
{
memory_end = memparse(p, &p);
+ memory_end &= PAGE_MASK;
+ memory_end_set = 1;
return 0;
}
early_param("mem", early_parse_mem);
-/*
- * "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes
- */
-static int __init early_parse_ipldelay(char *p)
-{
- unsigned long delay = 0;
-
- delay = simple_strtoul(p, &p, 0);
-
- switch (*p) {
- case 's':
- case 'S':
- delay *= 1000000;
- break;
- case 'm':
- case 'M':
- delay *= 60 * 1000000;
- }
-
- /* now wait for the requested amount of time */
- udelay(delay);
-
- return 0;
-}
-early_param("ipldelay", early_parse_ipldelay);
-
-#ifdef CONFIG_S390_SWITCH_AMODE
-#ifdef CONFIG_PGSTE
-unsigned int switch_amode = 1;
-#else
-unsigned int switch_amode = 0;
-#endif
-EXPORT_SYMBOL_GPL(switch_amode);
-
-static void set_amode_and_uaccess(unsigned long user_amode,
- unsigned long user32_amode)
-{
- psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
- PSW_MASK_PSTATE | PSW_DEFAULT_KEY;
-#ifdef CONFIG_COMPAT
- psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
- PSW_MASK_PSTATE | PSW_DEFAULT_KEY;
- psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode |
- PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK |
- PSW32_MASK_PSTATE;
-#endif
- psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
- PSW_MASK_MCHECK | PSW_DEFAULT_KEY;
-
- if (MACHINE_HAS_MVCOS) {
- printk("mvcos available.\n");
- memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess));
- } else {
- printk("mvcos not available.\n");
- memcpy(&uaccess, &uaccess_pt, sizeof(uaccess));
- }
-}
-
-/*
- * Switch kernel/user addressing modes?
- */
-static int __init early_parse_switch_amode(char *p)
-{
- switch_amode = 1;
- return 0;
-}
-early_param("switch_amode", early_parse_switch_amode);
-
-#else /* CONFIG_S390_SWITCH_AMODE */
-static inline void set_amode_and_uaccess(unsigned long user_amode,
- unsigned long user32_amode)
-{
-}
-#endif /* CONFIG_S390_SWITCH_AMODE */
-
-#ifdef CONFIG_S390_EXEC_PROTECT
-unsigned int s390_noexec = 0;
-EXPORT_SYMBOL_GPL(s390_noexec);
-
-/*
- * Enable execute protection?
- */
-static int __init early_parse_noexec(char *p)
+static int __init parse_vmalloc(char *arg)
{
- if (!strncmp(p, "off", 3))
- return 0;
- switch_amode = 1;
- s390_noexec = 1;
+ if (!arg)
+ return -EINVAL;
+ VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
return 0;
}
-early_param("noexec", early_parse_noexec);
-#endif /* CONFIG_S390_EXEC_PROTECT */
+early_param("vmalloc", parse_vmalloc);
-static void setup_addressing_mode(void)
-{
- if (s390_noexec) {
- printk("S390 execute protection active, ");
- set_amode_and_uaccess(PSW_ASC_SECONDARY, PSW32_ASC_SECONDARY);
- } else if (switch_amode) {
- printk("S390 address spaces switched, ");
- set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY);
- }
-#ifdef CONFIG_TRACE_IRQFLAGS
- sysc_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK;
- io_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK;
-#endif
-}
+void *restart_stack __attribute__((__section__(".data")));
-static void __init
-setup_lowcore(void)
+static void __init setup_lowcore(void)
{
struct _lowcore *lc;
- int lc_pages;
/*
* Setup lowcore for boot cpu
*/
- lc_pages = sizeof(void *) == 8 ? 2 : 1;
- lc = (struct _lowcore *)
- __alloc_bootmem(lc_pages * PAGE_SIZE, lc_pages * PAGE_SIZE, 0);
- memset(lc, 0, lc_pages * PAGE_SIZE);
- lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
+ BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
+ lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
+ lc->restart_psw.mask = PSW_KERNEL_BITS;
lc->restart_psw.addr =
PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
- if (switch_amode)
- lc->restart_psw.mask |= PSW_ASC_HOME;
- lc->external_new_psw.mask = psw_kernel_bits;
+ lc->external_new_psw.mask = PSW_KERNEL_BITS |
+ PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->external_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS |
+ PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
- lc->program_new_psw.mask = psw_kernel_bits;
+ lc->program_new_psw.mask = PSW_KERNEL_BITS |
+ PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->program_new_psw.addr =
- PSW_ADDR_AMODE | (unsigned long)pgm_check_handler;
- lc->mcck_new_psw.mask =
- psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT;
+ PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+ lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = psw_kernel_bits;
+ lc->io_new_psw.mask = PSW_KERNEL_BITS |
+ PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
- lc->ipl_device = S390_lowcore.ipl_device;
lc->clock_comparator = -1ULL;
- lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
+ lc->kernel_stack = ((unsigned long) &init_thread_union)
+ + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->async_stack = (unsigned long)
- __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
+ __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0)
+ + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->panic_stack = (unsigned long)
- __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
+ __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0)
+ + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->current_task = (unsigned long) init_thread_union.thread_info.task;
lc->thread_info = (unsigned long) &init_thread_union;
+ lc->machine_flags = S390_lowcore.machine_flags;
+ lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
+ memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
+ MAX_FACILITY_BIT/8);
#ifndef CONFIG_64BIT
if (MACHINE_HAS_IEEE) {
lc->extended_save_area_addr = (__u32)
- __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0);
+ __alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0);
/* enable extended save area */
__ctl_set_bit(14, 29);
}
+#else
+ lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
#endif
+ lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
+ lc->async_enter_timer = S390_lowcore.async_enter_timer;
+ lc->exit_timer = S390_lowcore.exit_timer;
+ lc->user_timer = S390_lowcore.user_timer;
+ lc->system_timer = S390_lowcore.system_timer;
+ lc->steal_timer = S390_lowcore.steal_timer;
+ lc->last_update_timer = S390_lowcore.last_update_timer;
+ lc->last_update_clock = S390_lowcore.last_update_clock;
+ lc->ftrace_func = S390_lowcore.ftrace_func;
+
+ restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
+ restart_stack += ASYNC_SIZE;
+
+ /*
+ * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
+ * restart data to the absolute zero lowcore. This is necessary if
+ * PSW restart is done on an offline CPU that has lowcore zero.
+ */
+ lc->restart_stack = (unsigned long) restart_stack;
+ lc->restart_fn = (unsigned long) do_restart;
+ lc->restart_data = 0;
+ lc->restart_source = -1UL;
+
+ /* Setup absolute zero lowcore */
+ mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
+ mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
+ mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
+ mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
+ mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
+
+#ifdef CONFIG_SMP
+ lc->spinlock_lockval = arch_spin_lockval(0);
+#endif
+
set_prefix((u32)(unsigned long) lc);
+ lowcore_ptr[0] = lc;
}
-static void __init
-setup_resources(void)
+static struct resource code_resource = {
+ .name = "Kernel code",
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource data_resource = {
+ .name = "Kernel data",
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource bss_resource = {
+ .name = "Kernel bss",
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource __initdata *standard_resources[] = {
+ &code_resource,
+ &data_resource,
+ &bss_resource,
+};
+
+static void __init setup_resources(void)
{
- struct resource *res, *sub_res;
- int i;
+ struct resource *res, *std_res, *sub_res;
+ struct memblock_region *reg;
+ int j;
code_resource.start = (unsigned long) &_text;
code_resource.end = (unsigned long) &_etext - 1;
data_resource.start = (unsigned long) &_etext;
data_resource.end = (unsigned long) &_edata - 1;
+ bss_resource.start = (unsigned long) &__bss_start;
+ bss_resource.end = (unsigned long) &__bss_stop - 1;
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- if (!memory_chunk[i].size)
- continue;
- res = alloc_bootmem_low(sizeof(struct resource));
+ for_each_memblock(memory, reg) {
+ res = alloc_bootmem_low(sizeof(*res));
res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
- switch (memory_chunk[i].type) {
- case CHUNK_READ_WRITE:
- res->name = "System RAM";
- break;
- case CHUNK_READ_ONLY:
- res->name = "System ROM";
- res->flags |= IORESOURCE_READONLY;
- break;
- default:
- res->name = "reserved";
- }
- res->start = memory_chunk[i].addr;
- res->end = memory_chunk[i].addr + memory_chunk[i].size - 1;
- request_resource(&iomem_resource, res);
- if (code_resource.start >= res->start &&
- code_resource.start <= res->end &&
- code_resource.end > res->end) {
- sub_res = alloc_bootmem_low(sizeof(struct resource));
- memcpy(sub_res, &code_resource,
- sizeof(struct resource));
- sub_res->end = res->end;
- code_resource.start = res->end + 1;
- request_resource(res, sub_res);
- }
+ res->name = "System RAM";
+ res->start = reg->base;
+ res->end = reg->base + reg->size - 1;
+ request_resource(&iomem_resource, res);
- if (code_resource.start >= res->start &&
- code_resource.start <= res->end &&
- code_resource.end <= res->end)
- request_resource(res, &code_resource);
-
- if (data_resource.start >= res->start &&
- data_resource.start <= res->end &&
- data_resource.end > res->end) {
- sub_res = alloc_bootmem_low(sizeof(struct resource));
- memcpy(sub_res, &data_resource,
- sizeof(struct resource));
- sub_res->end = res->end;
- data_resource.start = res->end + 1;
- request_resource(res, sub_res);
+ for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
+ std_res = standard_resources[j];
+ if (std_res->start < res->start ||
+ std_res->start > res->end)
+ continue;
+ if (std_res->end > res->end) {
+ sub_res = alloc_bootmem_low(sizeof(*sub_res));
+ *sub_res = *std_res;
+ sub_res->end = res->end;
+ std_res->start = res->end + 1;
+ request_resource(res, sub_res);
+ } else {
+ request_resource(res, std_res);
+ }
}
-
- if (data_resource.start >= res->start &&
- data_resource.start <= res->end &&
- data_resource.end <= res->end)
- request_resource(res, &data_resource);
}
}
-unsigned long real_memory_size;
-EXPORT_SYMBOL_GPL(real_memory_size);
-
static void __init setup_memory_end(void)
{
- unsigned long memory_size;
- unsigned long max_mem;
- int i;
-
-#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
- if (ipl_info.type == IPL_TYPE_FCP_DUMP)
- memory_end = ZFCPDUMP_HSA_SIZE;
+ unsigned long vmax, vmalloc_size, tmp;
+
+ /* Choose kernel address space layout: 2, 3, or 4 levels. */
+#ifdef CONFIG_64BIT
+ vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
+ tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
+ tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size;
+ if (tmp <= (1UL << 42))
+ vmax = 1UL << 42; /* 3-level kernel page table */
+ else
+ vmax = 1UL << 53; /* 4-level kernel page table */
+ /* module area is at the end of the kernel address space. */
+ MODULES_END = vmax;
+ MODULES_VADDR = MODULES_END - MODULES_LEN;
+ VMALLOC_END = MODULES_VADDR;
+#else
+ vmalloc_size = VMALLOC_END ?: 96UL << 20;
+ vmax = 1UL << 31; /* 2-level kernel page table */
+ /* vmalloc area is at the end of the kernel address space. */
+ VMALLOC_END = vmax;
#endif
- memory_size = 0;
- memory_end &= PAGE_MASK;
+ VMALLOC_START = vmax - vmalloc_size;
+
+ /* Split remaining virtual space between 1:1 mapping & vmemmap array */
+ tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+ /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
+ tmp = SECTION_ALIGN_UP(tmp);
+ tmp = VMALLOC_START - tmp * sizeof(struct page);
+ tmp &= ~((vmax >> 11) - 1); /* align to page table level */
+ tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
+ vmemmap = (struct page *) tmp;
+
+ /* Take care that memory_end is set and <= vmemmap */
+ memory_end = min(memory_end ?: max_physmem_end, tmp);
+ max_pfn = max_low_pfn = PFN_DOWN(memory_end);
+ memblock_remove(memory_end, ULONG_MAX);
+
+ pr_notice("Max memory size: %luMB\n", memory_end >> 20);
+}
- max_mem = memory_end ? min(VMEM_MAX_PHYS, memory_end) : VMEM_MAX_PHYS;
- memory_end = min(max_mem, memory_end);
+static void __init setup_vmcoreinfo(void)
+{
+ mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
+}
- /*
- * Make sure all chunks are MAX_ORDER aligned so we don't need the
- * extra checks that HOLES_IN_ZONE would require.
- */
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- unsigned long start, end;
- struct mem_chunk *chunk;
- unsigned long align;
-
- chunk = &memory_chunk[i];
- align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1);
- start = (chunk->addr + align - 1) & ~(align - 1);
- end = (chunk->addr + chunk->size) & ~(align - 1);
- if (start >= end)
- memset(chunk, 0, sizeof(*chunk));
- else {
- chunk->addr = start;
- chunk->size = end - start;
- }
- }
+#ifdef CONFIG_CRASH_DUMP
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- struct mem_chunk *chunk = &memory_chunk[i];
+/*
+ * When kdump is enabled, we have to ensure that no memory from
+ * the area [0 - crashkernel memory size] and
+ * [crashk_res.start - crashk_res.end] is set offline.
+ */
+static int kdump_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct memory_notify *arg = data;
+
+ if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
+ return NOTIFY_BAD;
+ if (arg->start_pfn > PFN_DOWN(crashk_res.end))
+ return NOTIFY_OK;
+ if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
+ return NOTIFY_OK;
+ return NOTIFY_BAD;
+}
- real_memory_size = max(real_memory_size,
- chunk->addr + chunk->size);
- if (chunk->addr >= max_mem) {
- memset(chunk, 0, sizeof(*chunk));
- continue;
- }
- if (chunk->addr + chunk->size > max_mem)
- chunk->size = max_mem - chunk->addr;
- memory_size = max(memory_size, chunk->addr + chunk->size);
+static struct notifier_block kdump_mem_nb = {
+ .notifier_call = kdump_mem_notifier,
+};
+
+#endif
+
+/*
+ * Make sure that the area behind memory_end is protected
+ */
+static void reserve_memory_end(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
+ !OLDMEM_BASE && sclp_get_hsa_size()) {
+ memory_end = sclp_get_hsa_size();
+ memory_end &= PAGE_MASK;
+ memory_end_set = 1;
}
- if (!memory_end)
- memory_end = memory_size;
+#endif
+ if (!memory_end_set)
+ return;
+ memblock_reserve(memory_end, ULONG_MAX);
}
-static void __init
-setup_memory(void)
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void reserve_oldmem(void)
{
- unsigned long bootmap_size;
- unsigned long start_pfn, end_pfn;
- int i;
-
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
- start_pfn = PFN_UP(__pa(&_end));
- end_pfn = max_pfn = PFN_DOWN(memory_end);
+#ifdef CONFIG_CRASH_DUMP
+ if (OLDMEM_BASE)
+ /* Forget all memory above the running kdump system */
+ memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
+#endif
+}
-#ifdef CONFIG_BLK_DEV_INITRD
- /*
- * Move the initrd in case the bitmap of the bootmem allocater
- * would overwrite it.
- */
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void remove_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (OLDMEM_BASE)
+ /* Forget all memory above the running kdump system */
+ memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
+#endif
+}
- if (INITRD_START && INITRD_SIZE) {
- unsigned long bmap_size;
- unsigned long start;
+/*
+ * Reserve memory for kdump kernel to be loaded with kexec
+ */
+static void __init reserve_crashkernel(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ unsigned long long crash_base, crash_size;
+ phys_addr_t low, high;
+ int rc;
- bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1);
- bmap_size = PFN_PHYS(bmap_size);
+ rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
+ &crash_base);
- if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
- start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;
+ crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
+ crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
+ if (rc || crash_size == 0)
+ return;
- if (start + INITRD_SIZE > memory_end) {
- printk("initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\n"
- "disabling initrd\n",
- start + INITRD_SIZE, memory_end);
- INITRD_START = INITRD_SIZE = 0;
- } else {
- printk("Moving initrd (0x%08lx -> 0x%08lx, "
- "size: %ld)\n",
- INITRD_START, start, INITRD_SIZE);
- memmove((void *) start, (void *) INITRD_START,
- INITRD_SIZE);
- INITRD_START = start;
- }
- }
+ if (memblock.memory.regions[0].size < crash_size) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "first memory chunk must be at least crashkernel size");
+ return;
}
-#endif
- /*
- * Initialize the boot-time allocator
- */
- bootmap_size = init_bootmem(start_pfn, end_pfn);
+ low = crash_base ?: OLDMEM_BASE;
+ high = low + crash_size;
+ if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
+ /* The crashkernel fits into OLDMEM, reuse OLDMEM */
+ crash_base = low;
+ } else {
+ /* Find suitable area in free memory */
+ low = max_t(unsigned long, crash_size, sclp_get_hsa_size());
+ high = crash_base ? crash_base + crash_size : ULONG_MAX;
- /*
- * Register RAM areas with the bootmem allocator.
- */
+ if (crash_base && crash_base < low) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "crash_base too low");
+ return;
+ }
+ low = crash_base ?: low;
+ crash_base = memblock_find_in_range(low, high, crash_size,
+ KEXEC_CRASH_MEM_ALIGN);
+ }
- for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
- unsigned long start_chunk, end_chunk, pfn;
-
- if (memory_chunk[i].type != CHUNK_READ_WRITE)
- continue;
- start_chunk = PFN_DOWN(memory_chunk[i].addr);
- end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size) - 1;
- end_chunk = min(end_chunk, end_pfn);
- if (start_chunk >= end_chunk)
- continue;
- add_active_range(0, start_chunk, end_chunk);
- pfn = max(start_chunk, start_pfn);
- for (; pfn <= end_chunk; pfn++)
- page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY);
+ if (!crash_base) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "no suitable area found");
+ return;
}
- psw_set_key(PAGE_DEFAULT_KEY);
+ if (register_memory_notifier(&kdump_mem_nb))
+ return;
- free_bootmem_with_active_regions(0, max_pfn);
+ if (!OLDMEM_BASE && MACHINE_IS_VM)
+ diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ insert_resource(&iomem_resource, &crashk_res);
+ memblock_remove(crash_base, crash_size);
+ pr_info("Reserving %lluMB of memory at %lluMB "
+ "for crashkernel (System RAM: %luMB)\n",
+ crash_size >> 20, crash_base >> 20,
+ (unsigned long)memblock.memory.total_size >> 20);
+ os_info_crashkernel_add(crash_base, crash_size);
+#endif
+}
- /*
- * Reserve memory used for lowcore/command line/kernel image.
- */
- reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT);
- reserve_bootmem((unsigned long)_stext,
- PFN_PHYS(start_pfn) - (unsigned long)_stext,
- BOOTMEM_DEFAULT);
- /*
- * Reserve the bootmem bitmap itself as well. We do this in two
- * steps (first step was init_bootmem()) because this catches
- * the (very unlikely) case of us accidentally initializing the
- * bootmem allocator with an invalid RAM area.
- */
- reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
- BOOTMEM_DEFAULT);
+/*
+ * Reserve the initrd from being used by memblock
+ */
+static void __init reserve_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ initrd_start = INITRD_START;
+ initrd_end = initrd_start + INITRD_SIZE;
+ memblock_reserve(INITRD_START, INITRD_SIZE);
+#endif
+}
+/*
+ * Check for initrd being in usable memory
+ */
+static void __init check_initrd(void)
+{
#ifdef CONFIG_BLK_DEV_INITRD
- if (INITRD_START && INITRD_SIZE) {
- if (INITRD_START + INITRD_SIZE <= memory_end) {
- reserve_bootmem(INITRD_START, INITRD_SIZE,
- BOOTMEM_DEFAULT);
- initrd_start = INITRD_START;
- initrd_end = initrd_start + INITRD_SIZE;
- } else {
- printk("initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
- initrd_start + INITRD_SIZE, memory_end);
- initrd_start = initrd_end = 0;
- }
+ if (INITRD_START && INITRD_SIZE &&
+ !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
+ pr_err("initrd does not fit memory.\n");
+ memblock_free(INITRD_START, INITRD_SIZE);
+ initrd_start = initrd_end = 0;
}
#endif
}
-static int __init __stfle(unsigned long long *list, int doublewords)
+/*
+ * Reserve all kernel text
+ */
+static void __init reserve_kernel(void)
{
- typedef struct { unsigned long long _[doublewords]; } addrtype;
- register unsigned long __nr asm("0") = doublewords - 1;
+ unsigned long start_pfn;
+ start_pfn = PFN_UP(__pa(&_end));
- asm volatile(".insn s,0xb2b00000,%0" /* stfle */
- : "=m" (*(addrtype *) list), "+d" (__nr) : : "cc");
- return __nr + 1;
+ /*
+ * Reserve memory used for lowcore/command line/kernel image.
+ */
+ memblock_reserve(0, (unsigned long)_ehead);
+ memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
+ - (unsigned long)_stext);
}
-int __init stfle(unsigned long long *list, int doublewords)
+static void __init reserve_elfcorehdr(void)
{
- if (!(stfl() & (1UL << 24)))
- return -EOPNOTSUPP;
- return __stfle(list, doublewords);
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel())
+ memblock_reserve(elfcorehdr_addr - OLDMEM_BASE,
+ PAGE_ALIGN(elfcorehdr_size));
+#endif
+}
+
+static void __init setup_memory(void)
+{
+ struct memblock_region *reg;
+
+ /*
+ * Init storage key for present memory
+ */
+ for_each_memblock(memory, reg) {
+ storage_key_init_range(reg->base, reg->base + reg->size);
+ }
+ psw_set_key(PAGE_DEFAULT_KEY);
+
+ /* Only cosmetics */
+ memblock_enforce_memory_limit(memblock_end_of_DRAM());
}
/*
@@ -706,12 +699,9 @@ int __init stfle(unsigned long long *list, int doublewords)
static void __init setup_hwcaps(void)
{
static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
- struct cpuinfo_S390 *cpuinfo = &S390_lowcore.cpu_data;
- unsigned long long facility_list_extended;
- unsigned int facility_list;
+ struct cpuid cpu_id;
int i;
- facility_list = stfl();
/*
* The store facility list bits numbers as found in the principles
* of operation are numbered with bit 1UL<<31 as number 0 to
@@ -722,37 +712,59 @@ static void __init setup_hwcaps(void)
* Bit 17: the message-security assist is installed
* Bit 19: the long-displacement facility is installed
* Bit 21: the extended-immediate facility is installed
+ * Bit 22: extended-translation facility 3 is installed
+ * Bit 30: extended-translation facility 3 enhancement facility
* These get translated to:
* HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
* HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
- * HWCAP_S390_LDISP bit 4, and HWCAP_S390_EIMM bit 5.
+ * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
+ * HWCAP_S390_ETF3EH bit 8 (22 && 30).
*/
for (i = 0; i < 6; i++)
- if (facility_list & (1UL << (31 - stfl_bits[i])))
+ if (test_facility(stfl_bits[i]))
elf_hwcap |= 1UL << i;
+ if (test_facility(22) && test_facility(30))
+ elf_hwcap |= HWCAP_S390_ETF3EH;
+
/*
* Check for additional facilities with store-facility-list-extended.
* stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
* and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
* as stored by stfl, bits 32-xxx contain additional facilities.
* How many facility words are stored depends on the number of
- * doublewords passed to the instruction. The additional facilites
+ * doublewords passed to the instruction. The additional facilities
* are:
- * Bit 43: decimal floating point facility is installed
+ * Bit 42: decimal floating point facility is installed
+ * Bit 44: perform floating point operation facility is installed
* translated to:
- * HWCAP_S390_DFP bit 6.
+ * HWCAP_S390_DFP bit 6 (42 && 44).
*/
- if ((elf_hwcap & (1UL << 2)) &&
- __stfle(&facility_list_extended, 1) > 0) {
- if (facility_list_extended & (1ULL << (64 - 43)))
- elf_hwcap |= 1UL << 6;
- }
+ if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
+ elf_hwcap |= HWCAP_S390_DFP;
+ /*
+ * Huge page support HWCAP_S390_HPAGE is bit 7.
+ */
if (MACHINE_HAS_HPAGE)
- elf_hwcap |= 1UL << 7;
+ elf_hwcap |= HWCAP_S390_HPAGE;
- switch (cpuinfo->cpu_id.machine) {
+#if defined(CONFIG_64BIT)
+ /*
+ * 64-bit register support for 31-bit processes
+ * HWCAP_S390_HIGH_GPRS is bit 9.
+ */
+ elf_hwcap |= HWCAP_S390_HIGH_GPRS;
+
+ /*
+ * Transactional execution support HWCAP_S390_TE is bit 10.
+ */
+ if (test_facility(50) && test_facility(73))
+ elf_hwcap |= HWCAP_S390_TE;
+#endif
+
+ get_cpu_id(&cpu_id);
+ switch (cpu_id.machine) {
case 0x9672:
#if !defined(CONFIG_64BIT)
default: /* Use "g5" as default for 31 bit kernels. */
@@ -771,8 +783,21 @@ static void __init setup_hwcaps(void)
strcpy(elf_platform, "z990");
break;
case 0x2094:
+ case 0x2096:
strcpy(elf_platform, "z9-109");
break;
+ case 0x2097:
+ case 0x2098:
+ strcpy(elf_platform, "z10");
+ break;
+ case 0x2817:
+ case 0x2818:
+ strcpy(elf_platform, "z196");
+ break;
+ case 0x2827:
+ case 0x2828:
+ strcpy(elf_platform, "zEC12");
+ break;
}
}
@@ -781,58 +806,81 @@ static void __init setup_hwcaps(void)
* was printed.
*/
-void __init
-setup_arch(char **cmdline_p)
+void __init setup_arch(char **cmdline_p)
{
/*
* print what head.S has found out about the machine
*/
#ifndef CONFIG_64BIT
- printk((MACHINE_IS_VM) ?
- "We are running under VM (31 bit mode)\n" :
- "We are running native (31 bit mode)\n");
- printk((MACHINE_HAS_IEEE) ?
- "This machine has an IEEE fpu\n" :
- "This machine has no IEEE fpu\n");
+ if (MACHINE_IS_VM)
+ pr_info("Linux is running as a z/VM "
+ "guest operating system in 31-bit mode\n");
+ else if (MACHINE_IS_LPAR)
+ pr_info("Linux is running natively in 31-bit mode\n");
+ if (MACHINE_HAS_IEEE)
+ pr_info("The hardware system has IEEE compatible "
+ "floating point units\n");
+ else
+ pr_info("The hardware system has no IEEE compatible "
+ "floating point units\n");
#else /* CONFIG_64BIT */
if (MACHINE_IS_VM)
- printk("We are running under VM (64 bit mode)\n");
- else if (MACHINE_IS_KVM) {
- printk("We are running under KVM (64 bit mode)\n");
- add_preferred_console("ttyS", 1, NULL);
- } else
- printk("We are running native (64 bit mode)\n");
+ pr_info("Linux is running as a z/VM "
+ "guest operating system in 64-bit mode\n");
+ else if (MACHINE_IS_KVM)
+ pr_info("Linux is running under KVM in 64-bit mode\n");
+ else if (MACHINE_IS_LPAR)
+ pr_info("Linux is running natively in 64-bit mode\n");
#endif /* CONFIG_64BIT */
- /* Save unparsed command line copy for /proc/cmdline */
- strlcpy(boot_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
-
- *cmdline_p = COMMAND_LINE;
- *(*cmdline_p + COMMAND_LINE_SIZE - 1) = '\0';
+ /* Have one command line that is parsed and saved in /proc/cmdline */
+ /* boot_command_line has been already set up in early.c */
+ *cmdline_p = boot_command_line;
ROOT_DEV = Root_RAM0;
+ /* Is init_mm really needed? */
init_mm.start_code = PAGE_OFFSET;
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
init_mm.brk = (unsigned long) &_end;
- if (MACHINE_HAS_MVCOS)
- memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess));
- else
- memcpy(&uaccess, &uaccess_std, sizeof(uaccess));
-
parse_early_param();
-
+ os_info_init();
setup_ipl();
+
+ /* Do some memory reservations *before* memory is added to memblock */
+ reserve_memory_end();
+ reserve_oldmem();
+ reserve_kernel();
+ reserve_initrd();
+ reserve_elfcorehdr();
+ memblock_allow_resize();
+
+ /* Get information about *all* installed memory */
+ detect_memory_memblock();
+
+ remove_oldmem();
+
+ /*
+ * Make sure all chunks are MAX_ORDER aligned so we don't need the
+ * extra checks that HOLES_IN_ZONE would require.
+ *
+ * Is this still required?
+ */
+ memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
+
setup_memory_end();
- setup_addressing_mode();
setup_memory();
+
+ check_initrd();
+ reserve_crashkernel();
+
setup_resources();
+ setup_vmcoreinfo();
setup_lowcore();
-
+ smp_fill_possible_mask();
cpu_init();
- __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
s390_init_cpu_topology();
/*
@@ -847,94 +895,40 @@ setup_arch(char **cmdline_p)
/* Setup default console */
conmode_default();
+ set_preferred_console();
/* Setup zfcpdump support */
- setup_zfcpdump(console_devno);
+ setup_zfcpdump();
}
-void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo)
-{
- printk(KERN_INFO "cpu %d "
-#ifdef CONFIG_SMP
- "phys_idx=%d "
-#endif
- "vers=%02X ident=%06X machine=%04X unused=%04X\n",
- cpuinfo->cpu_nr,
-#ifdef CONFIG_SMP
- cpuinfo->cpu_addr,
-#endif
- cpuinfo->cpu_id.version,
- cpuinfo->cpu_id.ident,
- cpuinfo->cpu_id.machine,
- cpuinfo->cpu_id.unused);
-}
-
-/*
- * show_cpuinfo - Get information on one CPU for use by procfs.
- */
+#ifdef CONFIG_32BIT
+static int no_removal_warning __initdata;
-static int show_cpuinfo(struct seq_file *m, void *v)
+static int __init parse_no_removal_warning(char *str)
{
- static const char *hwcap_str[8] = {
- "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat"
- };
- struct cpuinfo_S390 *cpuinfo;
- unsigned long n = (unsigned long) v - 1;
- int i;
-
- s390_adjust_jiffies();
- preempt_disable();
- if (!n) {
- seq_printf(m, "vendor_id : IBM/S390\n"
- "# processors : %i\n"
- "bogomips per cpu: %lu.%02lu\n",
- num_online_cpus(), loops_per_jiffy/(500000/HZ),
- (loops_per_jiffy/(5000/HZ))%100);
- seq_puts(m, "features\t: ");
- for (i = 0; i < 8; i++)
- if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
- seq_printf(m, "%s ", hwcap_str[i]);
- seq_puts(m, "\n");
- }
-
- if (cpu_online(n)) {
-#ifdef CONFIG_SMP
- if (smp_processor_id() == n)
- cpuinfo = &S390_lowcore.cpu_data;
- else
- cpuinfo = &lowcore_ptr[n]->cpu_data;
-#else
- cpuinfo = &S390_lowcore.cpu_data;
-#endif
- seq_printf(m, "processor %li: "
- "version = %02X, "
- "identification = %06X, "
- "machine = %04X\n",
- n, cpuinfo->cpu_id.version,
- cpuinfo->cpu_id.ident,
- cpuinfo->cpu_id.machine);
- }
- preempt_enable();
- return 0;
+ no_removal_warning = 1;
+ return 0;
}
+__setup("no_removal_warning", parse_no_removal_warning);
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
- return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL;
-}
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
- ++*pos;
- return c_start(m, pos);
-}
-static void c_stop(struct seq_file *m, void *v)
+static int __init removal_warning(void)
{
+ if (no_removal_warning)
+ return 0;
+ printk(KERN_ALERT "\n\n");
+ printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n");
+ printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n");
+ printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n");
+ printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n");
+ printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n");
+ printk(KERN_CONT "please let us know. Please write to:\n");
+ printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n");
+ printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n");
+ printk(KERN_CONT "Thank you!\n\n");
+ printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n");
+ printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n");
+ schedule_timeout_uninterruptible(300 * HZ);
+ return 0;
}
-const struct seq_operations cpuinfo_op = {
- .start = c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
-};
-
+early_initcall(removal_warning);
+#endif
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index b9768204021..42b49f9e19b 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -1,7 +1,5 @@
/*
- * arch/s390/kernel/signal.c
- *
- * Copyright (C) IBM Corp. 1999,2006
+ * Copyright IBM Corp. 1999, 2006
* Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
*
* Based on Intel version
@@ -24,14 +22,15 @@
#include <linux/tty.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
+#include <linux/tracehook.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/lowcore.h>
+#include <asm/switch_to.h>
#include "entry.h"
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-
typedef struct
{
__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
@@ -49,67 +48,6 @@ typedef struct
struct ucontext uc;
} rt_sigframe;
-/*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-asmlinkage int
-sys_sigsuspend(int history0, int history1, old_sigset_t mask)
-{
- mask &= _BLOCKABLE;
- spin_lock_irq(&current->sighand->siglock);
- current->saved_sigmask = current->blocked;
- siginitset(&current->blocked, mask);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- current->state = TASK_INTERRUPTIBLE;
- schedule();
- set_thread_flag(TIF_RESTORE_SIGMASK);
-
- return -ERESTARTNOHAND;
-}
-
-asmlinkage long
-sys_sigaction(int sig, const struct old_sigaction __user *act,
- struct old_sigaction __user *oact)
-{
- struct k_sigaction new_ka, old_ka;
- int ret;
-
- if (act) {
- old_sigset_t mask;
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
- __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
- __get_user(mask, &act->sa_mask))
- return -EFAULT;
- siginitset(&new_ka.sa.sa_mask, mask);
- }
-
- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
- if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
- __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
- return -EFAULT;
- }
-
- return ret;
-}
-
-asmlinkage long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
-{
- struct pt_regs *regs = task_pt_regs(current);
- return do_sigaltstack(uss, uoss, regs->gprs[15]);
-}
-
-
-
/* Returns non-zero on fault. */
static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
{
@@ -119,102 +57,97 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
/* Copy a 'clean' PSW mask to the user to avoid leaking
information about whether PER is currently on. */
- user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask);
+ user_sregs.regs.psw.mask = PSW_USER_BITS |
+ (regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
user_sregs.regs.psw.addr = regs->psw.addr;
memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
- sizeof(sregs->regs.acrs));
+ sizeof(user_sregs.regs.acrs));
/*
* We have to store the fp registers to current->thread.fp_regs
* to merge them with the emulated registers.
*/
- save_fp_regs(&current->thread.fp_regs);
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ save_fp_regs(current->thread.fp_regs.fprs);
memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
- sizeof(s390_fp_regs));
- return __copy_to_user(sregs, &user_sregs, sizeof(_sigregs));
+ sizeof(user_sregs.fpregs));
+ if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
+ return -EFAULT;
+ return 0;
}
-/* Returns positive number on error */
static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
{
- int err;
_sigregs user_sregs;
/* Alwys make any pending restarted system call return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
- err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs));
- if (err)
- return err;
- regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask,
- user_sregs.regs.psw.mask);
- regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr;
+ if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs)))
+ return -EFAULT;
+
+ if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
+ return -EINVAL;
+
+ /* Loading the floating-point-control word can fail. Do that first. */
+ if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+ return -EINVAL;
+
+ /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+ regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+ (user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+ /* Check for invalid user address space control. */
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+ regs->psw.mask = PSW_ASC_PRIMARY |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ /* Check for invalid amode */
+ if (regs->psw.mask & PSW_MASK_EA)
+ regs->psw.mask |= PSW_MASK_BA;
+ regs->psw.addr = user_sregs.regs.psw.addr;
memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
- sizeof(sregs->regs.acrs));
+ sizeof(current->thread.acrs));
restore_access_regs(current->thread.acrs);
memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
- sizeof(s390_fp_regs));
- current->thread.fp_regs.fpc &= FPC_VALID_MASK;
+ sizeof(current->thread.fp_regs));
- restore_fp_regs(&current->thread.fp_regs);
- regs->trap = -1; /* disable syscall checks */
+ restore_fp_regs(current->thread.fp_regs.fprs);
+ clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
}
-asmlinkage long sys_sigreturn(void)
+SYSCALL_DEFINE0(sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
sigframe __user *frame = (sigframe __user *)regs->gprs[15];
sigset_t set;
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
+ set_current_blocked(&set);
if (restore_sigregs(regs, &frame->sregs))
goto badframe;
-
return regs->gprs[2];
-
badframe:
force_sig(SIGSEGV, current);
return 0;
}
-asmlinkage long sys_rt_sigreturn(void)
+SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15];
sigset_t set;
- if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
- goto badframe;
if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
+ set_current_blocked(&set);
if (restore_sigregs(regs, &frame->uc.uc_mcontext))
goto badframe;
-
- if (do_sigaltstack(&frame->uc.uc_stack, NULL,
- regs->gprs[15]) == -EFAULT)
+ if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
return regs->gprs[2];
-
badframe:
force_sig(SIGSEGV, current);
return 0;
@@ -246,13 +179,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
sp = current->sas_ss_sp + current->sas_ss_size;
}
- /* This is the legacy signal stack switching. */
- else if (!user_mode(regs) &&
- !(ka->sa.sa_flags & SA_RESTORER) &&
- ka->sa.sa_restorer) {
- sp = (unsigned long) ka->sa.sa_restorer;
- }
-
return (void __user *)((sp - frame_size) & -8ul);
}
@@ -272,8 +198,6 @@ static int setup_frame(int sig, struct k_sigaction *ka,
sigframe __user *frame;
frame = get_sigframe(ka, regs, sizeof(sigframe));
- if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe)))
- goto give_sigsegv;
if (frame == (void __user *) -1UL)
goto give_sigsegv;
@@ -305,6 +229,10 @@ static int setup_frame(int sig, struct k_sigaction *ka,
/* Set up registers for signal handler */
regs->gprs[15] = (unsigned long) frame;
+ /* Force default amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
regs->gprs[2] = map_signal(sig);
@@ -312,8 +240,13 @@ static int setup_frame(int sig, struct k_sigaction *ka,
/* We forgot to include these in the sigcontext.
To avoid breaking binary compatibility, they are passed as args. */
- regs->gprs[4] = current->thread.trap_no;
- regs->gprs[5] = current->thread.prot_addr;
+ if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+ sig == SIGTRAP || sig == SIGFPE) {
+ /* set extra registers only for synchronous signals */
+ regs->gprs[4] = regs->int_code & 127;
+ regs->gprs[5] = regs->int_parm_long;
+ regs->gprs[6] = task_thread_info(current)->last_break;
+ }
/* Place signal number on stack to allow backtrace from handler. */
if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
@@ -332,8 +265,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
rt_sigframe __user *frame;
frame = get_sigframe(ka, regs, sizeof(rt_sigframe));
- if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe)))
- goto give_sigsegv;
if (frame == (void __user *) -1UL)
goto give_sigsegv;
@@ -344,10 +275,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(NULL, &frame->uc.uc_link);
- err |= __put_user((void __user *)current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->gprs[15]),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]);
err |= save_sigregs(regs, &frame->uc.uc_mcontext);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
@@ -372,11 +300,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
/* Set up registers for signal handler */
regs->gprs[15] = (unsigned long) frame;
+ /* Force default amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
regs->gprs[2] = map_signal(sig);
regs->gprs[3] = (unsigned long) &frame->info;
regs->gprs[4] = (unsigned long) &frame->uc;
+ regs->gprs[5] = task_thread_info(current)->last_break;
return 0;
give_sigsegv:
@@ -384,13 +317,9 @@ give_sigsegv:
return -EFAULT;
}
-/*
- * OK, we're invoking a handler
- */
-
-static int
-handle_signal(unsigned long sig, struct k_sigaction *ka,
- siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
+static void handle_signal(unsigned long sig, struct k_sigaction *ka,
+ siginfo_t *info, sigset_t *oldset,
+ struct pt_regs *regs)
{
int ret;
@@ -399,17 +328,10 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
ret = setup_rt_frame(sig, ka, info, oldset, regs);
else
ret = setup_frame(sig, ka, oldset, regs);
-
- if (ret == 0) {
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked,sig);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
- }
-
- return ret;
+ if (ret)
+ return;
+ signal_delivered(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLE_STEP));
}
/*
@@ -423,106 +345,83 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
*/
void do_signal(struct pt_regs *regs)
{
- unsigned long retval = 0, continue_addr = 0, restart_addr = 0;
siginfo_t info;
int signr;
struct k_sigaction ka;
- sigset_t *oldset;
+ sigset_t *oldset = sigmask_to_save();
/*
- * We want the common case to go fast, which
- * is why we may in certain cases get here from
- * kernel mode. Just return without doing anything
- * if so.
+ * Get signal to deliver. When running under ptrace, at this point
+ * the debugger may change all our registers, including the system
+ * call information.
*/
- if (!user_mode(regs))
- return;
+ current_thread_info()->system_call =
+ test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
+ signr = get_signal_to_deliver(&info, &ka, regs, NULL);
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
- oldset = &current->saved_sigmask;
- else
- oldset = &current->blocked;
+ if (signr > 0) {
+ /* Whee! Actually deliver the signal. */
+ if (current_thread_info()->system_call) {
+ regs->int_code = current_thread_info()->system_call;
+ /* Check for system call restarting. */
+ switch (regs->gprs[2]) {
+ case -ERESTART_RESTARTBLOCK:
+ case -ERESTARTNOHAND:
+ regs->gprs[2] = -EINTR;
+ break;
+ case -ERESTARTSYS:
+ if (!(ka.sa.sa_flags & SA_RESTART)) {
+ regs->gprs[2] = -EINTR;
+ break;
+ }
+ /* fallthrough */
+ case -ERESTARTNOINTR:
+ regs->gprs[2] = regs->orig_gpr2;
+ regs->psw.addr =
+ __rewind_psw(regs->psw,
+ regs->int_code >> 16);
+ break;
+ }
+ }
+ /* No longer in a system call */
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
- /* Are we from a system call? */
- if (regs->trap == __LC_SVC_OLD_PSW) {
- continue_addr = regs->psw.addr;
- restart_addr = continue_addr - regs->ilc;
- retval = regs->gprs[2];
+ if (is_compat_task())
+ handle_signal32(signr, &ka, &info, oldset, regs);
+ else
+ handle_signal(signr, &ka, &info, oldset, regs);
+ return;
+ }
- /* Prepare for system call restart. We do this here so that a
- debugger will see the already changed PSW. */
- switch (retval) {
+ /* No handlers present - check for system call restart */
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+ if (current_thread_info()->system_call) {
+ regs->int_code = current_thread_info()->system_call;
+ switch (regs->gprs[2]) {
+ case -ERESTART_RESTARTBLOCK:
+ /* Restart with sys_restart_syscall */
+ regs->int_code = __NR_restart_syscall;
+ /* fallthrough */
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
+ /* Restart system call with magic TIF bit. */
regs->gprs[2] = regs->orig_gpr2;
- regs->psw.addr = restart_addr;
+ set_pt_regs_flag(regs, PIF_SYSCALL);
+ if (test_thread_flag(TIF_SINGLE_STEP))
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
break;
- case -ERESTART_RESTARTBLOCK:
- regs->gprs[2] = -EINTR;
- }
- regs->trap = -1; /* Don't deal with this again. */
- }
-
- /* Get signal to deliver. When running under ptrace, at this point
- the debugger may change all our registers ... */
- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-
- /* Depending on the signal settings we may need to revert the
- decision to restart the system call. */
- if (signr > 0 && regs->psw.addr == restart_addr) {
- if (retval == -ERESTARTNOHAND
- || (retval == -ERESTARTSYS
- && !(current->sighand->action[signr-1].sa.sa_flags
- & SA_RESTART))) {
- regs->gprs[2] = -EINTR;
- regs->psw.addr = continue_addr;
- }
- }
-
- if (signr > 0) {
- /* Whee! Actually deliver the signal. */
- int ret;
-#ifdef CONFIG_COMPAT
- if (test_thread_flag(TIF_31BIT)) {
- ret = handle_signal32(signr, &ka, &info, oldset, regs);
- }
- else
-#endif
- ret = handle_signal(signr, &ka, &info, oldset, regs);
- if (!ret) {
- /*
- * A signal was successfully delivered; the saved
- * sigmask will have been stored in the signal frame,
- * and will be restored by sigreturn, so we can simply
- * clear the TIF_RESTORE_SIGMASK flag.
- */
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
- clear_thread_flag(TIF_RESTORE_SIGMASK);
-
- /*
- * If we would have taken a single-step trap
- * for a normal instruction, act like we took
- * one for the handler setup.
- */
- if (current->thread.per_info.single_step)
- set_thread_flag(TIF_SINGLE_STEP);
}
- return;
}
/*
* If there's no signal to deliver, we just put the saved sigmask back.
*/
- if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
- clear_thread_flag(TIF_RESTORE_SIGMASK);
- sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
- }
+ restore_saved_sigmask();
+}
- /* Restart a different system call. */
- if (retval == -ERESTART_RESTARTBLOCK
- && regs->psw.addr == continue_addr) {
- regs->gprs[2] = __NR_restart_syscall;
- set_thread_flag(TIF_RESTART_SVC);
- }
+void do_notify_resume(struct pt_regs *regs)
+{
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 5d4fa4b1c74..243c7e51260 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1,25 +1,24 @@
/*
- * arch/s390/kernel/smp.c
+ * SMP related functions
*
- * Copyright IBM Corp. 1999,2007
- * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
- * Martin Schwidefsky (schwidefsky@de.ibm.com)
- * Heiko Carstens (heiko.carstens@de.ibm.com)
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Denis Joseph Barrow,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
*
* based on other smp stuff by
* (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
* (c) 1998 Ingo Molnar
*
- * We work with logical cpu numbering everywhere we can. The only
- * functions using the real cpu address (got from STAP) are the sigp
- * functions. For all other functions we use the identity mapping.
- * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is
- * used e.g. to find the idle task belonging to a logical cpu. Every array
- * in the kernel is sorted by the logical cpu number and not by the physical
- * one which is causing all the confusion with __cpu_logical_map and
- * cpu_number_map in other architectures.
+ * The code outside of smp.c uses logical cpu numbers, only smp.c does
+ * the translation of logical to physical cpu ids. All new code that
+ * operates on physical cpu numbers needs to go into smp.c.
*/
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mm.h>
@@ -27,232 +26,380 @@
#include <linux/spinlock.h>
#include <linux/kernel_stat.h>
#include <linux/delay.h>
-#include <linux/cache.h>
#include <linux/interrupt.h>
+#include <linux/irqflags.h>
#include <linux/cpu.h>
-#include <linux/timex.h>
-#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <linux/crash_dump.h>
+#include <asm/asm-offsets.h>
+#include <asm/switch_to.h>
+#include <asm/facility.h>
#include <asm/ipl.h>
#include <asm/setup.h>
-#include <asm/sigp.h>
-#include <asm/pgalloc.h>
#include <asm/irq.h>
-#include <asm/s390_ext.h>
-#include <asm/cpcmd.h>
#include <asm/tlbflush.h>
-#include <asm/timer.h>
+#include <asm/vtimer.h>
#include <asm/lowcore.h>
#include <asm/sclp.h>
-#include <asm/cpu.h>
+#include <asm/vdso.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
+#include <asm/sigp.h>
#include "entry.h"
-/*
- * An array with a pointer the lowcore of every CPU.
- */
-struct _lowcore *lowcore_ptr[NR_CPUS];
-EXPORT_SYMBOL(lowcore_ptr);
-
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_possible_map);
-
-static struct task_struct *current_set[NR_CPUS];
-
-static u8 smp_cpu_type;
-static int smp_use_sigp_detection;
+enum {
+ ec_schedule = 0,
+ ec_call_function_single,
+ ec_stop_cpu,
+};
-enum s390_cpu_state {
+enum {
CPU_STATE_STANDBY,
CPU_STATE_CONFIGURED,
};
-DEFINE_MUTEX(smp_cpu_state_mutex);
-int smp_cpu_polarization[NR_CPUS];
-static int smp_cpu_state[NR_CPUS];
-static int cpu_management;
-
-static DEFINE_PER_CPU(struct cpu, cpu_devices);
+struct pcpu {
+ struct cpu *cpu;
+ struct _lowcore *lowcore; /* lowcore page(s) for the cpu */
+ unsigned long async_stack; /* async stack for the cpu */
+ unsigned long panic_stack; /* panic stack for the cpu */
+ unsigned long ec_mask; /* bit mask for ec_xxx functions */
+ int state; /* physical cpu state */
+ int polarization; /* physical polarization */
+ u16 address; /* physical cpu address */
+};
-static void smp_ext_bitcall(int, ec_bit_sig);
+static u8 boot_cpu_type;
+static u16 boot_cpu_address;
+static struct pcpu pcpu_devices[NR_CPUS];
/*
- * Structure and data for __smp_call_function_map(). This is designed to
- * minimise static memory requirements. It also looks cleaner.
+ * The smp_cpu_state_mutex must be held when changing the state or polarization
+ * member of a pcpu data structure within the pcpu_devices arreay.
*/
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- cpumask_t started;
- cpumask_t finished;
- int wait;
-};
-
-static struct call_data_struct *call_data;
+DEFINE_MUTEX(smp_cpu_state_mutex);
/*
- * 'Call function' interrupt callback
+ * Signal processor helper functions.
*/
-static void do_call_function(void)
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
- cpu_set(smp_processor_id(), call_data->started);
- (*func)(info);
- if (wait)
- cpu_set(smp_processor_id(), call_data->finished);;
+ int cc;
+
+ while (1) {
+ cc = __pcpu_sigp(addr, order, parm, NULL);
+ if (cc != SIGP_CC_BUSY)
+ return cc;
+ cpu_relax();
+ }
}
-static void __smp_call_function_map(void (*func) (void *info), void *info,
- int nonatomic, int wait, cpumask_t map)
+static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
{
- struct call_data_struct data;
- int cpu, local = 0;
-
- /*
- * Can deadlock when interrupts are disabled or if in wrong context.
- */
- WARN_ON(irqs_disabled() || in_irq());
-
- /*
- * Check for local function call. We have to have the same call order
- * as in on_each_cpu() because of machine_restart_smp().
- */
- if (cpu_isset(smp_processor_id(), map)) {
- local = 1;
- cpu_clear(smp_processor_id(), map);
+ int cc, retry;
+
+ for (retry = 0; ; retry++) {
+ cc = __pcpu_sigp(pcpu->address, order, parm, NULL);
+ if (cc != SIGP_CC_BUSY)
+ break;
+ if (retry >= 3)
+ udelay(10);
}
+ return cc;
+}
- cpus_and(map, map, cpu_online_map);
- if (cpus_empty(map))
- goto out;
+static inline int pcpu_stopped(struct pcpu *pcpu)
+{
+ u32 uninitialized_var(status);
- data.func = func;
- data.info = info;
- data.started = CPU_MASK_NONE;
- data.wait = wait;
- if (wait)
- data.finished = CPU_MASK_NONE;
+ if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
+ 0, &status) != SIGP_CC_STATUS_STORED)
+ return 0;
+ return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
+}
- call_data = &data;
+static inline int pcpu_running(struct pcpu *pcpu)
+{
+ if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING,
+ 0, NULL) != SIGP_CC_STATUS_STORED)
+ return 1;
+ /* Status stored condition code is equivalent to cpu not running. */
+ return 0;
+}
- for_each_cpu_mask(cpu, map)
- smp_ext_bitcall(cpu, ec_call_function);
+/*
+ * Find struct pcpu by cpu address.
+ */
+static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address)
+{
+ int cpu;
- /* Wait for response */
- while (!cpus_equal(map, data.started))
- cpu_relax();
- if (wait)
- while (!cpus_equal(map, data.finished))
- cpu_relax();
+ for_each_cpu(cpu, mask)
+ if (pcpu_devices[cpu].address == address)
+ return pcpu_devices + cpu;
+ return NULL;
+}
+
+static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
+{
+ int order;
+
+ if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+ return;
+ order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+ pcpu_sigp_retry(pcpu, order, 0);
+}
+
+static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+{
+ struct _lowcore *lc;
+
+ if (pcpu != &pcpu_devices[0]) {
+ pcpu->lowcore = (struct _lowcore *)
+ __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
+ pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+ pcpu->panic_stack = __get_free_page(GFP_KERNEL);
+ if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack)
+ goto out;
+ }
+ lc = pcpu->lowcore;
+ memcpy(lc, &S390_lowcore, 512);
+ memset((char *) lc + 512, 0, sizeof(*lc) - 512);
+ lc->async_stack = pcpu->async_stack + ASYNC_SIZE
+ - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->panic_stack = pcpu->panic_stack + PAGE_SIZE
+ - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->cpu_nr = cpu;
+ lc->spinlock_lockval = arch_spin_lockval(cpu);
+#ifndef CONFIG_64BIT
+ if (MACHINE_HAS_IEEE) {
+ lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL);
+ if (!lc->extended_save_area_addr)
+ goto out;
+ }
+#else
+ if (vdso_alloc_per_cpu(lc))
+ goto out;
+#endif
+ lowcore_ptr[cpu] = lc;
+ pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
+ return 0;
out:
- if (local) {
- local_irq_disable();
- func(info);
- local_irq_enable();
+ if (pcpu != &pcpu_devices[0]) {
+ free_page(pcpu->panic_stack);
+ free_pages(pcpu->async_stack, ASYNC_ORDER);
+ free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
}
+ return -ENOMEM;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static void pcpu_free_lowcore(struct pcpu *pcpu)
+{
+ pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
+ lowcore_ptr[pcpu - pcpu_devices] = NULL;
+#ifndef CONFIG_64BIT
+ if (MACHINE_HAS_IEEE) {
+ struct _lowcore *lc = pcpu->lowcore;
+
+ free_page((unsigned long) lc->extended_save_area_addr);
+ lc->extended_save_area_addr = 0;
+ }
+#else
+ vdso_free_per_cpu(pcpu->lowcore);
+#endif
+ if (pcpu != &pcpu_devices[0]) {
+ free_page(pcpu->panic_stack);
+ free_pages(pcpu->async_stack, ASYNC_ORDER);
+ free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+ }
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
+{
+ struct _lowcore *lc = pcpu->lowcore;
+
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+ atomic_inc(&init_mm.context.attach_count);
+ lc->cpu_nr = cpu;
+ lc->spinlock_lockval = arch_spin_lockval(cpu);
+ lc->percpu_offset = __per_cpu_offset[cpu];
+ lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->machine_flags = S390_lowcore.machine_flags;
+ lc->ftrace_func = S390_lowcore.ftrace_func;
+ lc->user_timer = lc->system_timer = lc->steal_timer = 0;
+ __ctl_store(lc->cregs_save_area, 0, 15);
+ save_access_regs((unsigned int *) lc->access_regs_save_area);
+ memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
+ MAX_FACILITY_BIT/8);
+}
+
+static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+{
+ struct _lowcore *lc = pcpu->lowcore;
+ struct thread_info *ti = task_thread_info(tsk);
+
+ lc->kernel_stack = (unsigned long) task_stack_page(tsk)
+ + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->thread_info = (unsigned long) task_thread_info(tsk);
+ lc->current_task = (unsigned long) tsk;
+ lc->user_timer = ti->user_timer;
+ lc->system_timer = ti->system_timer;
+ lc->steal_timer = 0;
+}
+
+static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+{
+ struct _lowcore *lc = pcpu->lowcore;
+
+ lc->restart_stack = lc->kernel_stack;
+ lc->restart_fn = (unsigned long) func;
+ lc->restart_data = (unsigned long) data;
+ lc->restart_source = -1UL;
+ pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
}
/*
- * smp_call_function:
- * @func: the function to run; this must be fast and non-blocking
- * @info: an arbitrary pointer to pass to the function
- * @nonatomic: unused
- * @wait: if true, wait (atomically) until function has completed on other CPUs
- *
- * Run a function on all other CPUs.
- *
- * You must not call this function with disabled interrupts, from a
- * hardware interrupt handler or from a bottom half.
+ * Call function via PSW restart on pcpu and stop the current cpu.
*/
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
- int wait)
+static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
+ void *data, unsigned long stack)
{
- cpumask_t map;
+ struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
+ unsigned long source_cpu = stap();
+
+ __load_psw_mask(PSW_KERNEL_BITS);
+ if (pcpu->address == source_cpu)
+ func(data); /* should not return */
+ /* Stop target cpu (if func returns this stops the current cpu). */
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ /* Restart func on the target cpu and stop the current cpu. */
+ mem_assign_absolute(lc->restart_stack, stack);
+ mem_assign_absolute(lc->restart_fn, (unsigned long) func);
+ mem_assign_absolute(lc->restart_data, (unsigned long) data);
+ mem_assign_absolute(lc->restart_source, source_cpu);
+ asm volatile(
+ "0: sigp 0,%0,%2 # sigp restart to target cpu\n"
+ " brc 2,0b # busy, try again\n"
+ "1: sigp 0,%1,%3 # sigp stop to current cpu\n"
+ " brc 2,1b # busy, try again\n"
+ : : "d" (pcpu->address), "d" (source_cpu),
+ "K" (SIGP_RESTART), "K" (SIGP_STOP)
+ : "0", "1", "cc");
+ for (;;) ;
+}
- spin_lock(&call_lock);
- map = cpu_online_map;
- cpu_clear(smp_processor_id(), map);
- __smp_call_function_map(func, info, nonatomic, wait, map);
- spin_unlock(&call_lock);
- return 0;
+/*
+ * Call function on an online CPU.
+ */
+void smp_call_online_cpu(void (*func)(void *), void *data)
+{
+ struct pcpu *pcpu;
+
+ /* Use the current cpu if it is online. */
+ pcpu = pcpu_find_address(cpu_online_mask, stap());
+ if (!pcpu)
+ /* Use the first online cpu. */
+ pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
+ pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
}
-EXPORT_SYMBOL(smp_call_function);
/*
- * smp_call_function_single:
- * @cpu: the CPU where func should run
- * @func: the function to run; this must be fast and non-blocking
- * @info: an arbitrary pointer to pass to the function
- * @nonatomic: unused
- * @wait: if true, wait (atomically) until function has completed on other CPUs
- *
- * Run a function on one processor.
- *
- * You must not call this function with disabled interrupts, from a
- * hardware interrupt handler or from a bottom half.
+ * Call function on the ipl CPU.
*/
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
+void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
- spin_lock(&call_lock);
- __smp_call_function_map(func, info, nonatomic, wait,
- cpumask_of_cpu(cpu));
- spin_unlock(&call_lock);
- return 0;
+ pcpu_delegate(&pcpu_devices[0], func, data,
+ pcpu_devices->panic_stack + PAGE_SIZE);
}
-EXPORT_SYMBOL(smp_call_function_single);
-
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on. Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
+
+int smp_find_processor_id(u16 address)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ if (pcpu_devices[cpu].address == address)
+ return cpu;
+ return -1;
+}
+
+int smp_vcpu_scheduled(int cpu)
+{
+ return pcpu_running(pcpu_devices + cpu);
+}
+
+void smp_yield(void)
+{
+ if (MACHINE_HAS_DIAG44)
+ asm volatile("diag 0,0,0x44");
+}
+
+void smp_yield_cpu(int cpu)
+{
+ if (MACHINE_HAS_DIAG9C)
+ asm volatile("diag %0,0,0x9c"
+ : : "d" (pcpu_devices[cpu].address));
+ else if (MACHINE_HAS_DIAG44)
+ asm volatile("diag 0,0,0x44");
+}
+
+/*
+ * Send cpus emergency shutdown signal. This gives the cpus the
+ * opportunity to complete outstanding interrupts.
*/
-int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
- int wait)
+static void smp_emergency_stop(cpumask_t *cpumask)
{
- spin_lock(&call_lock);
- cpu_clear(smp_processor_id(), mask);
- __smp_call_function_map(func, info, 0, wait, mask);
- spin_unlock(&call_lock);
- return 0;
+ u64 end;
+ int cpu;
+
+ end = get_tod_clock() + (1000000UL << 12);
+ for_each_cpu(cpu, cpumask) {
+ struct pcpu *pcpu = pcpu_devices + cpu;
+ set_bit(ec_stop_cpu, &pcpu->ec_mask);
+ while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
+ 0, NULL) == SIGP_CC_BUSY &&
+ get_tod_clock() < end)
+ cpu_relax();
+ }
+ while (get_tod_clock() < end) {
+ for_each_cpu(cpu, cpumask)
+ if (pcpu_stopped(pcpu_devices + cpu))
+ cpumask_clear_cpu(cpu, cpumask);
+ if (cpumask_empty(cpumask))
+ break;
+ cpu_relax();
+ }
}
-EXPORT_SYMBOL(smp_call_function_mask);
+/*
+ * Stop all cpus but the current one.
+ */
void smp_send_stop(void)
{
- int cpu, rc;
+ cpumask_t cpumask;
+ int cpu;
/* Disable all interrupts/machine checks */
- __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK);
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ trace_hardirqs_off();
- /* write magic number to zero page (absolute 0) */
- lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC;
+ debug_set_critical();
+ cpumask_copy(&cpumask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &cpumask);
- /* stop all processors */
- for_each_online_cpu(cpu) {
- if (cpu == smp_processor_id())
- continue;
- do {
- rc = signal_processor(cpu, sigp_stop);
- } while (rc == sigp_busy);
+ if (oops_in_progress)
+ smp_emergency_stop(&cpumask);
- while (!smp_cpu_not_running(cpu))
+ /* stop all processors */
+ for_each_cpu(cpu, &cpumask) {
+ struct pcpu *pcpu = pcpu_devices + cpu;
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ while (!pcpu_stopped(pcpu))
cpu_relax();
}
}
@@ -261,35 +408,38 @@ void smp_send_stop(void)
* This is the main routine where commands issued by other
* cpus are handled.
*/
-
-static void do_ext_call_interrupt(__u16 code)
+static void smp_handle_ext_call(void)
{
unsigned long bits;
- /*
- * handle bit signal external calls
- *
- * For the ec_schedule signal we have to do nothing. All the work
- * is done automatically when we return from the interrupt.
- */
- bits = xchg(&S390_lowcore.ext_call_fast, 0);
+ /* handle bit signal external calls */
+ bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+ if (test_bit(ec_stop_cpu, &bits))
+ smp_stop_cpu();
+ if (test_bit(ec_schedule, &bits))
+ scheduler_ipi();
+ if (test_bit(ec_call_function_single, &bits))
+ generic_smp_call_function_single_interrupt();
+}
- if (test_bit(ec_call_function, &bits))
- do_call_function();
+static void do_ext_call_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS);
+ smp_handle_ext_call();
}
-/*
- * Send an external call sigp to another cpu and return without waiting
- * for its completion.
- */
-static void smp_ext_bitcall(int cpu, ec_bit_sig sig)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
- /*
- * Set signaling bit in lowcore of target cpu and kick it
- */
- set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast);
- while (signal_processor(cpu, sigp_emergency_signal) == sigp_busy)
- udelay(10);
+ int cpu;
+
+ for_each_cpu(cpu, mask)
+ pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+ pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
}
#ifndef CONFIG_64BIT
@@ -303,7 +453,7 @@ static void smp_ptlb_callback(void *info)
void smp_ptlb_all(void)
{
- on_each_cpu(smp_ptlb_callback, NULL, 0, 1);
+ on_each_cpu(smp_ptlb_callback, NULL, 1);
}
EXPORT_SYMBOL(smp_ptlb_all);
#endif /* ! CONFIG_64BIT */
@@ -315,15 +465,16 @@ EXPORT_SYMBOL(smp_ptlb_all);
*/
void smp_send_reschedule(int cpu)
{
- smp_ext_bitcall(cpu, ec_schedule);
+ pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
}
/*
* parameter area for the set/clear control bit callbacks
*/
struct ec_creg_mask_parms {
- unsigned long orvals[16];
- unsigned long andvals[16];
+ unsigned long orval;
+ unsigned long andval;
+ int cr;
};
/*
@@ -333,11 +484,9 @@ static void smp_ctl_bit_callback(void *info)
{
struct ec_creg_mask_parms *pp = info;
unsigned long cregs[16];
- int i;
__ctl_store(cregs, 0, 15);
- for (i = 0; i <= 15; i++)
- cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i];
+ cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
__ctl_load(cregs, 0, 15);
}
@@ -346,12 +495,9 @@ static void smp_ctl_bit_callback(void *info)
*/
void smp_ctl_set_bit(int cr, int bit)
{
- struct ec_creg_mask_parms parms;
+ struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
- memset(&parms.orvals, 0, sizeof(parms.orvals));
- memset(&parms.andvals, 0xff, sizeof(parms.andvals));
- parms.orvals[cr] = 1 << bit;
- on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1);
+ on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
EXPORT_SYMBOL(smp_ctl_set_bit);
@@ -360,502 +506,303 @@ EXPORT_SYMBOL(smp_ctl_set_bit);
*/
void smp_ctl_clear_bit(int cr, int bit)
{
- struct ec_creg_mask_parms parms;
+ struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
- memset(&parms.orvals, 0, sizeof(parms.orvals));
- memset(&parms.andvals, 0xff, sizeof(parms.andvals));
- parms.andvals[cr] = ~(1L << bit);
- on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1);
+ on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
EXPORT_SYMBOL(smp_ctl_clear_bit);
-/*
- * In early ipl state a temp. logically cpu number is needed, so the sigp
- * functions can be used to sense other cpus. Since NR_CPUS is >= 2 on
- * CONFIG_SMP and the ipl cpu is logical cpu 0, it must be 1.
- */
-#define CPU_INIT_NO 1
-
-#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
+#ifdef CONFIG_CRASH_DUMP
-/*
- * zfcpdump_prefix_array holds prefix registers for the following scenario:
- * 64 bit zfcpdump kernel and 31 bit kernel which is to be dumped. We have to
- * save its prefix registers, since they get lost, when switching from 31 bit
- * to 64 bit.
- */
-unsigned int zfcpdump_prefix_array[NR_CPUS + 1] \
- __attribute__((__section__(".data")));
-
-static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu)
+static void __init smp_get_save_area(int cpu, u16 address)
{
- if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+ void *lc = pcpu_devices[0].lowcore;
+ struct save_area *save_area;
+
+ if (is_kdump_kernel())
+ return;
+ if (!OLDMEM_BASE && (address == boot_cpu_address ||
+ ipl_info.type != IPL_TYPE_FCP_DUMP))
return;
- if (cpu >= NR_CPUS) {
- printk(KERN_WARNING "Registers for cpu %i not saved since dump "
- "kernel was compiled with NR_CPUS=%i\n", cpu, NR_CPUS);
+ save_area = dump_save_area_create(cpu);
+ if (!save_area)
+ panic("could not allocate memory for save area\n");
+ if (address == boot_cpu_address) {
+ /* Copy the registers of the boot cpu. */
+ copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
+ SAVE_AREA_BASE - PAGE_SIZE, 0);
return;
}
- zfcpdump_save_areas[cpu] = kmalloc(sizeof(union save_area), GFP_KERNEL);
- __cpu_logical_map[CPU_INIT_NO] = (__u16) phy_cpu;
- while (signal_processor(CPU_INIT_NO, sigp_stop_and_store_status) ==
- sigp_busy)
- cpu_relax();
- memcpy(zfcpdump_save_areas[cpu],
- (void *)(unsigned long) store_prefix() + SAVE_AREA_BASE,
- SAVE_AREA_SIZE);
-#ifdef CONFIG_64BIT
- /* copy original prefix register */
- zfcpdump_save_areas[cpu]->s390x.pref_reg = zfcpdump_prefix_array[cpu];
-#endif
+ /* Get the registers of a non-boot cpu. */
+ __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL);
+ memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area));
}
-union save_area *zfcpdump_save_areas[NR_CPUS + 1];
-EXPORT_SYMBOL_GPL(zfcpdump_save_areas);
+int smp_store_status(int cpu)
+{
+ struct pcpu *pcpu;
-#else
+ pcpu = pcpu_devices + cpu;
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
+ 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+ return 0;
+}
-static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { }
+#else /* CONFIG_CRASH_DUMP */
-#endif /* CONFIG_ZFCPDUMP || CONFIG_ZFCPDUMP_MODULE */
+static inline void smp_get_save_area(int cpu, u16 address) { }
-static int cpu_stopped(int cpu)
-{
- __u32 status;
+#endif /* CONFIG_CRASH_DUMP */
- /* Check for stopped state */
- if (signal_processor_ps(&status, 0, cpu, sigp_sense) ==
- sigp_status_stored) {
- if (status & 0x40)
- return 1;
- }
- return 0;
+void smp_cpu_set_polarization(int cpu, int val)
+{
+ pcpu_devices[cpu].polarization = val;
}
-static int cpu_known(int cpu_id)
+int smp_cpu_get_polarization(int cpu)
{
- int cpu;
-
- for_each_present_cpu(cpu) {
- if (__cpu_logical_map[cpu] == cpu_id)
- return 1;
- }
- return 0;
+ return pcpu_devices[cpu].polarization;
}
-static int smp_rescan_cpus_sigp(cpumask_t avail)
+static struct sclp_cpu_info *smp_get_cpu_info(void)
{
- int cpu_id, logical_cpu;
-
- logical_cpu = first_cpu(avail);
- if (logical_cpu == NR_CPUS)
- return 0;
- for (cpu_id = 0; cpu_id <= 65535; cpu_id++) {
- if (cpu_known(cpu_id))
- continue;
- __cpu_logical_map[logical_cpu] = cpu_id;
- smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
- if (!cpu_stopped(logical_cpu))
- continue;
- cpu_set(logical_cpu, cpu_present_map);
- smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
- logical_cpu = next_cpu(logical_cpu, avail);
- if (logical_cpu == NR_CPUS)
- break;
+ static int use_sigp_detection;
+ struct sclp_cpu_info *info;
+ int address;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
+ use_sigp_detection = 1;
+ for (address = 0; address <= MAX_CPU_ADDRESS; address++) {
+ if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
+ SIGP_CC_NOT_OPERATIONAL)
+ continue;
+ info->cpu[info->configured].address = address;
+ info->configured++;
+ }
+ info->combined = info->configured;
}
- return 0;
+ return info;
}
-static int smp_rescan_cpus_sclp(cpumask_t avail)
+static int smp_add_present_cpu(int cpu);
+
+static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
{
- struct sclp_cpu_info *info;
- int cpu_id, logical_cpu, cpu;
- int rc;
+ struct pcpu *pcpu;
+ cpumask_t avail;
+ int cpu, nr, i;
- logical_cpu = first_cpu(avail);
- if (logical_cpu == NR_CPUS)
- return 0;
- info = kmalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
- rc = sclp_get_cpu_info(info);
- if (rc)
- goto out;
- for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type)
+ nr = 0;
+ cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+ cpu = cpumask_first(&avail);
+ for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
+ if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
continue;
- cpu_id = info->cpu[cpu].address;
- if (cpu_known(cpu_id))
+ if (pcpu_find_address(cpu_present_mask, info->cpu[i].address))
continue;
- __cpu_logical_map[logical_cpu] = cpu_id;
- smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
- cpu_set(logical_cpu, cpu_present_map);
- if (cpu >= info->configured)
- smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
+ pcpu = pcpu_devices + cpu;
+ pcpu->address = info->cpu[i].address;
+ pcpu->state = (i >= info->configured) ?
+ CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ set_cpu_present(cpu, true);
+ if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+ set_cpu_present(cpu, false);
else
- smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
- logical_cpu = next_cpu(logical_cpu, avail);
- if (logical_cpu == NR_CPUS)
- break;
+ nr++;
+ cpu = cpumask_next(cpu, &avail);
}
-out:
- kfree(info);
- return rc;
-}
-
-static int __smp_rescan_cpus(void)
-{
- cpumask_t avail;
-
- cpus_xor(avail, cpu_possible_map, cpu_present_map);
- if (smp_use_sigp_detection)
- return smp_rescan_cpus_sigp(avail);
- else
- return smp_rescan_cpus_sclp(avail);
+ return nr;
}
static void __init smp_detect_cpus(void)
{
unsigned int cpu, c_cpus, s_cpus;
struct sclp_cpu_info *info;
- u16 boot_cpu_addr, cpu_addr;
- c_cpus = 1;
- s_cpus = 0;
- boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr;
- info = kmalloc(sizeof(*info), GFP_KERNEL);
+ info = smp_get_cpu_info();
if (!info)
panic("smp_detect_cpus failed to allocate memory\n");
- /* Use sigp detection algorithm if sclp doesn't work. */
- if (sclp_get_cpu_info(info)) {
- smp_use_sigp_detection = 1;
- for (cpu = 0; cpu <= 65535; cpu++) {
- if (cpu == boot_cpu_addr)
- continue;
- __cpu_logical_map[CPU_INIT_NO] = cpu;
- if (!cpu_stopped(CPU_INIT_NO))
- continue;
- smp_get_save_area(c_cpus, cpu);
- c_cpus++;
- }
- goto out;
- }
-
if (info->has_cpu_type) {
for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->cpu[cpu].address == boot_cpu_addr) {
- smp_cpu_type = info->cpu[cpu].type;
- break;
- }
+ if (info->cpu[cpu].address != boot_cpu_address)
+ continue;
+ /* The boot cpu dictates the cpu type. */
+ boot_cpu_type = info->cpu[cpu].type;
+ break;
}
}
-
+ c_cpus = s_cpus = 0;
for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type)
+ if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
continue;
- cpu_addr = info->cpu[cpu].address;
- if (cpu_addr == boot_cpu_addr)
- continue;
- __cpu_logical_map[CPU_INIT_NO] = cpu_addr;
- if (!cpu_stopped(CPU_INIT_NO)) {
+ if (cpu < info->configured) {
+ smp_get_save_area(c_cpus, info->cpu[cpu].address);
+ c_cpus++;
+ } else
s_cpus++;
- continue;
- }
- smp_get_save_area(c_cpus, cpu_addr);
- c_cpus++;
}
-out:
- kfree(info);
- printk(KERN_INFO "CPUs: %d configured, %d standby\n", c_cpus, s_cpus);
+ pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
get_online_cpus();
- __smp_rescan_cpus();
+ __smp_rescan_cpus(info, 0);
put_online_cpus();
+ kfree(info);
}
/*
* Activate a secondary processor.
*/
-int __cpuinit start_secondary(void *cpuvoid)
+static void smp_start_secondary(void *cpuvoid)
{
- /* Setup the cpu */
+ S390_lowcore.last_update_clock = get_tod_clock();
+ S390_lowcore.restart_stack = (unsigned long) restart_stack;
+ S390_lowcore.restart_fn = (unsigned long) do_restart;
+ S390_lowcore.restart_data = 0;
+ S390_lowcore.restart_source = -1UL;
+ restore_access_regs(S390_lowcore.access_regs_save_area);
+ __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
cpu_init();
preempt_disable();
- /* Enable TOD clock interrupts on the secondary cpu. */
init_cpu_timer();
-#ifdef CONFIG_VIRT_TIMER
- /* Enable cpu timer interrupts on the secondary cpu. */
init_cpu_vtimer();
-#endif
- /* Enable pfault pseudo page faults on this cpu. */
pfault_init();
-
- /* Mark this cpu as online */
- spin_lock(&call_lock);
- cpu_set(smp_processor_id(), cpu_online_map);
- spin_unlock(&call_lock);
- /* Switch on interrupts */
+ notify_cpu_starting(smp_processor_id());
+ set_cpu_online(smp_processor_id(), true);
+ inc_irq_stat(CPU_RST);
local_irq_enable();
- /* Print info about this processor */
- print_cpu_info(&S390_lowcore.cpu_data);
- /* cpu_idle will call schedule for us */
- cpu_idle();
- return 0;
-}
-
-static void __init smp_create_idle(unsigned int cpu)
-{
- struct task_struct *p;
-
- /*
- * don't care about the psw and regs settings since we'll never
- * reschedule the forked task.
- */
- p = fork_idle(cpu);
- if (IS_ERR(p))
- panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
- current_set[cpu] = p;
- spin_lock_init(&(&per_cpu(s390_idle, cpu))->lock);
-}
-
-static int __cpuinit smp_alloc_lowcore(int cpu)
-{
- unsigned long async_stack, panic_stack;
- struct _lowcore *lowcore;
- int lc_order;
-
- lc_order = sizeof(long) == 8 ? 1 : 0;
- lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
- if (!lowcore)
- return -ENOMEM;
- async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
- panic_stack = __get_free_page(GFP_KERNEL);
- if (!panic_stack || !async_stack)
- goto out;
- memcpy(lowcore, &S390_lowcore, 512);
- memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512);
- lowcore->async_stack = async_stack + ASYNC_SIZE;
- lowcore->panic_stack = panic_stack + PAGE_SIZE;
-
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE) {
- unsigned long save_area;
-
- save_area = get_zeroed_page(GFP_KERNEL);
- if (!save_area)
- goto out_save_area;
- lowcore->extended_save_area_addr = (u32) save_area;
- }
-#endif
- lowcore_ptr[cpu] = lowcore;
- return 0;
-
-#ifndef CONFIG_64BIT
-out_save_area:
- free_page(panic_stack);
-#endif
-out:
- free_pages(async_stack, ASYNC_ORDER);
- free_pages((unsigned long) lowcore, lc_order);
- return -ENOMEM;
+ cpu_startup_entry(CPUHP_ONLINE);
}
-#ifdef CONFIG_HOTPLUG_CPU
-static void smp_free_lowcore(int cpu)
-{
- struct _lowcore *lowcore;
- int lc_order;
-
- lc_order = sizeof(long) == 8 ? 1 : 0;
- lowcore = lowcore_ptr[cpu];
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE)
- free_page((unsigned long) lowcore->extended_save_area_addr);
-#endif
- free_page(lowcore->panic_stack - PAGE_SIZE);
- free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
- free_pages((unsigned long) lowcore, lc_order);
- lowcore_ptr[cpu] = NULL;
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
/* Upping and downing of CPUs */
-int __cpuinit __cpu_up(unsigned int cpu)
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- struct task_struct *idle;
- struct _lowcore *cpu_lowcore;
- struct stack_frame *sf;
- sigp_ccode ccode;
+ struct pcpu *pcpu;
+ int rc;
- if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED)
+ pcpu = pcpu_devices + cpu;
+ if (pcpu->state != CPU_STATE_CONFIGURED)
return -EIO;
- if (smp_alloc_lowcore(cpu))
- return -ENOMEM;
-
- ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]),
- cpu, sigp_set_prefix);
- if (ccode) {
- printk("sigp_set_prefix failed for cpu %d "
- "with condition code %d\n",
- (int) cpu, (int) ccode);
+ if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) !=
+ SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
- }
-
- idle = current_set[cpu];
- cpu_lowcore = lowcore_ptr[cpu];
- cpu_lowcore->kernel_stack = (unsigned long)
- task_stack_page(idle) + THREAD_SIZE;
- cpu_lowcore->thread_info = (unsigned long) task_thread_info(idle);
- sf = (struct stack_frame *) (cpu_lowcore->kernel_stack
- - sizeof(struct pt_regs)
- - sizeof(struct stack_frame));
- memset(sf, 0, sizeof(struct stack_frame));
- sf->gprs[9] = (unsigned long) sf;
- cpu_lowcore->save_area[15] = (unsigned long) sf;
- __ctl_store(cpu_lowcore->cregs_save_area, 0, 15);
- asm volatile(
- " stam 0,15,0(%0)"
- : : "a" (&cpu_lowcore->access_regs_save_area) : "memory");
- cpu_lowcore->percpu_offset = __per_cpu_offset[cpu];
- cpu_lowcore->current_task = (unsigned long) idle;
- cpu_lowcore->cpu_data.cpu_nr = cpu;
- cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce;
- cpu_lowcore->ipl_device = S390_lowcore.ipl_device;
- eieio();
-
- while (signal_processor(cpu, sigp_restart) == sigp_busy)
- udelay(10);
+ rc = pcpu_alloc_lowcore(pcpu, cpu);
+ if (rc)
+ return rc;
+ pcpu_prepare_secondary(pcpu, cpu);
+ pcpu_attach_task(pcpu, tidle);
+ pcpu_start_fn(pcpu, smp_start_secondary, NULL);
while (!cpu_online(cpu))
cpu_relax();
return 0;
}
-static int __init setup_possible_cpus(char *s)
-{
- int pcpus, cpu;
+static unsigned int setup_possible_cpus __initdata;
- pcpus = simple_strtoul(s, NULL, 0);
- cpu_possible_map = cpumask_of_cpu(0);
- for (cpu = 1; cpu < pcpus && cpu < NR_CPUS; cpu++)
- cpu_set(cpu, cpu_possible_map);
+static int __init _setup_possible_cpus(char *s)
+{
+ get_option(&s, &setup_possible_cpus);
return 0;
}
-early_param("possible_cpus", setup_possible_cpus);
+early_param("possible_cpus", _setup_possible_cpus);
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
- struct ec_creg_mask_parms cr_parms;
- int cpu = smp_processor_id();
-
- cpu_clear(cpu, cpu_online_map);
+ unsigned long cregs[16];
- /* Disable pfault pseudo page faults on this cpu. */
+ /* Handle possible pending IPIs */
+ smp_handle_ext_call();
+ set_cpu_online(smp_processor_id(), false);
+ /* Disable pseudo page faults on this cpu. */
pfault_fini();
-
- memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals));
- memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals));
-
- /* disable all external interrupts */
- cr_parms.orvals[0] = 0;
- cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 12 |
- 1 << 11 | 1 << 10 | 1 << 6 | 1 << 4);
- /* disable all I/O interrupts */
- cr_parms.orvals[6] = 0;
- cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 |
- 1 << 27 | 1 << 26 | 1 << 25 | 1 << 24);
- /* disable most machine checks */
- cr_parms.orvals[14] = 0;
- cr_parms.andvals[14] = ~(1 << 28 | 1 << 27 | 1 << 26 |
- 1 << 25 | 1 << 24);
-
- smp_ctl_bit_callback(&cr_parms);
-
+ /* Disable interrupt sources via control register. */
+ __ctl_store(cregs, 0, 15);
+ cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */
+ cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */
+ cregs[14] &= ~0x1f000000UL; /* disable most machine checks */
+ __ctl_load(cregs, 0, 15);
return 0;
}
void __cpu_die(unsigned int cpu)
{
+ struct pcpu *pcpu;
+
/* Wait until target cpu is down */
- while (!smp_cpu_not_running(cpu))
+ pcpu = pcpu_devices + cpu;
+ while (!pcpu_stopped(pcpu))
cpu_relax();
- smp_free_lowcore(cpu);
- printk(KERN_INFO "Processor %d spun down\n", cpu);
+ pcpu_free_lowcore(pcpu);
+ atomic_dec(&init_mm.context.attach_count);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
}
-void cpu_die(void)
+void __noreturn cpu_die(void)
{
idle_task_exit();
- signal_processor(smp_processor_id(), sigp_stop);
- BUG();
- for (;;);
+ pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+ for (;;) ;
}
#endif /* CONFIG_HOTPLUG_CPU */
-void __init smp_prepare_cpus(unsigned int max_cpus)
+void __init smp_fill_possible_mask(void)
{
-#ifndef CONFIG_64BIT
- unsigned long save_area = 0;
-#endif
- unsigned long async_stack, panic_stack;
- struct _lowcore *lowcore;
- unsigned int cpu;
- int lc_order;
+ unsigned int possible, sclp, cpu;
- smp_detect_cpus();
+ sclp = sclp_get_max_cpu() ?: nr_cpu_ids;
+ possible = setup_possible_cpus ?: nr_cpu_ids;
+ possible = min(possible, sclp);
+ for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+ set_cpu_possible(cpu, true);
+}
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
/* request the 0x1201 emergency signal external interrupt */
- if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
+ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1201");
- print_cpu_info(&S390_lowcore.cpu_data);
-
- /* Reallocate current lowcore, but keep its contents. */
- lc_order = sizeof(long) == 8 ? 1 : 0;
- lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
- panic_stack = __get_free_page(GFP_KERNEL);
- async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE)
- save_area = get_zeroed_page(GFP_KERNEL);
-#endif
- local_irq_disable();
- local_mcck_disable();
- lowcore_ptr[smp_processor_id()] = lowcore;
- *lowcore = S390_lowcore;
- lowcore->panic_stack = panic_stack + PAGE_SIZE;
- lowcore->async_stack = async_stack + ASYNC_SIZE;
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE)
- lowcore->extended_save_area_addr = (u32) save_area;
-#endif
- set_prefix((u32)(unsigned long) lowcore);
- local_mcck_enable();
- local_irq_enable();
- for_each_possible_cpu(cpu)
- if (cpu != smp_processor_id())
- smp_create_idle(cpu);
+ /* request the 0x1202 external call external interrupt */
+ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
+ panic("Couldn't request external interrupt 0x1202");
+ smp_detect_cpus();
}
void __init smp_prepare_boot_cpu(void)
{
- BUG_ON(smp_processor_id() != 0);
-
- current_thread_info()->cpu = 0;
- cpu_set(0, cpu_present_map);
- cpu_set(0, cpu_online_map);
+ struct pcpu *pcpu = pcpu_devices;
+
+ boot_cpu_address = stap();
+ pcpu->state = CPU_STATE_CONFIGURED;
+ pcpu->address = boot_cpu_address;
+ pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
+ pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE
+ + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+ pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE
+ + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
S390_lowcore.percpu_offset = __per_cpu_offset[0];
- current_set[0] = current;
- smp_cpu_state[0] = CPU_STATE_CONFIGURED;
- smp_cpu_polarization[0] = POLARIZATION_UNKNWN;
- spin_lock_init(&(&__get_cpu_var(s390_idle))->lock);
+ smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+ set_cpu_present(0, true);
+ set_cpu_online(0, true);
}
void __init smp_cpus_done(unsigned int max_cpus)
{
}
+void __init smp_setup_processor_id(void)
+{
+ S390_lowcore.cpu_nr = 0;
+ S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
+}
+
/*
* the frequency of the profiling timer can be changed
* by writing a multiplier value into /proc/profile.
@@ -868,52 +815,58 @@ int setup_profiling_timer(unsigned int multiplier)
}
#ifdef CONFIG_HOTPLUG_CPU
-static ssize_t cpu_configure_show(struct sys_device *dev, char *buf)
+static ssize_t cpu_configure_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", smp_cpu_state[dev->id]);
+ count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
-static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf,
- size_t count)
+static ssize_t cpu_configure_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
- int cpu = dev->id;
- int val, rc;
+ struct pcpu *pcpu;
+ int cpu, val, rc;
char delim;
if (sscanf(buf, "%d %c", &val, &delim) != 1)
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
-
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
rc = -EBUSY;
- if (cpu_online(cpu))
+ /* disallow configuration changes of online cpus and cpu 0 */
+ cpu = dev->id;
+ if (cpu_online(cpu) || cpu == 0)
goto out;
+ pcpu = pcpu_devices + cpu;
rc = 0;
switch (val) {
case 0:
- if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) {
- rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]);
- if (!rc) {
- smp_cpu_state[cpu] = CPU_STATE_STANDBY;
- smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
- }
- }
+ if (pcpu->state != CPU_STATE_CONFIGURED)
+ break;
+ rc = sclp_cpu_deconfigure(pcpu->address);
+ if (rc)
+ break;
+ pcpu->state = CPU_STATE_STANDBY;
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ topology_expect_change();
break;
case 1:
- if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) {
- rc = sclp_cpu_configure(__cpu_logical_map[cpu]);
- if (!rc) {
- smp_cpu_state[cpu] = CPU_STATE_CONFIGURED;
- smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
- }
- }
+ if (pcpu->state != CPU_STATE_STANDBY)
+ break;
+ rc = sclp_cpu_configure(pcpu->address);
+ if (rc)
+ break;
+ pcpu->state = CPU_STATE_CONFIGURED;
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ topology_expect_change();
break;
default:
break;
@@ -923,50 +876,21 @@ out:
put_online_cpus();
return rc ? rc : count;
}
-static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
+static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
#endif /* CONFIG_HOTPLUG_CPU */
-static ssize_t cpu_polarization_show(struct sys_device *dev, char *buf)
+static ssize_t show_cpu_address(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- int cpu = dev->id;
- ssize_t count;
-
- mutex_lock(&smp_cpu_state_mutex);
- switch (smp_cpu_polarization[cpu]) {
- case POLARIZATION_HRZ:
- count = sprintf(buf, "horizontal\n");
- break;
- case POLARIZATION_VL:
- count = sprintf(buf, "vertical:low\n");
- break;
- case POLARIZATION_VM:
- count = sprintf(buf, "vertical:medium\n");
- break;
- case POLARIZATION_VH:
- count = sprintf(buf, "vertical:high\n");
- break;
- default:
- count = sprintf(buf, "unknown\n");
- break;
- }
- mutex_unlock(&smp_cpu_state_mutex);
- return count;
+ return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
}
-static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL);
-
-static ssize_t show_cpu_address(struct sys_device *dev, char *buf)
-{
- return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
-}
-static SYSDEV_ATTR(address, 0444, show_cpu_address, NULL);
-
+static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
static struct attribute *cpu_common_attrs[] = {
#ifdef CONFIG_HOTPLUG_CPU
- &attr_configure.attr,
+ &dev_attr_configure.attr,
#endif
- &attr_address.attr,
- &attr_polarization.attr,
+ &dev_attr_address.attr,
NULL,
};
@@ -974,53 +898,45 @@ static struct attribute_group cpu_common_attr_group = {
.attrs = cpu_common_attrs,
};
-static ssize_t show_capability(struct sys_device *dev, char *buf)
-{
- unsigned int capability;
- int rc;
-
- rc = get_cpu_capability(&capability);
- if (rc)
- return rc;
- return sprintf(buf, "%u\n", capability);
-}
-static SYSDEV_ATTR(capability, 0444, show_capability, NULL);
-
-static ssize_t show_idle_count(struct sys_device *dev, char *buf)
+static ssize_t show_idle_count(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- struct s390_idle_data *idle;
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
unsigned long long idle_count;
-
- idle = &per_cpu(s390_idle, dev->id);
- spin_lock_irq(&idle->lock);
- idle_count = idle->idle_count;
- spin_unlock_irq(&idle->lock);
+ unsigned int sequence;
+
+ do {
+ sequence = ACCESS_ONCE(idle->sequence);
+ idle_count = ACCESS_ONCE(idle->idle_count);
+ if (ACCESS_ONCE(idle->clock_idle_enter))
+ idle_count++;
+ } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
return sprintf(buf, "%llu\n", idle_count);
}
-static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
+static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
-static ssize_t show_idle_time(struct sys_device *dev, char *buf)
+static ssize_t show_idle_time(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- struct s390_idle_data *idle;
- unsigned long long new_time;
-
- idle = &per_cpu(s390_idle, dev->id);
- spin_lock_irq(&idle->lock);
- if (idle->in_idle) {
- new_time = get_clock();
- idle->idle_time += new_time - idle->idle_enter;
- idle->idle_enter = new_time;
- }
- new_time = idle->idle_time;
- spin_unlock_irq(&idle->lock);
- return sprintf(buf, "%llu\n", new_time >> 12);
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+ unsigned long long now, idle_time, idle_enter, idle_exit;
+ unsigned int sequence;
+
+ do {
+ now = get_tod_clock();
+ sequence = ACCESS_ONCE(idle->sequence);
+ idle_time = ACCESS_ONCE(idle->idle_time);
+ idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
+ idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
+ } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
+ idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
+ return sprintf(buf, "%llu\n", idle_time >> 12);
}
-static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
+static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
static struct attribute *cpu_online_attrs[] = {
- &attr_capability.attr,
- &attr_idle_count.attr,
- &attr_idle_time_us.attr,
+ &dev_attr_idle_count.attr,
+ &dev_attr_idle_time_us.attr,
NULL,
};
@@ -1028,44 +944,36 @@ static struct attribute_group cpu_online_attr_group = {
.attrs = cpu_online_attrs,
};
-static int __cpuinit smp_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (unsigned int)(long)hcpu;
- struct cpu *c = &per_cpu(cpu_devices, cpu);
- struct sys_device *s = &c->sysdev;
- struct s390_idle_data *idle;
+ struct cpu *c = pcpu_devices[cpu].cpu;
+ struct device *s = &c->dev;
+ int err = 0;
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- idle = &per_cpu(s390_idle, cpu);
- spin_lock_irq(&idle->lock);
- idle->idle_enter = 0;
- idle->idle_time = 0;
- idle->idle_count = 0;
- spin_unlock_irq(&idle->lock);
- if (sysfs_create_group(&s->kobj, &cpu_online_attr_group))
- return NOTIFY_BAD;
+ err = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
break;
case CPU_DEAD:
- case CPU_DEAD_FROZEN:
sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
break;
}
- return NOTIFY_OK;
+ return notifier_from_errno(err);
}
-static struct notifier_block __cpuinitdata smp_cpu_nb = {
- .notifier_call = smp_cpu_notify,
-};
-
-static int __devinit smp_add_present_cpu(int cpu)
+static int smp_add_present_cpu(int cpu)
{
- struct cpu *c = &per_cpu(cpu_devices, cpu);
- struct sys_device *s = &c->sysdev;
+ struct device *s;
+ struct cpu *c;
int rc;
+ c = kzalloc(sizeof(*c), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+ pcpu_devices[cpu].cpu = c;
+ s = &c->dev;
c->hotpluggable = 1;
rc = register_cpu(c, cpu);
if (rc)
@@ -1073,11 +981,20 @@ static int __devinit smp_add_present_cpu(int cpu)
rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
if (rc)
goto out_cpu;
- if (!cpu_online(cpu))
- goto out;
- rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
- if (!rc)
- return 0;
+ if (cpu_online(cpu)) {
+ rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+ if (rc)
+ goto out_online;
+ }
+ rc = topology_cpu_init(c);
+ if (rc)
+ goto out_topology;
+ return 0;
+
+out_topology:
+ if (cpu_online(cpu))
+ sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+out_online:
sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
out_cpu:
#ifdef CONFIG_HOTPLUG_CPU
@@ -1091,32 +1008,26 @@ out:
int __ref smp_rescan_cpus(void)
{
- cpumask_t newcpus;
- int cpu;
- int rc;
+ struct sclp_cpu_info *info;
+ int nr;
+ info = smp_get_cpu_info();
+ if (!info)
+ return -ENOMEM;
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
- newcpus = cpu_present_map;
- rc = __smp_rescan_cpus();
- if (rc)
- goto out;
- cpus_andnot(newcpus, cpu_present_map, newcpus);
- for_each_cpu_mask(cpu, newcpus) {
- rc = smp_add_present_cpu(cpu);
- if (rc)
- cpu_clear(cpu, cpu_present_map);
- }
- rc = 0;
-out:
+ nr = __smp_rescan_cpus(info, 1);
mutex_unlock(&smp_cpu_state_mutex);
put_online_cpus();
- if (!cpus_empty(newcpus))
+ kfree(info);
+ if (nr)
topology_schedule_update();
- return rc;
+ return 0;
}
-static ssize_t __ref rescan_store(struct sys_device *dev, const char *buf,
+static ssize_t __ref rescan_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
size_t count)
{
int rc;
@@ -1124,66 +1035,29 @@ static ssize_t __ref rescan_store(struct sys_device *dev, const char *buf,
rc = smp_rescan_cpus();
return rc ? rc : count;
}
-static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store);
+static DEVICE_ATTR(rescan, 0200, NULL, rescan_store);
#endif /* CONFIG_HOTPLUG_CPU */
-static ssize_t dispatching_show(struct sys_device *dev, char *buf)
+static int __init s390_smp_init(void)
{
- ssize_t count;
-
- mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", cpu_management);
- mutex_unlock(&smp_cpu_state_mutex);
- return count;
-}
-
-static ssize_t dispatching_store(struct sys_device *dev, const char *buf,
- size_t count)
-{
- int val, rc;
- char delim;
-
- if (sscanf(buf, "%d %c", &val, &delim) != 1)
- return -EINVAL;
- if (val != 0 && val != 1)
- return -EINVAL;
- rc = 0;
- get_online_cpus();
- mutex_lock(&smp_cpu_state_mutex);
- if (cpu_management == val)
- goto out;
- rc = topology_set_cpu_management(val);
- if (!rc)
- cpu_management = val;
-out:
- mutex_unlock(&smp_cpu_state_mutex);
- put_online_cpus();
- return rc ? rc : count;
-}
-static SYSDEV_ATTR(dispatching, 0644, dispatching_show, dispatching_store);
-
-static int __init topology_init(void)
-{
- int cpu;
- int rc;
-
- register_cpu_notifier(&smp_cpu_nb);
+ int cpu, rc = 0;
#ifdef CONFIG_HOTPLUG_CPU
- rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
- &attr_rescan.attr);
+ rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
if (rc)
return rc;
#endif
- rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
- &attr_dispatching.attr);
- if (rc)
- return rc;
+ cpu_notifier_register_begin();
for_each_present_cpu(cpu) {
rc = smp_add_present_cpu(cpu);
if (rc)
- return rc;
+ goto out;
}
- return 0;
+
+ __hotcpu_notifier(smp_cpu_notify, 0);
+
+out:
+ cpu_notifier_register_done();
+ return rc;
}
-subsys_initcall(topology_init);
+subsys_initcall(s390_smp_init);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 85e46a5d0e0..1785cd82253 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -1,15 +1,14 @@
/*
- * arch/s390/kernel/stacktrace.c
- *
* Stack trace management functions
*
- * Copyright (C) IBM Corp. 2006
+ * Copyright IBM Corp. 2006
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#include <linux/sched.h>
#include <linux/stacktrace.h>
#include <linux/kallsyms.h>
+#include <linux/module.h>
static unsigned long save_context_stack(struct stack_trace *trace,
unsigned long sp,
@@ -81,6 +80,7 @@ void save_stack_trace(struct stack_trace *trace)
S390_lowcore.thread_info,
S390_lowcore.thread_info + THREAD_SIZE, 1);
}
+EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
@@ -93,3 +93,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
new file mode 100644
index 00000000000..a7a7537ce1e
--- /dev/null
+++ b/arch/s390/kernel/suspend.c
@@ -0,0 +1,225 @@
+/*
+ * Suspend support specific for s390.
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ */
+
+#include <linux/pfn.h>
+#include <linux/suspend.h>
+#include <linux/mm.h>
+#include <asm/ctl_reg.h>
+#include <asm/ipl.h>
+#include <asm/cio.h>
+#include <asm/pci.h>
+#include "entry.h"
+
+/*
+ * References to section boundaries
+ */
+extern const void __nosave_begin, __nosave_end;
+
+/*
+ * The restore of the saved pages in an hibernation image will set
+ * the change and referenced bits in the storage key for each page.
+ * Overindication of the referenced bits after an hibernation cycle
+ * does not cause any harm but the overindication of the change bits
+ * would cause trouble.
+ * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
+ * page to the most significant byte of the associated page frame
+ * number in the hibernation image.
+ */
+
+/*
+ * Key storage is allocated as a linked list of pages.
+ * The size of the keys array is (PAGE_SIZE - sizeof(long))
+ */
+struct page_key_data {
+ struct page_key_data *next;
+ unsigned char data[];
+};
+
+#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *))
+
+static struct page_key_data *page_key_data;
+static struct page_key_data *page_key_rp, *page_key_wp;
+static unsigned long page_key_rx, page_key_wx;
+unsigned long suspend_zero_pages;
+
+/*
+ * For each page in the hibernation image one additional byte is
+ * stored in the most significant byte of the page frame number.
+ * On suspend no additional memory is required but on resume the
+ * keys need to be memorized until the page data has been restored.
+ * Only then can the storage keys be set to their old state.
+ */
+unsigned long page_key_additional_pages(unsigned long pages)
+{
+ return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
+}
+
+/*
+ * Free page_key_data list of arrays.
+ */
+void page_key_free(void)
+{
+ struct page_key_data *pkd;
+
+ while (page_key_data) {
+ pkd = page_key_data;
+ page_key_data = pkd->next;
+ free_page((unsigned long) pkd);
+ }
+}
+
+/*
+ * Allocate page_key_data list of arrays with enough room to store
+ * one byte for each page in the hibernation image.
+ */
+int page_key_alloc(unsigned long pages)
+{
+ struct page_key_data *pk;
+ unsigned long size;
+
+ size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
+ while (size--) {
+ pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
+ if (!pk) {
+ page_key_free();
+ return -ENOMEM;
+ }
+ pk->next = page_key_data;
+ page_key_data = pk;
+ }
+ page_key_rp = page_key_wp = page_key_data;
+ page_key_rx = page_key_wx = 0;
+ return 0;
+}
+
+/*
+ * Save the storage key into the upper 8 bits of the page frame number.
+ */
+void page_key_read(unsigned long *pfn)
+{
+ unsigned long addr;
+
+ addr = (unsigned long) page_address(pfn_to_page(*pfn));
+ *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr);
+}
+
+/*
+ * Extract the storage key from the upper 8 bits of the page frame number
+ * and store it in the page_key_data list of arrays.
+ */
+void page_key_memorize(unsigned long *pfn)
+{
+ page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
+ *(unsigned char *) pfn = 0;
+ if (++page_key_wx < PAGE_KEY_DATA_SIZE)
+ return;
+ page_key_wp = page_key_wp->next;
+ page_key_wx = 0;
+}
+
+/*
+ * Get the next key from the page_key_data list of arrays and set the
+ * storage key of the page referred by @address. If @address refers to
+ * a "safe" page the swsusp_arch_resume code will transfer the storage
+ * key from the buffer page to the original page.
+ */
+void page_key_write(void *address)
+{
+ page_set_storage_key((unsigned long) address,
+ page_key_rp->data[page_key_rx], 0);
+ if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
+ return;
+ page_key_rp = page_key_rp->next;
+ page_key_rx = 0;
+}
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+ unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
+
+ /* Always save lowcore pages (LC protection might be enabled). */
+ if (pfn <= LC_PAGES)
+ return 0;
+ if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
+ return 1;
+ /* Skip memory holes and read-only pages (NSS, DCSS, ...). */
+ if (tprot(PFN_PHYS(pfn)))
+ return 1;
+ return 0;
+}
+
+/*
+ * PM notifier callback for suspend
+ */
+static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
+ void *ptr)
+{
+ switch (action) {
+ case PM_SUSPEND_PREPARE:
+ case PM_HIBERNATION_PREPARE:
+ suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
+ if (!suspend_zero_pages)
+ return NOTIFY_BAD;
+ break;
+ case PM_POST_SUSPEND:
+ case PM_POST_HIBERNATION:
+ free_pages(suspend_zero_pages, LC_ORDER);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return NOTIFY_OK;
+}
+
+static int __init suspend_pm_init(void)
+{
+ pm_notifier(suspend_pm_cb, 0);
+ return 0;
+}
+arch_initcall(suspend_pm_init);
+
+void save_processor_state(void)
+{
+ /* swsusp_arch_suspend() actually saves all cpu register contents.
+ * Machine checks must be disabled since swsusp_arch_suspend() stores
+ * register contents to their lowcore save areas. That's the same
+ * place where register contents on machine checks would be saved.
+ * To avoid register corruption disable machine checks.
+ * We must also disable machine checks in the new psw mask for
+ * program checks, since swsusp_arch_suspend() may generate program
+ * checks. Disabling machine checks for all other new psw masks is
+ * just paranoia.
+ */
+ local_mcck_disable();
+ /* Disable lowcore protection */
+ __ctl_clear_bit(0,28);
+ S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
+ S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
+ S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
+ S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
+}
+
+void restore_processor_state(void)
+{
+ S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
+ S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
+ S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
+ S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
+ /* Enable lowcore protection */
+ __ctl_set_bit(0,28);
+ local_mcck_enable();
+}
+
+/* Called at the end of swsusp_arch_resume */
+void s390_early_resume(void)
+{
+ lgr_info_log();
+ channel_subsystem_reinit();
+ zpci_rescan();
+}
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
new file mode 100644
index 00000000000..6b09fdffbd2
--- /dev/null
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -0,0 +1,306 @@
+/*
+ * S390 64-bit swsusp implementation
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/sigp.h>
+
+/*
+ * Save register context in absolute 0 lowcore and call swsusp_save() to
+ * create in-memory kernel image. The context is saved in the designated
+ * "store status" memory locations (see POP).
+ * We return from this function twice. The first time during the suspend to
+ * disk process. The second time via the swsusp_arch_resume() function
+ * (see below) in the resume process.
+ * This function runs with disabled interrupts.
+ */
+ .section .text
+ENTRY(swsusp_arch_suspend)
+ stmg %r6,%r15,__SF_GPRS(%r15)
+ lgr %r1,%r15
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ stg %r1,__SF_BACKCHAIN(%r15)
+
+ /* Deactivate DAT */
+ stnsm __SF_EMPTY(%r15),0xfb
+
+ /* Store prefix register on stack */
+ stpx __SF_EMPTY(%r15)
+
+ /* Save prefix register contents for lowcore copy */
+ llgf %r10,__SF_EMPTY(%r15)
+
+ /* Get pointer to save area */
+ lghi %r1,0x1000
+
+ /* Save CPU address */
+ stap __LC_EXT_CPU_ADDR(%r0)
+
+ /* Store registers */
+ mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */
+ stfpc 0x31c(%r1) /* store fpu control */
+ std 0,0x200(%r1) /* store f0 */
+ std 1,0x208(%r1) /* store f1 */
+ std 2,0x210(%r1) /* store f2 */
+ std 3,0x218(%r1) /* store f3 */
+ std 4,0x220(%r1) /* store f4 */
+ std 5,0x228(%r1) /* store f5 */
+ std 6,0x230(%r1) /* store f6 */
+ std 7,0x238(%r1) /* store f7 */
+ std 8,0x240(%r1) /* store f8 */
+ std 9,0x248(%r1) /* store f9 */
+ std 10,0x250(%r1) /* store f10 */
+ std 11,0x258(%r1) /* store f11 */
+ std 12,0x260(%r1) /* store f12 */
+ std 13,0x268(%r1) /* store f13 */
+ std 14,0x270(%r1) /* store f14 */
+ std 15,0x278(%r1) /* store f15 */
+ stam %a0,%a15,0x340(%r1) /* store access registers */
+ stctg %c0,%c15,0x380(%r1) /* store control registers */
+ stmg %r0,%r15,0x280(%r1) /* store general registers */
+
+ stpt 0x328(%r1) /* store timer */
+ stck __SF_EMPTY(%r15) /* store clock */
+ stckc 0x330(%r1) /* store clock comparator */
+
+ /* Update cputime accounting before going to sleep */
+ lg %r0,__LC_LAST_UPDATE_TIMER
+ slg %r0,0x328(%r1)
+ alg %r0,__LC_SYSTEM_TIMER
+ stg %r0,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1)
+ lg %r0,__LC_LAST_UPDATE_CLOCK
+ slg %r0,__SF_EMPTY(%r15)
+ alg %r0,__LC_STEAL_TIMER
+ stg %r0,__LC_STEAL_TIMER
+ mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15)
+
+ /* Activate DAT */
+ stosm __SF_EMPTY(%r15),0x04
+
+ /* Set prefix page to zero */
+ xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
+ spx __SF_EMPTY(%r15)
+
+ /* Save absolute zero pages */
+ larl %r2,suspend_zero_pages
+ lg %r2,0(%r2)
+ lghi %r4,0
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
+ /* Copy lowcore to absolute zero lowcore */
+ lghi %r2,0
+ lgr %r4,%r10
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
+ /* Save image */
+ brasl %r14,swsusp_save
+
+ /* Restore prefix register and return */
+ lghi %r1,0x1000
+ spx 0x318(%r1)
+ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
+ lghi %r2,0
+ br %r14
+
+/*
+ * Restore saved memory image to correct place and restore register context.
+ * Then we return to the function that called swsusp_arch_suspend().
+ * swsusp_arch_resume() runs with disabled interrupts.
+ */
+ENTRY(swsusp_arch_resume)
+ stmg %r6,%r15,__SF_GPRS(%r15)
+ lgr %r1,%r15
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ stg %r1,__SF_BACKCHAIN(%r15)
+
+ /* Make all free pages stable */
+ lghi %r2,1
+ brasl %r14,arch_set_page_states
+
+ /* Deactivate DAT */
+ stnsm __SF_EMPTY(%r15),0xfb
+
+ /* Set prefix page to zero */
+ xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
+ spx __SF_EMPTY(%r15)
+
+ /* Restore saved image */
+ larl %r1,restore_pblist
+ lg %r1,0(%r1)
+ ltgr %r1,%r1
+ jz 2f
+0:
+ lg %r2,8(%r1)
+ lg %r4,0(%r1)
+ iske %r0,%r4
+ lghi %r3,PAGE_SIZE
+ lghi %r5,PAGE_SIZE
+1:
+ mvcle %r2,%r4,0
+ jo 1b
+ lg %r2,8(%r1)
+ sske %r0,%r2
+ lg %r1,16(%r1)
+ ltgr %r1,%r1
+ jnz 0b
+2:
+ ptlb /* flush tlb */
+
+ /* Reset System */
+ larl %r1,restart_entry
+ larl %r2,.Lrestart_diag308_psw
+ og %r1,0(%r2)
+ stg %r1,0(%r0)
+ larl %r1,.Lnew_pgm_check_psw
+ epsw %r2,%r3
+ stm %r2,%r3,0(%r1)
+ mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1)
+ lghi %r0,0
+ diag %r0,%r0,0x308
+restart_entry:
+ lhi %r1,1
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE
+ sam64
+ larl %r1,.Lnew_pgm_check_psw
+ lpswe 0(%r1)
+pgm_check_entry:
+
+ /* Switch to original suspend CPU */
+ larl %r1,.Lresume_cpu /* Resume CPU address: r2 */
+ stap 0(%r1)
+ llgh %r2,0(%r1)
+ llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */
+ cgr %r1,%r2
+ je restore_registers /* r1 = r2 -> nothing to do */
+ larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */
+ mvc __LC_RST_NEW_PSW(16,%r0),0(%r4)
+3:
+ sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */
+ brc 8,4f /* accepted */
+ brc 2,3b /* busy, try again */
+
+ /* Suspend CPU not available -> panic */
+ larl %r15,init_thread_union
+ ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER)
+ larl %r2,.Lpanic_string
+ larl %r3,_sclp_print_early
+ lghi %r1,0
+ sam31
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE
+ basr %r14,%r3
+ larl %r3,.Ldisabled_wait_31
+ lpsw 0(%r3)
+4:
+ /* Switch to suspend CPU */
+ sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */
+ brc 2,4b /* busy, try again */
+5:
+ sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */
+ brc 2,5b /* busy, try again */
+6: j 6b
+
+restart_suspend:
+ larl %r1,.Lresume_cpu
+ llgh %r2,0(%r1)
+7:
+ sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */
+ brc 8,7b /* accepted, status 0, still running */
+ brc 2,7b /* busy, try again */
+ tmll %r9,0x40 /* Test if resume CPU is stopped */
+ jz 7b
+
+restore_registers:
+ /* Restore registers */
+ lghi %r13,0x1000 /* %r1 = pointer to save area */
+
+ /* Ignore time spent in suspended state. */
+ llgf %r1,0x318(%r13)
+ stck __LC_LAST_UPDATE_CLOCK(%r1)
+ spt 0x328(%r13) /* reprogram timer */
+ //sckc 0x330(%r13) /* set clock comparator */
+
+ lctlg %c0,%c15,0x380(%r13) /* load control registers */
+ lam %a0,%a15,0x340(%r13) /* load access registers */
+
+ lfpc 0x31c(%r13) /* load fpu control */
+ ld 0,0x200(%r13) /* load f0 */
+ ld 1,0x208(%r13) /* load f1 */
+ ld 2,0x210(%r13) /* load f2 */
+ ld 3,0x218(%r13) /* load f3 */
+ ld 4,0x220(%r13) /* load f4 */
+ ld 5,0x228(%r13) /* load f5 */
+ ld 6,0x230(%r13) /* load f6 */
+ ld 7,0x238(%r13) /* load f7 */
+ ld 8,0x240(%r13) /* load f8 */
+ ld 9,0x248(%r13) /* load f9 */
+ ld 10,0x250(%r13) /* load f10 */
+ ld 11,0x258(%r13) /* load f11 */
+ ld 12,0x260(%r13) /* load f12 */
+ ld 13,0x268(%r13) /* load f13 */
+ ld 14,0x270(%r13) /* load f14 */
+ ld 15,0x278(%r13) /* load f15 */
+
+ /* Load old stack */
+ lg %r15,0x2f8(%r13)
+
+ /* Save prefix register */
+ mvc __SF_EMPTY(4,%r15),0x318(%r13)
+
+ /* Restore absolute zero pages */
+ lghi %r2,0
+ larl %r4,suspend_zero_pages
+ lg %r4,0(%r4)
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
+ /* Restore prefix register */
+ spx __SF_EMPTY(%r15)
+
+ /* Activate DAT */
+ stosm __SF_EMPTY(%r15),0x04
+
+ /* Make all free pages unstable */
+ lghi %r2,0
+ brasl %r14,arch_set_page_states
+
+ /* Call arch specific early resume code */
+ brasl %r14,s390_early_resume
+
+ /* Return 0 */
+ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
+ lghi %r2,0
+ br %r14
+
+ .section .data..nosave,"aw",@progbits
+ .align 8
+.Ldisabled_wait_31:
+ .long 0x000a0000,0x00000000
+.Lpanic_string:
+ .asciz "Resume not possible because suspend CPU is no longer available"
+ .align 8
+.Lrestart_diag308_psw:
+ .long 0x00080000,0x80000000
+.Lrestart_suspend_psw:
+ .quad 0x0000000180000000,restart_suspend
+.Lnew_pgm_check_psw:
+ .quad 0,pgm_check_entry
+.Lresume_cpu:
+ .byte 0,0
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index 5fdb799062b..23eb222c165 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -1,8 +1,6 @@
/*
- * arch/s390/kernel/sys_s390.c
- *
* S390 version
- * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 1999, 2000
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Thomas Spatzier (tspat@de.ibm.com)
*
@@ -32,40 +30,13 @@
#include <asm/uaccess.h>
#include "entry.h"
-/* common code for old and new mmaps */
-static inline long do_mmap2(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- long error = -EBADF;
- struct file * file = NULL;
-
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- goto out;
- }
-
- down_write(&current->mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- up_write(&current->mm->mmap_sem);
-
- if (file)
- fput(file);
-out:
- return error;
-}
-
/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux for S/390 isn't able to handle more than 5
- * system call parameters, so these system calls used a memory
- * block for parameter passing..
+ * Perform the mmap() system call. Linux for S/390 isn't able to handle more
+ * than 5 system call parameters, so this system call uses a memory block
+ * for parameter passing.
*/
-struct mmap_arg_struct {
+struct s390_mmap_arg_struct {
unsigned long addr;
unsigned long len;
unsigned long prot;
@@ -74,146 +45,48 @@ struct mmap_arg_struct {
unsigned long offset;
};
-asmlinkage long sys_mmap2(struct mmap_arg_struct __user *arg)
+SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
{
- struct mmap_arg_struct a;
+ struct s390_mmap_arg_struct a;
int error = -EFAULT;
if (copy_from_user(&a, arg, sizeof(a)))
goto out;
- error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
- return error;
-}
-
-asmlinkage long old_mmap(struct mmap_arg_struct __user *arg)
-{
- struct mmap_arg_struct a;
- long error = -EFAULT;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
-
- error = -EINVAL;
- if (a.offset & ~PAGE_MASK)
- goto out;
-
- error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
+ error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
out:
return error;
}
-#ifndef CONFIG_64BIT
-struct sel_arg_struct {
- unsigned long n;
- fd_set __user *inp, *outp, *exp;
- struct timeval __user *tvp;
-};
-
-asmlinkage long old_select(struct sel_arg_struct __user *arg)
-{
- struct sel_arg_struct a;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- return -EFAULT;
- /* sys_select() does the appropriate kernel locking */
- return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-
-}
-#endif /* CONFIG_64BIT */
-
/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.
*/
-asmlinkage long sys_ipc(uint call, int first, unsigned long second,
- unsigned long third, void __user *ptr)
+SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
+ unsigned long, third, void __user *, ptr)
{
- struct ipc_kludge tmp;
- int ret;
-
- switch (call) {
- case SEMOP:
- return sys_semtimedop(first, (struct sembuf __user *)ptr,
- (unsigned)second, NULL);
- case SEMTIMEDOP:
- return sys_semtimedop(first, (struct sembuf __user *)ptr,
- (unsigned)second,
- (const struct timespec __user *) third);
- case SEMGET:
- return sys_semget(first, (int)second, third);
- case SEMCTL: {
- union semun fourth;
- if (!ptr)
- return -EINVAL;
- if (get_user(fourth.__pad, (void __user * __user *) ptr))
- return -EFAULT;
- return sys_semctl(first, (int)second, third, fourth);
- }
- case MSGSND:
- return sys_msgsnd (first, (struct msgbuf __user *) ptr,
- (size_t)second, third);
- break;
- case MSGRCV:
- if (!ptr)
- return -EINVAL;
- if (copy_from_user (&tmp, (struct ipc_kludge __user *) ptr,
- sizeof (struct ipc_kludge)))
- return -EFAULT;
- return sys_msgrcv (first, tmp.msgp,
- (size_t)second, tmp.msgtyp, third);
- case MSGGET:
- return sys_msgget((key_t)first, (int)second);
- case MSGCTL:
- return sys_msgctl(first, (int)second,
- (struct msqid_ds __user *)ptr);
-
- case SHMAT: {
- ulong raddr;
- ret = do_shmat(first, (char __user *)ptr,
- (int)second, &raddr);
- if (ret)
- return ret;
- return put_user (raddr, (ulong __user *) third);
- break;
- }
- case SHMDT:
- return sys_shmdt ((char __user *)ptr);
- case SHMGET:
- return sys_shmget(first, (size_t)second, third);
- case SHMCTL:
- return sys_shmctl(first, (int)second,
- (struct shmid_ds __user *) ptr);
- default:
- return -ENOSYS;
-
- }
-
- return -EINVAL;
+ if (call >> 16)
+ return -EINVAL;
+ /* The s390 sys_ipc variant has only five parameters instead of six
+ * like the generic variant. The only difference is the handling of
+ * the SEMTIMEDOP subcall where on s390 the third parameter is used
+ * as a pointer to a struct timespec where the generic variant uses
+ * the fifth parameter.
+ * Therefore we can call the generic variant by simply passing the
+ * third parameter also as fifth parameter.
+ */
+ return sys_ipc(call, first, second, third, ptr, third);
}
#ifdef CONFIG_64BIT
-asmlinkage long s390x_newuname(struct new_utsname __user *name)
+SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
{
- int ret = sys_newuname(name);
-
- if (current->personality == PER_LINUX32 && !ret) {
- ret = copy_to_user(name->machine, "s390\0\0\0\0", 8);
- if (ret) ret = -EFAULT;
- }
- return ret;
-}
+ unsigned int ret;
-asmlinkage long s390x_personality(unsigned long personality)
-{
- int ret;
-
- if (current->personality == PER_LINUX32 && personality == PER_LINUX)
- personality = PER_LINUX32;
+ if (personality(current->personality) == PER_LINUX32 &&
+ personality(personality) == PER_LINUX)
+ personality |= PER_LINUX32;
ret = sys_personality(personality);
- if (ret == PER_LINUX32)
- ret = PER_LINUX;
+ if (personality(ret) == PER_LINUX32)
+ ret &= ~PER_LINUX32;
return ret;
}
@@ -224,15 +97,13 @@ asmlinkage long s390x_personality(unsigned long personality)
*/
#ifndef CONFIG_64BIT
-asmlinkage long
-s390_fadvise64(int fd, u32 offset_high, u32 offset_low, size_t len, int advice)
+SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, offset_high, u32, offset_low,
+ size_t, len, int, advice)
{
return sys_fadvise64(fd, (u64) offset_high << 32 | offset_low,
len, advice);
}
-#endif
-
struct fadvise64_64_args {
int fd;
long long offset;
@@ -240,8 +111,7 @@ struct fadvise64_64_args {
int advice;
};
-asmlinkage long
-s390_fadvise64_64(struct fadvise64_64_args __user *args)
+SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
{
struct fadvise64_64_args a;
@@ -250,7 +120,6 @@ s390_fadvise64_64(struct fadvise64_64_args __user *args)
return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
}
-#ifndef CONFIG_64BIT
/*
* This is a wrapper to call sys_fallocate(). For 31 bit s390 the last
* 64 bit argument "len" is split into the upper and lower 32 bits. The
@@ -263,8 +132,8 @@ s390_fadvise64_64(struct fadvise64_64_args __user *args)
* to
* %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len
*/
-asmlinkage long s390_fallocate(int fd, int mode, loff_t offset,
- u32 len_high, u32 len_low)
+SYSCALL_DEFINE5(s390_fallocate, int, fd, int, mode, loff_t, offset,
+ u32, len_high, u32, len_low)
{
return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low);
}
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index c87ec687d4c..fe5cdf29a00 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -9,324 +9,350 @@
#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall,sys_ni_syscall)
NI_SYSCALL /* 0 */
-SYSCALL(sys_exit,sys_exit,sys32_exit_wrapper)
+SYSCALL(sys_exit,sys_exit,compat_sys_exit)
SYSCALL(sys_fork,sys_fork,sys_fork)
-SYSCALL(sys_read,sys_read,sys32_read_wrapper)
-SYSCALL(sys_write,sys_write,sys32_write_wrapper)
-SYSCALL(sys_open,sys_open,sys32_open_wrapper) /* 5 */
-SYSCALL(sys_close,sys_close,sys32_close_wrapper)
+SYSCALL(sys_read,sys_read,compat_sys_s390_read)
+SYSCALL(sys_write,sys_write,compat_sys_s390_write)
+SYSCALL(sys_open,sys_open,compat_sys_open) /* 5 */
+SYSCALL(sys_close,sys_close,compat_sys_close)
SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall)
-SYSCALL(sys_creat,sys_creat,sys32_creat_wrapper)
-SYSCALL(sys_link,sys_link,sys32_link_wrapper)
-SYSCALL(sys_unlink,sys_unlink,sys32_unlink_wrapper) /* 10 */
-SYSCALL(sys_execve,sys_execve,sys32_execve)
-SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper)
-SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */
-SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper)
-SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */
-SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper) /* old lchown16 syscall*/
+SYSCALL(sys_creat,sys_creat,compat_sys_creat)
+SYSCALL(sys_link,sys_link,compat_sys_link)
+SYSCALL(sys_unlink,sys_unlink,compat_sys_unlink) /* 10 */
+SYSCALL(sys_execve,sys_execve,compat_sys_execve)
+SYSCALL(sys_chdir,sys_chdir,compat_sys_chdir)
+SYSCALL(sys_time,sys_ni_syscall,compat_sys_time) /* old time syscall */
+SYSCALL(sys_mknod,sys_mknod,compat_sys_mknod)
+SYSCALL(sys_chmod,sys_chmod,compat_sys_chmod) /* 15 */
+SYSCALL(sys_lchown16,sys_ni_syscall,compat_sys_s390_lchown16) /* old lchown16 syscall*/
NI_SYSCALL /* old break syscall holder */
NI_SYSCALL /* old stat syscall holder */
-SYSCALL(sys_lseek,sys_lseek,sys32_lseek_wrapper)
+SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek)
SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */
-SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper)
-SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper)
-SYSCALL(sys_setuid16,sys_ni_syscall,sys32_setuid16_wrapper) /* old setuid16 syscall*/
-SYSCALL(sys_getuid16,sys_ni_syscall,sys32_getuid16) /* old getuid16 syscall*/
-SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */
-SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper)
-SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper)
+SYSCALL(sys_mount,sys_mount,compat_sys_mount)
+SYSCALL(sys_oldumount,sys_oldumount,compat_sys_oldumount)
+SYSCALL(sys_setuid16,sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/
+SYSCALL(sys_getuid16,sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/
+SYSCALL(sys_stime,sys_ni_syscall,compat_sys_stime) /* 25 old stime syscall */
+SYSCALL(sys_ptrace,sys_ptrace,compat_sys_ptrace)
+SYSCALL(sys_alarm,sys_alarm,compat_sys_alarm)
NI_SYSCALL /* old fstat syscall */
-SYSCALL(sys_pause,sys_pause,sys32_pause)
-SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */
+SYSCALL(sys_pause,sys_pause,sys_pause)
+SYSCALL(sys_utime,sys_utime,compat_sys_utime) /* 30 */
NI_SYSCALL /* old stty syscall */
NI_SYSCALL /* old gtty syscall */
-SYSCALL(sys_access,sys_access,sys32_access_wrapper)
-SYSCALL(sys_nice,sys_nice,sys32_nice_wrapper)
+SYSCALL(sys_access,sys_access,compat_sys_access)
+SYSCALL(sys_nice,sys_nice,compat_sys_nice)
NI_SYSCALL /* 35 old ftime syscall */
SYSCALL(sys_sync,sys_sync,sys_sync)
-SYSCALL(sys_kill,sys_kill,sys32_kill_wrapper)
-SYSCALL(sys_rename,sys_rename,sys32_rename_wrapper)
-SYSCALL(sys_mkdir,sys_mkdir,sys32_mkdir_wrapper)
-SYSCALL(sys_rmdir,sys_rmdir,sys32_rmdir_wrapper) /* 40 */
-SYSCALL(sys_dup,sys_dup,sys32_dup_wrapper)
-SYSCALL(sys_pipe,sys_pipe,sys32_pipe_wrapper)
-SYSCALL(sys_times,sys_times,compat_sys_times_wrapper)
+SYSCALL(sys_kill,sys_kill,compat_sys_kill)
+SYSCALL(sys_rename,sys_rename,compat_sys_rename)
+SYSCALL(sys_mkdir,sys_mkdir,compat_sys_mkdir)
+SYSCALL(sys_rmdir,sys_rmdir,compat_sys_rmdir) /* 40 */
+SYSCALL(sys_dup,sys_dup,compat_sys_dup)
+SYSCALL(sys_pipe,sys_pipe,compat_sys_pipe)
+SYSCALL(sys_times,sys_times,compat_sys_times)
NI_SYSCALL /* old prof syscall */
-SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */
-SYSCALL(sys_setgid16,sys_ni_syscall,sys32_setgid16_wrapper) /* old setgid16 syscall*/
-SYSCALL(sys_getgid16,sys_ni_syscall,sys32_getgid16) /* old getgid16 syscall*/
-SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper)
-SYSCALL(sys_geteuid16,sys_ni_syscall,sys32_geteuid16) /* old geteuid16 syscall */
-SYSCALL(sys_getegid16,sys_ni_syscall,sys32_getegid16) /* 50 old getegid16 syscall */
-SYSCALL(sys_acct,sys_acct,sys32_acct_wrapper)
-SYSCALL(sys_umount,sys_umount,sys32_umount_wrapper)
+SYSCALL(sys_brk,sys_brk,compat_sys_brk) /* 45 */
+SYSCALL(sys_setgid16,sys_ni_syscall,compat_sys_s390_setgid16) /* old setgid16 syscall*/
+SYSCALL(sys_getgid16,sys_ni_syscall,compat_sys_s390_getgid16) /* old getgid16 syscall*/
+SYSCALL(sys_signal,sys_signal,compat_sys_signal)
+SYSCALL(sys_geteuid16,sys_ni_syscall,compat_sys_s390_geteuid16) /* old geteuid16 syscall */
+SYSCALL(sys_getegid16,sys_ni_syscall,compat_sys_s390_getegid16) /* 50 old getegid16 syscall */
+SYSCALL(sys_acct,sys_acct,compat_sys_acct)
+SYSCALL(sys_umount,sys_umount,compat_sys_umount)
NI_SYSCALL /* old lock syscall */
-SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper)
-SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */
+SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl)
+SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl) /* 55 */
NI_SYSCALL /* intel mpx syscall */
-SYSCALL(sys_setpgid,sys_setpgid,sys32_setpgid_wrapper)
+SYSCALL(sys_setpgid,sys_setpgid,compat_sys_setpgid)
NI_SYSCALL /* old ulimit syscall */
NI_SYSCALL /* old uname syscall */
-SYSCALL(sys_umask,sys_umask,sys32_umask_wrapper) /* 60 */
-SYSCALL(sys_chroot,sys_chroot,sys32_chroot_wrapper)
-SYSCALL(sys_ustat,sys_ustat,sys32_ustat_wrapper)
-SYSCALL(sys_dup2,sys_dup2,sys32_dup2_wrapper)
+SYSCALL(sys_umask,sys_umask,compat_sys_umask) /* 60 */
+SYSCALL(sys_chroot,sys_chroot,compat_sys_chroot)
+SYSCALL(sys_ustat,sys_ustat,compat_sys_ustat)
+SYSCALL(sys_dup2,sys_dup2,compat_sys_dup2)
SYSCALL(sys_getppid,sys_getppid,sys_getppid)
SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp) /* 65 */
SYSCALL(sys_setsid,sys_setsid,sys_setsid)
-SYSCALL(sys_sigaction,sys_sigaction,sys32_sigaction_wrapper)
+SYSCALL(sys_sigaction,sys_sigaction,compat_sys_sigaction)
NI_SYSCALL /* old sgetmask syscall*/
NI_SYSCALL /* old ssetmask syscall*/
-SYSCALL(sys_setreuid16,sys_ni_syscall,sys32_setreuid16_wrapper) /* old setreuid16 syscall */
-SYSCALL(sys_setregid16,sys_ni_syscall,sys32_setregid16_wrapper) /* old setregid16 syscall */
-SYSCALL(sys_sigsuspend,sys_sigsuspend,sys_sigsuspend_wrapper)
-SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper)
-SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper)
-SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */
-SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper)
-SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage_wrapper)
-SYSCALL(sys_gettimeofday,sys_gettimeofday,sys32_gettimeofday_wrapper)
-SYSCALL(sys_settimeofday,sys_settimeofday,sys32_settimeofday_wrapper)
-SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */
-SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */
+SYSCALL(sys_setreuid16,sys_ni_syscall,compat_sys_s390_setreuid16) /* old setreuid16 syscall */
+SYSCALL(sys_setregid16,sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */
+SYSCALL(sys_sigsuspend,sys_sigsuspend,compat_sys_sigsuspend)
+SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending)
+SYSCALL(sys_sethostname,sys_sethostname,compat_sys_sethostname)
+SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit) /* 75 */
+SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit)
+SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage)
+SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday)
+SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday)
+SYSCALL(sys_getgroups16,sys_ni_syscall,compat_sys_s390_getgroups16) /* 80 old getgroups16 syscall */
+SYSCALL(sys_setgroups16,sys_ni_syscall,compat_sys_s390_setgroups16) /* old setgroups16 syscall */
NI_SYSCALL /* old select syscall */
-SYSCALL(sys_symlink,sys_symlink,sys32_symlink_wrapper)
+SYSCALL(sys_symlink,sys_symlink,compat_sys_symlink)
NI_SYSCALL /* old lstat syscall */
-SYSCALL(sys_readlink,sys_readlink,sys32_readlink_wrapper) /* 85 */
-SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper)
-SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper)
-SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper)
-SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */
-SYSCALL(old_mmap,old_mmap,old32_mmap_wrapper) /* 90 */
-SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper)
-SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper)
-SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper)
-SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper)
-SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper) /* 95 old fchown16 syscall*/
-SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper)
-SYSCALL(sys_setpriority,sys_setpriority,sys32_setpriority_wrapper)
+SYSCALL(sys_readlink,sys_readlink,compat_sys_readlink) /* 85 */
+SYSCALL(sys_uselib,sys_uselib,compat_sys_uselib)
+SYSCALL(sys_swapon,sys_swapon,compat_sys_swapon)
+SYSCALL(sys_reboot,sys_reboot,compat_sys_reboot)
+SYSCALL(sys_ni_syscall,sys_ni_syscall,compat_sys_old_readdir) /* old readdir syscall */
+SYSCALL(sys_old_mmap,sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */
+SYSCALL(sys_munmap,sys_munmap,compat_sys_munmap)
+SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate)
+SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate)
+SYSCALL(sys_fchmod,sys_fchmod,compat_sys_fchmod)
+SYSCALL(sys_fchown16,sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 syscall*/
+SYSCALL(sys_getpriority,sys_getpriority,compat_sys_getpriority)
+SYSCALL(sys_setpriority,sys_setpriority,compat_sys_setpriority)
NI_SYSCALL /* old profil syscall */
-SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs_wrapper)
-SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs_wrapper) /* 100 */
+SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs)
+SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs) /* 100 */
NI_SYSCALL /* ioperm for i386 */
-SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall_wrapper)
-SYSCALL(sys_syslog,sys_syslog,sys32_syslog_wrapper)
-SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer_wrapper)
-SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer_wrapper) /* 105 */
-SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper)
-SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper)
-SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper)
+SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall)
+SYSCALL(sys_syslog,sys_syslog,compat_sys_syslog)
+SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer)
+SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer) /* 105 */
+SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat)
+SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat)
+SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat)
NI_SYSCALL /* old uname syscall */
-SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,sys32_lookup_dcookie_wrapper) /* 110 */
+SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */
SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup)
NI_SYSCALL /* old "idle" system call */
NI_SYSCALL /* vm86old for i386 */
-SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4_wrapper)
-SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */
-SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper)
-SYSCALL(sys_ipc,sys_ipc,sys32_ipc_wrapper)
-SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper)
-SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn)
-SYSCALL(sys_clone,sys_clone,sys32_clone) /* 120 */
-SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper)
-SYSCALL(sys_newuname,s390x_newuname,sys32_newuname_wrapper)
+SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4)
+SYSCALL(sys_swapoff,sys_swapoff,compat_sys_swapoff) /* 115 */
+SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo)
+SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc)
+SYSCALL(sys_fsync,sys_fsync,compat_sys_fsync)
+SYSCALL(sys_sigreturn,sys_sigreturn,compat_sys_sigreturn)
+SYSCALL(sys_clone,sys_clone,compat_sys_clone) /* 120 */
+SYSCALL(sys_setdomainname,sys_setdomainname,compat_sys_setdomainname)
+SYSCALL(sys_newuname,sys_newuname,compat_sys_newuname)
NI_SYSCALL /* modify_ldt for i386 */
-SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper)
-SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */
-SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask_wrapper)
+SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex)
+SYSCALL(sys_mprotect,sys_mprotect,compat_sys_mprotect) /* 125 */
+SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask)
NI_SYSCALL /* old "create module" */
-SYSCALL(sys_init_module,sys_init_module,sys32_init_module_wrapper)
-SYSCALL(sys_delete_module,sys_delete_module,sys32_delete_module_wrapper)
+SYSCALL(sys_init_module,sys_init_module,compat_sys_init_module)
+SYSCALL(sys_delete_module,sys_delete_module,compat_sys_delete_module)
NI_SYSCALL /* 130: old get_kernel_syms */
-SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper)
-SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper)
-SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper)
-SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper)
-SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */
-SYSCALL(sys_personality,s390x_personality,sys32_personality_wrapper)
+SYSCALL(sys_quotactl,sys_quotactl,compat_sys_quotactl)
+SYSCALL(sys_getpgid,sys_getpgid,compat_sys_getpgid)
+SYSCALL(sys_fchdir,sys_fchdir,compat_sys_fchdir)
+SYSCALL(sys_bdflush,sys_bdflush,compat_sys_bdflush)
+SYSCALL(sys_sysfs,sys_sysfs,compat_sys_sysfs) /* 135 */
+SYSCALL(sys_personality,sys_s390_personality,compat_sys_s390_personality)
NI_SYSCALL /* for afs_syscall */
-SYSCALL(sys_setfsuid16,sys_ni_syscall,sys32_setfsuid16_wrapper) /* old setfsuid16 syscall */
-SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */
-SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */
-SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper)
-SYSCALL(sys_select,sys_select,compat_sys_select_wrapper)
-SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper)
-SYSCALL(sys_msync,sys_msync,sys32_msync_wrapper)
-SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */
-SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper)
-SYSCALL(sys_getsid,sys_getsid,sys32_getsid_wrapper)
-SYSCALL(sys_fdatasync,sys_fdatasync,sys32_fdatasync_wrapper)
-SYSCALL(sys_sysctl,sys_sysctl,sys32_sysctl_wrapper)
-SYSCALL(sys_mlock,sys_mlock,sys32_mlock_wrapper) /* 150 */
-SYSCALL(sys_munlock,sys_munlock,sys32_munlock_wrapper)
-SYSCALL(sys_mlockall,sys_mlockall,sys32_mlockall_wrapper)
+SYSCALL(sys_setfsuid16,sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */
+SYSCALL(sys_setfsgid16,sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */
+SYSCALL(sys_llseek,sys_llseek,compat_sys_llseek) /* 140 */
+SYSCALL(sys_getdents,sys_getdents,compat_sys_getdents)
+SYSCALL(sys_select,sys_select,compat_sys_select)
+SYSCALL(sys_flock,sys_flock,compat_sys_flock)
+SYSCALL(sys_msync,sys_msync,compat_sys_msync)
+SYSCALL(sys_readv,sys_readv,compat_sys_readv) /* 145 */
+SYSCALL(sys_writev,sys_writev,compat_sys_writev)
+SYSCALL(sys_getsid,sys_getsid,compat_sys_getsid)
+SYSCALL(sys_fdatasync,sys_fdatasync,compat_sys_fdatasync)
+SYSCALL(sys_sysctl,sys_sysctl,compat_sys_sysctl)
+SYSCALL(sys_mlock,sys_mlock,compat_sys_mlock) /* 150 */
+SYSCALL(sys_munlock,sys_munlock,compat_sys_munlock)
+SYSCALL(sys_mlockall,sys_mlockall,compat_sys_mlockall)
SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall)
-SYSCALL(sys_sched_setparam,sys_sched_setparam,sys32_sched_setparam_wrapper)
-SYSCALL(sys_sched_getparam,sys_sched_getparam,sys32_sched_getparam_wrapper) /* 155 */
-SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,sys32_sched_setscheduler_wrapper)
-SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,sys32_sched_getscheduler_wrapper)
+SYSCALL(sys_sched_setparam,sys_sched_setparam,compat_sys_sched_setparam)
+SYSCALL(sys_sched_getparam,sys_sched_getparam,compat_sys_sched_getparam) /* 155 */
+SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,compat_sys_sched_setscheduler)
+SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,compat_sys_sched_getscheduler)
SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield)
-SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,sys32_sched_get_priority_max_wrapper)
-SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,sys32_sched_get_priority_min_wrapper) /* 160 */
-SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,sys32_sched_rr_get_interval_wrapper)
-SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper)
-SYSCALL(sys_mremap,sys_mremap,sys32_mremap_wrapper)
-SYSCALL(sys_setresuid16,sys_ni_syscall,sys32_setresuid16_wrapper) /* old setresuid16 syscall */
-SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper) /* 165 old getresuid16 syscall */
+SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,compat_sys_sched_get_priority_max)
+SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,compat_sys_sched_get_priority_min) /* 160 */
+SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval)
+SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep)
+SYSCALL(sys_mremap,sys_mremap,compat_sys_mremap)
+SYSCALL(sys_setresuid16,sys_ni_syscall,compat_sys_s390_setresuid16) /* old setresuid16 syscall */
+SYSCALL(sys_getresuid16,sys_ni_syscall,compat_sys_s390_getresuid16) /* 165 old getresuid16 syscall */
NI_SYSCALL /* for vm86 */
NI_SYSCALL /* old sys_query_module */
-SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper)
-SYSCALL(sys_nfsservctl,sys_nfsservctl,compat_sys_nfsservctl_wrapper)
-SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */
-SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */
-SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper)
-SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn)
-SYSCALL(sys_rt_sigaction,sys_rt_sigaction,sys32_rt_sigaction_wrapper)
-SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,sys32_rt_sigprocmask_wrapper) /* 175 */
-SYSCALL(sys_rt_sigpending,sys_rt_sigpending,sys32_rt_sigpending_wrapper)
-SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait_wrapper)
-SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,sys32_rt_sigqueueinfo_wrapper)
-SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend_wrapper)
-SYSCALL(sys_pread64,sys_pread64,sys32_pread64_wrapper) /* 180 */
-SYSCALL(sys_pwrite64,sys_pwrite64,sys32_pwrite64_wrapper)
-SYSCALL(sys_chown16,sys_ni_syscall,sys32_chown16_wrapper) /* old chown16 syscall */
-SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper)
-SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper)
-SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */
-SYSCALL(sys_sigaltstack,sys_sigaltstack,sys32_sigaltstack)
-SYSCALL(sys_sendfile,sys_sendfile64,sys32_sendfile_wrapper)
+SYSCALL(sys_poll,sys_poll,compat_sys_poll)
+NI_SYSCALL /* old nfsservctl */
+SYSCALL(sys_setresgid16,sys_ni_syscall,compat_sys_s390_setresgid16) /* 170 old setresgid16 syscall */
+SYSCALL(sys_getresgid16,sys_ni_syscall,compat_sys_s390_getresgid16) /* old getresgid16 syscall */
+SYSCALL(sys_prctl,sys_prctl,compat_sys_prctl)
+SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,compat_sys_rt_sigreturn)
+SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction)
+SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */
+SYSCALL(sys_rt_sigpending,sys_rt_sigpending,compat_sys_rt_sigpending)
+SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait)
+SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo)
+SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend)
+SYSCALL(sys_pread64,sys_pread64,compat_sys_s390_pread64) /* 180 */
+SYSCALL(sys_pwrite64,sys_pwrite64,compat_sys_s390_pwrite64)
+SYSCALL(sys_chown16,sys_ni_syscall,compat_sys_s390_chown16) /* old chown16 syscall */
+SYSCALL(sys_getcwd,sys_getcwd,compat_sys_getcwd)
+SYSCALL(sys_capget,sys_capget,compat_sys_capget)
+SYSCALL(sys_capset,sys_capset,compat_sys_capset) /* 185 */
+SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack)
+SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile)
NI_SYSCALL /* streams1 */
NI_SYSCALL /* streams2 */
SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */
-SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper)
-SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper)
-SYSCALL(sys_truncate64,sys_ni_syscall,sys32_truncate64_wrapper)
-SYSCALL(sys_ftruncate64,sys_ni_syscall,sys32_ftruncate64_wrapper)
-SYSCALL(sys_stat64,sys_ni_syscall,sys32_stat64_wrapper) /* 195 */
-SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper)
-SYSCALL(sys_fstat64,sys_ni_syscall,sys32_fstat64_wrapper)
-SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper)
+SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit)
+SYSCALL(sys_mmap2,sys_mmap2,compat_sys_s390_mmap2)
+SYSCALL(sys_truncate64,sys_ni_syscall,compat_sys_s390_truncate64)
+SYSCALL(sys_ftruncate64,sys_ni_syscall,compat_sys_s390_ftruncate64)
+SYSCALL(sys_stat64,sys_ni_syscall,compat_sys_s390_stat64) /* 195 */
+SYSCALL(sys_lstat64,sys_ni_syscall,compat_sys_s390_lstat64)
+SYSCALL(sys_fstat64,sys_ni_syscall,compat_sys_s390_fstat64)
+SYSCALL(sys_lchown,sys_lchown,compat_sys_lchown)
SYSCALL(sys_getuid,sys_getuid,sys_getuid)
SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */
SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid)
SYSCALL(sys_getegid,sys_getegid,sys_getegid)
-SYSCALL(sys_setreuid,sys_setreuid,sys32_setreuid_wrapper)
-SYSCALL(sys_setregid,sys_setregid,sys32_setregid_wrapper)
-SYSCALL(sys_getgroups,sys_getgroups,sys32_getgroups_wrapper) /* 205 */
-SYSCALL(sys_setgroups,sys_setgroups,sys32_setgroups_wrapper)
-SYSCALL(sys_fchown,sys_fchown,sys32_fchown_wrapper)
-SYSCALL(sys_setresuid,sys_setresuid,sys32_setresuid_wrapper)
-SYSCALL(sys_getresuid,sys_getresuid,sys32_getresuid_wrapper)
-SYSCALL(sys_setresgid,sys_setresgid,sys32_setresgid_wrapper) /* 210 */
-SYSCALL(sys_getresgid,sys_getresgid,sys32_getresgid_wrapper)
-SYSCALL(sys_chown,sys_chown,sys32_chown_wrapper)
-SYSCALL(sys_setuid,sys_setuid,sys32_setuid_wrapper)
-SYSCALL(sys_setgid,sys_setgid,sys32_setgid_wrapper)
-SYSCALL(sys_setfsuid,sys_setfsuid,sys32_setfsuid_wrapper) /* 215 */
-SYSCALL(sys_setfsgid,sys_setfsgid,sys32_setfsgid_wrapper)
-SYSCALL(sys_pivot_root,sys_pivot_root,sys32_pivot_root_wrapper)
-SYSCALL(sys_mincore,sys_mincore,sys32_mincore_wrapper)
-SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper)
-SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */
-SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper)
-SYSCALL(sys_readahead,sys_readahead,sys32_readahead)
-SYSCALL(sys_sendfile64,sys_ni_syscall,sys32_sendfile64)
-SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper)
-SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */
-SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper)
-SYSCALL(sys_getxattr,sys_getxattr,sys32_getxattr_wrapper)
-SYSCALL(sys_lgetxattr,sys_lgetxattr,sys32_lgetxattr_wrapper)
-SYSCALL(sys_fgetxattr,sys_fgetxattr,sys32_fgetxattr_wrapper)
-SYSCALL(sys_listxattr,sys_listxattr,sys32_listxattr_wrapper) /* 230 */
-SYSCALL(sys_llistxattr,sys_llistxattr,sys32_llistxattr_wrapper)
-SYSCALL(sys_flistxattr,sys_flistxattr,sys32_flistxattr_wrapper)
-SYSCALL(sys_removexattr,sys_removexattr,sys32_removexattr_wrapper)
-SYSCALL(sys_lremovexattr,sys_lremovexattr,sys32_lremovexattr_wrapper)
-SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */
+SYSCALL(sys_setreuid,sys_setreuid,compat_sys_setreuid)
+SYSCALL(sys_setregid,sys_setregid,compat_sys_setregid)
+SYSCALL(sys_getgroups,sys_getgroups,compat_sys_getgroups) /* 205 */
+SYSCALL(sys_setgroups,sys_setgroups,compat_sys_setgroups)
+SYSCALL(sys_fchown,sys_fchown,compat_sys_fchown)
+SYSCALL(sys_setresuid,sys_setresuid,compat_sys_setresuid)
+SYSCALL(sys_getresuid,sys_getresuid,compat_sys_getresuid)
+SYSCALL(sys_setresgid,sys_setresgid,compat_sys_setresgid) /* 210 */
+SYSCALL(sys_getresgid,sys_getresgid,compat_sys_getresgid)
+SYSCALL(sys_chown,sys_chown,compat_sys_chown)
+SYSCALL(sys_setuid,sys_setuid,compat_sys_setuid)
+SYSCALL(sys_setgid,sys_setgid,compat_sys_setgid)
+SYSCALL(sys_setfsuid,sys_setfsuid,compat_sys_setfsuid) /* 215 */
+SYSCALL(sys_setfsgid,sys_setfsgid,compat_sys_setfsgid)
+SYSCALL(sys_pivot_root,sys_pivot_root,compat_sys_pivot_root)
+SYSCALL(sys_mincore,sys_mincore,compat_sys_mincore)
+SYSCALL(sys_madvise,sys_madvise,compat_sys_madvise)
+SYSCALL(sys_getdents64,sys_getdents64,compat_sys_getdents64) /* 220 */
+SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64)
+SYSCALL(sys_readahead,sys_readahead,compat_sys_s390_readahead)
+SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64)
+SYSCALL(sys_setxattr,sys_setxattr,compat_sys_setxattr)
+SYSCALL(sys_lsetxattr,sys_lsetxattr,compat_sys_lsetxattr) /* 225 */
+SYSCALL(sys_fsetxattr,sys_fsetxattr,compat_sys_fsetxattr)
+SYSCALL(sys_getxattr,sys_getxattr,compat_sys_getxattr)
+SYSCALL(sys_lgetxattr,sys_lgetxattr,compat_sys_lgetxattr)
+SYSCALL(sys_fgetxattr,sys_fgetxattr,compat_sys_fgetxattr)
+SYSCALL(sys_listxattr,sys_listxattr,compat_sys_listxattr) /* 230 */
+SYSCALL(sys_llistxattr,sys_llistxattr,compat_sys_llistxattr)
+SYSCALL(sys_flistxattr,sys_flistxattr,compat_sys_flistxattr)
+SYSCALL(sys_removexattr,sys_removexattr,compat_sys_removexattr)
+SYSCALL(sys_lremovexattr,sys_lremovexattr,compat_sys_lremovexattr)
+SYSCALL(sys_fremovexattr,sys_fremovexattr,compat_sys_fremovexattr) /* 235 */
SYSCALL(sys_gettid,sys_gettid,sys_gettid)
-SYSCALL(sys_tkill,sys_tkill,sys_tkill)
-SYSCALL(sys_futex,sys_futex,compat_sys_futex_wrapper)
-SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper)
-SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */
-SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill)
+SYSCALL(sys_tkill,sys_tkill,compat_sys_tkill)
+SYSCALL(sys_futex,sys_futex,compat_sys_futex)
+SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,compat_sys_sched_setaffinity)
+SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,compat_sys_sched_getaffinity) /* 240 */
+SYSCALL(sys_tgkill,sys_tgkill,compat_sys_tgkill)
NI_SYSCALL /* reserved for TUX */
-SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper)
-SYSCALL(sys_io_destroy,sys_io_destroy,sys32_io_destroy_wrapper)
-SYSCALL(sys_io_getevents,sys_io_getevents,sys32_io_getevents_wrapper) /* 245 */
-SYSCALL(sys_io_submit,sys_io_submit,sys32_io_submit_wrapper)
-SYSCALL(sys_io_cancel,sys_io_cancel,sys32_io_cancel_wrapper)
-SYSCALL(sys_exit_group,sys_exit_group,sys32_exit_group_wrapper)
-SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper)
-SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */
-SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper)
-SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper)
-SYSCALL(s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper)
-SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper)
-SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */
-SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper)
-SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,sys32_timer_getoverrun_wrapper)
-SYSCALL(sys_timer_delete,sys_timer_delete,sys32_timer_delete_wrapper)
-SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper)
-SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */
-SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
-SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
+SYSCALL(sys_io_setup,sys_io_setup,compat_sys_io_setup)
+SYSCALL(sys_io_destroy,sys_io_destroy,compat_sys_io_destroy)
+SYSCALL(sys_io_getevents,sys_io_getevents,compat_sys_io_getevents) /* 245 */
+SYSCALL(sys_io_submit,sys_io_submit,compat_sys_io_submit)
+SYSCALL(sys_io_cancel,sys_io_cancel,compat_sys_io_cancel)
+SYSCALL(sys_exit_group,sys_exit_group,compat_sys_exit_group)
+SYSCALL(sys_epoll_create,sys_epoll_create,compat_sys_epoll_create)
+SYSCALL(sys_epoll_ctl,sys_epoll_ctl,compat_sys_epoll_ctl) /* 250 */
+SYSCALL(sys_epoll_wait,sys_epoll_wait,compat_sys_epoll_wait)
+SYSCALL(sys_set_tid_address,sys_set_tid_address,compat_sys_set_tid_address)
+SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,compat_sys_s390_fadvise64)
+SYSCALL(sys_timer_create,sys_timer_create,compat_sys_timer_create)
+SYSCALL(sys_timer_settime,sys_timer_settime,compat_sys_timer_settime) /* 255 */
+SYSCALL(sys_timer_gettime,sys_timer_gettime,compat_sys_timer_gettime)
+SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,compat_sys_timer_getoverrun)
+SYSCALL(sys_timer_delete,sys_timer_delete,compat_sys_timer_delete)
+SYSCALL(sys_clock_settime,sys_clock_settime,compat_sys_clock_settime)
+SYSCALL(sys_clock_gettime,sys_clock_gettime,compat_sys_clock_gettime) /* 260 */
+SYSCALL(sys_clock_getres,sys_clock_getres,compat_sys_clock_getres)
+SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,compat_sys_clock_nanosleep)
NI_SYSCALL /* reserved for vserver */
-SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
-SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
-SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
-SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper)
+SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,compat_sys_s390_fadvise64_64)
+SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64)
+SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64)
+SYSCALL(sys_remap_file_pages,sys_remap_file_pages,compat_sys_remap_file_pages)
NI_SYSCALL /* 268 sys_mbind */
NI_SYSCALL /* 269 sys_get_mempolicy */
NI_SYSCALL /* 270 sys_set_mempolicy */
-SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper)
-SYSCALL(sys_mq_unlink,sys_mq_unlink,sys32_mq_unlink_wrapper)
-SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper)
-SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper)
-SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */
-SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper)
-SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper)
-SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper)
-SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper)
+SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open)
+SYSCALL(sys_mq_unlink,sys_mq_unlink,compat_sys_mq_unlink)
+SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend)
+SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive)
+SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify) /* 275 */
+SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr)
+SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load)
+SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key)
+SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key)
SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl) /* 280 */
-SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid_wrapper)
-SYSCALL(sys_ioprio_set,sys_ioprio_set,sys_ioprio_set_wrapper)
-SYSCALL(sys_ioprio_get,sys_ioprio_get,sys_ioprio_get_wrapper)
+SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid)
+SYSCALL(sys_ioprio_set,sys_ioprio_set,compat_sys_ioprio_set)
+SYSCALL(sys_ioprio_get,sys_ioprio_get,compat_sys_ioprio_get)
SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init)
-SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper) /* 285 */
-SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper)
+SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */
+SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
NI_SYSCALL /* 287 sys_migrate_pages */
-SYSCALL(sys_openat,sys_openat,compat_sys_openat_wrapper)
-SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper)
-SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */
-SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper)
-SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper)
-SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat64_wrapper)
-SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper)
-SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */
-SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper)
-SYSCALL(sys_symlinkat,sys_symlinkat,sys_symlinkat_wrapper)
-SYSCALL(sys_readlinkat,sys_readlinkat,sys_readlinkat_wrapper)
-SYSCALL(sys_fchmodat,sys_fchmodat,sys_fchmodat_wrapper)
-SYSCALL(sys_faccessat,sys_faccessat,sys_faccessat_wrapper) /* 300 */
-SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper)
-SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper)
-SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper)
-SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list_wrapper)
-SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list_wrapper)
-SYSCALL(sys_splice,sys_splice,sys_splice_wrapper)
-SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper)
-SYSCALL(sys_tee,sys_tee,sys_tee_wrapper)
-SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice_wrapper)
+SYSCALL(sys_openat,sys_openat,compat_sys_openat)
+SYSCALL(sys_mkdirat,sys_mkdirat,compat_sys_mkdirat)
+SYSCALL(sys_mknodat,sys_mknodat,compat_sys_mknodat) /* 290 */
+SYSCALL(sys_fchownat,sys_fchownat,compat_sys_fchownat)
+SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat)
+SYSCALL(sys_fstatat64,sys_newfstatat,compat_sys_s390_fstatat64)
+SYSCALL(sys_unlinkat,sys_unlinkat,compat_sys_unlinkat)
+SYSCALL(sys_renameat,sys_renameat,compat_sys_renameat) /* 295 */
+SYSCALL(sys_linkat,sys_linkat,compat_sys_linkat)
+SYSCALL(sys_symlinkat,sys_symlinkat,compat_sys_symlinkat)
+SYSCALL(sys_readlinkat,sys_readlinkat,compat_sys_readlinkat)
+SYSCALL(sys_fchmodat,sys_fchmodat,compat_sys_fchmodat)
+SYSCALL(sys_faccessat,sys_faccessat,compat_sys_faccessat) /* 300 */
+SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6)
+SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll)
+SYSCALL(sys_unshare,sys_unshare,compat_sys_unshare)
+SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list)
+SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list)
+SYSCALL(sys_splice,sys_splice,compat_sys_splice)
+SYSCALL(sys_sync_file_range,sys_sync_file_range,compat_sys_s390_sync_file_range)
+SYSCALL(sys_tee,sys_tee,compat_sys_tee)
+SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice)
NI_SYSCALL /* 310 sys_move_pages */
-SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper)
-SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait_wrapper)
-SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper)
-SYSCALL(s390_fallocate,sys_fallocate,sys_fallocate_wrapper)
-SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */
-SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd_wrapper)
+SYSCALL(sys_getcpu,sys_getcpu,compat_sys_getcpu)
+SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait)
+SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes)
+SYSCALL(sys_s390_fallocate,sys_fallocate,compat_sys_s390_fallocate)
+SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat) /* 315 */
+SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd)
NI_SYSCALL /* 317 old sys_timer_fd */
-SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper)
-SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper)
-SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime_wrapper) /* 320 */
-SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime_wrapper)
+SYSCALL(sys_eventfd,sys_eventfd,compat_sys_eventfd)
+SYSCALL(sys_timerfd_create,sys_timerfd_create,compat_sys_timerfd_create)
+SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */
+SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime)
+SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4)
+SYSCALL(sys_eventfd2,sys_eventfd2,compat_sys_eventfd2)
+SYSCALL(sys_inotify_init1,sys_inotify_init1,compat_sys_inotify_init1)
+SYSCALL(sys_pipe2,sys_pipe2,compat_sys_pipe2) /* 325 */
+SYSCALL(sys_dup3,sys_dup3,compat_sys_dup3)
+SYSCALL(sys_epoll_create1,sys_epoll_create1,compat_sys_epoll_create1)
+SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv)
+SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev)
+SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */
+SYSCALL(sys_perf_event_open,sys_perf_event_open,compat_sys_perf_event_open)
+SYSCALL(sys_fanotify_init,sys_fanotify_init,compat_sys_fanotify_init)
+SYSCALL(sys_fanotify_mark,sys_fanotify_mark,compat_sys_fanotify_mark)
+SYSCALL(sys_prlimit64,sys_prlimit64,compat_sys_prlimit64)
+SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */
+SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at)
+SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime)
+SYSCALL(sys_syncfs,sys_syncfs,compat_sys_syncfs)
+SYSCALL(sys_setns,sys_setns,compat_sys_setns)
+SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */
+SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev)
+SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,compat_sys_s390_runtime_instr)
+SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp)
+SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module)
+SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */
+SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr)
+SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
new file mode 100644
index 00000000000..811f542b8ed
--- /dev/null
+++ b/arch/s390/kernel/sysinfo.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright IBM Corp. 2001, 2009
+ * Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/topology.h>
+
+/* Sigh, math-emu. Don't ask. */
+#include <asm/sfp-util.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+
+int topology_max_mnest;
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+ register int r0 asm("0") = (fc << 28) | sel1;
+ register int r1 asm("1") = sel2;
+ int rc = 0;
+
+ asm volatile(
+ " stsi 0(%3)\n"
+ "0: jz 2f\n"
+ "1: lhi %1,%4\n"
+ "2:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (r0), "+d" (rc)
+ : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
+ : "cc", "memory");
+ if (rc)
+ return rc;
+ return fc ? 0 : ((unsigned int) r0) >> 28;
+}
+EXPORT_SYMBOL(stsi);
+
+static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
+{
+ int i;
+
+ if (stsi(info, 1, 1, 1))
+ return;
+ EBCASC(info->manufacturer, sizeof(info->manufacturer));
+ EBCASC(info->type, sizeof(info->type));
+ EBCASC(info->model, sizeof(info->model));
+ EBCASC(info->sequence, sizeof(info->sequence));
+ EBCASC(info->plant, sizeof(info->plant));
+ EBCASC(info->model_capacity, sizeof(info->model_capacity));
+ EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
+ EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+ seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer);
+ seq_printf(m, "Type: %-4.4s\n", info->type);
+ /*
+ * Sigh: the model field has been renamed with System z9
+ * to model_capacity and a new model field has been added
+ * after the plant field. To avoid confusing older programs
+ * the "Model:" prints "model_capacity model" or just
+ * "model_capacity" if the model string is empty .
+ */
+ seq_printf(m, "Model: %-16.16s", info->model_capacity);
+ if (info->model[0] != '\0')
+ seq_printf(m, " %-16.16s", info->model);
+ seq_putc(m, '\n');
+ seq_printf(m, "Sequence Code: %-16.16s\n", info->sequence);
+ seq_printf(m, "Plant: %-4.4s\n", info->plant);
+ seq_printf(m, "Model Capacity: %-16.16s %08u\n",
+ info->model_capacity, info->model_cap_rating);
+ if (info->model_perm_cap_rating)
+ seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n",
+ info->model_perm_cap,
+ info->model_perm_cap_rating);
+ if (info->model_temp_cap_rating)
+ seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
+ info->model_temp_cap,
+ info->model_temp_cap_rating);
+ if (info->ncr)
+ seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr);
+ if (info->npr)
+ seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
+ if (info->ntr)
+ seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
+ if (info->cai) {
+ seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai);
+ seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr);
+ seq_printf(m, "Capacity Transient: %d\n", info->t);
+ }
+ if (info->p) {
+ for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) {
+ seq_printf(m, "Type %d Percentage: %d\n",
+ i, info->typepct[i - 1]);
+ }
+ }
+}
+
+static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
+{
+ static int max_mnest;
+ int i, rc;
+
+ seq_putc(m, '\n');
+ if (!MACHINE_HAS_TOPOLOGY)
+ return;
+ if (stsi(info, 15, 1, topology_max_mnest))
+ return;
+ seq_printf(m, "CPU Topology HW: ");
+ for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+ seq_printf(m, " %d", info->mag[i]);
+ seq_putc(m, '\n');
+#ifdef CONFIG_SCHED_MC
+ store_topology(info);
+ seq_printf(m, "CPU Topology SW: ");
+ for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+ seq_printf(m, " %d", info->mag[i]);
+ seq_putc(m, '\n');
+#endif
+}
+
+static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info)
+{
+ struct sysinfo_1_2_2_extension *ext;
+ int i;
+
+ if (stsi(info, 1, 2, 2))
+ return;
+ ext = (struct sysinfo_1_2_2_extension *)
+ ((unsigned long) info + info->acc_offset);
+ seq_printf(m, "CPUs Total: %d\n", info->cpus_total);
+ seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured);
+ seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby);
+ seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved);
+ /*
+ * Sigh 2. According to the specification the alternate
+ * capability field is a 32 bit floating point number
+ * if the higher order 8 bits are not zero. Printing
+ * a floating point number in the kernel is a no-no,
+ * always print the number as 32 bit unsigned integer.
+ * The user-space needs to know about the strange
+ * encoding of the alternate cpu capability.
+ */
+ seq_printf(m, "Capability: %u", info->capability);
+ if (info->format == 1)
+ seq_printf(m, " %u", ext->alt_capability);
+ seq_putc(m, '\n');
+ if (info->nominal_cap)
+ seq_printf(m, "Nominal Capability: %d\n", info->nominal_cap);
+ if (info->secondary_cap)
+ seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap);
+ for (i = 2; i <= info->cpus_total; i++) {
+ seq_printf(m, "Adjustment %02d-way: %u",
+ i, info->adjustment[i-2]);
+ if (info->format == 1)
+ seq_printf(m, " %u", ext->alt_adjustment[i-2]);
+ seq_putc(m, '\n');
+ }
+}
+
+static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
+{
+ if (stsi(info, 2, 2, 2))
+ return;
+ EBCASC(info->name, sizeof(info->name));
+ seq_putc(m, '\n');
+ seq_printf(m, "LPAR Number: %d\n", info->lpar_number);
+ seq_printf(m, "LPAR Characteristics: ");
+ if (info->characteristics & LPAR_CHAR_DEDICATED)
+ seq_printf(m, "Dedicated ");
+ if (info->characteristics & LPAR_CHAR_SHARED)
+ seq_printf(m, "Shared ");
+ if (info->characteristics & LPAR_CHAR_LIMITED)
+ seq_printf(m, "Limited ");
+ seq_putc(m, '\n');
+ seq_printf(m, "LPAR Name: %-8.8s\n", info->name);
+ seq_printf(m, "LPAR Adjustment: %d\n", info->caf);
+ seq_printf(m, "LPAR CPUs Total: %d\n", info->cpus_total);
+ seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured);
+ seq_printf(m, "LPAR CPUs Standby: %d\n", info->cpus_standby);
+ seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved);
+ seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated);
+ seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared);
+}
+
+static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
+{
+ int i;
+
+ if (stsi(info, 3, 2, 2))
+ return;
+ for (i = 0; i < info->count; i++) {
+ EBCASC(info->vm[i].name, sizeof(info->vm[i].name));
+ EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi));
+ seq_putc(m, '\n');
+ seq_printf(m, "VM%02d Name: %-8.8s\n", i, info->vm[i].name);
+ seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi);
+ seq_printf(m, "VM%02d Adjustment: %d\n", i, info->vm[i].caf);
+ seq_printf(m, "VM%02d CPUs Total: %d\n", i, info->vm[i].cpus_total);
+ seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured);
+ seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby);
+ seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved);
+ }
+}
+
+static int sysinfo_show(struct seq_file *m, void *v)
+{
+ void *info = (void *)get_zeroed_page(GFP_KERNEL);
+ int level;
+
+ if (!info)
+ return 0;
+ level = stsi(NULL, 0, 0, 0);
+ if (level >= 1)
+ stsi_1_1_1(m, info);
+ if (level >= 1)
+ stsi_15_1_x(m, info);
+ if (level >= 1)
+ stsi_1_2_2(m, info);
+ if (level >= 2)
+ stsi_2_2_2(m, info);
+ if (level >= 3)
+ stsi_3_2_2(m, info);
+ free_page((unsigned long)info);
+ return 0;
+}
+
+static int sysinfo_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, sysinfo_show, NULL);
+}
+
+static const struct file_operations sysinfo_fops = {
+ .open = sysinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init sysinfo_create_proc(void)
+{
+ proc_create("sysinfo", 0444, NULL, &sysinfo_fops);
+ return 0;
+}
+device_initcall(sysinfo_create_proc);
+
+/*
+ * Service levels interface.
+ */
+
+static DECLARE_RWSEM(service_level_sem);
+static LIST_HEAD(service_level_list);
+
+int register_service_level(struct service_level *slr)
+{
+ struct service_level *ptr;
+
+ down_write(&service_level_sem);
+ list_for_each_entry(ptr, &service_level_list, list)
+ if (ptr == slr) {
+ up_write(&service_level_sem);
+ return -EEXIST;
+ }
+ list_add_tail(&slr->list, &service_level_list);
+ up_write(&service_level_sem);
+ return 0;
+}
+EXPORT_SYMBOL(register_service_level);
+
+int unregister_service_level(struct service_level *slr)
+{
+ struct service_level *ptr, *next;
+ int rc = -ENOENT;
+
+ down_write(&service_level_sem);
+ list_for_each_entry_safe(ptr, next, &service_level_list, list) {
+ if (ptr != slr)
+ continue;
+ list_del(&ptr->list);
+ rc = 0;
+ break;
+ }
+ up_write(&service_level_sem);
+ return rc;
+}
+EXPORT_SYMBOL(unregister_service_level);
+
+static void *service_level_start(struct seq_file *m, loff_t *pos)
+{
+ down_read(&service_level_sem);
+ return seq_list_start(&service_level_list, *pos);
+}
+
+static void *service_level_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ return seq_list_next(p, &service_level_list, pos);
+}
+
+static void service_level_stop(struct seq_file *m, void *p)
+{
+ up_read(&service_level_sem);
+}
+
+static int service_level_show(struct seq_file *m, void *p)
+{
+ struct service_level *slr;
+
+ slr = list_entry(p, struct service_level, list);
+ slr->seq_print(m, slr);
+ return 0;
+}
+
+static const struct seq_operations service_level_seq_ops = {
+ .start = service_level_start,
+ .next = service_level_next,
+ .stop = service_level_stop,
+ .show = service_level_show
+};
+
+static int service_level_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &service_level_seq_ops);
+}
+
+static const struct file_operations service_level_ops = {
+ .open = service_level_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+static void service_level_vm_print(struct seq_file *m,
+ struct service_level *slr)
+{
+ char *query_buffer, *str;
+
+ query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+ if (!query_buffer)
+ return;
+ cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
+ str = strchr(query_buffer, '\n');
+ if (str)
+ *str = 0;
+ seq_printf(m, "VM: %s\n", query_buffer);
+ kfree(query_buffer);
+}
+
+static struct service_level service_level_vm = {
+ .seq_print = service_level_vm_print
+};
+
+static __init int create_proc_service_level(void)
+{
+ proc_create("service_levels", 0, NULL, &service_level_ops);
+ if (MACHINE_IS_VM)
+ register_service_level(&service_level_vm);
+ return 0;
+}
+subsys_initcall(create_proc_service_level);
+
+/*
+ * CPU capability might have changed. Therefore recalculate loops_per_jiffy.
+ */
+void s390_adjust_jiffies(void)
+{
+ struct sysinfo_1_2_2 *info;
+ const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */
+ FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+ FP_DECL_EX;
+ unsigned int capability;
+
+ info = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!info)
+ return;
+
+ if (stsi(info, 1, 2, 2) == 0) {
+ /*
+ * Major sigh. The cpu capability encoding is "special".
+ * If the first 9 bits of info->capability are 0 then it
+ * is a 32 bit unsigned integer in the range 0 .. 2^23.
+ * If the first 9 bits are != 0 then it is a 32 bit float.
+ * In addition a lower value indicates a proportionally
+ * higher cpu capacity. Bogomips are the other way round.
+ * To get to a halfway suitable number we divide 1e7
+ * by the cpu capability number. Yes, that means a floating
+ * point division .. math-emu here we come :-)
+ */
+ FP_UNPACK_SP(SA, &fmil);
+ if ((info->capability >> 23) == 0)
+ FP_FROM_INT_S(SB, (long) info->capability, 64, long);
+ else
+ FP_UNPACK_SP(SB, &info->capability);
+ FP_DIV_S(SR, SA, SB);
+ FP_TO_INT_S(capability, SR, 32, 0);
+ } else
+ /*
+ * Really old machine without stsi block for basic
+ * cpu information. Report 42.0 bogomips.
+ */
+ capability = 42;
+ loops_per_jiffy = capability * (500000/HZ);
+ free_page((unsigned long) info);
+}
+
+/*
+ * calibrate the delay loop
+ */
+void calibrate_delay(void)
+{
+ s390_adjust_jiffies();
+ /* Print the good old Bogomips line .. */
+ printk(KERN_DEBUG "Calibrating delay loop (skipped)... "
+ "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100);
+}
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 7aec676fefd..0931b110c82 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -1,9 +1,8 @@
/*
- * arch/s390/kernel/time.c
* Time of day based timer functions.
*
* S390 version
- * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 1999, 2008
* Author(s): Hartmut Penner (hp@de.ibm.com),
* Martin Schwidefsky (schwidefsky@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
@@ -12,6 +11,10 @@
* Copyright (C) 1991, 1992, 1995 Linus Torvalds
*/
+#define KMSG_COMPONENT "time"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel_stat.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/sched.h>
@@ -20,8 +23,10 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/stop_machine.h>
#include <linux/time.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/smp.h>
@@ -29,45 +34,36 @@
#include <linux/profile.h>
#include <linux/timex.h>
#include <linux/notifier.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
#include <linux/clockchips.h>
+#include <linux/gfp.h>
+#include <linux/kprobes.h>
#include <asm/uaccess.h>
#include <asm/delay.h>
-#include <asm/s390_ext.h>
#include <asm/div64.h>
+#include <asm/vdso.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
-#include <asm/timer.h>
+#include <asm/vtimer.h>
#include <asm/etr.h>
#include <asm/cio.h>
+#include "entry.h"
/* change this if you have some constant time drift */
#define USECS_PER_JIFFY ((unsigned long) 1000000/HZ)
#define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12)
-/* The value of the TOD clock for 1.1.1970. */
-#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
-
-/*
- * Create a small time difference between the timer interrupts
- * on the different cpus to avoid lock contention.
- */
-#define CPU_DEVIATION (smp_processor_id() << 12)
-
-#define TICK_SIZE tick
-
-static ext_int_info_t ext_int_info_cc;
-static ext_int_info_t ext_int_etr_cc;
-static u64 jiffies_timer_cc;
+u64 sched_clock_base_cc = -1; /* Force to data section. */
+EXPORT_SYMBOL_GPL(sched_clock_base_cc);
static DEFINE_PER_CPU(struct clock_event_device, comparators);
/*
* Scheduler clock - returns current time in nanosec units.
*/
-unsigned long long sched_clock(void)
+unsigned long long notrace __kprobes sched_clock(void)
{
- return ((get_clock_xt() - jiffies_timer_cc) * 125) >> 9;
+ return tod_to_ns(get_tod_clock_monotonic());
}
/*
@@ -79,32 +75,25 @@ unsigned long long monotonic_clock(void)
}
EXPORT_SYMBOL(monotonic_clock);
-void tod_to_timeval(__u64 todval, struct timespec *xtime)
+void tod_to_timeval(__u64 todval, struct timespec *xt)
{
unsigned long long sec;
sec = todval >> 12;
do_div(sec, 1000000);
- xtime->tv_sec = sec;
+ xt->tv_sec = sec;
todval -= (sec * 1000000) << 12;
- xtime->tv_nsec = ((todval * 1000) >> 12);
+ xt->tv_nsec = ((todval * 1000) >> 12);
}
-
-#ifdef CONFIG_PROFILING
-#define s390_do_profile() profile_tick(CPU_PROFILING)
-#else
-#define s390_do_profile() do { ; } while(0)
-#endif /* CONFIG_PROFILING */
+EXPORT_SYMBOL(tod_to_timeval);
void clock_comparator_work(void)
{
struct clock_event_device *cd;
S390_lowcore.clock_comparator = -1ULL;
- set_clock_comparator(S390_lowcore.clock_comparator);
cd = &__get_cpu_var(comparators);
cd->event_handler(cd);
- s390_do_profile();
}
/*
@@ -122,7 +111,7 @@ static void fixup_clock_comparator(unsigned long long delta)
static int s390_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- S390_lowcore.clock_comparator = get_clock() + delta;
+ S390_lowcore.clock_comparator = get_tod_clock() + delta;
set_clock_comparator(S390_lowcore.clock_comparator);
return 0;
}
@@ -153,7 +142,7 @@ void init_cpu_timer(void)
cd->min_delta_ns = 1;
cd->max_delta_ns = LONG_MAX;
cd->rating = 400;
- cd->cpumask = cpumask_of_cpu(cpu);
+ cd->cpumask = cpumask_of(cpu);
cd->set_next_event = s390_next_event;
cd->set_mode = s390_set_mode;
@@ -162,37 +151,48 @@ void init_cpu_timer(void)
/* Enable clock comparator timer interrupt. */
__ctl_set_bit(0,11);
- /* Always allow ETR external interrupts, even without an ETR. */
+ /* Always allow the timing alert external interrupt. */
__ctl_set_bit(0, 4);
}
-static void clock_comparator_interrupt(__u16 code)
+static void clock_comparator_interrupt(struct ext_code ext_code,
+ unsigned int param32,
+ unsigned long param64)
{
+ inc_irq_stat(IRQEXT_CLK);
+ if (S390_lowcore.clock_comparator == -1ULL)
+ set_clock_comparator(S390_lowcore.clock_comparator);
}
-static void etr_reset(void);
-static void etr_ext_handler(__u16);
+static void etr_timing_alert(struct etr_irq_parm *);
+static void stp_timing_alert(struct stp_irq_parm *);
-/*
- * Get the TOD clock running.
- */
-static u64 __init reset_tod_clock(void)
+static void timing_alert_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
{
- u64 time;
+ inc_irq_stat(IRQEXT_TLA);
+ if (param32 & 0x00c40000)
+ etr_timing_alert((struct etr_irq_parm *) &param32);
+ if (param32 & 0x00038000)
+ stp_timing_alert((struct stp_irq_parm *) &param32);
+}
- etr_reset();
- if (store_clock(&time) == 0)
- return time;
- /* TOD clock not running. Set the clock to Unix Epoch. */
- if (set_clock(TOD_UNIX_EPOCH) != 0 || store_clock(&time) != 0)
- panic("TOD clock not operational.");
+static void etr_reset(void);
+static void stp_reset(void);
- return TOD_UNIX_EPOCH;
+void read_persistent_clock(struct timespec *ts)
+{
+ tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
}
-static cycle_t read_tod_clock(void)
+void read_boot_clock(struct timespec *ts)
{
- return get_clock();
+ tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
+}
+
+static cycle_t read_tod_clock(struct clocksource *cs)
+{
+ return get_tod_clock();
}
static struct clocksource clocksource_tod = {
@@ -205,6 +205,51 @@ static struct clocksource clocksource_tod = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
+struct clocksource * __init clocksource_default_clock(void)
+{
+ return &clocksource_tod;
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ u64 nsecps;
+
+ if (tk->clock != &clocksource_tod)
+ return;
+
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_wmb();
+ vdso_data->xtime_tod_stamp = tk->clock->cycle_last;
+ vdso_data->xtime_clock_sec = tk->xtime_sec;
+ vdso_data->xtime_clock_nsec = tk->xtime_nsec;
+ vdso_data->wtom_clock_sec =
+ tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ vdso_data->wtom_clock_nsec = tk->xtime_nsec +
+ + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift);
+ nsecps = (u64) NSEC_PER_SEC << tk->shift;
+ while (vdso_data->wtom_clock_nsec >= nsecps) {
+ vdso_data->wtom_clock_nsec -= nsecps;
+ vdso_data->wtom_clock_sec++;
+ }
+ vdso_data->tk_mult = tk->mult;
+ vdso_data->tk_shift = tk->shift;
+ smp_wmb();
+ ++vdso_data->tb_update_count;
+}
+
+extern struct timezone sys_tz;
+
+void update_vsyscall_tz(void)
+{
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_wmb();
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ smp_wmb();
+ ++vdso_data->tb_update_count;
+}
/*
* Initialize the TOD clock and the CPU timer of
@@ -212,36 +257,152 @@ static struct clocksource clocksource_tod = {
*/
void __init time_init(void)
{
- u64 init_timer_cc;
-
- init_timer_cc = reset_tod_clock();
- jiffies_timer_cc = init_timer_cc - jiffies_64 * CLK_TICKS_PER_JIFFY;
-
- /* set xtime */
- tod_to_timeval(init_timer_cc - TOD_UNIX_EPOCH, &xtime);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
+ /* Reset time synchronization interfaces. */
+ etr_reset();
+ stp_reset();
/* request the clock comparator external interrupt */
- if (register_early_external_interrupt(0x1004,
- clock_comparator_interrupt,
- &ext_int_info_cc) != 0)
- panic("Couldn't request external interrupt 0x1004");
+ if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt))
+ panic("Couldn't request external interrupt 0x1004");
+
+ /* request the timing alert external interrupt */
+ if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
+ panic("Couldn't request external interrupt 0x1406");
if (clocksource_register(&clocksource_tod) != 0)
panic("Could not register TOD clock source");
- /* request the etr external interrupt */
- if (register_early_external_interrupt(0x1406, etr_ext_handler,
- &ext_int_etr_cc) != 0)
- panic("Couldn't request external interrupt 0x1406");
-
/* Enable TOD clock interrupts on the boot cpu. */
init_cpu_timer();
-#ifdef CONFIG_VIRT_TIMER
+ /* Enable cpu timer interrupts on the boot cpu. */
vtime_init();
-#endif
+}
+
+/*
+ * The time is "clock". old is what we think the time is.
+ * Adjust the value by a multiple of jiffies and add the delta to ntp.
+ * "delay" is an approximation how long the synchronization took. If
+ * the time correction is positive, then "delay" is subtracted from
+ * the time difference and only the remaining part is passed to ntp.
+ */
+static unsigned long long adjust_time(unsigned long long old,
+ unsigned long long clock,
+ unsigned long long delay)
+{
+ unsigned long long delta, ticks;
+ struct timex adjust;
+
+ if (clock > old) {
+ /* It is later than we thought. */
+ delta = ticks = clock - old;
+ delta = ticks = (delta < delay) ? 0 : delta - delay;
+ delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+ adjust.offset = ticks * (1000000 / HZ);
+ } else {
+ /* It is earlier than we thought. */
+ delta = ticks = old - clock;
+ delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+ delta = -delta;
+ adjust.offset = -ticks * (1000000 / HZ);
+ }
+ sched_clock_base_cc += delta;
+ if (adjust.offset != 0) {
+ pr_notice("The ETR interface has adjusted the clock "
+ "by %li microseconds\n", adjust.offset);
+ adjust.modes = ADJ_OFFSET_SINGLESHOT;
+ do_adjtimex(&adjust);
+ }
+ return delta;
+}
+
+static DEFINE_PER_CPU(atomic_t, clock_sync_word);
+static DEFINE_MUTEX(clock_sync_mutex);
+static unsigned long clock_sync_flags;
+
+#define CLOCK_SYNC_HAS_ETR 0
+#define CLOCK_SYNC_HAS_STP 1
+#define CLOCK_SYNC_ETR 2
+#define CLOCK_SYNC_STP 3
+
+/*
+ * The synchronous get_clock function. It will write the current clock
+ * value to the clock pointer and return 0 if the clock is in sync with
+ * the external time source. If the clock mode is local it will return
+ * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external
+ * reference.
+ */
+int get_sync_clock(unsigned long long *clock)
+{
+ atomic_t *sw_ptr;
+ unsigned int sw0, sw1;
+
+ sw_ptr = &get_cpu_var(clock_sync_word);
+ sw0 = atomic_read(sw_ptr);
+ *clock = get_tod_clock();
+ sw1 = atomic_read(sw_ptr);
+ put_cpu_var(clock_sync_word);
+ if (sw0 == sw1 && (sw0 & 0x80000000U))
+ /* Success: time is in sync. */
+ return 0;
+ if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
+ !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ return -EOPNOTSUPP;
+ if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
+ !test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+ return -EACCES;
+ return -EAGAIN;
+}
+EXPORT_SYMBOL(get_sync_clock);
+
+/*
+ * Make get_sync_clock return -EAGAIN.
+ */
+static void disable_sync_clock(void *dummy)
+{
+ atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+ /*
+ * Clear the in-sync bit 2^31. All get_sync_clock calls will
+ * fail until the sync bit is turned back on. In addition
+ * increase the "sequence" counter to avoid the race of an
+ * etr event and the complete recovery against get_sync_clock.
+ */
+ atomic_clear_mask(0x80000000, sw_ptr);
+ atomic_inc(sw_ptr);
+}
+
+/*
+ * Make get_sync_clock return 0 again.
+ * Needs to be called from a context disabled for preemption.
+ */
+static void enable_sync_clock(void)
+{
+ atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+ atomic_set_mask(0x80000000, sw_ptr);
+}
+
+/*
+ * Function to check if the clock is in sync.
+ */
+static inline int check_sync_clock(void)
+{
+ atomic_t *sw_ptr;
+ int rc;
+
+ sw_ptr = &get_cpu_var(clock_sync_word);
+ rc = (atomic_read(sw_ptr) & 0x80000000U) != 0;
+ put_cpu_var(clock_sync_word);
+ return rc;
+}
+
+/* Single threaded workqueue used for etr and stp sync events */
+static struct workqueue_struct *time_sync_wq;
+
+static void __init time_init_wq(void)
+{
+ if (time_sync_wq)
+ return;
+ time_sync_wq = create_singlethread_workqueue("timesync");
}
/*
@@ -249,6 +410,7 @@ void __init time_init(void)
*/
static int etr_port0_online;
static int etr_port1_online;
+static int etr_steai_available;
static int __init early_parse_etr(char *p)
{
@@ -273,12 +435,6 @@ enum etr_event {
ETR_EVENT_UPDATE,
};
-enum etr_flags {
- ETR_FLAG_ENOSYS,
- ETR_FLAG_EACCES,
- ETR_FLAG_STEAI,
-};
-
/*
* Valid bit combinations of the eacr register are (x = don't care):
* e0 e1 dp p0 p1 ea es sl
@@ -305,74 +461,19 @@ enum etr_flags {
*/
static struct etr_eacr etr_eacr;
static u64 etr_tolec; /* time of last eacr update */
-static unsigned long etr_flags;
static struct etr_aib etr_port0;
static int etr_port0_uptodate;
static struct etr_aib etr_port1;
static int etr_port1_uptodate;
static unsigned long etr_events;
static struct timer_list etr_timer;
-static DEFINE_PER_CPU(atomic_t, etr_sync_word);
static void etr_timeout(unsigned long dummy);
static void etr_work_fn(struct work_struct *work);
+static DEFINE_MUTEX(etr_work_mutex);
static DECLARE_WORK(etr_work, etr_work_fn);
/*
- * The etr get_clock function. It will write the current clock value
- * to the clock pointer and return 0 if the clock is in sync with the
- * external time source. If the clock mode is local it will return
- * -ENOSYS and -EAGAIN if the clock is not in sync with the external
- * reference. This function is what ETR is all about..
- */
-int get_sync_clock(unsigned long long *clock)
-{
- atomic_t *sw_ptr;
- unsigned int sw0, sw1;
-
- sw_ptr = &get_cpu_var(etr_sync_word);
- sw0 = atomic_read(sw_ptr);
- *clock = get_clock();
- sw1 = atomic_read(sw_ptr);
- put_cpu_var(etr_sync_sync);
- if (sw0 == sw1 && (sw0 & 0x80000000U))
- /* Success: time is in sync. */
- return 0;
- if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
- return -ENOSYS;
- if (test_bit(ETR_FLAG_EACCES, &etr_flags))
- return -EACCES;
- return -EAGAIN;
-}
-EXPORT_SYMBOL(get_sync_clock);
-
-/*
- * Make get_sync_clock return -EAGAIN.
- */
-static void etr_disable_sync_clock(void *dummy)
-{
- atomic_t *sw_ptr = &__get_cpu_var(etr_sync_word);
- /*
- * Clear the in-sync bit 2^31. All get_sync_clock calls will
- * fail until the sync bit is turned back on. In addition
- * increase the "sequence" counter to avoid the race of an
- * etr event and the complete recovery against get_sync_clock.
- */
- atomic_clear_mask(0x80000000, sw_ptr);
- atomic_inc(sw_ptr);
-}
-
-/*
- * Make get_sync_clock return 0 again.
- * Needs to be called from a context disabled for preemption.
- */
-static void etr_enable_sync_clock(void)
-{
- atomic_t *sw_ptr = &__get_cpu_var(etr_sync_word);
- atomic_set_mask(0x80000000, sw_ptr);
-}
-
-/*
* Reset ETR attachment.
*/
static void etr_reset(void)
@@ -381,15 +482,15 @@ static void etr_reset(void)
.e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
.p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
.es = 0, .sl = 0 };
- if (etr_setr(&etr_eacr) == 0)
- etr_tolec = get_clock();
- else {
- set_bit(ETR_FLAG_ENOSYS, &etr_flags);
- if (etr_port0_online || etr_port1_online) {
- printk(KERN_WARNING "Running on non ETR capable "
- "machine, only local mode available.\n");
- etr_port0_online = etr_port1_online = 0;
- }
+ if (etr_setr(&etr_eacr) == 0) {
+ etr_tolec = get_tod_clock();
+ set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
+ if (etr_port0_online && etr_port1_online)
+ set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+ } else if (etr_port0_online || etr_port1_online) {
+ pr_warning("The real or virtual hardware system does "
+ "not provide an ETR interface\n");
+ etr_port0_online = etr_port1_online = 0;
}
}
@@ -397,21 +498,20 @@ static int __init etr_init(void)
{
struct etr_aib aib;
- if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
+ if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
return 0;
+ time_init_wq();
/* Check if this machine has the steai instruction. */
if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
- set_bit(ETR_FLAG_STEAI, &etr_flags);
+ etr_steai_available = 1;
setup_timer(&etr_timer, etr_timeout, 0UL);
- if (!etr_port0_online && !etr_port1_online)
- set_bit(ETR_FLAG_EACCES, &etr_flags);
if (etr_port0_online) {
set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
if (etr_port1_online) {
set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
return 0;
}
@@ -435,9 +535,12 @@ void etr_switch_to_local(void)
{
if (!etr_eacr.sl)
return;
- etr_disable_sync_clock(NULL);
- set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events);
- schedule_work(&etr_work);
+ disable_sync_clock(NULL);
+ if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
+ etr_eacr.es = etr_eacr.sl = 0;
+ etr_setr(&etr_eacr);
+ queue_work(time_sync_wq, &etr_work);
+ }
}
/*
@@ -450,23 +553,23 @@ void etr_sync_check(void)
{
if (!etr_eacr.es)
return;
- etr_disable_sync_clock(NULL);
- set_bit(ETR_EVENT_SYNC_CHECK, &etr_events);
- schedule_work(&etr_work);
+ disable_sync_clock(NULL);
+ if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
+ etr_eacr.es = 0;
+ etr_setr(&etr_eacr);
+ queue_work(time_sync_wq, &etr_work);
+ }
}
/*
- * ETR external interrupt. There are two causes:
+ * ETR timing alert. There are two causes:
* 1) port state change, check the usability of the port
* 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
* sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
* or ETR-data word 4 (edf4) has changed.
*/
-static void etr_ext_handler(__u16 code)
+static void etr_timing_alert(struct etr_irq_parm *intparm)
{
- struct etr_interruption_parameter *intparm =
- (struct etr_interruption_parameter *) &S390_lowcore.ext_params;
-
if (intparm->pc0)
/* ETR port 0 state change. */
set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
@@ -479,13 +582,13 @@ static void etr_ext_handler(__u16 code)
* Both ports are not up-to-date now.
*/
set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
static void etr_timeout(unsigned long dummy)
{
set_bit(ETR_EVENT_UPDATE, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
/*
@@ -591,58 +694,25 @@ static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
return 1;
}
-/*
- * The time is "clock". old is what we think the time is.
- * Adjust the value by a multiple of jiffies and add the delta to ntp.
- * "delay" is an approximation how long the synchronization took. If
- * the time correction is positive, then "delay" is subtracted from
- * the time difference and only the remaining part is passed to ntp.
- */
-static unsigned long long etr_adjust_time(unsigned long long old,
- unsigned long long clock,
- unsigned long long delay)
-{
- unsigned long long delta, ticks;
- struct timex adjust;
-
- if (clock > old) {
- /* It is later than we thought. */
- delta = ticks = clock - old;
- delta = ticks = (delta < delay) ? 0 : delta - delay;
- delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
- adjust.offset = ticks * (1000000 / HZ);
- } else {
- /* It is earlier than we thought. */
- delta = ticks = old - clock;
- delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
- delta = -delta;
- adjust.offset = -ticks * (1000000 / HZ);
- }
- jiffies_timer_cc += delta;
- if (adjust.offset != 0) {
- printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
- adjust.offset);
- adjust.modes = ADJ_OFFSET_SINGLESHOT;
- do_adjtimex(&adjust);
- }
- return delta;
-}
-
-static struct {
+struct clock_sync_data {
+ atomic_t cpus;
int in_sync;
unsigned long long fixup_cc;
-} etr_sync;
+ int etr_port;
+ struct etr_aib *etr_aib;
+};
-static void etr_sync_cpu_start(void *dummy)
+static void clock_sync_cpu(struct clock_sync_data *sync)
{
- etr_enable_sync_clock();
+ atomic_dec(&sync->cpus);
+ enable_sync_clock();
/*
* This looks like a busy wait loop but it isn't. etr_sync_cpus
* is called on all other cpus while the TOD clocks is stopped.
* __udelay will stop the cpu on an enabled wait psw until the
* TOD is running again.
*/
- while (etr_sync.in_sync == 0) {
+ while (sync->in_sync == 0) {
__udelay(1);
/*
* A different cpu changes *in_sync. Therefore use
@@ -650,56 +720,53 @@ static void etr_sync_cpu_start(void *dummy)
*/
barrier();
}
- if (etr_sync.in_sync != 1)
+ if (sync->in_sync != 1)
/* Didn't work. Clear per-cpu in sync bit again. */
- etr_disable_sync_clock(NULL);
+ disable_sync_clock(NULL);
/*
* This round of TOD syncing is done. Set the clock comparator
* to the next tick and let the processor continue.
*/
- fixup_clock_comparator(etr_sync.fixup_cc);
-}
-
-static void etr_sync_cpu_end(void *dummy)
-{
+ fixup_clock_comparator(sync->fixup_cc);
}
/*
- * Sync the TOD clock using the port refered to by aibp. This port
+ * Sync the TOD clock using the port referred to by aibp. This port
* has to be enabled and the other port has to be disabled. The
* last eacr update has to be more than 1.6 seconds in the past.
*/
-static int etr_sync_clock(struct etr_aib *aib, int port)
+static int etr_sync_clock(void *data)
{
- struct etr_aib *sync_port;
+ static int first;
unsigned long long clock, old_clock, delay, delta;
- int follows;
+ struct clock_sync_data *etr_sync;
+ struct etr_aib *sync_port, *aib;
+ int port;
int rc;
- /* Check if the current aib is adjacent to the sync port aib. */
- sync_port = (port == 0) ? &etr_port0 : &etr_port1;
- follows = etr_aib_follows(sync_port, aib, port);
- memcpy(sync_port, aib, sizeof(*aib));
- if (!follows)
- return -EAGAIN;
+ etr_sync = data;
- /*
- * Catch all other cpus and make them wait until we have
- * successfully synced the clock. smp_call_function will
- * return after all other cpus are in etr_sync_cpu_start.
- */
- memset(&etr_sync, 0, sizeof(etr_sync));
- preempt_disable();
- smp_call_function(etr_sync_cpu_start, NULL, 0, 0);
- local_irq_disable();
- etr_enable_sync_clock();
+ if (xchg(&first, 1) == 1) {
+ /* Slave */
+ clock_sync_cpu(etr_sync);
+ return 0;
+ }
+
+ /* Wait until all other cpus entered the sync function. */
+ while (atomic_read(&etr_sync->cpus) != 0)
+ cpu_relax();
+
+ port = etr_sync->etr_port;
+ aib = etr_sync->etr_aib;
+ sync_port = (port == 0) ? &etr_port0 : &etr_port1;
+ enable_sync_clock();
/* Set clock to next OTE. */
__ctl_set_bit(14, 21);
__ctl_set_bit(0, 29);
clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
- old_clock = get_clock();
- if (set_clock(clock) == 0) {
+ old_clock = get_tod_clock();
+ if (set_tod_clock(clock) == 0) {
__udelay(1); /* Wait for the clock to start. */
__ctl_clear_bit(0, 29);
__ctl_clear_bit(14, 21);
@@ -707,30 +774,51 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
/* Adjust Linux timing variables. */
delay = (unsigned long long)
(aib->edf2.etv - sync_port->edf2.etv) << 32;
- delta = etr_adjust_time(old_clock, clock, delay);
- etr_sync.fixup_cc = delta;
+ delta = adjust_time(old_clock, clock, delay);
+ etr_sync->fixup_cc = delta;
fixup_clock_comparator(delta);
/* Verify that the clock is properly set. */
if (!etr_aib_follows(sync_port, aib, port)) {
/* Didn't work. */
- etr_disable_sync_clock(NULL);
- etr_sync.in_sync = -EAGAIN;
+ disable_sync_clock(NULL);
+ etr_sync->in_sync = -EAGAIN;
rc = -EAGAIN;
} else {
- etr_sync.in_sync = 1;
+ etr_sync->in_sync = 1;
rc = 0;
}
} else {
/* Could not set the clock ?!? */
__ctl_clear_bit(0, 29);
__ctl_clear_bit(14, 21);
- etr_disable_sync_clock(NULL);
- etr_sync.in_sync = -EAGAIN;
+ disable_sync_clock(NULL);
+ etr_sync->in_sync = -EAGAIN;
rc = -EAGAIN;
}
- local_irq_enable();
- smp_call_function(etr_sync_cpu_end,NULL,0,0);
- preempt_enable();
+ xchg(&first, 0);
+ return rc;
+}
+
+static int etr_sync_clock_stop(struct etr_aib *aib, int port)
+{
+ struct clock_sync_data etr_sync;
+ struct etr_aib *sync_port;
+ int follows;
+ int rc;
+
+ /* Check if the current aib is adjacent to the sync port aib. */
+ sync_port = (port == 0) ? &etr_port0 : &etr_port1;
+ follows = etr_aib_follows(sync_port, aib, port);
+ memcpy(sync_port, aib, sizeof(*aib));
+ if (!follows)
+ return -EAGAIN;
+ memset(&etr_sync, 0, sizeof(etr_sync));
+ etr_sync.etr_aib = aib;
+ etr_sync.etr_port = port;
+ get_online_cpus();
+ atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
+ rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask);
+ put_online_cpus();
return rc;
}
@@ -754,7 +842,7 @@ static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
* assume that this can have caused an stepping
* port switch.
*/
- etr_tolec = get_clock();
+ etr_tolec = get_tod_clock();
eacr.p0 = etr_port0_online;
if (!eacr.p0)
eacr.e0 = 0;
@@ -767,7 +855,7 @@ static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
* assume that this can have caused an stepping
* port switch.
*/
- etr_tolec = get_clock();
+ etr_tolec = get_tod_clock();
eacr.p1 = etr_port1_online;
if (!eacr.p1)
eacr.e1 = 0;
@@ -832,7 +920,7 @@ static struct etr_eacr etr_handle_update(struct etr_aib *aib,
* Do not try to get the alternate port aib if the clock
* is not in sync yet.
*/
- if (!eacr.es)
+ if (!eacr.es || !check_sync_clock())
return eacr;
/*
@@ -840,7 +928,7 @@ static struct etr_eacr etr_handle_update(struct etr_aib *aib,
* the other port immediately. If only stetr is available the
* data-port bit toggle has to be used.
*/
- if (test_bit(ETR_FLAG_STEAI, &etr_flags)) {
+ if (etr_steai_available) {
if (eacr.p0 && !etr_port0_uptodate) {
etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
etr_port0_uptodate = 1;
@@ -883,11 +971,11 @@ static void etr_update_eacr(struct etr_eacr eacr)
etr_eacr = eacr;
etr_setr(&etr_eacr);
if (dp_changed)
- etr_tolec = get_clock();
+ etr_tolec = get_tod_clock();
}
/*
- * ETR tasklet. In this function you'll find the main logic. In
+ * ETR work. In this function you'll find the main logic. In
* particular this is the only function that calls etr_update_eacr(),
* it "controls" the etr control register.
*/
@@ -898,6 +986,9 @@ static void etr_work_fn(struct work_struct *work)
struct etr_aib aib;
int sync_port;
+ /* prevent multiple execution. */
+ mutex_lock(&etr_work_mutex);
+
/* Create working copy of etr_eacr. */
eacr = etr_eacr;
@@ -909,17 +1000,16 @@ static void etr_work_fn(struct work_struct *work)
if (!eacr.ea) {
/* Both ports offline. Reset everything. */
eacr.dp = eacr.es = eacr.sl = 0;
- on_each_cpu(etr_disable_sync_clock, NULL, 0, 1);
+ on_each_cpu(disable_sync_clock, NULL, 1);
del_timer_sync(&etr_timer);
etr_update_eacr(eacr);
- set_bit(ETR_FLAG_EACCES, &etr_flags);
- return;
+ goto out_unlock;
}
/* Store aib to get the current ETR status word. */
BUG_ON(etr_stetr(&aib) != 0);
etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */
- now = get_clock();
+ now = get_tod_clock();
/*
* Update the port information if the last stepping port change
@@ -929,7 +1019,7 @@ static void etr_work_fn(struct work_struct *work)
eacr = etr_handle_update(&aib, eacr);
/*
- * Select ports to enable. The prefered synchronization mode is PPS.
+ * Select ports to enable. The preferred synchronization mode is PPS.
* If a port can be enabled depends on a number of things:
* 1) The port needs to be online and uptodate. A port is not
* disabled just because it is not uptodate, but it is only
@@ -953,7 +1043,6 @@ static void etr_work_fn(struct work_struct *work)
eacr.e1 = 1;
sync_port = (etr_port0_uptodate &&
etr_port_valid(&etr_port0, 0)) ? 0 : -1;
- clear_bit(ETR_FLAG_EACCES, &etr_flags);
} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
eacr.sl = 0;
eacr.e0 = 0;
@@ -962,7 +1051,6 @@ static void etr_work_fn(struct work_struct *work)
eacr.es = 0;
sync_port = (etr_port1_uptodate &&
etr_port_valid(&etr_port1, 1)) ? 1 : -1;
- clear_bit(ETR_FLAG_EACCES, &etr_flags);
} else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
eacr.sl = 1;
eacr.e0 = 1;
@@ -976,7 +1064,6 @@ static void etr_work_fn(struct work_struct *work)
eacr.e1 = 1;
sync_port = (etr_port0_uptodate &&
etr_port_valid(&etr_port0, 0)) ? 0 : -1;
- clear_bit(ETR_FLAG_EACCES, &etr_flags);
} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
eacr.sl = 1;
eacr.e0 = 0;
@@ -985,22 +1072,20 @@ static void etr_work_fn(struct work_struct *work)
eacr.es = 0;
sync_port = (etr_port1_uptodate &&
etr_port_valid(&etr_port1, 1)) ? 1 : -1;
- clear_bit(ETR_FLAG_EACCES, &etr_flags);
} else {
/* Both ports not usable. */
eacr.es = eacr.sl = 0;
sync_port = -1;
- set_bit(ETR_FLAG_EACCES, &etr_flags);
}
/*
* If the clock is in sync just update the eacr and return.
* If there is no valid sync port wait for a port update.
*/
- if (eacr.es || sync_port < 0) {
+ if ((eacr.es && check_sync_clock()) || sync_port < 0) {
etr_update_eacr(eacr);
etr_set_tolec_timeout(now);
- return;
+ goto out_unlock;
}
/*
@@ -1013,49 +1098,56 @@ static void etr_work_fn(struct work_struct *work)
/*
* Update eacr and try to synchronize the clock. If the update
* of eacr caused a stepping port switch (or if we have to
- * assume that a stepping port switch has occured) or the
+ * assume that a stepping port switch has occurred) or the
* clock syncing failed, reset the sync check control bit
* and set up a timer to try again after 0.5 seconds
*/
etr_update_eacr(eacr);
if (now < etr_tolec + (1600000 << 12) ||
- etr_sync_clock(&aib, sync_port) != 0) {
+ etr_sync_clock_stop(&aib, sync_port) != 0) {
/* Sync failed. Try again in 1/2 second. */
eacr.es = 0;
etr_update_eacr(eacr);
etr_set_sync_timeout();
} else
etr_set_tolec_timeout(now);
+out_unlock:
+ mutex_unlock(&etr_work_mutex);
}
/*
* Sysfs interface functions
*/
-static struct sysdev_class etr_sysclass = {
- .name = "etr",
+static struct bus_type etr_subsys = {
+ .name = "etr",
+ .dev_name = "etr",
};
-static struct sys_device etr_port0_dev = {
+static struct device etr_port0_dev = {
.id = 0,
- .cls = &etr_sysclass,
+ .bus = &etr_subsys,
};
-static struct sys_device etr_port1_dev = {
+static struct device etr_port1_dev = {
.id = 1,
- .cls = &etr_sysclass,
+ .bus = &etr_subsys,
};
/*
- * ETR class attributes
+ * ETR subsys attributes
*/
-static ssize_t etr_stepping_port_show(struct sysdev_class *class, char *buf)
+static ssize_t etr_stepping_port_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%i\n", etr_port0.esw.p);
}
-static SYSDEV_CLASS_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
+static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
-static ssize_t etr_stepping_mode_show(struct sysdev_class *class, char *buf)
+static ssize_t etr_stepping_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
char *mode_str;
@@ -1068,12 +1160,12 @@ static ssize_t etr_stepping_mode_show(struct sysdev_class *class, char *buf)
return sprintf(buf, "%s\n", mode_str);
}
-static SYSDEV_CLASS_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL);
+static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL);
/*
* ETR port attributes
*/
-static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev)
+static inline struct etr_aib *etr_aib_from_dev(struct device *dev)
{
if (dev == &etr_port0_dev)
return etr_port0_online ? &etr_port0 : NULL;
@@ -1081,7 +1173,9 @@ static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev)
return etr_port1_online ? &etr_port1 : NULL;
}
-static ssize_t etr_online_show(struct sys_device *dev, char *buf)
+static ssize_t etr_online_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
unsigned int online;
@@ -1089,43 +1183,58 @@ static ssize_t etr_online_show(struct sys_device *dev, char *buf)
return sprintf(buf, "%i\n", online);
}
-static ssize_t etr_online_store(struct sys_device *dev,
- const char *buf, size_t count)
+static ssize_t etr_online_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
unsigned int value;
value = simple_strtoul(buf, NULL, 0);
if (value != 0 && value != 1)
return -EINVAL;
- if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
- return -ENOSYS;
+ if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
+ return -EOPNOTSUPP;
+ mutex_lock(&clock_sync_mutex);
if (dev == &etr_port0_dev) {
if (etr_port0_online == value)
- return count; /* Nothing to do. */
+ goto out; /* Nothing to do. */
etr_port0_online = value;
+ if (etr_port0_online && etr_port1_online)
+ set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+ else
+ clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
} else {
if (etr_port1_online == value)
- return count; /* Nothing to do. */
+ goto out; /* Nothing to do. */
etr_port1_online = value;
+ if (etr_port0_online && etr_port1_online)
+ set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+ else
+ clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
- schedule_work(&etr_work);
+ queue_work(time_sync_wq, &etr_work);
}
+out:
+ mutex_unlock(&clock_sync_mutex);
return count;
}
-static SYSDEV_ATTR(online, 0600, etr_online_show, etr_online_store);
+static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store);
-static ssize_t etr_stepping_control_show(struct sys_device *dev, char *buf)
+static ssize_t etr_stepping_control_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
etr_eacr.e0 : etr_eacr.e1);
}
-static SYSDEV_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL);
+static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL);
-static ssize_t etr_mode_code_show(struct sys_device *dev, char *buf)
+static ssize_t etr_mode_code_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
if (!etr_port0_online && !etr_port1_online)
/* Status word is not uptodate if both ports are offline. */
@@ -1134,9 +1243,10 @@ static ssize_t etr_mode_code_show(struct sys_device *dev, char *buf)
etr_port0.esw.psc0 : etr_port0.esw.psc1);
}
-static SYSDEV_ATTR(state_code, 0400, etr_mode_code_show, NULL);
+static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL);
-static ssize_t etr_untuned_show(struct sys_device *dev, char *buf)
+static ssize_t etr_untuned_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct etr_aib *aib = etr_aib_from_dev(dev);
@@ -1145,9 +1255,10 @@ static ssize_t etr_untuned_show(struct sys_device *dev, char *buf)
return sprintf(buf, "%i\n", aib->edf1.u);
}
-static SYSDEV_ATTR(untuned, 0400, etr_untuned_show, NULL);
+static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL);
-static ssize_t etr_network_id_show(struct sys_device *dev, char *buf)
+static ssize_t etr_network_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct etr_aib *aib = etr_aib_from_dev(dev);
@@ -1156,9 +1267,10 @@ static ssize_t etr_network_id_show(struct sys_device *dev, char *buf)
return sprintf(buf, "%i\n", aib->edf1.net_id);
}
-static SYSDEV_ATTR(network, 0400, etr_network_id_show, NULL);
+static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL);
-static ssize_t etr_id_show(struct sys_device *dev, char *buf)
+static ssize_t etr_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct etr_aib *aib = etr_aib_from_dev(dev);
@@ -1167,9 +1279,10 @@ static ssize_t etr_id_show(struct sys_device *dev, char *buf)
return sprintf(buf, "%i\n", aib->edf1.etr_id);
}
-static SYSDEV_ATTR(id, 0400, etr_id_show, NULL);
+static DEVICE_ATTR(id, 0400, etr_id_show, NULL);
-static ssize_t etr_port_number_show(struct sys_device *dev, char *buf)
+static ssize_t etr_port_number_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct etr_aib *aib = etr_aib_from_dev(dev);
@@ -1178,9 +1291,10 @@ static ssize_t etr_port_number_show(struct sys_device *dev, char *buf)
return sprintf(buf, "%i\n", aib->edf1.etr_pn);
}
-static SYSDEV_ATTR(port, 0400, etr_port_number_show, NULL);
+static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL);
-static ssize_t etr_coupled_show(struct sys_device *dev, char *buf)
+static ssize_t etr_coupled_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct etr_aib *aib = etr_aib_from_dev(dev);
@@ -1189,9 +1303,10 @@ static ssize_t etr_coupled_show(struct sys_device *dev, char *buf)
return sprintf(buf, "%i\n", aib->edf3.c);
}
-static SYSDEV_ATTR(coupled, 0400, etr_coupled_show, NULL);
+static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL);
-static ssize_t etr_local_time_show(struct sys_device *dev, char *buf)
+static ssize_t etr_local_time_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct etr_aib *aib = etr_aib_from_dev(dev);
@@ -1200,9 +1315,10 @@ static ssize_t etr_local_time_show(struct sys_device *dev, char *buf)
return sprintf(buf, "%i\n", aib->edf3.blto);
}
-static SYSDEV_ATTR(local_time, 0400, etr_local_time_show, NULL);
+static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL);
-static ssize_t etr_utc_offset_show(struct sys_device *dev, char *buf)
+static ssize_t etr_utc_offset_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct etr_aib *aib = etr_aib_from_dev(dev);
@@ -1211,64 +1327,64 @@ static ssize_t etr_utc_offset_show(struct sys_device *dev, char *buf)
return sprintf(buf, "%i\n", aib->edf3.buo);
}
-static SYSDEV_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL);
-
-static struct sysdev_attribute *etr_port_attributes[] = {
- &attr_online,
- &attr_stepping_control,
- &attr_state_code,
- &attr_untuned,
- &attr_network,
- &attr_id,
- &attr_port,
- &attr_coupled,
- &attr_local_time,
- &attr_utc_offset,
+static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL);
+
+static struct device_attribute *etr_port_attributes[] = {
+ &dev_attr_online,
+ &dev_attr_stepping_control,
+ &dev_attr_state_code,
+ &dev_attr_untuned,
+ &dev_attr_network,
+ &dev_attr_id,
+ &dev_attr_port,
+ &dev_attr_coupled,
+ &dev_attr_local_time,
+ &dev_attr_utc_offset,
NULL
};
-static int __init etr_register_port(struct sys_device *dev)
+static int __init etr_register_port(struct device *dev)
{
- struct sysdev_attribute **attr;
+ struct device_attribute **attr;
int rc;
- rc = sysdev_register(dev);
+ rc = device_register(dev);
if (rc)
goto out;
for (attr = etr_port_attributes; *attr; attr++) {
- rc = sysdev_create_file(dev, *attr);
+ rc = device_create_file(dev, *attr);
if (rc)
goto out_unreg;
}
return 0;
out_unreg:
for (; attr >= etr_port_attributes; attr--)
- sysdev_remove_file(dev, *attr);
- sysdev_unregister(dev);
+ device_remove_file(dev, *attr);
+ device_unregister(dev);
out:
return rc;
}
-static void __init etr_unregister_port(struct sys_device *dev)
+static void __init etr_unregister_port(struct device *dev)
{
- struct sysdev_attribute **attr;
+ struct device_attribute **attr;
for (attr = etr_port_attributes; *attr; attr++)
- sysdev_remove_file(dev, *attr);
- sysdev_unregister(dev);
+ device_remove_file(dev, *attr);
+ device_unregister(dev);
}
static int __init etr_init_sysfs(void)
{
int rc;
- rc = sysdev_class_register(&etr_sysclass);
+ rc = subsys_system_register(&etr_subsys, NULL);
if (rc)
goto out;
- rc = sysdev_class_create_file(&etr_sysclass, &attr_stepping_port);
+ rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port);
if (rc)
- goto out_unreg_class;
- rc = sysdev_class_create_file(&etr_sysclass, &attr_stepping_mode);
+ goto out_unreg_subsys;
+ rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
if (rc)
goto out_remove_stepping_port;
rc = etr_register_port(&etr_port0_dev);
@@ -1282,13 +1398,391 @@ static int __init etr_init_sysfs(void)
out_remove_port0:
etr_unregister_port(&etr_port0_dev);
out_remove_stepping_mode:
- sysdev_class_remove_file(&etr_sysclass, &attr_stepping_mode);
+ device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
out_remove_stepping_port:
- sysdev_class_remove_file(&etr_sysclass, &attr_stepping_port);
-out_unreg_class:
- sysdev_class_unregister(&etr_sysclass);
+ device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port);
+out_unreg_subsys:
+ bus_unregister(&etr_subsys);
out:
return rc;
}
device_initcall(etr_init_sysfs);
+
+/*
+ * Server Time Protocol (STP) code.
+ */
+static int stp_online;
+static struct stp_sstpi stp_info;
+static void *stp_page;
+
+static void stp_work_fn(struct work_struct *work);
+static DEFINE_MUTEX(stp_work_mutex);
+static DECLARE_WORK(stp_work, stp_work_fn);
+static struct timer_list stp_timer;
+
+static int __init early_parse_stp(char *p)
+{
+ if (strncmp(p, "off", 3) == 0)
+ stp_online = 0;
+ else if (strncmp(p, "on", 2) == 0)
+ stp_online = 1;
+ return 0;
+}
+early_param("stp", early_parse_stp);
+
+/*
+ * Reset STP attachment.
+ */
+static void __init stp_reset(void)
+{
+ int rc;
+
+ stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+ if (rc == 0)
+ set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
+ else if (stp_online) {
+ pr_warning("The real or virtual hardware system does "
+ "not provide an STP interface\n");
+ free_page((unsigned long) stp_page);
+ stp_page = NULL;
+ stp_online = 0;
+ }
+}
+
+static void stp_timeout(unsigned long dummy)
+{
+ queue_work(time_sync_wq, &stp_work);
+}
+
+static int __init stp_init(void)
+{
+ if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ return 0;
+ setup_timer(&stp_timer, stp_timeout, 0UL);
+ time_init_wq();
+ if (!stp_online)
+ return 0;
+ queue_work(time_sync_wq, &stp_work);
+ return 0;
+}
+
+arch_initcall(stp_init);
+
+/*
+ * STP timing alert. There are three causes:
+ * 1) timing status change
+ * 2) link availability change
+ * 3) time control parameter change
+ * In all three cases we are only interested in the clock source state.
+ * If a STP clock source is now available use it.
+ */
+static void stp_timing_alert(struct stp_irq_parm *intparm)
+{
+ if (intparm->tsc || intparm->lac || intparm->tcpc)
+ queue_work(time_sync_wq, &stp_work);
+}
+
+/*
+ * STP sync check machine check. This is called when the timing state
+ * changes from the synchronized state to the unsynchronized state.
+ * After a STP sync check the clock is not in sync. The machine check
+ * is broadcasted to all cpus at the same time.
+ */
+void stp_sync_check(void)
+{
+ disable_sync_clock(NULL);
+ queue_work(time_sync_wq, &stp_work);
+}
+
+/*
+ * STP island condition machine check. This is called when an attached
+ * server attempts to communicate over an STP link and the servers
+ * have matching CTN ids and have a valid stratum-1 configuration
+ * but the configurations do not match.
+ */
+void stp_island_check(void)
+{
+ disable_sync_clock(NULL);
+ queue_work(time_sync_wq, &stp_work);
+}
+
+
+static int stp_sync_clock(void *data)
+{
+ static int first;
+ unsigned long long old_clock, delta;
+ struct clock_sync_data *stp_sync;
+ int rc;
+
+ stp_sync = data;
+
+ if (xchg(&first, 1) == 1) {
+ /* Slave */
+ clock_sync_cpu(stp_sync);
+ return 0;
+ }
+
+ /* Wait until all other cpus entered the sync function. */
+ while (atomic_read(&stp_sync->cpus) != 0)
+ cpu_relax();
+
+ enable_sync_clock();
+
+ rc = 0;
+ if (stp_info.todoff[0] || stp_info.todoff[1] ||
+ stp_info.todoff[2] || stp_info.todoff[3] ||
+ stp_info.tmd != 2) {
+ old_clock = get_tod_clock();
+ rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
+ if (rc == 0) {
+ delta = adjust_time(old_clock, get_tod_clock(), 0);
+ fixup_clock_comparator(delta);
+ rc = chsc_sstpi(stp_page, &stp_info,
+ sizeof(struct stp_sstpi));
+ if (rc == 0 && stp_info.tmd != 2)
+ rc = -EAGAIN;
+ }
+ }
+ if (rc) {
+ disable_sync_clock(NULL);
+ stp_sync->in_sync = -EAGAIN;
+ } else
+ stp_sync->in_sync = 1;
+ xchg(&first, 0);
+ return 0;
+}
+
+/*
+ * STP work. Check for the STP state and take over the clock
+ * synchronization if the STP clock source is usable.
+ */
+static void stp_work_fn(struct work_struct *work)
+{
+ struct clock_sync_data stp_sync;
+ int rc;
+
+ /* prevent multiple execution. */
+ mutex_lock(&stp_work_mutex);
+
+ if (!stp_online) {
+ chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+ del_timer_sync(&stp_timer);
+ goto out_unlock;
+ }
+
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+ if (rc)
+ goto out_unlock;
+
+ rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+ if (rc || stp_info.c == 0)
+ goto out_unlock;
+
+ /* Skip synchronization if the clock is already in sync. */
+ if (check_sync_clock())
+ goto out_unlock;
+
+ memset(&stp_sync, 0, sizeof(stp_sync));
+ get_online_cpus();
+ atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+ stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask);
+ put_online_cpus();
+
+ if (!check_sync_clock())
+ /*
+ * There is a usable clock but the synchonization failed.
+ * Retry after a second.
+ */
+ mod_timer(&stp_timer, jiffies + HZ);
+
+out_unlock:
+ mutex_unlock(&stp_work_mutex);
+}
+
+/*
+ * STP subsys sysfs interface functions
+ */
+static struct bus_type stp_subsys = {
+ .name = "stp",
+ .dev_name = "stp",
+};
+
+static ssize_t stp_ctn_id_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online)
+ return -ENODATA;
+ return sprintf(buf, "%016llx\n",
+ *(unsigned long long *) stp_info.ctnid);
+}
+
+static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+
+static ssize_t stp_ctn_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", stp_info.ctn);
+}
+
+static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+
+static ssize_t stp_dst_offset_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online || !(stp_info.vbits & 0x2000))
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+}
+
+static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+
+static ssize_t stp_leap_seconds_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online || !(stp_info.vbits & 0x8000))
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+}
+
+static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+
+static ssize_t stp_stratum_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+}
+
+static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL);
+
+static ssize_t stp_time_offset_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online || !(stp_info.vbits & 0x0800))
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (int) stp_info.tto);
+}
+
+static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+
+static ssize_t stp_time_zone_offset_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online || !(stp_info.vbits & 0x4000))
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+}
+
+static DEVICE_ATTR(time_zone_offset, 0400,
+ stp_time_zone_offset_show, NULL);
+
+static ssize_t stp_timing_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", stp_info.tmd);
+}
+
+static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+
+static ssize_t stp_timing_state_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", stp_info.tst);
+}
+
+static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+
+static ssize_t stp_online_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%i\n", stp_online);
+}
+
+static ssize_t stp_online_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int value;
+
+ value = simple_strtoul(buf, NULL, 0);
+ if (value != 0 && value != 1)
+ return -EINVAL;
+ if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ return -EOPNOTSUPP;
+ mutex_lock(&clock_sync_mutex);
+ stp_online = value;
+ if (stp_online)
+ set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+ else
+ clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+ queue_work(time_sync_wq, &stp_work);
+ mutex_unlock(&clock_sync_mutex);
+ return count;
+}
+
+/*
+ * Can't use DEVICE_ATTR because the attribute should be named
+ * stp/online but dev_attr_online already exists in this file ..
+ */
+static struct device_attribute dev_attr_stp_online = {
+ .attr = { .name = "online", .mode = 0600 },
+ .show = stp_online_show,
+ .store = stp_online_store,
+};
+
+static struct device_attribute *stp_attributes[] = {
+ &dev_attr_ctn_id,
+ &dev_attr_ctn_type,
+ &dev_attr_dst_offset,
+ &dev_attr_leap_seconds,
+ &dev_attr_stp_online,
+ &dev_attr_stratum,
+ &dev_attr_time_offset,
+ &dev_attr_time_zone_offset,
+ &dev_attr_timing_mode,
+ &dev_attr_timing_state,
+ NULL
+};
+
+static int __init stp_init_sysfs(void)
+{
+ struct device_attribute **attr;
+ int rc;
+
+ rc = subsys_system_register(&stp_subsys, NULL);
+ if (rc)
+ goto out;
+ for (attr = stp_attributes; *attr; attr++) {
+ rc = device_create_file(stp_subsys.dev_root, *attr);
+ if (rc)
+ goto out_unreg;
+ }
+ return 0;
+out_unreg:
+ for (; attr >= stp_attributes; attr--)
+ device_remove_file(stp_subsys.dev_root, *attr);
+ bus_unregister(&stp_subsys);
+out:
+ return rc;
+}
+
+device_initcall(stp_init_sysfs);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 661a0721705..355a16c5570 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -1,164 +1,187 @@
/*
- * Copyright IBM Corp. 2007
+ * Copyright IBM Corp. 2007, 2011
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/device.h>
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
#include <linux/bootmem.h>
+#include <linux/cpuset.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
#include <linux/sched.h>
-#include <linux/kthread.h>
-#include <linux/workqueue.h>
+#include <linux/init.h>
+#include <linux/delay.h>
#include <linux/cpu.h>
#include <linux/smp.h>
-#include <asm/delay.h>
-#include <asm/s390_ext.h>
+#include <linux/mm.h>
#include <asm/sysinfo.h>
-#define CPU_BITS 64
-#define NR_MAG 6
-
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
#define PTF_CHECK (2UL)
-struct tl_cpu {
- unsigned char reserved0[4];
- unsigned char :6;
- unsigned char pp:2;
- unsigned char reserved1;
- unsigned short origin;
- unsigned long mask[CPU_BITS / BITS_PER_LONG];
-};
-
-struct tl_container {
- unsigned char reserved[8];
-};
-
-union tl_entry {
- unsigned char nl;
- struct tl_cpu cpu;
- struct tl_container container;
-};
-
-struct tl_info {
- unsigned char reserved0[2];
- unsigned short length;
- unsigned char mag[NR_MAG];
- unsigned char reserved1;
- unsigned char mnest;
- unsigned char reserved2[4];
- union tl_entry tle[0];
-};
-
-struct core_info {
- struct core_info *next;
+struct mask_info {
+ struct mask_info *next;
+ unsigned char id;
cpumask_t mask;
};
-static void topology_work_fn(struct work_struct *work);
-static struct tl_info *tl_info;
-static struct core_info core_info;
-static int machine_has_topology;
-static int machine_has_topology_irq;
-static struct timer_list topology_timer;
static void set_topology_timer(void);
+static void topology_work_fn(struct work_struct *work);
+static struct sysinfo_15_1_x *tl_info;
+
+static int topology_enabled = 1;
static DECLARE_WORK(topology_work, topology_work_fn);
-cpumask_t cpu_core_map[NR_CPUS];
+/* topology_lock protects the socket and book linked lists */
+static DEFINE_SPINLOCK(topology_lock);
+static struct mask_info socket_info;
+static struct mask_info book_info;
-cpumask_t cpu_coregroup_map(unsigned int cpu)
+struct cpu_topology_s390 cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
{
- struct core_info *core = &core_info;
cpumask_t mask;
- cpus_clear(mask);
- if (!machine_has_topology)
- return cpu_present_map;
- mutex_lock(&smp_cpu_state_mutex);
- while (core) {
- if (cpu_isset(cpu, core->mask)) {
- mask = core->mask;
- break;
- }
- core = core->next;
+ cpumask_copy(&mask, cpumask_of(cpu));
+ if (!topology_enabled || !MACHINE_HAS_TOPOLOGY)
+ return mask;
+ for (; info; info = info->next) {
+ if (cpumask_test_cpu(cpu, &info->mask))
+ return info->mask;
}
- mutex_unlock(&smp_cpu_state_mutex);
- if (cpus_empty(mask))
- mask = cpumask_of_cpu(cpu);
return mask;
}
-static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
+static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu,
+ struct mask_info *book,
+ struct mask_info *socket,
+ int one_socket_per_cpu)
{
unsigned int cpu;
- for (cpu = find_first_bit(&tl_cpu->mask[0], CPU_BITS);
- cpu < CPU_BITS;
- cpu = find_next_bit(&tl_cpu->mask[0], CPU_BITS, cpu + 1))
- {
- unsigned int rcpu, lcpu;
-
- rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin;
- for_each_present_cpu(lcpu) {
- if (__cpu_logical_map[lcpu] == rcpu) {
- cpu_set(lcpu, core->mask);
- smp_cpu_polarization[lcpu] = tl_cpu->pp;
- }
+ for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) {
+ unsigned int rcpu;
+ int lcpu;
+
+ rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin;
+ lcpu = smp_find_processor_id(rcpu);
+ if (lcpu < 0)
+ continue;
+ cpumask_set_cpu(lcpu, &book->mask);
+ cpu_topology[lcpu].book_id = book->id;
+ cpumask_set_cpu(lcpu, &socket->mask);
+ cpu_topology[lcpu].core_id = rcpu;
+ if (one_socket_per_cpu) {
+ cpu_topology[lcpu].socket_id = rcpu;
+ socket = socket->next;
+ } else {
+ cpu_topology[lcpu].socket_id = socket->id;
}
+ smp_cpu_set_polarization(lcpu, tl_cpu->pp);
}
+ return socket;
}
-static void clear_cores(void)
+static void clear_masks(void)
{
- struct core_info *core = &core_info;
+ struct mask_info *info;
- while (core) {
- cpus_clear(core->mask);
- core = core->next;
+ info = &socket_info;
+ while (info) {
+ cpumask_clear(&info->mask);
+ info = info->next;
+ }
+ info = &book_info;
+ while (info) {
+ cpumask_clear(&info->mask);
+ info = info->next;
}
}
-static union tl_entry *next_tle(union tl_entry *tle)
+static union topology_entry *next_tle(union topology_entry *tle)
{
- if (tle->nl)
- return (union tl_entry *)((struct tl_container *)tle + 1);
- else
- return (union tl_entry *)((struct tl_cpu *)tle + 1);
+ if (!tle->nl)
+ return (union topology_entry *)((struct topology_cpu *)tle + 1);
+ return (union topology_entry *)((struct topology_container *)tle + 1);
}
-static void tl_to_cores(struct tl_info *info)
+static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
{
- union tl_entry *tle, *end;
- struct core_info *core = &core_info;
+ struct mask_info *socket = &socket_info;
+ struct mask_info *book = &book_info;
+ union topology_entry *tle, *end;
- mutex_lock(&smp_cpu_state_mutex);
- clear_cores();
tle = info->tle;
- end = (union tl_entry *)((unsigned long)info + info->length);
+ end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
switch (tle->nl) {
- case 5:
- case 4:
- case 3:
case 2:
+ book = book->next;
+ book->id = tle->container.id;
break;
case 1:
- core = core->next;
+ socket = socket->next;
+ socket->id = tle->container.id;
break;
case 0:
- add_cpus_to_core(&tle->cpu, core);
+ add_cpus_to_mask(&tle->cpu, book, socket, 0);
break;
default:
- clear_cores();
- machine_has_topology = 0;
+ clear_masks();
return;
}
tle = next_tle(tle);
}
- mutex_unlock(&smp_cpu_state_mutex);
+}
+
+static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
+{
+ struct mask_info *socket = &socket_info;
+ struct mask_info *book = &book_info;
+ union topology_entry *tle, *end;
+
+ tle = info->tle;
+ end = (union topology_entry *)((unsigned long)info + info->length);
+ while (tle < end) {
+ switch (tle->nl) {
+ case 1:
+ book = book->next;
+ book->id = tle->container.id;
+ break;
+ case 0:
+ socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
+ break;
+ default:
+ clear_masks();
+ return;
+ }
+ tle = next_tle(tle);
+ }
+}
+
+static void tl_to_masks(struct sysinfo_15_1_x *info)
+{
+ struct cpuid cpu_id;
+
+ spin_lock_irq(&topology_lock);
+ get_cpu_id(&cpu_id);
+ clear_masks();
+ switch (cpu_id.machine) {
+ case 0x2097:
+ case 0x2098:
+ __tl_to_masks_z10(info);
+ break;
+ default:
+ __tl_to_masks_generic(info);
+ }
+ spin_unlock_irq(&topology_lock);
}
static void topology_update_polarization_simple(void)
@@ -166,8 +189,8 @@ static void topology_update_polarization_simple(void)
int cpu;
mutex_lock(&smp_cpu_state_mutex);
- for_each_present_cpu(cpu)
- smp_cpu_polarization[cpu] = POLARIZATION_HRZ;
+ for_each_possible_cpu(cpu)
+ smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
mutex_unlock(&smp_cpu_state_mutex);
}
@@ -186,10 +209,9 @@ static int ptf(unsigned long fc)
int topology_set_cpu_management(int fc)
{
- int cpu;
- int rc;
+ int cpu, rc;
- if (!machine_has_topology)
+ if (!MACHINE_HAS_TOPOLOGY)
return -EOPNOTSUPP;
if (fc)
rc = ptf(PTF_VERTICAL);
@@ -197,53 +219,61 @@ int topology_set_cpu_management(int fc)
rc = ptf(PTF_HORIZONTAL);
if (rc)
return -EBUSY;
- for_each_present_cpu(cpu)
- smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
+ for_each_possible_cpu(cpu)
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
return rc;
}
-static void update_cpu_core_map(void)
+static void update_cpu_masks(void)
{
+ unsigned long flags;
int cpu;
- for_each_present_cpu(cpu)
- cpu_core_map[cpu] = cpu_coregroup_map(cpu);
+ spin_lock_irqsave(&topology_lock, flags);
+ for_each_possible_cpu(cpu) {
+ cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu);
+ cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu);
+ if (!MACHINE_HAS_TOPOLOGY) {
+ cpu_topology[cpu].core_id = cpu;
+ cpu_topology[cpu].socket_id = cpu;
+ cpu_topology[cpu].book_id = cpu;
+ }
+ }
+ spin_unlock_irqrestore(&topology_lock, flags);
}
-void arch_update_cpu_topology(void)
+void store_topology(struct sysinfo_15_1_x *info)
{
- struct tl_info *info = tl_info;
- struct sys_device *sysdev;
+ if (topology_max_mnest >= 3)
+ stsi(info, 15, 1, 3);
+ else
+ stsi(info, 15, 1, 2);
+}
+
+int arch_update_cpu_topology(void)
+{
+ struct sysinfo_15_1_x *info = tl_info;
+ struct device *dev;
int cpu;
- if (!machine_has_topology) {
- update_cpu_core_map();
+ if (!MACHINE_HAS_TOPOLOGY) {
+ update_cpu_masks();
topology_update_polarization_simple();
- return;
+ return 0;
}
- stsi(info, 15, 1, 2);
- tl_to_cores(info);
- update_cpu_core_map();
+ store_topology(info);
+ tl_to_masks(info);
+ update_cpu_masks();
for_each_online_cpu(cpu) {
- sysdev = get_cpu_sysdev(cpu);
- kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+ dev = get_cpu_device(cpu);
+ kobject_uevent(&dev->kobj, KOBJ_CHANGE);
}
-}
-
-static int topology_kthread(void *data)
-{
- arch_reinit_sched_domains();
- return 0;
+ return 1;
}
static void topology_work_fn(struct work_struct *work)
{
- /* We can't call arch_reinit_sched_domains() from a multi-threaded
- * workqueue context since it may deadlock in case of cpu hotplug.
- * So we have to create a kernel thread in order to call
- * arch_reinit_sched_domains().
- */
- kthread_run(topology_kthread, NULL, "topology_update");
+ rebuild_sched_domains();
}
void topology_schedule_update(void)
@@ -258,84 +288,191 @@ static void topology_timer_fn(unsigned long ignored)
set_topology_timer();
}
+static struct timer_list topology_timer =
+ TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0);
+
+static atomic_t topology_poll = ATOMIC_INIT(0);
+
static void set_topology_timer(void)
{
- topology_timer.function = topology_timer_fn;
- topology_timer.data = 0;
- topology_timer.expires = jiffies + 60 * HZ;
- add_timer(&topology_timer);
+ if (atomic_add_unless(&topology_poll, -1, 0))
+ mod_timer(&topology_timer, jiffies + HZ / 10);
+ else
+ mod_timer(&topology_timer, jiffies + HZ * 60);
}
-static void topology_interrupt(__u16 code)
+void topology_expect_change(void)
{
- schedule_work(&topology_work);
+ if (!MACHINE_HAS_TOPOLOGY)
+ return;
+ /* This is racy, but it doesn't matter since it is just a heuristic.
+ * Worst case is that we poll in a higher frequency for a bit longer.
+ */
+ if (atomic_read(&topology_poll) > 60)
+ return;
+ atomic_add(60, &topology_poll);
+ set_topology_timer();
}
-static int __init init_topology_update(void)
+static int __init early_parse_topology(char *p)
{
- int rc;
+ if (strncmp(p, "off", 3))
+ return 0;
+ topology_enabled = 0;
+ return 0;
+}
+early_param("topology", early_parse_topology);
- rc = 0;
- if (!machine_has_topology) {
- topology_update_polarization_simple();
- goto out;
- }
- init_timer_deferrable(&topology_timer);
- if (machine_has_topology_irq) {
- rc = register_external_interrupt(0x2005, topology_interrupt);
- if (rc)
- goto out;
- ctl_set_bit(0, 8);
+static void __init alloc_masks(struct sysinfo_15_1_x *info,
+ struct mask_info *mask, int offset)
+{
+ int i, nr_masks;
+
+ nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
+ for (i = 0; i < info->mnest - offset; i++)
+ nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
+ nr_masks = max(nr_masks, 1);
+ for (i = 0; i < nr_masks; i++) {
+ mask->next = alloc_bootmem_align(
+ roundup_pow_of_two(sizeof(struct mask_info)),
+ roundup_pow_of_two(sizeof(struct mask_info)));
+ mask = mask->next;
}
- else
- set_topology_timer();
-out:
- update_cpu_core_map();
- return rc;
}
-__initcall(init_topology_update);
void __init s390_init_cpu_topology(void)
{
- unsigned long long facility_bits;
- struct tl_info *info;
- struct core_info *core;
- int nr_cores;
+ struct sysinfo_15_1_x *info;
int i;
- if (stfle(&facility_bits, 1) <= 0)
- return;
- if (!(facility_bits & (1ULL << 52)) || !(facility_bits & (1ULL << 61)))
+ if (!MACHINE_HAS_TOPOLOGY)
return;
- machine_has_topology = 1;
-
- if (facility_bits & (1ULL << 51))
- machine_has_topology_irq = 1;
-
tl_info = alloc_bootmem_pages(PAGE_SIZE);
- if (!tl_info)
- goto error;
info = tl_info;
- stsi(info, 15, 1, 2);
-
- nr_cores = info->mag[NR_MAG - 2];
- for (i = 0; i < info->mnest - 2; i++)
- nr_cores *= info->mag[NR_MAG - 3 - i];
-
- printk(KERN_INFO "CPU topology:");
- for (i = 0; i < NR_MAG; i++)
- printk(" %d", info->mag[i]);
- printk(" / %d\n", info->mnest);
-
- core = &core_info;
- for (i = 0; i < nr_cores; i++) {
- core->next = alloc_bootmem(sizeof(struct core_info));
- core = core->next;
- if (!core)
- goto error;
+ store_topology(info);
+ pr_info("The CPU configuration topology of the machine is:");
+ for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+ printk(KERN_CONT " %d", info->mag[i]);
+ printk(KERN_CONT " / %d\n", info->mnest);
+ alloc_masks(info, &socket_info, 1);
+ alloc_masks(info, &book_info, 2);
+}
+
+static int cpu_management;
+
+static ssize_t dispatching_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t count;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ count = sprintf(buf, "%d\n", cpu_management);
+ mutex_unlock(&smp_cpu_state_mutex);
+ return count;
+}
+
+static ssize_t dispatching_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int val, rc;
+ char delim;
+
+ if (sscanf(buf, "%d %c", &val, &delim) != 1)
+ return -EINVAL;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+ rc = 0;
+ get_online_cpus();
+ mutex_lock(&smp_cpu_state_mutex);
+ if (cpu_management == val)
+ goto out;
+ rc = topology_set_cpu_management(val);
+ if (rc)
+ goto out;
+ cpu_management = val;
+ topology_expect_change();
+out:
+ mutex_unlock(&smp_cpu_state_mutex);
+ put_online_cpus();
+ return rc ? rc : count;
+}
+static DEVICE_ATTR(dispatching, 0644, dispatching_show,
+ dispatching_store);
+
+static ssize_t cpu_polarization_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int cpu = dev->id;
+ ssize_t count;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ switch (smp_cpu_get_polarization(cpu)) {
+ case POLARIZATION_HRZ:
+ count = sprintf(buf, "horizontal\n");
+ break;
+ case POLARIZATION_VL:
+ count = sprintf(buf, "vertical:low\n");
+ break;
+ case POLARIZATION_VM:
+ count = sprintf(buf, "vertical:medium\n");
+ break;
+ case POLARIZATION_VH:
+ count = sprintf(buf, "vertical:high\n");
+ break;
+ default:
+ count = sprintf(buf, "unknown\n");
+ break;
+ }
+ mutex_unlock(&smp_cpu_state_mutex);
+ return count;
+}
+static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
+
+static struct attribute *topology_cpu_attrs[] = {
+ &dev_attr_polarization.attr,
+ NULL,
+};
+
+static struct attribute_group topology_cpu_attr_group = {
+ .attrs = topology_cpu_attrs,
+};
+
+int topology_cpu_init(struct cpu *cpu)
+{
+ return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+}
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &cpu_topology[cpu].core_mask;
+}
+
+static const struct cpumask *cpu_book_mask(int cpu)
+{
+ return &cpu_topology[cpu].book_mask;
+}
+
+static struct sched_domain_topology_level s390_topology[] = {
+ { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ { cpu_book_mask, SD_INIT_NAME(BOOK) },
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
+static int __init topology_init(void)
+{
+ if (!MACHINE_HAS_TOPOLOGY) {
+ topology_update_polarization_simple();
+ goto out;
}
- return;
-error:
- machine_has_topology = 0;
- machine_has_topology_irq = 0;
+ set_topology_timer();
+out:
+
+ set_sched_topology(s390_topology);
+
+ return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
}
+device_initcall(topology_init);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 4584d81984c..c5762324d9e 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -1,8 +1,6 @@
/*
- * arch/s390/kernel/traps.c
- *
* S390 version
- * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 1999, 2000
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
*
@@ -14,317 +12,45 @@
* 'Traps.c' handles hardware traps and faults after we have saved some
* state in 'asm.s'.
*/
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
#include <linux/ptrace.h>
-#include <linux/timer.h>
+#include <linux/sched.h>
#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/seq_file.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/kallsyms.h>
-#include <linux/reboot.h>
-#include <linux/kprobes.h>
-#include <linux/bug.h>
-#include <linux/utsname.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
-#include <asm/mathemu.h>
-#include <asm/cpcmd.h>
-#include <asm/s390_ext.h>
-#include <asm/lowcore.h>
-#include <asm/debug.h>
#include "entry.h"
-pgm_check_handler_t *pgm_check_table[128];
-
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_PROCESS_DEBUG
-int sysctl_userprocess_debug = 1;
-#else
-int sysctl_userprocess_debug = 0;
-#endif
-#endif
-
-extern pgm_check_handler_t do_protection_exception;
-extern pgm_check_handler_t do_dat_exception;
-extern pgm_check_handler_t do_asce_exception;
-
-#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
+int show_unhandled_signals = 1;
-#ifndef CONFIG_64BIT
-#define FOURLONG "%08lx %08lx %08lx %08lx\n"
-static int kstack_depth_to_print = 12;
-#else /* CONFIG_64BIT */
-#define FOURLONG "%016lx %016lx %016lx %016lx\n"
-static int kstack_depth_to_print = 20;
-#endif /* CONFIG_64BIT */
-
-/*
- * For show_trace we have tree different stack to consider:
- * - the panic stack which is used if the kernel stack has overflown
- * - the asynchronous interrupt stack (cpu related)
- * - the synchronous kernel stack (process related)
- * The stack trace can start at any of the three stack and can potentially
- * touch all of them. The order is: panic stack, async stack, sync stack.
- */
-static unsigned long
-__show_trace(unsigned long sp, unsigned long low, unsigned long high)
-{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (1) {
- sp = sp & PSW_ADDR_INSN;
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN);
- print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN);
- /* Follow the backchain. */
- while (1) {
- low = sp;
- sp = sf->back_chain & PSW_ADDR_INSN;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN);
- print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN);
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN);
- print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN);
- low = sp;
- sp = regs->gprs[15];
- }
-}
-
-static void show_trace(struct task_struct *task, unsigned long *stack)
-{
- register unsigned long __r15 asm ("15");
- unsigned long sp;
-
- sp = (unsigned long) stack;
- if (!sp)
- sp = task ? task->thread.ksp : __r15;
- printk("Call Trace:\n");
-#ifdef CONFIG_CHECK_STACK
- sp = __show_trace(sp, S390_lowcore.panic_stack - 4096,
- S390_lowcore.panic_stack);
-#endif
- sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE,
- S390_lowcore.async_stack);
- if (task)
- __show_trace(sp, (unsigned long) task_stack_page(task),
- (unsigned long) task_stack_page(task) + THREAD_SIZE);
- else
- __show_trace(sp, S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE);
- if (!task)
- task = current;
- debug_show_held_locks(task);
-}
-
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
- register unsigned long * __r15 asm ("15");
- unsigned long *stack;
- int i;
-
- if (!sp)
- stack = task ? (unsigned long *) task->thread.ksp : __r15;
- else
- stack = sp;
-
- for (i = 0; i < kstack_depth_to_print; i++) {
- if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
- break;
- if (i && ((i * sizeof (long) % 32) == 0))
- printk("\n ");
- printk("%p ", (void *)*stack++);
- }
- printk("\n");
- show_trace(task, sp);
-}
-
-static void show_last_breaking_event(struct pt_regs *regs)
+static inline void __user *get_trap_ip(struct pt_regs *regs)
{
#ifdef CONFIG_64BIT
- printk("Last Breaking-Event-Address:\n");
- printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN);
- print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN);
-#endif
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- printk("CPU: %d %s %s %.*s\n",
- task_thread_info(current)->cpu, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
- current->comm, current->pid, current,
- (void *) current->thread.ksp);
- show_stack(NULL, NULL);
-}
-EXPORT_SYMBOL(dump_stack);
-
-static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
-{
- return (regs->psw.mask & bits) / ((~bits + 1) & bits);
-}
-
-void show_registers(struct pt_regs *regs)
-{
- char *mode;
-
- mode = (regs->psw.mask & PSW_MASK_PSTATE) ? "User" : "Krnl";
- printk("%s PSW : %p %p",
- mode, (void *) regs->psw.mask,
- (void *) regs->psw.addr);
- print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN);
- printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
- "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
- mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
- mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
- mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
- mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
- mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
-#ifdef CONFIG_64BIT
- printk(" EA:%x", mask_bits(regs, PSW_BASE_BITS));
-#endif
- printk("\n%s GPRS: " FOURLONG, mode,
- regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
- printk(" " FOURLONG,
- regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
- printk(" " FOURLONG,
- regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
- printk(" " FOURLONG,
- regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
-
- show_code(regs);
-}
-
-void show_regs(struct pt_regs *regs)
-{
- print_modules();
- printk("CPU: %d %s %s %.*s\n",
- task_thread_info(current)->cpu, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
- current->comm, current->pid, current,
- (void *) current->thread.ksp);
- show_registers(regs);
- /* Show stack backtrace if pt_regs is from kernel mode */
- if (!(regs->psw.mask & PSW_MASK_PSTATE))
- show_trace(NULL, (unsigned long *) regs->gprs[15]);
- show_last_breaking_event(regs);
-}
+ unsigned long address;
-/* This is called from fs/proc/array.c */
-void task_show_regs(struct seq_file *m, struct task_struct *task)
-{
- struct pt_regs *regs;
-
- regs = task_pt_regs(task);
- seq_printf(m, "task: %p, ksp: %p\n",
- task, (void *)task->thread.ksp);
- seq_printf(m, "User PSW : %p %p\n",
- (void *) regs->psw.mask, (void *)regs->psw.addr);
-
- seq_printf(m, "User GPRS: " FOURLONG,
- regs->gprs[0], regs->gprs[1],
- regs->gprs[2], regs->gprs[3]);
- seq_printf(m, " " FOURLONG,
- regs->gprs[4], regs->gprs[5],
- regs->gprs[6], regs->gprs[7]);
- seq_printf(m, " " FOURLONG,
- regs->gprs[8], regs->gprs[9],
- regs->gprs[10], regs->gprs[11]);
- seq_printf(m, " " FOURLONG,
- regs->gprs[12], regs->gprs[13],
- regs->gprs[14], regs->gprs[15]);
- seq_printf(m, "User ACRS: %08x %08x %08x %08x\n",
- task->thread.acrs[0], task->thread.acrs[1],
- task->thread.acrs[2], task->thread.acrs[3]);
- seq_printf(m, " %08x %08x %08x %08x\n",
- task->thread.acrs[4], task->thread.acrs[5],
- task->thread.acrs[6], task->thread.acrs[7]);
- seq_printf(m, " %08x %08x %08x %08x\n",
- task->thread.acrs[8], task->thread.acrs[9],
- task->thread.acrs[10], task->thread.acrs[11]);
- seq_printf(m, " %08x %08x %08x %08x\n",
- task->thread.acrs[12], task->thread.acrs[13],
- task->thread.acrs[14], task->thread.acrs[15]);
-}
-
-static DEFINE_SPINLOCK(die_lock);
-
-void die(const char * str, struct pt_regs * regs, long err)
-{
- static int die_counter;
-
- oops_enter();
- debug_stop_all();
- console_verbose();
- spin_lock_irq(&die_lock);
- bust_spinlocks(1);
- printk("%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
+ if (regs->int_code & 0x200)
+ address = *(unsigned long *)(current->thread.trap_tdb + 24);
+ else
+ address = regs->psw.addr;
+ return (void __user *)
+ ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+#else
+ return (void __user *)
+ ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN);
#endif
- printk("\n");
- notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV);
- show_regs(regs);
- bust_spinlocks(0);
- add_taint(TAINT_DIE);
- spin_unlock_irq(&die_lock);
- if (in_interrupt())
- panic("Fatal exception in interrupt");
- if (panic_on_oops)
- panic("Fatal exception: panic_on_oops");
- oops_exit();
- do_exit(SIGSEGV);
}
-static void inline
-report_user_fault(long interruption_code, struct pt_regs *regs)
+static inline void report_user_fault(struct pt_regs *regs, int signr)
{
-#if defined(CONFIG_SYSCTL)
- if (!sysctl_userprocess_debug)
+ if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
return;
-#endif
-#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG)
- printk("User process fault: interruption code 0x%lX\n",
- interruption_code);
+ if (!unhandled_signal(current, signr))
+ return;
+ if (!printk_ratelimit())
+ return;
+ printk("User process fault: interruption code 0x%X ", regs->int_code);
+ print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+ printk("\n");
show_regs(regs);
-#endif
}
int is_valid_bugaddr(unsigned long addr)
@@ -332,158 +58,141 @@ int is_valid_bugaddr(unsigned long addr)
return 1;
}
-static void __kprobes inline do_trap(long interruption_code, int signr,
- char *str, struct pt_regs *regs,
- siginfo_t *info)
+static void __kprobes do_trap(struct pt_regs *regs,
+ int si_signo, int si_code, char *str)
{
- /*
- * We got all needed information from the lowcore and can
- * now safely switch on interrupts.
- */
- if (regs->psw.mask & PSW_MASK_PSTATE)
- local_irq_enable();
-
- if (notify_die(DIE_TRAP, str, regs, interruption_code,
- interruption_code, signr) == NOTIFY_STOP)
- return;
+ siginfo_t info;
- if (regs->psw.mask & PSW_MASK_PSTATE) {
- struct task_struct *tsk = current;
+ if (notify_die(DIE_TRAP, str, regs, 0,
+ regs->int_code, si_signo) == NOTIFY_STOP)
+ return;
- tsk->thread.trap_no = interruption_code & 0xffff;
- force_sig_info(signr, info, tsk);
- report_user_fault(interruption_code, regs);
+ if (user_mode(regs)) {
+ info.si_signo = si_signo;
+ info.si_errno = 0;
+ info.si_code = si_code;
+ info.si_addr = get_trap_ip(regs);
+ force_sig_info(si_signo, &info, current);
+ report_user_fault(regs, si_signo);
} else {
const struct exception_table_entry *fixup;
fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
if (fixup)
- regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
+ regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
else {
enum bug_trap_type btt;
btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
if (btt == BUG_TRAP_TYPE_WARN)
return;
- die(str, regs, interruption_code);
+ die(regs, str);
}
}
}
-static inline void __user *get_check_address(struct pt_regs *regs)
+void __kprobes do_per_trap(struct pt_regs *regs)
{
- return (void __user *)((regs->psw.addr-S390_lowcore.pgm_ilc) & PSW_ADDR_INSN);
-}
+ siginfo_t info;
-void __kprobes do_single_step(struct pt_regs *regs)
-{
- if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0,
- SIGTRAP) == NOTIFY_STOP){
+ if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
return;
- }
- if ((current->ptrace & PT_PTRACED) != 0)
- force_sig(SIGTRAP, current);
+ if (!current->ptrace)
+ return;
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_HWBKPT;
+ info.si_addr =
+ (void __force __user *) current->thread.per_event.address;
+ force_sig_info(SIGTRAP, &info, current);
}
-static void default_trap_handler(struct pt_regs * regs, long interruption_code)
+void default_trap_handler(struct pt_regs *regs)
{
- if (regs->psw.mask & PSW_MASK_PSTATE) {
- local_irq_enable();
+ if (user_mode(regs)) {
+ report_user_fault(regs, SIGSEGV);
do_exit(SIGSEGV);
- report_user_fault(interruption_code, regs);
} else
- die("Unknown program exception", regs, interruption_code);
+ die(regs, "Unknown program exception");
}
-#define DO_ERROR_INFO(signr, str, name, sicode, siaddr) \
-static void name(struct pt_regs * regs, long interruption_code) \
-{ \
- siginfo_t info; \
- info.si_signo = signr; \
- info.si_errno = 0; \
- info.si_code = sicode; \
- info.si_addr = siaddr; \
- do_trap(interruption_code, signr, str, regs, &info); \
+#define DO_ERROR_INFO(name, signr, sicode, str) \
+void name(struct pt_regs *regs) \
+{ \
+ do_trap(regs, signr, sicode, str); \
}
-DO_ERROR_INFO(SIGILL, "addressing exception", addressing_exception,
- ILL_ILLADR, get_check_address(regs))
-DO_ERROR_INFO(SIGILL, "execute exception", execute_exception,
- ILL_ILLOPN, get_check_address(regs))
-DO_ERROR_INFO(SIGFPE, "fixpoint divide exception", divide_exception,
- FPE_INTDIV, get_check_address(regs))
-DO_ERROR_INFO(SIGFPE, "fixpoint overflow exception", overflow_exception,
- FPE_INTOVF, get_check_address(regs))
-DO_ERROR_INFO(SIGFPE, "HFP overflow exception", hfp_overflow_exception,
- FPE_FLTOVF, get_check_address(regs))
-DO_ERROR_INFO(SIGFPE, "HFP underflow exception", hfp_underflow_exception,
- FPE_FLTUND, get_check_address(regs))
-DO_ERROR_INFO(SIGFPE, "HFP significance exception", hfp_significance_exception,
- FPE_FLTRES, get_check_address(regs))
-DO_ERROR_INFO(SIGFPE, "HFP divide exception", hfp_divide_exception,
- FPE_FLTDIV, get_check_address(regs))
-DO_ERROR_INFO(SIGFPE, "HFP square root exception", hfp_sqrt_exception,
- FPE_FLTINV, get_check_address(regs))
-DO_ERROR_INFO(SIGILL, "operand exception", operand_exception,
- ILL_ILLOPN, get_check_address(regs))
-DO_ERROR_INFO(SIGILL, "privileged operation", privileged_op,
- ILL_PRVOPC, get_check_address(regs))
-DO_ERROR_INFO(SIGILL, "special operation exception", special_op_exception,
- ILL_ILLOPN, get_check_address(regs))
-DO_ERROR_INFO(SIGILL, "translation exception", translation_exception,
- ILL_ILLOPN, get_check_address(regs))
-
-static inline void
-do_fp_trap(struct pt_regs *regs, void __user *location,
- int fpc, long interruption_code)
-{
- siginfo_t si;
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
+ "addressing exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
+ "execute exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
+ "fixpoint divide exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
+ "fixpoint overflow exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
+ "HFP overflow exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
+ "HFP underflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
+ "HFP significance exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
+ "HFP divide exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
+ "HFP square root exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
+ "operand exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
+ "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
+ "special operation exception")
+DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN,
+ "translation exception")
- si.si_signo = SIGFPE;
- si.si_errno = 0;
- si.si_addr = location;
- si.si_code = 0;
+#ifdef CONFIG_64BIT
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
+ "transaction constraint exception")
+#endif
+
+static inline void do_fp_trap(struct pt_regs *regs, int fpc)
+{
+ int si_code = 0;
/* FPC[2] is Data Exception Code */
if ((fpc & 0x00000300) == 0) {
/* bits 6 and 7 of DXC are 0 iff IEEE exception */
if (fpc & 0x8000) /* invalid fp operation */
- si.si_code = FPE_FLTINV;
+ si_code = FPE_FLTINV;
else if (fpc & 0x4000) /* div by 0 */
- si.si_code = FPE_FLTDIV;
+ si_code = FPE_FLTDIV;
else if (fpc & 0x2000) /* overflow */
- si.si_code = FPE_FLTOVF;
+ si_code = FPE_FLTOVF;
else if (fpc & 0x1000) /* underflow */
- si.si_code = FPE_FLTUND;
+ si_code = FPE_FLTUND;
else if (fpc & 0x0800) /* inexact */
- si.si_code = FPE_FLTRES;
+ si_code = FPE_FLTRES;
}
- current->thread.ieee_instruction_pointer = (addr_t) location;
- do_trap(interruption_code, SIGFPE,
- "floating point exception", regs, &si);
+ do_trap(regs, SIGFPE, si_code, "floating point exception");
}
-static void illegal_op(struct pt_regs * regs, long interruption_code)
+void __kprobes illegal_op(struct pt_regs *regs)
{
siginfo_t info;
__u8 opcode[6];
__u16 __user *location;
int signal = 0;
- location = get_check_address(regs);
-
- /*
- * We got all needed information from the lowcore and can
- * now safely switch on interrupts.
- */
- if (regs->psw.mask & PSW_MASK_PSTATE)
- local_irq_enable();
+ location = get_trap_ip(regs);
- if (regs->psw.mask & PSW_MASK_PSTATE) {
+ if (user_mode(regs)) {
if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
return;
if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
- if (current->ptrace & PT_PTRACED)
- force_sig(SIGTRAP, current);
- else
+ if (current->ptrace) {
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_BRKPT;
+ info.si_addr = location;
+ force_sig_info(SIGTRAP, &info, current);
+ } else
signal = SIGILL;
#ifdef CONFIG_MATHEMU
} else if (opcode[0] == 0xb3) {
@@ -515,53 +224,33 @@ static void illegal_op(struct pt_regs * regs, long interruption_code)
* If we get an illegal op in kernel mode, send it through the
* kprobes notifier. If kprobes doesn't pick it up, SIGILL
*/
- if (notify_die(DIE_BPT, "bpt", regs, interruption_code,
+ if (notify_die(DIE_BPT, "bpt", regs, 0,
3, SIGTRAP) != NOTIFY_STOP)
signal = SIGILL;
}
#ifdef CONFIG_MATHEMU
if (signal == SIGFPE)
- do_fp_trap(regs, location,
- current->thread.fp_regs.fpc, interruption_code);
- else if (signal == SIGSEGV) {
- info.si_signo = signal;
- info.si_errno = 0;
- info.si_code = SEGV_MAPERR;
- info.si_addr = (void __user *) location;
- do_trap(interruption_code, signal,
- "user address fault", regs, &info);
- } else
+ do_fp_trap(regs, current->thread.fp_regs.fpc);
+ else if (signal == SIGSEGV)
+ do_trap(regs, signal, SEGV_MAPERR, "user address fault");
+ else
#endif
- if (signal) {
- info.si_signo = signal;
- info.si_errno = 0;
- info.si_code = ILL_ILLOPC;
- info.si_addr = (void __user *) location;
- do_trap(interruption_code, signal,
- "illegal operation", regs, &info);
- }
+ if (signal)
+ do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
}
#ifdef CONFIG_MATHEMU
-asmlinkage void
-specification_exception(struct pt_regs * regs, long interruption_code)
+void specification_exception(struct pt_regs *regs)
{
__u8 opcode[6];
__u16 __user *location = NULL;
int signal = 0;
- location = (__u16 __user *) get_check_address(regs);
+ location = (__u16 __user *) get_trap_ip(regs);
- /*
- * We got all needed information from the lowcore and can
- * now safely switch on interrupts.
- */
- if (regs->psw.mask & PSW_MASK_PSTATE)
- local_irq_enable();
-
- if (regs->psw.mask & PSW_MASK_PSTATE) {
+ if (user_mode(regs)) {
get_user(*((__u16 *) opcode), location);
switch (opcode[0]) {
case 0x28: /* LDR Rx,Ry */
@@ -594,42 +283,27 @@ specification_exception(struct pt_regs * regs, long interruption_code)
signal = SIGILL;
if (signal == SIGFPE)
- do_fp_trap(regs, location,
- current->thread.fp_regs.fpc, interruption_code);
- else if (signal) {
- siginfo_t info;
- info.si_signo = signal;
- info.si_errno = 0;
- info.si_code = ILL_ILLOPN;
- info.si_addr = location;
- do_trap(interruption_code, signal,
- "specification exception", regs, &info);
- }
+ do_fp_trap(regs, current->thread.fp_regs.fpc);
+ else if (signal)
+ do_trap(regs, signal, ILL_ILLOPN, "specification exception");
}
#else
-DO_ERROR_INFO(SIGILL, "specification exception", specification_exception,
- ILL_ILLOPN, get_check_address(regs));
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
+ "specification exception");
#endif
-static void data_exception(struct pt_regs * regs, long interruption_code)
+void data_exception(struct pt_regs *regs)
{
__u16 __user *location;
int signal = 0;
- location = get_check_address(regs);
-
- /*
- * We got all needed information from the lowcore and can
- * now safely switch on interrupts.
- */
- if (regs->psw.mask & PSW_MASK_PSTATE)
- local_irq_enable();
+ location = get_trap_ip(regs);
if (MACHINE_HAS_IEEE)
asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
#ifdef CONFIG_MATHEMU
- else if (regs->psw.mask & PSW_MASK_PSTATE) {
+ else if (user_mode(regs)) {
__u8 opcode[6];
get_user(*((__u16 *) opcode), location);
switch (opcode[0]) {
@@ -688,35 +362,21 @@ static void data_exception(struct pt_regs * regs, long interruption_code)
else
signal = SIGILL;
if (signal == SIGFPE)
- do_fp_trap(regs, location,
- current->thread.fp_regs.fpc, interruption_code);
- else if (signal) {
- siginfo_t info;
- info.si_signo = signal;
- info.si_errno = 0;
- info.si_code = ILL_ILLOPN;
- info.si_addr = location;
- do_trap(interruption_code, signal,
- "data exception", regs, &info);
- }
+ do_fp_trap(regs, current->thread.fp_regs.fpc);
+ else if (signal)
+ do_trap(regs, signal, ILL_ILLOPN, "data exception");
}
-static void space_switch_exception(struct pt_regs * regs, long int_code)
+void space_switch_exception(struct pt_regs *regs)
{
- siginfo_t info;
-
/* Set user psw back to home space mode. */
- if (regs->psw.mask & PSW_MASK_PSTATE)
+ if (user_mode(regs))
regs->psw.mask |= PSW_ASC_HOME;
/* Send SIGILL. */
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_PRVOPC;
- info.si_addr = get_check_address(regs);
- do_trap(int_code, SIGILL, "space switch event", regs, &info);
+ do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
}
-asmlinkage void kernel_stack_overflow(struct pt_regs * regs)
+void __kprobes kernel_stack_overflow(struct pt_regs * regs)
{
bust_spinlocks(1);
printk("Kernel stack overflow.\n");
@@ -725,41 +385,7 @@ asmlinkage void kernel_stack_overflow(struct pt_regs * regs)
panic("Corrupt kernel stack, can't continue.");
}
-/* init is done in lowcore.S and head.S */
-
void __init trap_init(void)
{
- int i;
-
- for (i = 0; i < 128; i++)
- pgm_check_table[i] = &default_trap_handler;
- pgm_check_table[1] = &illegal_op;
- pgm_check_table[2] = &privileged_op;
- pgm_check_table[3] = &execute_exception;
- pgm_check_table[4] = &do_protection_exception;
- pgm_check_table[5] = &addressing_exception;
- pgm_check_table[6] = &specification_exception;
- pgm_check_table[7] = &data_exception;
- pgm_check_table[8] = &overflow_exception;
- pgm_check_table[9] = &divide_exception;
- pgm_check_table[0x0A] = &overflow_exception;
- pgm_check_table[0x0B] = &divide_exception;
- pgm_check_table[0x0C] = &hfp_overflow_exception;
- pgm_check_table[0x0D] = &hfp_underflow_exception;
- pgm_check_table[0x0E] = &hfp_significance_exception;
- pgm_check_table[0x0F] = &hfp_divide_exception;
- pgm_check_table[0x10] = &do_dat_exception;
- pgm_check_table[0x11] = &do_dat_exception;
- pgm_check_table[0x12] = &translation_exception;
- pgm_check_table[0x13] = &special_op_exception;
-#ifdef CONFIG_64BIT
- pgm_check_table[0x38] = &do_asce_exception;
- pgm_check_table[0x39] = &do_dat_exception;
- pgm_check_table[0x3A] = &do_dat_exception;
- pgm_check_table[0x3B] = &do_dat_exception;
-#endif /* CONFIG_64BIT */
- pgm_check_table[0x15] = &operand_exception;
- pgm_check_table[0x1C] = &space_switch_exception;
- pgm_check_table[0x1D] = &hfp_sqrt_exception;
- pfault_irq_init();
+ local_mcck_enable();
}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
new file mode 100644
index 00000000000..61364909678
--- /dev/null
+++ b/arch/s390/kernel/vdso.c
@@ -0,0 +1,333 @@
+/*
+ * vdso setup for s390
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+#include <linux/compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <asm/facility.h>
+
+#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
+extern char vdso32_start, vdso32_end;
+static void *vdso32_kbase = &vdso32_start;
+static unsigned int vdso32_pages;
+static struct page **vdso32_pagelist;
+#endif
+
+#ifdef CONFIG_64BIT
+extern char vdso64_start, vdso64_end;
+static void *vdso64_kbase = &vdso64_start;
+static unsigned int vdso64_pages;
+static struct page **vdso64_pagelist;
+#endif /* CONFIG_64BIT */
+
+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+unsigned int __read_mostly vdso_enabled = 1;
+
+static int __init vdso_setup(char *s)
+{
+ unsigned long val;
+ int rc;
+
+ rc = 0;
+ if (strncmp(s, "on", 3) == 0)
+ vdso_enabled = 1;
+ else if (strncmp(s, "off", 4) == 0)
+ vdso_enabled = 0;
+ else {
+ rc = kstrtoul(s, 0, &val);
+ vdso_enabled = rc ? 0 : !!val;
+ }
+ return !rc;
+}
+__setup("vdso=", vdso_setup);
+
+/*
+ * The vdso data page
+ */
+static union {
+ struct vdso_data data;
+ u8 page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+/*
+ * Setup vdso data page.
+ */
+static void vdso_init_data(struct vdso_data *vd)
+{
+ vd->ectg_available = test_facility(31);
+}
+
+#ifdef CONFIG_64BIT
+/*
+ * Allocate/free per cpu vdso data.
+ */
+#define SEGMENT_ORDER 2
+
+int vdso_alloc_per_cpu(struct _lowcore *lowcore)
+{
+ unsigned long segment_table, page_table, page_frame;
+ u32 *psal, *aste;
+ int i;
+
+ lowcore->vdso_per_cpu_data = __LC_PASTE;
+
+ if (!vdso_enabled)
+ return 0;
+
+ segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
+ page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ page_frame = get_zeroed_page(GFP_KERNEL);
+ if (!segment_table || !page_table || !page_frame)
+ goto out;
+
+ clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
+ PAGE_SIZE << SEGMENT_ORDER);
+ clear_table((unsigned long *) page_table, _PAGE_INVALID,
+ 256*sizeof(unsigned long));
+
+ *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
+ *(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
+
+ psal = (u32 *) (page_table + 256*sizeof(unsigned long));
+ aste = psal + 32;
+
+ for (i = 4; i < 32; i += 4)
+ psal[i] = 0x80000000;
+
+ lowcore->paste[4] = (u32)(addr_t) psal;
+ psal[0] = 0x02000000;
+ psal[2] = (u32)(addr_t) aste;
+ *(unsigned long *) (aste + 2) = segment_table +
+ _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
+ aste[4] = (u32)(addr_t) psal;
+ lowcore->vdso_per_cpu_data = page_frame;
+
+ return 0;
+
+out:
+ free_page(page_frame);
+ free_page(page_table);
+ free_pages(segment_table, SEGMENT_ORDER);
+ return -ENOMEM;
+}
+
+void vdso_free_per_cpu(struct _lowcore *lowcore)
+{
+ unsigned long segment_table, page_table, page_frame;
+ u32 *psal, *aste;
+
+ if (!vdso_enabled)
+ return;
+
+ psal = (u32 *)(addr_t) lowcore->paste[4];
+ aste = (u32 *)(addr_t) psal[2];
+ segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
+ page_table = *(unsigned long *) segment_table;
+ page_frame = *(unsigned long *) page_table;
+
+ free_page(page_frame);
+ free_page(page_table);
+ free_pages(segment_table, SEGMENT_ORDER);
+}
+
+static void vdso_init_cr5(void)
+{
+ unsigned long cr5;
+
+ if (!vdso_enabled)
+ return;
+ cr5 = offsetof(struct _lowcore, paste);
+ __ctl_load(cr5, 5, 5);
+}
+#endif /* CONFIG_64BIT */
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ struct mm_struct *mm = current->mm;
+ struct page **vdso_pagelist;
+ unsigned long vdso_pages;
+ unsigned long vdso_base;
+ int rc;
+
+ if (!vdso_enabled)
+ return 0;
+ /*
+ * Only map the vdso for dynamically linked elf binaries.
+ */
+ if (!uses_interp)
+ return 0;
+
+#ifdef CONFIG_64BIT
+ vdso_pagelist = vdso64_pagelist;
+ vdso_pages = vdso64_pages;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task()) {
+ vdso_pagelist = vdso32_pagelist;
+ vdso_pages = vdso32_pages;
+ }
+#endif
+#else
+ vdso_pagelist = vdso32_pagelist;
+ vdso_pages = vdso32_pages;
+#endif
+
+ /*
+ * vDSO has a problem and was disabled, just don't "enable" it for
+ * the process
+ */
+ if (vdso_pages == 0)
+ return 0;
+
+ current->mm->context.vdso_base = 0;
+
+ /*
+ * pick a base address for the vDSO in process space. We try to put
+ * it at vdso_base which is the "natural" base for it, but we might
+ * fail and end up putting it elsewhere.
+ */
+ down_write(&mm->mmap_sem);
+ vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ rc = vdso_base;
+ goto out_up;
+ }
+
+ /*
+ * Put vDSO base into mm struct. We need to do this before calling
+ * install_special_mapping or the perf counter mmap tracking code
+ * will fail to recognise it as a vDSO (since arch_vma_name fails).
+ */
+ current->mm->context.vdso_base = vdso_base;
+
+ /*
+ * our vma flags don't have VM_WRITE so by default, the process
+ * isn't allowed to write those pages.
+ * gdb can break that with ptrace interface, and thus trigger COW
+ * on those pages but it's then your responsibility to never do that
+ * on the "data" page of the vDSO or you'll stop getting kernel
+ * updates and your nice userland gettimeofday will be totally dead.
+ * It's fine to use that for setting breakpoints in the vDSO code
+ * pages though.
+ */
+ rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ vdso_pagelist);
+ if (rc)
+ current->mm->context.vdso_base = 0;
+out_up:
+ up_write(&mm->mmap_sem);
+ return rc;
+}
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
+ return "[vdso]";
+ return NULL;
+}
+
+static int __init vdso_init(void)
+{
+ int i;
+
+ if (!vdso_enabled)
+ return 0;
+ vdso_init_data(vdso_data);
+#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
+ /* Calculate the size of the 32 bit vDSO */
+ vdso32_pages = ((&vdso32_end - &vdso32_start
+ + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
+
+ /* Make sure pages are in the correct state */
+ vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1),
+ GFP_KERNEL);
+ BUG_ON(vdso32_pagelist == NULL);
+ for (i = 0; i < vdso32_pages - 1; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ vdso32_pagelist[i] = pg;
+ }
+ vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data);
+ vdso32_pagelist[vdso32_pages] = NULL;
+#endif
+
+#ifdef CONFIG_64BIT
+ /* Calculate the size of the 64 bit vDSO */
+ vdso64_pages = ((&vdso64_end - &vdso64_start
+ + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
+
+ /* Make sure pages are in the correct state */
+ vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1),
+ GFP_KERNEL);
+ BUG_ON(vdso64_pagelist == NULL);
+ for (i = 0; i < vdso64_pages - 1; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ vdso64_pagelist[i] = pg;
+ }
+ vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
+ vdso64_pagelist[vdso64_pages] = NULL;
+ if (vdso_alloc_per_cpu(&S390_lowcore))
+ BUG();
+ vdso_init_cr5();
+#endif /* CONFIG_64BIT */
+
+ get_page(virt_to_page(vdso_data));
+
+ smp_wmb();
+
+ return 0;
+}
+early_initcall(vdso_init);
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+ return 0;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+ return 0;
+}
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+ return NULL;
+}
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
new file mode 100644
index 00000000000..e45fba9d0ce
--- /dev/null
+++ b/arch/s390/kernel/vdso32/.gitignore
@@ -0,0 +1 @@
+vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
new file mode 100644
index 00000000000..8ad2b34ad15
--- /dev/null
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -0,0 +1,58 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o
+
+# Build rules
+
+targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_31 += -m31 -s
+
+KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+ $(call if_changed,vdso32ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S
+ $(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+ cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso32.so: $(obj)/vdso32.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
new file mode 100644
index 00000000000..36aaa25d05d
--- /dev/null
+++ b/arch/s390/kernel/vdso32/clock_getres.S
@@ -0,0 +1,39 @@
+/*
+ * Userland implementation of clock_getres() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_getres
+ .type __kernel_clock_getres,@function
+__kernel_clock_getres:
+ .cfi_startproc
+ chi %r2,__CLOCK_REALTIME
+ je 0f
+ chi %r2,__CLOCK_MONOTONIC
+ jne 3f
+0: ltr %r3,%r3
+ jz 2f /* res == NULL */
+ basr %r1,0
+1: l %r0,4f-1b(%r1)
+ xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */
+ st %r0,4(%r3) /* store tp->tv_usec */
+2: lhi %r2,0
+ br %r14
+3: lhi %r1,__NR_clock_getres /* fallback to svc */
+ svc 0
+ br %r14
+4: .long __CLOCK_REALTIME_RES
+ .cfi_endproc
+ .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
new file mode 100644
index 00000000000..65fc3979c2f
--- /dev/null
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -0,0 +1,125 @@
+/*
+ * Userland implementation of clock_gettime() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_gettime
+ .type __kernel_clock_gettime,@function
+__kernel_clock_gettime:
+ .cfi_startproc
+ basr %r5,0
+0: al %r5,21f-0b(%r5) /* get &_vdso_data */
+ chi %r2,__CLOCK_REALTIME
+ je 10f
+ chi %r2,__CLOCK_MONOTONIC
+ jne 19f
+
+ /* CLOCK_MONOTONIC */
+ ltr %r3,%r3
+ jz 9f /* tp == NULL */
+1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 1b
+ stck 24(%r15) /* Store TOD clock */
+ lm %r0,%r1,24(%r15)
+ s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ sl %r1,__VDSO_XTIME_STAMP+4(%r5)
+ brc 3,2f
+ ahi %r0,-1
+2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ lr %r2,%r0
+ l %r0,__VDSO_TK_MULT(%r5)
+ ltr %r1,%r1
+ mr %r0,%r0
+ jnm 3f
+ a %r0,__VDSO_TK_MULT(%r5)
+3: alr %r0,%r2
+ al %r0,__VDSO_WTOM_NSEC(%r5)
+ al %r1,__VDSO_WTOM_NSEC+4(%r5)
+ brc 12,5f
+ ahi %r0,1
+5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r2) /* >> tk->shift */
+ l %r2,__VDSO_WTOM_SEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 1b
+ basr %r5,0
+6: ltr %r0,%r0
+ jnz 7f
+ cl %r1,20f-6b(%r5)
+ jl 8f
+7: ahi %r2,1
+ sl %r1,20f-6b(%r5)
+ brc 3,6b
+ ahi %r0,-1
+ j 6b
+8: st %r2,0(%r3) /* store tp->tv_sec */
+ st %r1,4(%r3) /* store tp->tv_nsec */
+9: lhi %r2,0
+ br %r14
+
+ /* CLOCK_REALTIME */
+10: ltr %r3,%r3 /* tp == NULL */
+ jz 18f
+11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 11b
+ stck 24(%r15) /* Store TOD clock */
+ lm %r0,%r1,24(%r15)
+ s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ sl %r1,__VDSO_XTIME_STAMP+4(%r5)
+ brc 3,12f
+ ahi %r0,-1
+12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ lr %r2,%r0
+ l %r0,__VDSO_TK_MULT(%r5)
+ ltr %r1,%r1
+ mr %r0,%r0
+ jnm 13f
+ a %r0,__VDSO_TK_MULT(%r5)
+13: alr %r0,%r2
+ al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ al %r1,__VDSO_XTIME_NSEC+4(%r5)
+ brc 12,14f
+ ahi %r0,1
+14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r2) /* >> tk->shift */
+ l %r2,__VDSO_XTIME_SEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 11b
+ basr %r5,0
+15: ltr %r0,%r0
+ jnz 16f
+ cl %r1,20f-15b(%r5)
+ jl 17f
+16: ahi %r2,1
+ sl %r1,20f-15b(%r5)
+ brc 3,15b
+ ahi %r0,-1
+ j 15b
+17: st %r2,0(%r3) /* store tp->tv_sec */
+ st %r1,4(%r3) /* store tp->tv_nsec */
+18: lhi %r2,0
+ br %r14
+
+ /* Fallback to system call */
+19: lhi %r1,__NR_clock_gettime
+ svc 0
+ br %r14
+
+20: .long 1000000000
+21: .long _vdso_data - 0b
+ .cfi_endproc
+ .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
new file mode 100644
index 00000000000..fd621a950f7
--- /dev/null
+++ b/arch/s390/kernel/vdso32/gettimeofday.S
@@ -0,0 +1,79 @@
+/*
+ * Userland implementation of gettimeofday() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_gettimeofday
+ .type __kernel_gettimeofday,@function
+__kernel_gettimeofday:
+ .cfi_startproc
+ basr %r5,0
+0: al %r5,13f-0b(%r5) /* get &_vdso_data */
+1: ltr %r3,%r3 /* check if tz is NULL */
+ je 2f
+ mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
+2: ltr %r2,%r2 /* check if tv is NULL */
+ je 10f
+ l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 1b
+ stck 24(%r15) /* Store TOD clock */
+ lm %r0,%r1,24(%r15)
+ s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ sl %r1,__VDSO_XTIME_STAMP+4(%r5)
+ brc 3,3f
+ ahi %r0,-1
+3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ st %r0,24(%r15)
+ l %r0,__VDSO_TK_MULT(%r5)
+ ltr %r1,%r1
+ mr %r0,%r0
+ jnm 4f
+ a %r0,__VDSO_TK_MULT(%r5)
+4: al %r0,24(%r15)
+ al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
+ al %r1,__VDSO_XTIME_NSEC+4(%r5)
+ brc 12,5f
+ ahi %r0,1
+5: mvc 24(4,%r15),__VDSO_XTIME_SEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 1b
+ l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r4) /* >> tk->shift */
+ l %r4,24(%r15) /* get tv_sec from stack */
+ basr %r5,0
+6: ltr %r0,%r0
+ jnz 7f
+ cl %r1,11f-6b(%r5)
+ jl 8f
+7: ahi %r4,1
+ sl %r1,11f-6b(%r5)
+ brc 3,6b
+ ahi %r0,-1
+ j 6b
+8: st %r4,0(%r2) /* store tv->tv_sec */
+ ltr %r1,%r1
+ m %r0,12f-6b(%r5)
+ jnm 9f
+ al %r0,12f-6b(%r5)
+9: srl %r0,6
+ st %r0,4(%r2) /* store tv->tv_usec */
+10: slr %r2,%r2
+ br %r14
+11: .long 1000000000
+12: .long 274877907
+13: .long _vdso_data - 0b
+ .cfi_endproc
+ .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
new file mode 100644
index 00000000000..79a071e4357
--- /dev/null
+++ b/arch/s390/kernel/vdso32/note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 00000000000..a8c379fa124
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,138 @@
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
+OUTPUT_ARCH(s390:31-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO32_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN(16);
+ .text : {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ } :text
+ PROVIDE(__etext = .);
+ PROVIDE(_etext = .);
+ PROVIDE(etext = .);
+
+ /*
+ * Other stuff is appended to the text segment:
+ */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+ .got ALIGN(8) : { *(.got .toc) }
+
+ _end = .;
+ PROVIDE(end = .);
+
+ /*
+ * Stabs debugging sections are here too.
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+
+ /*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to the
+ * beginning of the section so we begin them at 0.
+ */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+ . = ALIGN(4096);
+ PROVIDE(_vdso_data = .);
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.branch_lt)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ /*
+ * Has to be there for the kernel to find
+ */
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+
+ local: *;
+ };
+}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 00000000000..ae42f8ce350
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso32_start, vdso32_end
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/s390/kernel/vdso32/vdso32.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
new file mode 100644
index 00000000000..3fd18cf9fec
--- /dev/null
+++ b/arch/s390/kernel/vdso64/.gitignore
@@ -0,0 +1 @@
+vdso64.lds
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
new file mode 100644
index 00000000000..2a8ddfd12a5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -0,0 +1,58 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_64 += -m64 -s
+
+KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+ $(call if_changed,vdso64ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S
+ $(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+ cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso64.so: $(obj)/vdso64.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso64.so
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
new file mode 100644
index 00000000000..34deba7c7ed
--- /dev/null
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -0,0 +1,46 @@
+/*
+ * Userland implementation of clock_getres() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_getres
+ .type __kernel_clock_getres,@function
+__kernel_clock_getres:
+ .cfi_startproc
+ cghi %r2,__CLOCK_REALTIME
+ je 0f
+ cghi %r2,__CLOCK_MONOTONIC
+ je 0f
+ cghi %r2,__CLOCK_THREAD_CPUTIME_ID
+ je 0f
+ cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
+ jne 2f
+ larl %r5,_vdso_data
+ icm %r0,15,__LC_ECTG_OK(%r5)
+ jz 2f
+0: ltgr %r3,%r3
+ jz 1f /* res == NULL */
+ larl %r1,3f
+ lg %r0,0(%r1)
+ xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */
+ stg %r0,8(%r3) /* store tp->tv_usec */
+1: lghi %r2,0
+ br %r14
+2: lghi %r1,__NR_clock_getres /* fallback to svc */
+ svc 0
+ br %r14
+3: .quad __CLOCK_REALTIME_RES
+ .cfi_endproc
+ .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
new file mode 100644
index 00000000000..91940ed33a4
--- /dev/null
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -0,0 +1,127 @@
+/*
+ * Userland implementation of clock_gettime() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_gettime
+ .type __kernel_clock_gettime,@function
+__kernel_clock_gettime:
+ .cfi_startproc
+ larl %r5,_vdso_data
+ cghi %r2,__CLOCK_REALTIME
+ je 4f
+ cghi %r2,__CLOCK_THREAD_CPUTIME_ID
+ je 9f
+ cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
+ je 9f
+ cghi %r2,__CLOCK_MONOTONIC
+ jne 12f
+
+ /* CLOCK_MONOTONIC */
+ ltgr %r3,%r3
+ jz 3f /* tp == NULL */
+0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 0b
+ stck 48(%r15) /* Store TOD clock */
+ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ lg %r0,__VDSO_WTOM_SEC(%r5)
+ lg %r1,48(%r15)
+ sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_WTOM_NSEC(%r5)
+ srlg %r1,%r1,0(%r2) /* >> tk->shift */
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 0b
+ larl %r5,13f
+1: clg %r1,0(%r5)
+ jl 2f
+ slg %r1,0(%r5)
+ aghi %r0,1
+ j 1b
+2: stg %r0,0(%r3) /* store tp->tv_sec */
+ stg %r1,8(%r3) /* store tp->tv_nsec */
+3: lghi %r2,0
+ br %r14
+
+ /* CLOCK_REALTIME */
+4: ltr %r3,%r3 /* tp == NULL */
+ jz 8f
+5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 5b
+ stck 48(%r15) /* Store TOD clock */
+ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ lg %r1,48(%r15)
+ sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ srlg %r1,%r1,0(%r2) /* >> tk->shift */
+ lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 5b
+ larl %r5,13f
+6: clg %r1,0(%r5)
+ jl 7f
+ slg %r1,0(%r5)
+ aghi %r0,1
+ j 6b
+7: stg %r0,0(%r3) /* store tp->tv_sec */
+ stg %r1,8(%r3) /* store tp->tv_nsec */
+8: lghi %r2,0
+ br %r14
+
+ /* CLOCK_THREAD_CPUTIME_ID for this thread */
+9: icm %r0,15,__VDSO_ECTG_OK(%r5)
+ jz 12f
+ ear %r2,%a4
+ llilh %r4,0x0100
+ sar %a4,%r4
+ lghi %r4,0
+ epsw %r5,0
+ sacf 512 /* Magic ectg instruction */
+ .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
+ tml %r5,0x4000
+ jo 11f
+ tml %r5,0x8000
+ jno 10f
+ sacf 256
+ j 11f
+10: sacf 0
+11: sar %a4,%r2
+ algr %r1,%r0 /* r1 = cputime as TOD value */
+ mghi %r1,1000 /* convert to nanoseconds */
+ srlg %r1,%r1,12 /* r1 = cputime in nanosec */
+ lgr %r4,%r1
+ larl %r5,13f
+ srlg %r1,%r1,9 /* divide by 1000000000 */
+ mlg %r0,8(%r5)
+ srlg %r0,%r0,11 /* r0 = tv_sec */
+ stg %r0,0(%r3)
+ msg %r0,0(%r5) /* calculate tv_nsec */
+ slgr %r4,%r0 /* r4 = tv_nsec */
+ stg %r4,8(%r3)
+ lghi %r2,0
+ br %r14
+
+ /* Fallback to system call */
+12: lghi %r1,__NR_clock_gettime
+ svc 0
+ br %r14
+
+13: .quad 1000000000
+14: .quad 19342813113834067
+ .cfi_endproc
+ .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
new file mode 100644
index 00000000000..d0860d1d0cc
--- /dev/null
+++ b/arch/s390/kernel/vdso64/gettimeofday.S
@@ -0,0 +1,57 @@
+/*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_gettimeofday
+ .type __kernel_gettimeofday,@function
+__kernel_gettimeofday:
+ .cfi_startproc
+ larl %r5,_vdso_data
+0: ltgr %r3,%r3 /* check if tz is NULL */
+ je 1f
+ mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
+1: ltgr %r2,%r2 /* check if tv is NULL */
+ je 4f
+ lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 0b
+ stck 48(%r15) /* Store TOD clock */
+ lg %r1,48(%r15)
+ sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 0b
+ lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srlg %r1,%r1,0(%r5) /* >> tk->shift */
+ larl %r5,5f
+2: clg %r1,0(%r5)
+ jl 3f
+ slg %r1,0(%r5)
+ aghi %r0,1
+ j 2b
+3: stg %r0,0(%r2) /* store tv->tv_sec */
+ slgr %r0,%r0 /* tv_nsec -> tv_usec */
+ ml %r0,8(%r5)
+ srlg %r0,%r0,6
+ stg %r0,8(%r2) /* store tv->tv_usec */
+4: lghi %r2,0
+ br %r14
+5: .quad 1000000000
+ .long 274877907
+ .cfi_endproc
+ .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
new file mode 100644
index 00000000000..79a071e4357
--- /dev/null
+++ b/arch/s390/kernel/vdso64/note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 00000000000..9f5979d102a
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,138 @@
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO64_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN(16);
+ .text : {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ } :text
+ PROVIDE(__etext = .);
+ PROVIDE(_etext = .);
+ PROVIDE(etext = .);
+
+ /*
+ * Other stuff is appended to the text segment:
+ */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+ .got ALIGN(8) : { *(.got .toc) }
+
+ _end = .;
+ PROVIDE(end = .);
+
+ /*
+ * Stabs debugging sections are here too.
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+
+ /*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to the
+ * beginning of the section so we begin them at 0.
+ */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+ . = ALIGN(4096);
+ PROVIDE(_vdso_data = .);
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.branch_lt)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ /*
+ * Has to be there for the kernel to find
+ */
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+
+ local: *;
+ };
+}
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 00000000000..c245842b516
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso64_start, vdso64_end
+ .balign PAGE_SIZE
+vdso64_start:
+ .incbin "arch/s390/kernel/vdso64/vdso64.so"
+ .balign PAGE_SIZE
+vdso64_end:
+
+ .previous
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index b4607155e8d..35b13ed0af5 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -2,18 +2,19 @@
* Written by Martin Schwidefsky (schwidefsky@de.ibm.com)
*/
+#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
#ifndef CONFIG_64BIT
OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390)
-ENTRY(_start)
+OUTPUT_ARCH(s390:31-bit)
+ENTRY(startup)
jiffies = jiffies_64 + 4;
#else
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
-ENTRY(_start)
+ENTRY(startup)
jiffies = jiffies_64;
#endif
@@ -28,11 +29,12 @@ SECTIONS
. = 0x00000000;
.text : {
_text = .; /* Text and read-only data */
- *(.text.head)
- TEXT_TEXT
+ HEAD_TEXT
+ TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
+ IRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
} :text = 0x0700
@@ -40,9 +42,10 @@ SECTIONS
_etext = .; /* End of text section */
NOTES :text :note
- BUG_TABLE :text
- RODATA
+ .dummy : { *(.dummy) } :data
+
+ RO_DATA_SECTION(PAGE_SIZE)
#ifdef CONFIG_SHARED_KERNEL
. = ALIGN(0x100000); /* VM shared segments are 1MB aligned */
@@ -50,56 +53,20 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
_eshared = .; /* End of shareable data */
+ _sdata = .; /* Start of data section */
- . = ALIGN(16); /* Exception table */
- __ex_table : {
- __start___ex_table = .;
- *(__ex_table)
- __stop___ex_table = .;
- }
-
- .data : { /* Data */
- DATA_DATA
- CONSTRUCTORS
- }
-
- . = ALIGN(PAGE_SIZE);
- .data_nosave : {
- __nosave_begin = .;
- *(.data.nosave)
- }
- . = ALIGN(PAGE_SIZE);
- __nosave_end = .;
-
- . = ALIGN(PAGE_SIZE);
- .data.page_aligned : {
- *(.data.idt)
- }
+ EXCEPTION_TABLE(16) :data
- . = ALIGN(0x100);
- .data.cacheline_aligned : {
- *(.data.cacheline_aligned)
- }
+ RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
- . = ALIGN(0x100);
- .data.read_mostly : {
- *(.data.read_mostly)
- }
_edata = .; /* End of data section */
- . = ALIGN(2 * PAGE_SIZE); /* init_task */
- .data.init_task : {
- *(.data.init_task)
- }
-
/* will be freed after init */
. = ALIGN(PAGE_SIZE); /* Init code and data */
__init_begin = .;
- .init.text : {
- _sinittext = .;
- INIT_TEXT
- _einittext = .;
- }
+
+ INIT_TEXT_SECTION(PAGE_SIZE)
+
/*
* .exit.text is discarded at runtime, not link time,
* to deal with references from __bug_table
@@ -108,59 +75,26 @@ SECTIONS
EXIT_TEXT
}
- .init.data : {
- INIT_DATA
- }
- . = ALIGN(0x100);
- .init.setup : {
- __setup_start = .;
- *(.init.setup)
- __setup_end = .;
- }
- .initcall.init : {
- __initcall_start = .;
- INITCALLS
- __initcall_end = .;
+ .exit.data : {
+ EXIT_DATA
}
- .con_initcall.init : {
- __con_initcall_start = .;
- *(.con_initcall.init)
- __con_initcall_end = .;
- }
- SECURITY_INIT
-
-#ifdef CONFIG_BLK_DEV_INITRD
- . = ALIGN(0x100);
- .init.ramfs : {
- __initramfs_start = .;
- *(.init.ramfs)
- . = ALIGN(2);
- __initramfs_end = .;
- }
-#endif
+ /* early.c uses stsi, which requires page aligned data. */
+ . = ALIGN(PAGE_SIZE);
+ INIT_DATA_SECTION(0x100)
- PERCPU(PAGE_SIZE)
+ PERCPU_SECTION(0x100)
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */
- /* BSS */
- .bss : {
- __bss_start = .;
- *(.bss)
- . = ALIGN(2);
- __bss_stop = .;
- }
+ BSS_SECTION(0, 2, 0)
_end = . ;
- /* Sections to be discarded */
- /DISCARD/ : {
- EXIT_DATA
- *(.exitcall.exit)
- }
-
/* Debugging sections. */
STABS_DEBUG
DWARF_DEBUG
+
+ /* Sections to be discarded */
+ DISCARDS
}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index ca90ee3f930..8c34363d6f1 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -1,186 +1,198 @@
/*
- * arch/s390/kernel/vtime.c
* Virtual cpu timer based timer functions.
*
- * S390 version
- * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 2004, 2012
* Author(s): Jan Glauber <jan.glauber@de.ibm.com>
*/
-#include <linux/module.h>
+#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/export.h>
#include <linux/kernel.h>
+#include <linux/timex.h>
+#include <linux/types.h>
#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/init.h>
+#include <linux/cpu.h>
#include <linux/smp.h>
-#include <linux/types.h>
-#include <linux/timex.h>
-#include <linux/notifier.h>
-#include <linux/kernel_stat.h>
-#include <linux/rcupdate.h>
-#include <linux/posix-timers.h>
-#include <asm/s390_ext.h>
-#include <asm/timer.h>
#include <asm/irq_regs.h>
+#include <asm/cputime.h>
+#include <asm/vtimer.h>
+#include <asm/vtime.h>
+#include <asm/irq.h>
+#include "entry.h"
-static ext_int_info_t ext_int_info_timer;
-static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
+static void virt_timer_expire(void);
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-/*
- * Update process times based on virtual cpu times stored by entry.S
- * to the lowcore fields user_timer, system_timer & steal_clock.
- */
-void account_process_tick(struct task_struct *tsk, int user_tick)
+DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+
+static LIST_HEAD(virt_timer_list);
+static DEFINE_SPINLOCK(virt_timer_lock);
+static atomic64_t virt_timer_current;
+static atomic64_t virt_timer_elapsed;
+
+static inline u64 get_vtimer(void)
{
- cputime_t cputime;
- __u64 timer, clock;
- int rcu_user_flag;
+ u64 timer;
- timer = S390_lowcore.last_update_timer;
- clock = S390_lowcore.last_update_clock;
- asm volatile (" STPT %0\n" /* Store current cpu timer value */
- " STCK %1" /* Store current tod clock value */
- : "=m" (S390_lowcore.last_update_timer),
- "=m" (S390_lowcore.last_update_clock) );
- S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
- S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock;
-
- cputime = S390_lowcore.user_timer >> 12;
- rcu_user_flag = cputime != 0;
- S390_lowcore.user_timer -= cputime << 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_user_time(tsk, cputime);
-
- cputime = S390_lowcore.system_timer >> 12;
- S390_lowcore.system_timer -= cputime << 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_system_time(tsk, HARDIRQ_OFFSET, cputime);
-
- cputime = S390_lowcore.steal_clock;
- if ((__s64) cputime > 0) {
- cputime >>= 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_steal_time(tsk, cputime);
- }
+ asm volatile("stpt %0" : "=m" (timer));
+ return timer;
}
-/*
- * Update process times based on virtual cpu times stored by entry.S
- * to the lowcore fields user_timer, system_timer & steal_clock.
- */
-void account_vtime(struct task_struct *tsk)
+static inline void set_vtimer(u64 expires)
{
- cputime_t cputime;
- __u64 timer;
+ u64 timer;
- timer = S390_lowcore.last_update_timer;
- asm volatile (" STPT %0" /* Store current cpu timer value */
- : "=m" (S390_lowcore.last_update_timer) );
- S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
+ asm volatile(
+ " stpt %0\n" /* Store current cpu timer value */
+ " spt %1" /* Set new value imm. afterwards */
+ : "=m" (timer) : "m" (expires));
+ S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
+ S390_lowcore.last_update_timer = expires;
+}
- cputime = S390_lowcore.user_timer >> 12;
- S390_lowcore.user_timer -= cputime << 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_user_time(tsk, cputime);
+static inline int virt_timer_forward(u64 elapsed)
+{
+ BUG_ON(!irqs_disabled());
- cputime = S390_lowcore.system_timer >> 12;
- S390_lowcore.system_timer -= cputime << 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_system_time(tsk, 0, cputime);
+ if (list_empty(&virt_timer_list))
+ return 0;
+ elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed);
+ return elapsed >= atomic64_read(&virt_timer_current);
}
/*
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
-void account_system_vtime(struct task_struct *tsk)
+static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
{
- cputime_t cputime;
- __u64 timer;
+ struct thread_info *ti = task_thread_info(tsk);
+ u64 timer, clock, user, system, steal;
timer = S390_lowcore.last_update_timer;
- asm volatile (" STPT %0" /* Store current cpu timer value */
- : "=m" (S390_lowcore.last_update_timer) );
+ clock = S390_lowcore.last_update_clock;
+ asm volatile(
+ " stpt %0\n" /* Store current cpu timer value */
+ " stck %1" /* Store current tod clock value */
+ : "=m" (S390_lowcore.last_update_timer),
+ "=m" (S390_lowcore.last_update_clock));
S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
+ S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
+
+ user = S390_lowcore.user_timer - ti->user_timer;
+ S390_lowcore.steal_timer -= user;
+ ti->user_timer = S390_lowcore.user_timer;
+ account_user_time(tsk, user, user);
+
+ system = S390_lowcore.system_timer - ti->system_timer;
+ S390_lowcore.steal_timer -= system;
+ ti->system_timer = S390_lowcore.system_timer;
+ account_system_time(tsk, hardirq_offset, system, system);
+
+ steal = S390_lowcore.steal_timer;
+ if ((s64) steal > 0) {
+ S390_lowcore.steal_timer = 0;
+ account_steal_time(steal);
+ }
- cputime = S390_lowcore.system_timer >> 12;
- S390_lowcore.system_timer -= cputime << 12;
- S390_lowcore.steal_clock -= cputime << 12;
- account_system_time(tsk, 0, cputime);
+ return virt_timer_forward(user + system);
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
-
-static inline void set_vtimer(__u64 expires)
-{
- __u64 timer;
-
- asm volatile (" STPT %0\n" /* Store current cpu timer value */
- " SPT %1" /* Set new value immediatly afterwards */
- : "=m" (timer) : "m" (expires) );
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
- S390_lowcore.last_update_timer = expires;
- /* store expire time for this CPU timer */
- __get_cpu_var(virt_cpu_timer).to_expire = expires;
-}
-#else
-static inline void set_vtimer(__u64 expires)
+void vtime_task_switch(struct task_struct *prev)
{
- S390_lowcore.last_update_timer = expires;
- asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
-
- /* store expire time for this CPU timer */
- __get_cpu_var(virt_cpu_timer).to_expire = expires;
+ struct thread_info *ti;
+
+ do_account_vtime(prev, 0);
+ ti = task_thread_info(prev);
+ ti->user_timer = S390_lowcore.user_timer;
+ ti->system_timer = S390_lowcore.system_timer;
+ ti = task_thread_info(current);
+ S390_lowcore.user_timer = ti->user_timer;
+ S390_lowcore.system_timer = ti->system_timer;
}
-#endif
-static void start_cpu_timer(void)
+/*
+ * In s390, accounting pending user time also implies
+ * accounting system time in order to correctly compute
+ * the stolen time accounting.
+ */
+void vtime_account_user(struct task_struct *tsk)
{
- struct vtimer_queue *vt_list;
-
- vt_list = &__get_cpu_var(virt_cpu_timer);
-
- /* CPU timer interrupt is pending, don't reprogramm it */
- if (vt_list->idle & 1LL<<63)
- return;
-
- if (!list_empty(&vt_list->list))
- set_vtimer(vt_list->idle);
+ if (do_account_vtime(tsk, HARDIRQ_OFFSET))
+ virt_timer_expire();
}
-static void stop_cpu_timer(void)
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+void vtime_account_irq_enter(struct task_struct *tsk)
{
- struct vtimer_queue *vt_list;
+ struct thread_info *ti = task_thread_info(tsk);
+ u64 timer, system;
- vt_list = &__get_cpu_var(virt_cpu_timer);
+ WARN_ON_ONCE(!irqs_disabled());
- /* nothing to do */
- if (list_empty(&vt_list->list)) {
- vt_list->idle = VTIMER_MAX_SLICE;
- goto fire;
- }
+ timer = S390_lowcore.last_update_timer;
+ S390_lowcore.last_update_timer = get_vtimer();
+ S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
- /* store the actual expire value */
- asm volatile ("STPT %0" : "=m" (vt_list->idle));
+ system = S390_lowcore.system_timer - ti->system_timer;
+ S390_lowcore.steal_timer -= system;
+ ti->system_timer = S390_lowcore.system_timer;
+ account_system_time(tsk, 0, system, system);
- /*
- * If the CPU timer is negative we don't reprogramm
- * it because we will get instantly an interrupt.
- */
- if (vt_list->idle & 1LL<<63)
- return;
+ virt_timer_forward(system);
+}
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
- vt_list->offset += vt_list->to_expire - vt_list->idle;
+void vtime_account_system(struct task_struct *tsk)
+__attribute__((alias("vtime_account_irq_enter")));
+EXPORT_SYMBOL_GPL(vtime_account_system);
- /*
- * We cannot halt the CPU timer, we just write a value that
- * nearly never expires (only after 71 years) and re-write
- * the stored expire value if we continue the timer
- */
- fire:
- set_vtimer(VTIMER_MAX_SLICE);
+void __kprobes vtime_stop_cpu(void)
+{
+ struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
+ unsigned long long idle_time;
+ unsigned long psw_mask;
+
+ trace_hardirqs_on();
+
+ /* Wait for external, I/O or machine check interrupt. */
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ idle->nohz_delay = 0;
+
+ /* Call the assembler magic in entry.S */
+ psw_idle(idle, psw_mask);
+
+ /* Account time spent with enabled wait psw loaded as idle time. */
+ idle->sequence++;
+ smp_wmb();
+ idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
+ idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
+ idle->idle_time += idle_time;
+ idle->idle_count++;
+ account_idle_time(idle_time);
+ smp_wmb();
+ idle->sequence++;
+}
+
+cputime64_t s390_get_idle_time(int cpu)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
+ unsigned long long now, idle_enter, idle_exit;
+ unsigned int sequence;
+
+ do {
+ now = get_tod_clock();
+ sequence = ACCESS_ONCE(idle->sequence);
+ idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
+ idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
+ } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
+ return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
}
/*
@@ -189,11 +201,11 @@ static void stop_cpu_timer(void)
*/
static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
{
- struct vtimer_list *event;
+ struct vtimer_list *tmp;
- list_for_each_entry(event, head, entry) {
- if (event->expires > timer->expires) {
- list_add_tail(&timer->entry, &event->entry);
+ list_for_each_entry(tmp, head, entry) {
+ if (tmp->expires > timer->expires) {
+ list_add_tail(&timer->entry, &tmp->entry);
return;
}
}
@@ -201,312 +213,158 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
}
/*
- * Do the callback functions of expired vtimer events.
- * Called from within the interrupt handler.
+ * Handler for expired virtual CPU timer.
*/
-static void do_callbacks(struct list_head *cb_list)
+static void virt_timer_expire(void)
{
- struct vtimer_queue *vt_list;
- struct vtimer_list *event, *tmp;
- void (*fn)(unsigned long);
- unsigned long data;
-
- if (list_empty(cb_list))
- return;
-
- vt_list = &__get_cpu_var(virt_cpu_timer);
-
- list_for_each_entry_safe(event, tmp, cb_list, entry) {
- fn = event->function;
- data = event->data;
- fn(data);
-
- if (!event->interval)
- /* delete one shot timer */
- list_del_init(&event->entry);
- else {
- /* move interval timer back to list */
- spin_lock(&vt_list->lock);
- list_del_init(&event->entry);
- list_add_sorted(event, &vt_list->list);
- spin_unlock(&vt_list->lock);
- }
+ struct vtimer_list *timer, *tmp;
+ unsigned long elapsed;
+ LIST_HEAD(cb_list);
+
+ /* walk timer list, fire all expired timers */
+ spin_lock(&virt_timer_lock);
+ elapsed = atomic64_read(&virt_timer_elapsed);
+ list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) {
+ if (timer->expires < elapsed)
+ /* move expired timer to the callback queue */
+ list_move_tail(&timer->entry, &cb_list);
+ else
+ timer->expires -= elapsed;
}
-}
-
-/*
- * Handler for the virtual CPU timer.
- */
-static void do_cpu_timer_interrupt(__u16 error_code)
-{
- __u64 next, delta;
- struct vtimer_queue *vt_list;
- struct vtimer_list *event, *tmp;
- struct list_head *ptr;
- /* the callback queue */
- struct list_head cb_list;
-
- INIT_LIST_HEAD(&cb_list);
- vt_list = &__get_cpu_var(virt_cpu_timer);
-
- /* walk timer list, fire all expired events */
- spin_lock(&vt_list->lock);
-
- if (vt_list->to_expire < VTIMER_MAX_SLICE)
- vt_list->offset += vt_list->to_expire;
-
- list_for_each_entry_safe(event, tmp, &vt_list->list, entry) {
- if (event->expires > vt_list->offset)
- /* found first unexpired event, leave */
- break;
-
- /* re-charge interval timer, we have to add the offset */
- if (event->interval)
- event->expires = event->interval + vt_list->offset;
-
- /* move expired timer to the callback queue */
- list_move_tail(&event->entry, &cb_list);
+ if (!list_empty(&virt_timer_list)) {
+ timer = list_first_entry(&virt_timer_list,
+ struct vtimer_list, entry);
+ atomic64_set(&virt_timer_current, timer->expires);
}
- spin_unlock(&vt_list->lock);
- do_callbacks(&cb_list);
-
- /* next event is first in list */
- spin_lock(&vt_list->lock);
- if (!list_empty(&vt_list->list)) {
- ptr = vt_list->list.next;
- event = list_entry(ptr, struct vtimer_list, entry);
- next = event->expires - vt_list->offset;
-
- /* add the expired time from this interrupt handler
- * and the callback functions
- */
- asm volatile ("STPT %0" : "=m" (delta));
- delta = 0xffffffffffffffffLL - delta + 1;
- vt_list->offset += delta;
- next -= delta;
- } else {
- vt_list->offset = 0;
- next = VTIMER_MAX_SLICE;
+ atomic64_sub(elapsed, &virt_timer_elapsed);
+ spin_unlock(&virt_timer_lock);
+
+ /* Do callbacks and recharge periodic timers */
+ list_for_each_entry_safe(timer, tmp, &cb_list, entry) {
+ list_del_init(&timer->entry);
+ timer->function(timer->data);
+ if (timer->interval) {
+ /* Recharge interval timer */
+ timer->expires = timer->interval +
+ atomic64_read(&virt_timer_elapsed);
+ spin_lock(&virt_timer_lock);
+ list_add_sorted(timer, &virt_timer_list);
+ spin_unlock(&virt_timer_lock);
+ }
}
- spin_unlock(&vt_list->lock);
- set_vtimer(next);
}
void init_virt_timer(struct vtimer_list *timer)
{
timer->function = NULL;
INIT_LIST_HEAD(&timer->entry);
- spin_lock_init(&timer->lock);
}
EXPORT_SYMBOL(init_virt_timer);
static inline int vtimer_pending(struct vtimer_list *timer)
{
- return (!list_empty(&timer->entry));
+ return !list_empty(&timer->entry);
}
-/*
- * this function should only run on the specified CPU
- */
static void internal_add_vtimer(struct vtimer_list *timer)
{
- unsigned long flags;
- __u64 done;
- struct vtimer_list *event;
- struct vtimer_queue *vt_list;
-
- vt_list = &per_cpu(virt_cpu_timer, timer->cpu);
- spin_lock_irqsave(&vt_list->lock, flags);
-
- if (timer->cpu != smp_processor_id())
- printk("internal_add_vtimer: BUG, running on wrong CPU");
-
- /* if list is empty we only have to set the timer */
- if (list_empty(&vt_list->list)) {
- /* reset the offset, this may happen if the last timer was
- * just deleted by mod_virt_timer and the interrupt
- * didn't happen until here
- */
- vt_list->offset = 0;
- goto fire;
+ if (list_empty(&virt_timer_list)) {
+ /* First timer, just program it. */
+ atomic64_set(&virt_timer_current, timer->expires);
+ atomic64_set(&virt_timer_elapsed, 0);
+ list_add(&timer->entry, &virt_timer_list);
+ } else {
+ /* Update timer against current base. */
+ timer->expires += atomic64_read(&virt_timer_elapsed);
+ if (likely((s64) timer->expires <
+ (s64) atomic64_read(&virt_timer_current)))
+ /* The new timer expires before the current timer. */
+ atomic64_set(&virt_timer_current, timer->expires);
+ /* Insert new timer into the list. */
+ list_add_sorted(timer, &virt_timer_list);
}
-
- /* save progress */
- asm volatile ("STPT %0" : "=m" (done));
-
- /* calculate completed work */
- done = vt_list->to_expire - done + vt_list->offset;
- vt_list->offset = 0;
-
- list_for_each_entry(event, &vt_list->list, entry)
- event->expires -= done;
-
- fire:
- list_add_sorted(timer, &vt_list->list);
-
- /* get first element, which is the next vtimer slice */
- event = list_entry(vt_list->list.next, struct vtimer_list, entry);
-
- set_vtimer(event->expires);
- spin_unlock_irqrestore(&vt_list->lock, flags);
- /* release CPU acquired in prepare_vtimer or mod_virt_timer() */
- put_cpu();
}
-static inline int prepare_vtimer(struct vtimer_list *timer)
+static void __add_vtimer(struct vtimer_list *timer, int periodic)
{
- if (!timer->function) {
- printk("add_virt_timer: uninitialized timer\n");
- return -EINVAL;
- }
-
- if (!timer->expires || timer->expires > VTIMER_MAX_SLICE) {
- printk("add_virt_timer: invalid timer expire value!\n");
- return -EINVAL;
- }
-
- if (vtimer_pending(timer)) {
- printk("add_virt_timer: timer pending\n");
- return -EBUSY;
- }
+ unsigned long flags;
- timer->cpu = get_cpu();
- return 0;
+ timer->interval = periodic ? timer->expires : 0;
+ spin_lock_irqsave(&virt_timer_lock, flags);
+ internal_add_vtimer(timer);
+ spin_unlock_irqrestore(&virt_timer_lock, flags);
}
/*
* add_virt_timer - add an oneshot virtual CPU timer
*/
-void add_virt_timer(void *new)
+void add_virt_timer(struct vtimer_list *timer)
{
- struct vtimer_list *timer;
-
- timer = (struct vtimer_list *)new;
-
- if (prepare_vtimer(timer) < 0)
- return;
-
- timer->interval = 0;
- internal_add_vtimer(timer);
+ __add_vtimer(timer, 0);
}
EXPORT_SYMBOL(add_virt_timer);
/*
* add_virt_timer_int - add an interval virtual CPU timer
*/
-void add_virt_timer_periodic(void *new)
+void add_virt_timer_periodic(struct vtimer_list *timer)
{
- struct vtimer_list *timer;
-
- timer = (struct vtimer_list *)new;
-
- if (prepare_vtimer(timer) < 0)
- return;
-
- timer->interval = timer->expires;
- internal_add_vtimer(timer);
+ __add_vtimer(timer, 1);
}
EXPORT_SYMBOL(add_virt_timer_periodic);
-/*
- * If we change a pending timer the function must be called on the CPU
- * where the timer is running on, e.g. by smp_call_function_single()
- *
- * The original mod_timer adds the timer if it is not pending. For compatibility
- * we do the same. The timer will be added on the current CPU as a oneshot timer.
- *
- * returns whether it has modified a pending timer (1) or not (0)
- */
-int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
+static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic)
{
- struct vtimer_queue *vt_list;
unsigned long flags;
- int cpu;
+ int rc;
- if (!timer->function) {
- printk("mod_virt_timer: uninitialized timer\n");
- return -EINVAL;
- }
-
- if (!expires || expires > VTIMER_MAX_SLICE) {
- printk("mod_virt_timer: invalid expire range\n");
- return -EINVAL;
- }
+ BUG_ON(!timer->function);
- /*
- * This is a common optimization triggered by the
- * networking code - if the timer is re-modified
- * to be the same thing then just return:
- */
if (timer->expires == expires && vtimer_pending(timer))
return 1;
-
- cpu = get_cpu();
- vt_list = &per_cpu(virt_cpu_timer, cpu);
-
- /* disable interrupts before test if timer is pending */
- spin_lock_irqsave(&vt_list->lock, flags);
-
- /* if timer isn't pending add it on the current CPU */
- if (!vtimer_pending(timer)) {
- spin_unlock_irqrestore(&vt_list->lock, flags);
- /* we do not activate an interval timer with mod_virt_timer */
- timer->interval = 0;
- timer->expires = expires;
- timer->cpu = cpu;
- internal_add_vtimer(timer);
- return 0;
- }
-
- /* check if we run on the right CPU */
- if (timer->cpu != cpu) {
- printk("mod_virt_timer: running on wrong CPU, check your code\n");
- spin_unlock_irqrestore(&vt_list->lock, flags);
- put_cpu();
- return -EINVAL;
- }
-
- list_del_init(&timer->entry);
+ spin_lock_irqsave(&virt_timer_lock, flags);
+ rc = vtimer_pending(timer);
+ if (rc)
+ list_del_init(&timer->entry);
+ timer->interval = periodic ? expires : 0;
timer->expires = expires;
-
- /* also change the interval if we have an interval timer */
- if (timer->interval)
- timer->interval = expires;
-
- /* the timer can't expire anymore so we can release the lock */
- spin_unlock_irqrestore(&vt_list->lock, flags);
internal_add_vtimer(timer);
- return 1;
+ spin_unlock_irqrestore(&virt_timer_lock, flags);
+ return rc;
+}
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer(struct vtimer_list *timer, u64 expires)
+{
+ return __mod_vtimer(timer, expires, 0);
}
EXPORT_SYMBOL(mod_virt_timer);
/*
- * delete a virtual timer
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires)
+{
+ return __mod_vtimer(timer, expires, 1);
+}
+EXPORT_SYMBOL(mod_virt_timer_periodic);
+
+/*
+ * Delete a virtual timer.
*
* returns whether the deleted timer was pending (1) or not (0)
*/
int del_virt_timer(struct vtimer_list *timer)
{
unsigned long flags;
- struct vtimer_queue *vt_list;
- /* check if timer is pending */
if (!vtimer_pending(timer))
return 0;
-
- vt_list = &per_cpu(virt_cpu_timer, timer->cpu);
- spin_lock_irqsave(&vt_list->lock, flags);
-
- /* we don't interrupt a running timer, just let it expire! */
+ spin_lock_irqsave(&virt_timer_lock, flags);
list_del_init(&timer->entry);
-
- /* last timer removed */
- if (list_empty(&vt_list->list)) {
- vt_list->to_expire = 0;
- vt_list->offset = 0;
- }
-
- spin_unlock_irqrestore(&vt_list->lock, flags);
+ spin_unlock_irqrestore(&virt_timer_lock, flags);
return 1;
}
EXPORT_SYMBOL(del_virt_timer);
@@ -516,55 +374,29 @@ EXPORT_SYMBOL(del_virt_timer);
*/
void init_cpu_vtimer(void)
{
- struct vtimer_queue *vt_list;
-
- /* kick the virtual timer */
- S390_lowcore.exit_timer = VTIMER_MAX_SLICE;
- S390_lowcore.last_update_timer = VTIMER_MAX_SLICE;
- asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
- asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
-
- /* enable cpu timer interrupts */
- __ctl_set_bit(0,10);
-
- vt_list = &__get_cpu_var(virt_cpu_timer);
- INIT_LIST_HEAD(&vt_list->list);
- spin_lock_init(&vt_list->lock);
- vt_list->to_expire = 0;
- vt_list->offset = 0;
- vt_list->idle = 0;
-
+ /* set initial cpu timer */
+ set_vtimer(VTIMER_MAX_SLICE);
}
-static int vtimer_idle_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int s390_nohz_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
- switch (action) {
- case S390_CPU_IDLE:
- stop_cpu_timer();
- break;
- case S390_CPU_NOT_IDLE:
- start_cpu_timer();
+ struct s390_idle_data *idle;
+ long cpu = (long) hcpu;
+
+ idle = &per_cpu(s390_idle, cpu);
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DYING:
+ idle->nohz_delay = 0;
+ default:
break;
}
return NOTIFY_OK;
}
-static struct notifier_block vtimer_idle_nb = {
- .notifier_call = vtimer_idle_notify,
-};
-
void __init vtime_init(void)
{
- /* request the cpu timer external interrupt */
- if (register_early_external_interrupt(0x1005, do_cpu_timer_interrupt,
- &ext_int_info_timer) != 0)
- panic("Couldn't request external interrupt 0x1005");
-
- if (register_idle_notifier(&vtimer_idle_nb))
- panic("Couldn't register idle notifier");
-
/* Enable cpu timer interrupts on the boot cpu. */
init_cpu_vtimer();
+ cpu_notifier(s390_nohz_notify, 0);
}
-
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index e051cad1f1e..10d529ac982 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -1,12 +1,11 @@
#
# KVM configuration
#
-config HAVE_KVM
- bool
+source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
- bool "Virtualization"
- default y
+ def_bool y
+ prompt "KVM"
---help---
Say Y here to get to see options for using your Linux host to run other
operating systems inside virtual machines (guests).
@@ -17,11 +16,17 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION
config KVM
- tristate "Kernel-based Virtual Machine (KVM) support"
- depends on HAVE_KVM && EXPERIMENTAL
+ def_tristate y
+ prompt "Kernel-based Virtual Machine (KVM) support"
+ depends on HAVE_KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
- select S390_SWITCH_AMODE
+ select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select HAVE_KVM_EVENTFD
+ select KVM_ASYNC_PF
+ select KVM_ASYNC_PF_SYNC
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQ_ROUTING
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
@@ -35,11 +40,17 @@ config KVM
If unsure, say N.
-config KVM_TRACE
- bool
+config KVM_S390_UCONTROL
+ bool "Userspace controlled virtual machines"
+ depends on KVM
+ ---help---
+ Allow CAP_SYS_ADMIN users to create KVM virtual machines that are
+ controlled by userspace.
+
+ If unsure, say N.
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
-source drivers/virtio/Kconfig
+source drivers/vhost/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index e5221ec0b8e..b3b55346965 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -6,9 +6,12 @@
# it under the terms of the GNU General Public License (version 2 only)
# as published by the Free Software Foundation.
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o
-EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
+ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
+
+kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-objs += diag.o gaccess.o guestdbg.o
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index a0775e1f08d..0161675878a 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -1,7 +1,7 @@
/*
- * diag.c - handling diagnose instructions
+ * handling diagnose instructions
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2011
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -13,22 +13,156 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <asm/pgalloc.h>
+#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
+#include "trace.h"
+#include "trace-s390.h"
+#include "gaccess.h"
+
+static int diag_release_pages(struct kvm_vcpu *vcpu)
+{
+ unsigned long start, end;
+ unsigned long prefix = kvm_s390_get_prefix(vcpu);
+
+ start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+ end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+
+ if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end
+ || start < 2 * PAGE_SIZE)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
+ vcpu->stat.diagnose_10++;
+
+ /* we checked for start > end above */
+ if (end < prefix || start >= prefix + 2 * PAGE_SIZE) {
+ gmap_discard(start, end, vcpu->arch.gmap);
+ } else {
+ if (start < prefix)
+ gmap_discard(start, prefix, vcpu->arch.gmap);
+ if (end >= prefix)
+ gmap_discard(prefix + 2 * PAGE_SIZE,
+ end, vcpu->arch.gmap);
+ }
+ return 0;
+}
+
+static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+{
+ struct prs_parm {
+ u16 code;
+ u16 subcode;
+ u16 parm_len;
+ u16 parm_version;
+ u64 token_addr;
+ u64 select_mask;
+ u64 compare_mask;
+ u64 zarch;
+ };
+ struct prs_parm parm;
+ int rc;
+ u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+ u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+
+ if (vcpu->run->s.regs.gprs[rx] & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (parm.subcode) {
+ case 0: /* TOKEN */
+ if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+ /*
+ * If the pagefault handshake is already activated,
+ * the token must not be changed. We have to return
+ * decimal 8 instead, as mandated in SC24-6084.
+ */
+ vcpu->run->s.regs.gprs[ry] = 8;
+ return 0;
+ }
+
+ if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+ parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ vcpu->arch.pfault_token = parm.token_addr;
+ vcpu->arch.pfault_select = parm.select_mask;
+ vcpu->arch.pfault_compare = parm.compare_mask;
+ vcpu->run->s.regs.gprs[ry] = 0;
+ rc = 0;
+ break;
+ case 1: /*
+ * CANCEL
+ * Specification allows to let already pending tokens survive
+ * the cancel, therefore to reduce code complexity, we assume
+ * all outstanding tokens are already pending.
+ */
+ if (parm.token_addr || parm.select_mask ||
+ parm.compare_mask || parm.zarch)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ vcpu->run->s.regs.gprs[ry] = 0;
+ /*
+ * If the pfault handling was not established or is already
+ * canceled SC24-6084 requests to return decimal 4.
+ */
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ vcpu->run->s.regs.gprs[ry] = 4;
+ else
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+
+ rc = 0;
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ break;
+ }
+
+ return rc;
+}
static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
vcpu->stat.diagnose_44++;
- vcpu_put(vcpu);
- yield();
- vcpu_load(vcpu);
+ kvm_vcpu_on_spin(vcpu);
+ return 0;
+}
+
+static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *tcpu;
+ int tid;
+ int i;
+
+ tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+ vcpu->stat.diagnose_9c++;
+ VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
+
+ if (tid == vcpu->vcpu_id)
+ return 0;
+
+ kvm_for_each_vcpu(i, tcpu, kvm)
+ if (tcpu->vcpu_id == tid) {
+ kvm_vcpu_yield_to(tcpu);
+ break;
+ }
+
return 0;
}
static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
{
unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
- unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff;
+ unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
switch (subcode) {
@@ -39,29 +173,72 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
vcpu->run->s390_reset_flags = 0;
break;
default:
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
- atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_vcpu_stop(vcpu);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
- VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx",
+ VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx",
vcpu->run->s390_reset_flags);
+ trace_kvm_s390_request_resets(vcpu->run->s390_reset_flags);
return -EREMOTE;
}
+static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ /* No virtio-ccw notification? Get out quickly. */
+ if (!vcpu->kvm->arch.css_support ||
+ (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
+ return -EOPNOTSUPP;
+
+ /*
+ * The layout is as follows:
+ * - gpr 2 contains the subchannel id (passed as addr)
+ * - gpr 3 contains the virtqueue index (passed as datamatch)
+ * - gpr 4 contains the index on the bus (optionally)
+ */
+ ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+ vcpu->run->s.regs.gprs[2] & 0xffffffff,
+ 8, &vcpu->run->s.regs.gprs[3],
+ vcpu->run->s.regs.gprs[4]);
+
+ /*
+ * Return cookie in gpr 2, but don't overwrite the register if the
+ * diagnose will be handled by userspace.
+ */
+ if (ret != -EOPNOTSUPP)
+ vcpu->run->s.regs.gprs[2] = ret;
+ /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */
+ return ret < 0 ? ret : 0;
+}
+
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
{
- int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+ int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+ trace_kvm_s390_handle_diag(vcpu, code);
switch (code) {
+ case 0x10:
+ return diag_release_pages(vcpu);
case 0x44:
return __diag_time_slice_end(vcpu);
+ case 0x9c:
+ return __diag_time_slice_end_directed(vcpu);
+ case 0x258:
+ return __diag_page_ref_service(vcpu);
case 0x308:
return __diag_ipl_functions(vcpu);
+ case 0x500:
+ return __diag_virtio_hypercall(vcpu);
default:
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
new file mode 100644
index 00000000000..4653ac6e182
--- /dev/null
+++ b/arch/s390/kvm/gaccess.c
@@ -0,0 +1,726 @@
+/*
+ * guest access functions
+ *
+ * Copyright IBM Corp. 2014
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/err.h>
+#include <asm/pgtable.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+union asce {
+ unsigned long val;
+ struct {
+ unsigned long origin : 52; /* Region- or Segment-Table Origin */
+ unsigned long : 2;
+ unsigned long g : 1; /* Subspace Group Control */
+ unsigned long p : 1; /* Private Space Control */
+ unsigned long s : 1; /* Storage-Alteration-Event Control */
+ unsigned long x : 1; /* Space-Switch-Event Control */
+ unsigned long r : 1; /* Real-Space Control */
+ unsigned long : 1;
+ unsigned long dt : 2; /* Designation-Type Control */
+ unsigned long tl : 2; /* Region- or Segment-Table Length */
+ };
+};
+
+enum {
+ ASCE_TYPE_SEGMENT = 0,
+ ASCE_TYPE_REGION3 = 1,
+ ASCE_TYPE_REGION2 = 2,
+ ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Second-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Second-Table Length */
+ };
+};
+
+union region2_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Third-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Third-Table Length */
+ };
+};
+
+struct region3_table_entry_fc0 {
+ unsigned long sto: 52;/* Segment-Table Origin */
+ unsigned long : 1;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Segment-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+ unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+ unsigned long : 14;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc: 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long co : 1; /* Change-Recording Override */
+ unsigned long : 2;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union region3_table_entry {
+ unsigned long val;
+ struct region3_table_entry_fc0 fc0;
+ struct region3_table_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+struct segment_entry_fc0 {
+ unsigned long pto: 53;/* Page-Table Origin */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 3;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+struct segment_entry_fc1 {
+ unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+ unsigned long : 3;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc: 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long co : 1; /* Change-Recording Override */
+ unsigned long : 2;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union segment_table_entry {
+ unsigned long val;
+ struct segment_entry_fc0 fc0;
+ struct segment_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+enum {
+ TABLE_TYPE_SEGMENT = 0,
+ TABLE_TYPE_REGION3 = 1,
+ TABLE_TYPE_REGION2 = 2,
+ TABLE_TYPE_REGION1 = 3
+};
+
+union page_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long pfra : 52; /* Page-Frame Real Address */
+ unsigned long z : 1; /* Zero Bit */
+ unsigned long i : 1; /* Page-Invalid Bit */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long co : 1; /* Change-Recording Override */
+ unsigned long : 8;
+ };
+};
+
+/*
+ * vaddress union in order to easily decode a virtual address into its
+ * region first index, region second index etc. parts.
+ */
+union vaddress {
+ unsigned long addr;
+ struct {
+ unsigned long rfx : 11;
+ unsigned long rsx : 11;
+ unsigned long rtx : 11;
+ unsigned long sx : 11;
+ unsigned long px : 8;
+ unsigned long bx : 12;
+ };
+ struct {
+ unsigned long rfx01 : 2;
+ unsigned long : 9;
+ unsigned long rsx01 : 2;
+ unsigned long : 9;
+ unsigned long rtx01 : 2;
+ unsigned long : 9;
+ unsigned long sx01 : 2;
+ unsigned long : 29;
+ };
+};
+
+/*
+ * raddress union which will contain the result (real or absolute address)
+ * after a page table walk. The rfaa, sfaa and pfra members are used to
+ * simply assign them the value of a region, segment or page table entry.
+ */
+union raddress {
+ unsigned long addr;
+ unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+ unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+ unsigned long pfra : 52; /* Page-Frame Real Address */
+};
+
+static int ipte_lock_count;
+static DEFINE_MUTEX(ipte_mutex);
+
+int ipte_lock_held(struct kvm_vcpu *vcpu)
+{
+ union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
+
+ if (vcpu->arch.sie_block->eca & 1)
+ return ic->kh != 0;
+ return ipte_lock_count != 0;
+}
+
+static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ mutex_lock(&ipte_mutex);
+ ipte_lock_count++;
+ if (ipte_lock_count > 1)
+ goto out;
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ old = ACCESS_ONCE(*ic);
+ while (old.k) {
+ cond_resched();
+ old = ACCESS_ONCE(*ic);
+ }
+ new = old;
+ new.k = 1;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+out:
+ mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ mutex_lock(&ipte_mutex);
+ ipte_lock_count--;
+ if (ipte_lock_count)
+ goto out;
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ new = old = ACCESS_ONCE(*ic);
+ new.k = 0;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ if (!ipte_lock_count)
+ wake_up(&vcpu->kvm->arch.ipte_wq);
+out:
+ mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ old = ACCESS_ONCE(*ic);
+ while (old.kg) {
+ cond_resched();
+ old = ACCESS_ONCE(*ic);
+ }
+ new = old;
+ new.k = 1;
+ new.kh++;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+}
+
+static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ new = old = ACCESS_ONCE(*ic);
+ new.kh--;
+ if (!new.kh)
+ new.k = 0;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ if (!new.kh)
+ wake_up(&vcpu->kvm->arch.ipte_wq);
+}
+
+void ipte_lock(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->eca & 1)
+ ipte_lock_siif(vcpu);
+ else
+ ipte_lock_simple(vcpu);
+}
+
+void ipte_unlock(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->eca & 1)
+ ipte_unlock_siif(vcpu);
+ else
+ ipte_unlock_simple(vcpu);
+}
+
+static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+{
+ switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
+ case PSW_AS_PRIMARY:
+ return vcpu->arch.sie_block->gcr[1];
+ case PSW_AS_SECONDARY:
+ return vcpu->arch.sie_block->gcr[7];
+ case PSW_AS_HOME:
+ return vcpu->arch.sie_block->gcr[13];
+ }
+ return 0;
+}
+
+static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
+{
+ return kvm_read_guest(kvm, gpa, val, sizeof(*val));
+}
+
+/**
+ * guest_translate - translate a guest virtual into a guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: guest virtual address
+ * @gpa: points to where guest physical (absolute) address should be stored
+ * @write: indicates if access is a write access
+ *
+ * Translate a guest virtual address into a guest absolute address by means
+ * of dynamic address translation as specified by the architecuture.
+ * If the resulting absolute address is not available in the configuration
+ * an addressing exception is indicated and @gpa will not be changed.
+ *
+ * Returns: - zero on success; @gpa contains the resulting absolute address
+ * - a negative value if guest access failed due to e.g. broken
+ * guest mapping
+ * - a positve value if an access exception happened. In this case
+ * the returned value is the program interruption code as defined
+ * by the architecture
+ */
+static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, int write)
+{
+ union vaddress vaddr = {.addr = gva};
+ union raddress raddr = {.addr = gva};
+ union page_table_entry pte;
+ int dat_protection = 0;
+ union ctlreg0 ctlreg0;
+ unsigned long ptr;
+ int edat1, edat2;
+ union asce asce;
+
+ ctlreg0.val = vcpu->arch.sie_block->gcr[0];
+ edat1 = ctlreg0.edat && test_vfacility(8);
+ edat2 = edat1 && test_vfacility(78);
+ asce.val = get_vcpu_asce(vcpu);
+ if (asce.r)
+ goto real_address;
+ ptr = asce.origin * 4096;
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1:
+ if (vaddr.rfx01 > asce.tl)
+ return PGM_REGION_FIRST_TRANS;
+ ptr += vaddr.rfx * 8;
+ break;
+ case ASCE_TYPE_REGION2:
+ if (vaddr.rfx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rsx01 > asce.tl)
+ return PGM_REGION_SECOND_TRANS;
+ ptr += vaddr.rsx * 8;
+ break;
+ case ASCE_TYPE_REGION3:
+ if (vaddr.rfx || vaddr.rsx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rtx01 > asce.tl)
+ return PGM_REGION_THIRD_TRANS;
+ ptr += vaddr.rtx * 8;
+ break;
+ case ASCE_TYPE_SEGMENT:
+ if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.sx01 > asce.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ ptr += vaddr.sx * 8;
+ break;
+ }
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1: {
+ union region1_table_entry rfte;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rfte.val))
+ return -EFAULT;
+ if (rfte.i)
+ return PGM_REGION_FIRST_TRANS;
+ if (rfte.tt != TABLE_TYPE_REGION1)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+ return PGM_REGION_SECOND_TRANS;
+ if (edat1)
+ dat_protection |= rfte.p;
+ ptr = rfte.rto * 4096 + vaddr.rsx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_REGION2: {
+ union region2_table_entry rste;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rste.val))
+ return -EFAULT;
+ if (rste.i)
+ return PGM_REGION_SECOND_TRANS;
+ if (rste.tt != TABLE_TYPE_REGION2)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+ return PGM_REGION_THIRD_TRANS;
+ if (edat1)
+ dat_protection |= rste.p;
+ ptr = rste.rto * 4096 + vaddr.rtx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_REGION3: {
+ union region3_table_entry rtte;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rtte.val))
+ return -EFAULT;
+ if (rtte.i)
+ return PGM_REGION_THIRD_TRANS;
+ if (rtte.tt != TABLE_TYPE_REGION3)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.cr && asce.p && edat2)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.fc && edat2) {
+ dat_protection |= rtte.fc1.p;
+ raddr.rfaa = rtte.fc1.rfaa;
+ goto absolute_address;
+ }
+ if (vaddr.sx01 < rtte.fc0.tf)
+ return PGM_SEGMENT_TRANSLATION;
+ if (vaddr.sx01 > rtte.fc0.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ if (edat1)
+ dat_protection |= rtte.fc0.p;
+ ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_SEGMENT: {
+ union segment_table_entry ste;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &ste.val))
+ return -EFAULT;
+ if (ste.i)
+ return PGM_SEGMENT_TRANSLATION;
+ if (ste.tt != TABLE_TYPE_SEGMENT)
+ return PGM_TRANSLATION_SPEC;
+ if (ste.cs && asce.p)
+ return PGM_TRANSLATION_SPEC;
+ if (ste.fc && edat1) {
+ dat_protection |= ste.fc1.p;
+ raddr.sfaa = ste.fc1.sfaa;
+ goto absolute_address;
+ }
+ dat_protection |= ste.fc0.p;
+ ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
+ }
+ }
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &pte.val))
+ return -EFAULT;
+ if (pte.i)
+ return PGM_PAGE_TRANSLATION;
+ if (pte.z)
+ return PGM_TRANSLATION_SPEC;
+ if (pte.co && !edat1)
+ return PGM_TRANSLATION_SPEC;
+ dat_protection |= pte.p;
+ raddr.pfra = pte.pfra;
+real_address:
+ raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
+absolute_address:
+ if (write && dat_protection)
+ return PGM_PROTECTION;
+ if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+ return PGM_ADDRESSING;
+ *gpa = raddr.addr;
+ return 0;
+}
+
+static inline int is_low_address(unsigned long ga)
+{
+ /* Check for address ranges 0..511 and 4096..4607 */
+ return (ga & ~0x11fful) == 0;
+}
+
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+{
+ union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ union asce asce;
+
+ if (!ctlreg0.lap)
+ return 0;
+ asce.val = get_vcpu_asce(vcpu);
+ if (psw_bits(*psw).t && asce.p)
+ return 0;
+ return 1;
+}
+
+struct trans_exc_code_bits {
+ unsigned long addr : 52; /* Translation-exception Address */
+ unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
+ unsigned long : 7;
+ unsigned long b61 : 1;
+ unsigned long as : 2; /* ASCE Identifier */
+};
+
+enum {
+ FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+ FSI_STORE = 1, /* Exception was due to store operation */
+ FSI_FETCH = 2 /* Exception was due to fetch operation */
+};
+
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+ unsigned long *pages, unsigned long nr_pages,
+ int write)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct trans_exc_code_bits *tec_bits;
+ int lap_enabled, rc;
+
+ memset(pgm, 0, sizeof(*pgm));
+ tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+ tec_bits->as = psw_bits(*psw).as;
+ lap_enabled = low_address_protection_enabled(vcpu);
+ while (nr_pages) {
+ ga = kvm_s390_logical_to_effective(vcpu, ga);
+ tec_bits->addr = ga >> PAGE_SHIFT;
+ if (write && lap_enabled && is_low_address(ga)) {
+ pgm->code = PGM_PROTECTION;
+ return pgm->code;
+ }
+ ga &= PAGE_MASK;
+ if (psw_bits(*psw).t) {
+ rc = guest_translate(vcpu, ga, pages, write);
+ if (rc < 0)
+ return rc;
+ if (rc == PGM_PROTECTION)
+ tec_bits->b61 = 1;
+ if (rc)
+ pgm->code = rc;
+ } else {
+ *pages = kvm_s390_real_to_abs(vcpu, ga);
+ if (kvm_is_error_gpa(vcpu->kvm, *pages))
+ pgm->code = PGM_ADDRESSING;
+ }
+ if (pgm->code)
+ return pgm->code;
+ ga += PAGE_SIZE;
+ pages++;
+ nr_pages--;
+ }
+ return 0;
+}
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+ unsigned long len, int write)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long _len, nr_pages, gpa, idx;
+ unsigned long pages_array[2];
+ unsigned long *pages;
+ int need_ipte_lock;
+ union asce asce;
+ int rc;
+
+ if (!len)
+ return 0;
+ /* Access register mode is not supported yet. */
+ if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+ return -EOPNOTSUPP;
+ nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
+ pages = pages_array;
+ if (nr_pages > ARRAY_SIZE(pages_array))
+ pages = vmalloc(nr_pages * sizeof(unsigned long));
+ if (!pages)
+ return -ENOMEM;
+ asce.val = get_vcpu_asce(vcpu);
+ need_ipte_lock = psw_bits(*psw).t && !asce.r;
+ if (need_ipte_lock)
+ ipte_lock(vcpu);
+ rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+ for (idx = 0; idx < nr_pages && !rc; idx++) {
+ gpa = *(pages + idx) + (ga & ~PAGE_MASK);
+ _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+ if (write)
+ rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
+ else
+ rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
+ len -= _len;
+ ga += _len;
+ data += _len;
+ }
+ if (need_ipte_lock)
+ ipte_unlock(vcpu);
+ if (nr_pages > ARRAY_SIZE(pages_array))
+ vfree(pages);
+ return rc;
+}
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+ void *data, unsigned long len, int write)
+{
+ unsigned long _len, gpa;
+ int rc = 0;
+
+ while (len && !rc) {
+ gpa = kvm_s390_real_to_abs(vcpu, gra);
+ _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+ if (write)
+ rc = write_guest_abs(vcpu, gpa, data, _len);
+ else
+ rc = read_guest_abs(vcpu, gpa, data, _len);
+ len -= _len;
+ gra += _len;
+ data += _len;
+ }
+ return rc;
+}
+
+/**
+ * guest_translate_address - translate guest logical into guest absolute address
+ *
+ * Parameter semantics are the same as the ones from guest_translate.
+ * The memory contents at the guest address are not changed.
+ *
+ * Note: The IPTE lock is not taken during this function, so the caller
+ * has to take care of this.
+ */
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, int write)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct trans_exc_code_bits *tec;
+ union asce asce;
+ int rc;
+
+ /* Access register mode is not supported yet. */
+ if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+ return -EOPNOTSUPP;
+
+ gva = kvm_s390_logical_to_effective(vcpu, gva);
+ memset(pgm, 0, sizeof(*pgm));
+ tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec->as = psw_bits(*psw).as;
+ tec->fsi = write ? FSI_STORE : FSI_FETCH;
+ tec->addr = gva >> PAGE_SHIFT;
+ if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
+ if (write) {
+ rc = pgm->code = PGM_PROTECTION;
+ return rc;
+ }
+ }
+
+ asce.val = get_vcpu_asce(vcpu);
+ if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
+ rc = guest_translate(vcpu, gva, gpa, write);
+ if (rc > 0) {
+ if (rc == PGM_PROTECTION)
+ tec->b61 = 1;
+ pgm->code = rc;
+ }
+ } else {
+ rc = 0;
+ *gpa = kvm_s390_real_to_abs(vcpu, gva);
+ if (kvm_is_error_gpa(vcpu->kvm, *gpa))
+ rc = pgm->code = PGM_ADDRESSING;
+ }
+
+ return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_protection - check for low-address protection
+ * @ga: Guest address
+ *
+ * Checks whether an address is subject to low-address protection and set
+ * up vcpu->arch.pgm accordingly if necessary.
+ *
+ * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
+ */
+int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct trans_exc_code_bits *tec_bits;
+
+ if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
+ return 0;
+
+ memset(pgm, 0, sizeof(*pgm));
+ tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec_bits->fsi = FSI_STORE;
+ tec_bits->as = psw_bits(*psw).as;
+ tec_bits->addr = ga >> PAGE_SHIFT;
+ pgm->code = PGM_PROTECTION;
+
+ return pgm->code;
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 4e0633c413f..0149cf15058 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,7 +1,7 @@
/*
- * gaccess.h - access guest memory
+ * access guest memory
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -15,260 +15,321 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
-#include <asm/uaccess.h>
-
-static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
- u64 guestaddr)
-{
- u64 prefix = vcpu->arch.sie_block->prefix;
- u64 origin = vcpu->kvm->arch.guest_origin;
- u64 memsize = vcpu->kvm->arch.guest_memsize;
-
- if (guestaddr < 2 * PAGE_SIZE)
- guestaddr += prefix;
- else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
- guestaddr -= prefix;
-
- if (guestaddr > memsize)
- return (void __user __force *) ERR_PTR(-EFAULT);
-
- guestaddr += origin;
-
- return (void __user *) guestaddr;
-}
-
-static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
- u64 *result)
-{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 7);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- return get_user(*result, (u64 __user *) uptr);
-}
-
-static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
- u32 *result)
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include "kvm-s390.h"
+
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @vcpu - guest virtual cpu
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of a virtual guest cpu by applying its prefix.
+ */
+static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
+ unsigned long gra)
{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 3);
+ unsigned long prefix = kvm_s390_get_prefix(vcpu);
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- return get_user(*result, (u32 __user *) uptr);
+ if (gra < 2 * PAGE_SIZE)
+ gra += prefix;
+ else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
+ gra -= prefix;
+ return gra;
}
-static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
- u16 *result)
+/**
+ * kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @vcpu: guest virtual cpu
+ * @ga: guest logical address
+ *
+ * Convert a guest vcpu logical address to a guest vcpu effective address by
+ * applying the rules of the vcpu's addressing mode defined by PSW bits 31
+ * and 32 (extendended/basic addressing mode).
+ *
+ * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
+ * of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
+ unsigned long ga)
{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 1);
-
- if (IS_ERR(uptr))
- return PTR_ERR(uptr);
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
- return get_user(*result, (u16 __user *) uptr);
+ if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
+ return ga;
+ if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
+ return ga & ((1UL << 31) - 1);
+ return ga & ((1UL << 24) - 1);
}
-static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
- u8 *result)
-{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- return get_user(*result, (u8 __user *) uptr);
-}
+/*
+ * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
+ * which shall only be used to access the lowcore of a vcpu.
+ * These functions should be used for e.g. interrupt handlers where no
+ * guest memory access protection facilities, like key or low address
+ * protection, are applicable.
+ * At a later point guest vcpu lowcore access should happen via pinned
+ * prefix pages, so that these pages can be accessed directly via the
+ * kernel mapping. All of these *_lc functions can be removed then.
+ */
-static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
- u64 value)
+/**
+ * put_guest_lc - write a simple variable to a guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @x: value to copy to guest
+ * @gra: vcpu's destination guest real address
+ *
+ * Copies a simple value from kernel space to a guest vcpu's lowcore.
+ * The size of the variable may be 1, 2, 4 or 8 bytes. The destination
+ * must be located in the vcpu's lowcore. Otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ * the guest memory mapping is broken. In any case the best solution
+ * would be to terminate the guest.
+ * It is wrong to inject a guest exception.
+ */
+#define put_guest_lc(vcpu, x, gra) \
+({ \
+ struct kvm_vcpu *__vcpu = (vcpu); \
+ __typeof__(*(gra)) __x = (x); \
+ unsigned long __gpa; \
+ \
+ __gpa = (unsigned long)(gra); \
+ __gpa += kvm_s390_get_prefix(__vcpu); \
+ kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x)); \
+})
+
+/**
+ * write_guest_lc - copy data from kernel space to guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from kernel space to guest vcpu's lowcore. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ * the guest memory mapping is broken. In any case the best solution
+ * would be to terminate the guest.
+ * It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 7);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
+ unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
- return put_user(value, (u64 __user *) uptr);
+ return kvm_write_guest(vcpu->kvm, gpa, data, len);
}
-static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
- u32 value)
+/**
+ * read_guest_lc - copy data from guest vcpu's lowcore to kernel space
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from guest vcpu's lowcore to kernel space. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ * the guest memory mapping is broken. In any case the best solution
+ * would be to terminate the guest.
+ * It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 3);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
+ unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
- return put_user(value, (u32 __user *) uptr);
+ return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
-static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
- u16 value)
-{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- BUG_ON(guestaddr & 1);
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, int write);
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+ unsigned long len, int write);
- return put_user(value, (u16 __user *) uptr);
-}
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+ void *data, unsigned long len, int write);
-static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
- u8 value)
+/**
+ * write_guest - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @ga (guest address).
+ * In order to copy data to guest space the PSW of the vcpu is inspected:
+ * If DAT is off data will be copied to guest real or absolute memory.
+ * If DAT is on data will be copied to the address space as specified by
+ * the address space bits of the PSW:
+ * Primary, secondory or home space (access register mode is currently not
+ * implemented).
+ * The addressing mode of the PSW is also inspected, so that address wrap
+ * around is taken into account for 24-, 31- and 64-bit addressing mode,
+ * if the to be copied data crosses page boundaries in guest address space.
+ * In addition also low address and DAT protection are inspected before
+ * copying any data (key protection is currently not implemented).
+ *
+ * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
+ * In case of an access exception (e.g. protection exception) pgm will contain
+ * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()'
+ * will inject a correct exception into the guest.
+ * If no access exception happened, the contents of pgm are undefined when
+ * this function returns.
+ *
+ * Returns: - zero on success
+ * - a negative value if e.g. the guest mapping is broken or in
+ * case of out-of-memory. In this case the contents of pgm are
+ * undefined. Also parts of @data may have been copied to guest
+ * space.
+ * - a positive value if an access exception happened. In this case
+ * the returned value is the program interruption code and the
+ * contents of pgm may be used to inject an exception into the
+ * guest. No data has been copied to guest space.
+ *
+ * Note: in case an access exception is recognized no data has been copied to
+ * guest space (this is also true, if the to be copied data would cross
+ * one or more page boundaries in guest space).
+ * Therefore this function may be used for nullifying and suppressing
+ * instruction emulation.
+ * It may also be used for terminating instructions, if it is undefined
+ * if data has been changed in guest space in case of an exception.
+ */
+static inline __must_check
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+ unsigned long len)
{
- void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
-
- if (IS_ERR((void __force *) uptr))
- return PTR_ERR((void __force *) uptr);
-
- return put_user(value, (u8 __user *) uptr);
+ return access_guest(vcpu, ga, data, len, 1);
}
-
-static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest,
- const void *from, unsigned long n)
+/**
+ * read_guest - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest is identical to write_guest, except that
+ * data will be copied from guest space to kernel space.
+ */
+static inline __must_check
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+ unsigned long len)
{
- int rc;
- unsigned long i;
- const u8 *data = from;
-
- for (i = 0; i < n; i++) {
- rc = put_guest_u8(vcpu, guestdest++, *(data++));
- if (rc < 0)
- return rc;
- }
- return 0;
+ return access_guest(vcpu, ga, data, len, 0);
}
-static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest,
- const void *from, unsigned long n)
+/**
+ * write_guest_abs - copy data from kernel space to guest space absolute
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+ unsigned long len)
{
- u64 prefix = vcpu->arch.sie_block->prefix;
- u64 origin = vcpu->kvm->arch.guest_origin;
- u64 memsize = vcpu->kvm->arch.guest_memsize;
-
- if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
- goto slowpath;
-
- if ((guestdest < prefix) && (guestdest + n > prefix))
- goto slowpath;
-
- if ((guestdest < prefix + 2 * PAGE_SIZE)
- && (guestdest + n > prefix + 2 * PAGE_SIZE))
- goto slowpath;
-
- if (guestdest < 2 * PAGE_SIZE)
- guestdest += prefix;
- else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
- guestdest -= prefix;
-
- if (guestdest + n > memsize)
- return -EFAULT;
-
- if (guestdest + n < guestdest)
- return -EFAULT;
-
- guestdest += origin;
-
- return copy_to_user((void __user *) guestdest, from, n);
-slowpath:
- return __copy_to_guest_slow(vcpu, guestdest, from, n);
+ return kvm_write_guest(vcpu->kvm, gpa, data, len);
}
-static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
- u64 guestsrc, unsigned long n)
+/**
+ * read_guest_abs - copy data from guest space absolute to kernel space
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+ unsigned long len)
{
- int rc;
- unsigned long i;
- u8 *data = to;
-
- for (i = 0; i < n; i++) {
- rc = get_guest_u8(vcpu, guestsrc++, data++);
- if (rc < 0)
- return rc;
- }
- return 0;
+ return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
-static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
- u64 guestsrc, unsigned long n)
+/**
+ * write_guest_real - copy data from kernel space to guest space real
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gra (guest real address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
{
- u64 prefix = vcpu->arch.sie_block->prefix;
- u64 origin = vcpu->kvm->arch.guest_origin;
- u64 memsize = vcpu->kvm->arch.guest_memsize;
-
- if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
- goto slowpath;
-
- if ((guestsrc < prefix) && (guestsrc + n > prefix))
- goto slowpath;
-
- if ((guestsrc < prefix + 2 * PAGE_SIZE)
- && (guestsrc + n > prefix + 2 * PAGE_SIZE))
- goto slowpath;
-
- if (guestsrc < 2 * PAGE_SIZE)
- guestsrc += prefix;
- else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
- guestsrc -= prefix;
-
- if (guestsrc + n > memsize)
- return -EFAULT;
-
- if (guestsrc + n < guestsrc)
- return -EFAULT;
-
- guestsrc += origin;
-
- return copy_from_user(to, (void __user *) guestsrc, n);
-slowpath:
- return __copy_from_guest_slow(vcpu, to, guestsrc, n);
+ return access_guest_real(vcpu, gra, data, len, 1);
}
-static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest,
- const void *from, unsigned long n)
+/**
+ * read_guest_real - copy data from guest space real to kernel space
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gra (guest real address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
{
- u64 origin = vcpu->kvm->arch.guest_origin;
- u64 memsize = vcpu->kvm->arch.guest_memsize;
-
- if (guestdest + n > memsize)
- return -EFAULT;
-
- if (guestdest + n < guestdest)
- return -EFAULT;
-
- guestdest += origin;
-
- return copy_to_user((void __user *) guestdest, from, n);
+ return access_guest_real(vcpu, gra, data, len, 0);
}
-static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
- u64 guestsrc, unsigned long n)
-{
- u64 origin = vcpu->kvm->arch.guest_origin;
- u64 memsize = vcpu->kvm->arch.guest_memsize;
-
- if (guestsrc + n > memsize)
- return -EFAULT;
+void ipte_lock(struct kvm_vcpu *vcpu);
+void ipte_unlock(struct kvm_vcpu *vcpu);
+int ipte_lock_held(struct kvm_vcpu *vcpu);
+int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
- if (guestsrc + n < guestsrc)
- return -EFAULT;
-
- guestsrc += origin;
-
- return copy_from_user(to, (void __user *) guestsrc, n);
-}
-#endif
+#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
new file mode 100644
index 00000000000..3e8d4092ce3
--- /dev/null
+++ b/arch/s390/kvm/guestdbg.c
@@ -0,0 +1,482 @@
+/*
+ * kvm guest debug support
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+/*
+ * Extends the address range given by *start and *stop to include the address
+ * range starting with estart and the length len. Takes care of overflowing
+ * intervals and tries to minimize the overall intervall size.
+ */
+static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
+{
+ u64 estop;
+
+ if (len > 0)
+ len--;
+ else
+ len = 0;
+
+ estop = estart + len;
+
+ /* 0-0 range represents "not set" */
+ if ((*start == 0) && (*stop == 0)) {
+ *start = estart;
+ *stop = estop;
+ } else if (*start <= *stop) {
+ /* increase the existing range */
+ if (estart < *start)
+ *start = estart;
+ if (estop > *stop)
+ *stop = estop;
+ } else {
+ /* "overflowing" interval, whereby *stop > *start */
+ if (estart <= *stop) {
+ if (estop > *stop)
+ *stop = estop;
+ } else if (estop > *start) {
+ if (estart < *start)
+ *start = estart;
+ }
+ /* minimize the range */
+ else if ((estop - *stop) < (*start - estart))
+ *stop = estop;
+ else
+ *start = estart;
+ }
+}
+
+#define MAX_INST_SIZE 6
+
+static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
+{
+ unsigned long start, len;
+ u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+ u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+ u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+ int i;
+
+ if (vcpu->arch.guestdbg.nr_hw_bp <= 0 ||
+ vcpu->arch.guestdbg.hw_bp_info == NULL)
+ return;
+
+ /*
+ * If the guest is not interrested in branching events, we can savely
+ * limit them to the PER address range.
+ */
+ if (!(*cr9 & PER_EVENT_BRANCH))
+ *cr9 |= PER_CONTROL_BRANCH_ADDRESS;
+ *cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+ start = vcpu->arch.guestdbg.hw_bp_info[i].addr;
+ len = vcpu->arch.guestdbg.hw_bp_info[i].len;
+
+ /*
+ * The instruction in front of the desired bp has to
+ * report instruction-fetching events
+ */
+ if (start < MAX_INST_SIZE) {
+ len += start;
+ start = 0;
+ } else {
+ start -= MAX_INST_SIZE;
+ len += MAX_INST_SIZE;
+ }
+
+ extend_address_range(cr10, cr11, start, len);
+ }
+}
+
+static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
+{
+ unsigned long start, len;
+ u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+ u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+ u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+ int i;
+
+ if (vcpu->arch.guestdbg.nr_hw_wp <= 0 ||
+ vcpu->arch.guestdbg.hw_wp_info == NULL)
+ return;
+
+ /* if host uses storage alternation for special address
+ * spaces, enable all events and give all to the guest */
+ if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
+ *cr9 &= ~PER_CONTROL_ALTERATION;
+ *cr10 = 0;
+ *cr11 = PSW_ADDR_INSN;
+ } else {
+ *cr9 &= ~PER_CONTROL_ALTERATION;
+ *cr9 |= PER_EVENT_STORE;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+ start = vcpu->arch.guestdbg.hw_wp_info[i].addr;
+ len = vcpu->arch.guestdbg.hw_wp_info[i].len;
+
+ extend_address_range(cr10, cr11, start, len);
+ }
+ }
+}
+
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0];
+ vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9];
+ vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10];
+ vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11];
+}
+
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0;
+ vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9;
+ vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10;
+ vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11;
+}
+
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+ /*
+ * TODO: if guest psw has per enabled, otherwise 0s!
+ * This reduces the amount of reported events.
+ * Need to intercept all psw changes!
+ */
+
+ if (guestdbg_sstep_enabled(vcpu)) {
+ /* disable timer (clock-comparator) interrupts */
+ vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
+ vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
+ vcpu->arch.sie_block->gcr[10] = 0;
+ vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+ }
+
+ if (guestdbg_hw_bp_enabled(vcpu)) {
+ enable_all_hw_bp(vcpu);
+ enable_all_hw_wp(vcpu);
+ }
+
+ /* TODO: Instruction-fetching-nullification not allowed for now */
+ if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION)
+ vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION;
+}
+
+#define MAX_WP_SIZE 100
+
+static int __import_wp_info(struct kvm_vcpu *vcpu,
+ struct kvm_hw_breakpoint *bp_data,
+ struct kvm_hw_wp_info_arch *wp_info)
+{
+ int ret = 0;
+ wp_info->len = bp_data->len;
+ wp_info->addr = bp_data->addr;
+ wp_info->phys_addr = bp_data->phys_addr;
+ wp_info->old_data = NULL;
+
+ if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
+ return -EINVAL;
+
+ wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
+ if (!wp_info->old_data)
+ return -ENOMEM;
+ /* try to backup the original value */
+ ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
+ wp_info->len);
+ if (ret) {
+ kfree(wp_info->old_data);
+ wp_info->old_data = NULL;
+ }
+
+ return ret;
+}
+
+#define MAX_BP_COUNT 50
+
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ int ret = 0, nr_wp = 0, nr_bp = 0, i, size;
+ struct kvm_hw_breakpoint *bp_data = NULL;
+ struct kvm_hw_wp_info_arch *wp_info = NULL;
+ struct kvm_hw_bp_info_arch *bp_info = NULL;
+
+ if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp)
+ return 0;
+ else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
+ return -EINVAL;
+
+ size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint);
+ bp_data = kmalloc(size, GFP_KERNEL);
+ if (!bp_data) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ if (copy_from_user(bp_data, dbg->arch.hw_bp, size)) {
+ ret = -EFAULT;
+ goto error;
+ }
+
+ for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
+ switch (bp_data[i].type) {
+ case KVM_HW_WP_WRITE:
+ nr_wp++;
+ break;
+ case KVM_HW_BP:
+ nr_bp++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ size = nr_wp * sizeof(struct kvm_hw_wp_info_arch);
+ if (size > 0) {
+ wp_info = kmalloc(size, GFP_KERNEL);
+ if (!wp_info) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+ size = nr_bp * sizeof(struct kvm_hw_bp_info_arch);
+ if (size > 0) {
+ bp_info = kmalloc(size, GFP_KERNEL);
+ if (!bp_info) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+
+ for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) {
+ switch (bp_data[i].type) {
+ case KVM_HW_WP_WRITE:
+ ret = __import_wp_info(vcpu, &bp_data[i],
+ &wp_info[nr_wp]);
+ if (ret)
+ goto error;
+ nr_wp++;
+ break;
+ case KVM_HW_BP:
+ bp_info[nr_bp].len = bp_data[i].len;
+ bp_info[nr_bp].addr = bp_data[i].addr;
+ nr_bp++;
+ break;
+ }
+ }
+
+ vcpu->arch.guestdbg.nr_hw_bp = nr_bp;
+ vcpu->arch.guestdbg.hw_bp_info = bp_info;
+ vcpu->arch.guestdbg.nr_hw_wp = nr_wp;
+ vcpu->arch.guestdbg.hw_wp_info = wp_info;
+ return 0;
+error:
+ kfree(bp_data);
+ kfree(wp_info);
+ kfree(bp_info);
+ return ret;
+}
+
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu)
+{
+ int i;
+ struct kvm_hw_wp_info_arch *hw_wp_info = NULL;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+ hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+ kfree(hw_wp_info->old_data);
+ hw_wp_info->old_data = NULL;
+ }
+ kfree(vcpu->arch.guestdbg.hw_wp_info);
+ vcpu->arch.guestdbg.hw_wp_info = NULL;
+
+ kfree(vcpu->arch.guestdbg.hw_bp_info);
+ vcpu->arch.guestdbg.hw_bp_info = NULL;
+
+ vcpu->arch.guestdbg.nr_hw_wp = 0;
+ vcpu->arch.guestdbg.nr_hw_bp = 0;
+}
+
+static inline int in_addr_range(u64 addr, u64 a, u64 b)
+{
+ if (a <= b)
+ return (addr >= a) && (addr <= b);
+ else
+ /* "overflowing" interval */
+ return (addr <= a) && (addr >= b);
+}
+
+#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
+
+static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu,
+ unsigned long addr)
+{
+ struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info;
+ int i;
+
+ if (vcpu->arch.guestdbg.nr_hw_bp == 0)
+ return NULL;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+ /* addr is directly the start or in the range of a bp */
+ if (addr == bp_info->addr)
+ goto found;
+ if (bp_info->len > 0 &&
+ in_addr_range(addr, bp_info->addr, end_of_range(bp_info)))
+ goto found;
+
+ bp_info++;
+ }
+
+ return NULL;
+found:
+ return bp_info;
+}
+
+static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
+{
+ int i;
+ struct kvm_hw_wp_info_arch *wp_info = NULL;
+ void *temp = NULL;
+
+ if (vcpu->arch.guestdbg.nr_hw_wp == 0)
+ return NULL;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+ wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+ if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
+ continue;
+
+ temp = kmalloc(wp_info->len, GFP_KERNEL);
+ if (!temp)
+ continue;
+
+ /* refetch the wp data and compare it to the old value */
+ if (!read_guest(vcpu, wp_info->phys_addr, temp,
+ wp_info->len)) {
+ if (memcmp(temp, wp_info->old_data, wp_info->len)) {
+ kfree(temp);
+ return wp_info;
+ }
+ }
+ kfree(temp);
+ temp = NULL;
+ }
+
+ return NULL;
+}
+
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
+{
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+}
+
+#define per_bp_event(code) \
+ (code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH))
+#define per_write_wp_event(code) \
+ (code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL))
+
+static int debug_exit_required(struct kvm_vcpu *vcpu)
+{
+ u32 perc = (vcpu->arch.sie_block->perc << 24);
+ struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+ struct kvm_hw_wp_info_arch *wp_info = NULL;
+ struct kvm_hw_bp_info_arch *bp_info = NULL;
+ unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
+ unsigned long peraddr = vcpu->arch.sie_block->peraddr;
+
+ if (guestdbg_hw_bp_enabled(vcpu)) {
+ if (per_write_wp_event(perc) &&
+ vcpu->arch.guestdbg.nr_hw_wp > 0) {
+ wp_info = any_wp_changed(vcpu);
+ if (wp_info) {
+ debug_exit->addr = wp_info->addr;
+ debug_exit->type = KVM_HW_WP_WRITE;
+ goto exit_required;
+ }
+ }
+ if (per_bp_event(perc) &&
+ vcpu->arch.guestdbg.nr_hw_bp > 0) {
+ bp_info = find_hw_bp(vcpu, addr);
+ /* remove duplicate events if PC==PER address */
+ if (bp_info && (addr != peraddr)) {
+ debug_exit->addr = addr;
+ debug_exit->type = KVM_HW_BP;
+ vcpu->arch.guestdbg.last_bp = addr;
+ goto exit_required;
+ }
+ /* breakpoint missed */
+ bp_info = find_hw_bp(vcpu, peraddr);
+ if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) {
+ debug_exit->addr = peraddr;
+ debug_exit->type = KVM_HW_BP;
+ goto exit_required;
+ }
+ }
+ }
+ if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) {
+ debug_exit->addr = addr;
+ debug_exit->type = KVM_SINGLESTEP;
+ goto exit_required;
+ }
+
+ return 0;
+exit_required:
+ return 1;
+}
+
+#define guest_per_enabled(vcpu) \
+ (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
+
+static void filter_guest_per_event(struct kvm_vcpu *vcpu)
+{
+ u32 perc = vcpu->arch.sie_block->perc << 24;
+ u64 peraddr = vcpu->arch.sie_block->peraddr;
+ u64 addr = vcpu->arch.sie_block->gpsw.addr;
+ u64 cr9 = vcpu->arch.sie_block->gcr[9];
+ u64 cr10 = vcpu->arch.sie_block->gcr[10];
+ u64 cr11 = vcpu->arch.sie_block->gcr[11];
+ /* filter all events, demanded by the guest */
+ u32 guest_perc = perc & cr9 & PER_EVENT_MASK;
+
+ if (!guest_per_enabled(vcpu))
+ guest_perc = 0;
+
+ /* filter "successful-branching" events */
+ if (guest_perc & PER_EVENT_BRANCH &&
+ cr9 & PER_CONTROL_BRANCH_ADDRESS &&
+ !in_addr_range(addr, cr10, cr11))
+ guest_perc &= ~PER_EVENT_BRANCH;
+
+ /* filter "instruction-fetching" events */
+ if (guest_perc & PER_EVENT_IFETCH &&
+ !in_addr_range(peraddr, cr10, cr11))
+ guest_perc &= ~PER_EVENT_IFETCH;
+
+ /* All other PER events will be given to the guest */
+ /* TODO: Check alterated address/address space */
+
+ vcpu->arch.sie_block->perc = guest_perc >> 24;
+
+ if (!guest_perc)
+ vcpu->arch.sie_block->iprcc &= ~PGM_PER;
+}
+
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
+{
+ if (debug_exit_required(vcpu))
+ vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+
+ filter_guest_per_event(vcpu);
+}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 47a0b642174..a0b586c1913 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,7 +1,7 @@
/*
- * intercept.c - in-kernel handling for sie intercepts
+ * in-kernel handling for sie intercepts
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -16,90 +16,26 @@
#include <linux/pagemap.h>
#include <asm/kvm_host.h>
+#include <asm/asm-offsets.h>
+#include <asm/irq.h>
#include "kvm-s390.h"
#include "gaccess.h"
+#include "trace.h"
+#include "trace-s390.h"
-static int handle_lctg(struct kvm_vcpu *vcpu)
-{
- int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
- int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
- ((vcpu->arch.sie_block->ipb & 0xff00) << 4);
- u64 useraddr;
- int reg, rc;
-
- vcpu->stat.instruction_lctg++;
- if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f)
- return -ENOTSUPP;
-
- useraddr = disp2;
- if (base2)
- useraddr += vcpu->arch.guest_gprs[base2];
-
- reg = reg1;
-
- VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
- disp2);
-
- do {
- rc = get_guest_u64(vcpu, useraddr,
- &vcpu->arch.sie_block->gcr[reg]);
- if (rc == -EFAULT) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- break;
- }
- useraddr += 8;
- if (reg == reg3)
- break;
- reg = (reg + 1) % 16;
- } while (1);
- return 0;
-}
-
-static int handle_lctl(struct kvm_vcpu *vcpu)
-{
- int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
- int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
- u64 useraddr;
- u32 val = 0;
- int reg, rc;
-
- vcpu->stat.instruction_lctl++;
-
- useraddr = disp2;
- if (base2)
- useraddr += vcpu->arch.guest_gprs[base2];
-
- VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
- disp2);
-
- reg = reg1;
- do {
- rc = get_guest_u32(vcpu, useraddr, &val);
- if (rc == -EFAULT) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- break;
- }
- vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
- vcpu->arch.sie_block->gcr[reg] |= val;
- useraddr += 4;
- if (reg == reg3)
- break;
- reg = (reg + 1) % 16;
- } while (1);
- return 0;
-}
-static intercept_handler_t instruction_handlers[256] = {
+static const intercept_handler_t instruction_handlers[256] = {
+ [0x01] = kvm_s390_handle_01,
+ [0x82] = kvm_s390_handle_lpsw,
[0x83] = kvm_s390_handle_diag,
[0xae] = kvm_s390_handle_sigp,
- [0xb2] = kvm_s390_handle_priv,
- [0xb7] = handle_lctl,
- [0xeb] = handle_lctg,
+ [0xb2] = kvm_s390_handle_b2,
+ [0xb6] = kvm_s390_handle_stctl,
+ [0xb7] = kvm_s390_handle_lctl,
+ [0xb9] = kvm_s390_handle_b9,
+ [0xe5] = kvm_s390_handle_e5,
+ [0xeb] = kvm_s390_handle_eb,
};
static int handle_noop(struct kvm_vcpu *vcpu)
@@ -111,9 +47,6 @@ static int handle_noop(struct kvm_vcpu *vcpu)
case 0x10:
vcpu->stat.exit_external_request++;
break;
- case 0x14:
- vcpu->stat.exit_external_interrupt++;
- break;
default:
break; /* nothing */
}
@@ -122,43 +55,43 @@ static int handle_noop(struct kvm_vcpu *vcpu)
static int handle_stop(struct kvm_vcpu *vcpu)
{
- int rc;
+ int rc = 0;
vcpu->stat.exit_stop_request++;
- atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
spin_lock_bh(&vcpu->arch.local_int.lock);
- if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
- vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
- rc = __kvm_s390_vcpu_store_status(vcpu,
- KVM_S390_STORE_STATUS_NOADDR);
- if (rc >= 0)
- rc = -ENOTSUPP;
- }
+
+ trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
+ kvm_s390_vcpu_stop(vcpu);
vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
- rc = -ENOTSUPP;
+ rc = -EOPNOTSUPP;
+ }
+
+ if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
+ vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
+ /* store status must be called unlocked. Since local_int.lock
+ * only protects local_int.* and not guest memory we can give
+ * up the lock here */
+ spin_unlock_bh(&vcpu->arch.local_int.lock);
+ rc = kvm_s390_vcpu_store_status(vcpu,
+ KVM_S390_STORE_STATUS_NOADDR);
+ if (rc >= 0)
+ rc = -EOPNOTSUPP;
} else
- rc = 0;
- spin_unlock_bh(&vcpu->arch.local_int.lock);
+ spin_unlock_bh(&vcpu->arch.local_int.lock);
return rc;
}
static int handle_validity(struct kvm_vcpu *vcpu)
{
int viwhy = vcpu->arch.sie_block->ipb >> 16;
+
vcpu->stat.exit_validity++;
- if (viwhy == 0x37) {
- fault_in_pages_writeable((char __user *)
- vcpu->kvm->arch.guest_origin +
- vcpu->arch.sie_block->prefix,
- PAGE_SIZE);
- return 0;
- }
- VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
- viwhy);
- return -ENOTSUPP;
+ trace_kvm_s390_intercept_validity(vcpu, viwhy);
+ WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy);
+ return -EOPNOTSUPP;
}
static int handle_instruction(struct kvm_vcpu *vcpu)
@@ -166,16 +99,129 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
intercept_handler_t handler;
vcpu->stat.exit_instruction++;
+ trace_kvm_s390_intercept_instruction(vcpu,
+ vcpu->arch.sie_block->ipa,
+ vcpu->arch.sie_block->ipb);
handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
if (handler)
return handler(vcpu);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
+static void __extract_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info)
+{
+ memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
+ pgm_info->code = vcpu->arch.sie_block->iprcc;
+
+ switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
+ case PGM_AFX_TRANSLATION:
+ case PGM_ASX_TRANSLATION:
+ case PGM_EX_TRANSLATION:
+ case PGM_LFX_TRANSLATION:
+ case PGM_LSTE_SEQUENCE:
+ case PGM_LSX_TRANSLATION:
+ case PGM_LX_TRANSLATION:
+ case PGM_PRIMARY_AUTHORITY:
+ case PGM_SECONDARY_AUTHORITY:
+ case PGM_SPACE_SWITCH:
+ pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+ break;
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_INSTANCE:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_EXTENDED_AUTHORITY:
+ pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ break;
+ case PGM_ASCE_TYPE:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_SEGMENT_TRANSLATION:
+ pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info->op_access_id = vcpu->arch.sie_block->oai;
+ break;
+ case PGM_MONITOR:
+ pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
+ pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
+ break;
+ case PGM_DATA:
+ pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
+ break;
+ case PGM_PROTECTION:
+ pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ break;
+ default:
+ break;
+ }
+
+ if (vcpu->arch.sie_block->iprcc & PGM_PER) {
+ pgm_info->per_code = vcpu->arch.sie_block->perc;
+ pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
+ pgm_info->per_address = vcpu->arch.sie_block->peraddr;
+ pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
+ }
+}
+
+/*
+ * restore ITDB to program-interruption TDB in guest lowcore
+ * and set TX abort indication if required
+*/
+static int handle_itdb(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_itdb *itdb;
+ int rc;
+
+ if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
+ return 0;
+ if (current->thread.per_flags & PER_FLAG_NO_TE)
+ return 0;
+ itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+ rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
+ if (rc)
+ return rc;
+ memset(itdb, 0, sizeof(*itdb));
+
+ return 0;
+}
+
+#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
+
static int handle_prog(struct kvm_vcpu *vcpu)
{
+ struct kvm_s390_pgm_info pgm_info;
+ psw_t psw;
+ int rc;
+
vcpu->stat.exit_program_interruption++;
- return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
+
+ if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+ kvm_s390_handle_per_event(vcpu);
+ /* the interrupt might have been filtered out completely */
+ if (vcpu->arch.sie_block->iprcc == 0)
+ return 0;
+ }
+
+ trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
+ if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) {
+ rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t));
+ if (rc)
+ return rc;
+ /* Avoid endless loops of specification exceptions */
+ if (!is_valid_psw(&psw))
+ return -EOPNOTSUPP;
+ }
+ rc = handle_itdb(vcpu);
+ if (rc)
+ return rc;
+
+ __extract_prog_irq(vcpu, &pgm_info);
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
@@ -186,23 +232,117 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
rc = handle_instruction(vcpu);
rc2 = handle_prog(vcpu);
- if (rc == -ENOTSUPP)
+ if (rc == -EOPNOTSUPP)
vcpu->arch.sie_block->icptcode = 0x04;
if (rc)
return rc;
return rc2;
}
-static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
+/**
+ * handle_external_interrupt - used for external interruption interceptions
+ *
+ * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
+ * the new PSW does not have external interrupts disabled. In the first case,
+ * we've got to deliver the interrupt manually, and in the second case, we
+ * drop to userspace to handle the situation there.
+ */
+static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+{
+ u16 eic = vcpu->arch.sie_block->eic;
+ struct kvm_s390_interrupt irq;
+ psw_t newpsw;
+ int rc;
+
+ vcpu->stat.exit_external_interrupt++;
+
+ rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+ if (rc)
+ return rc;
+ /* We can not handle clock comparator or timer interrupt with bad PSW */
+ if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
+ (newpsw.mask & PSW_MASK_EXT))
+ return -EOPNOTSUPP;
+
+ switch (eic) {
+ case EXT_IRQ_CLK_COMP:
+ irq.type = KVM_S390_INT_CLOCK_COMP;
+ break;
+ case EXT_IRQ_CPU_TIMER:
+ irq.type = KVM_S390_INT_CPU_TIMER;
+ break;
+ case EXT_IRQ_EXTERNAL_CALL:
+ if (kvm_s390_si_ext_call_pending(vcpu))
+ return 0;
+ irq.type = KVM_S390_INT_EXTERNAL_CALL;
+ irq.parm = vcpu->arch.sie_block->extcpuaddr;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+
+/**
+ * Handle MOVE PAGE partial execution interception.
+ *
+ * This interception can only happen for guests with DAT disabled and
+ * addresses that are currently not mapped in the host. Thus we try to
+ * set up the mappings for the corresponding user pages here (or throw
+ * addressing exceptions in case of illegal guest addresses).
+ */
+static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long srcaddr, dstaddr;
+ int reg1, reg2, rc;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ /* Make sure that the source is paged-in */
+ srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]);
+ if (kvm_is_error_gpa(vcpu->kvm, srcaddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
+ if (rc != 0)
+ return rc;
+
+ /* Make sure that the destination is paged-in */
+ dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]);
+ if (kvm_is_error_gpa(vcpu->kvm, dstaddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
+ if (rc != 0)
+ return rc;
+
+ psw->addr = __rewind_psw(*psw, 4);
+
+ return 0;
+}
+
+static int handle_partial_execution(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */
+ return handle_mvpg_pei(vcpu);
+ if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */
+ return kvm_s390_handle_sigp_pei(vcpu);
+
+ return -EOPNOTSUPP;
+}
+
+static const intercept_handler_t intercept_funcs[] = {
[0x00 >> 2] = handle_noop,
[0x04 >> 2] = handle_instruction,
[0x08 >> 2] = handle_prog,
[0x0C >> 2] = handle_instruction_and_prog,
[0x10 >> 2] = handle_noop,
- [0x14 >> 2] = handle_noop,
+ [0x14 >> 2] = handle_external_interrupt,
+ [0x18 >> 2] = handle_noop,
[0x1C >> 2] = kvm_s390_handle_wait,
[0x20 >> 2] = handle_validity,
[0x28 >> 2] = handle_stop,
+ [0x38 >> 2] = handle_partial_execution,
};
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
@@ -210,10 +350,10 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
intercept_handler_t func;
u8 code = vcpu->arch.sie_block->icptcode;
- if (code & 3 || code > 0x48)
- return -ENOTSUPP;
+ if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs))
+ return -EOPNOTSUPP;
func = intercept_funcs[code >> 2];
if (func)
return func(vcpu);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 84a7fed4cd4..90c8de22a2a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,7 +1,7 @@
/*
- * interrupt.c - handling kvm guest interrupts
+ * handling kvm guest interrupts
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008,2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -10,17 +10,45 @@
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
-#include <asm/lowcore.h>
-#include <asm/uaccess.h>
+#include <linux/interrupt.h>
#include <linux/kvm_host.h>
+#include <linux/hrtimer.h>
+#include <linux/mmu_context.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <asm/asm-offsets.h>
+#include <asm/uaccess.h>
#include "kvm-s390.h"
#include "gaccess.h"
+#include "trace-s390.h"
+
+#define IOINT_SCHID_MASK 0x0000ffff
+#define IOINT_SSID_MASK 0x00030000
+#define IOINT_CSSID_MASK 0x03fc0000
+#define IOINT_AI_MASK 0x04000000
+
+static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
-static int psw_extint_disabled(struct kvm_vcpu *vcpu)
+static int is_ioint(u64 type)
+{
+ return ((type & 0xfffe0000u) != 0xfffe0000u);
+}
+
+int psw_extint_disabled(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
}
+static int psw_ioint_disabled(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO);
+}
+
+static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK);
+}
+
static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
{
if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
@@ -30,22 +58,50 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
return 1;
}
+static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+ if (psw_extint_disabled(vcpu) ||
+ !(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+ return 0;
+ if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
+ /* No timer interrupts when single stepping */
+ return 0;
+ return 1;
+}
+
+static u64 int_word_to_isc_bits(u32 int_word)
+{
+ u8 isc = (int_word & 0x38000000) >> 27;
+
+ return (0x80 >> isc) << 24;
+}
+
static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
- struct interrupt_info *inti)
+ struct kvm_s390_interrupt_info *inti)
{
switch (inti->type) {
+ case KVM_S390_INT_EXTERNAL_CALL:
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
+ return 1;
case KVM_S390_INT_EMERGENCY:
if (psw_extint_disabled(vcpu))
return 0;
if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
return 1;
return 0;
- case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_CLOCK_COMP:
+ return ckc_interrupts_enabled(vcpu);
+ case KVM_S390_INT_CPU_TIMER:
if (psw_extint_disabled(vcpu))
return 0;
- if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+ if (vcpu->arch.sie_block->gcr[0] & 0x400ul)
return 1;
return 0;
+ case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_VIRTIO:
if (psw_extint_disabled(vcpu))
return 0;
@@ -57,7 +113,22 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
case KVM_S390_SIGP_SET_PREFIX:
case KVM_S390_RESTART:
return 1;
+ case KVM_S390_MCHK:
+ if (psw_mchk_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14)
+ return 1;
+ return 0;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ if (psw_ioint_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[6] &
+ int_word_to_isc_bits(inti->io.io_int_word))
+ return 1;
+ return 0;
default:
+ printk(KERN_WARNING "illegal interrupt type %llx\n",
+ inti->type);
BUG();
}
return 0;
@@ -65,24 +136,28 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
static void __set_cpu_idle(struct kvm_vcpu *vcpu)
{
- BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
}
static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
{
- BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
}
static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
{
- atomic_clear_mask(CPUSTAT_ECALL_PEND |
- CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
- &vcpu->arch.sie_block->cpuflags);
+ atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+ &vcpu->arch.sie_block->cpuflags);
vcpu->arch.sie_block->lctl = 0x0000;
+ vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
+
+ if (guestdbg_enabled(vcpu)) {
+ vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 |
+ LCTL_CR10 | LCTL_CR11);
+ vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
+ }
}
static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
@@ -91,12 +166,17 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
}
static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
- struct interrupt_info *inti)
+ struct kvm_s390_interrupt_info *inti)
{
switch (inti->type) {
+ case KVM_S390_INT_EXTERNAL_CALL:
case KVM_S390_INT_EMERGENCY:
case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_CLOCK_COMP:
+ case KVM_S390_INT_CPU_TIMER:
if (psw_extint_disabled(vcpu))
__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
else
@@ -105,94 +185,240 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
case KVM_S390_SIGP_STOP:
__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
break;
+ case KVM_S390_MCHK:
+ if (psw_mchk_disabled(vcpu))
+ vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+ else
+ vcpu->arch.sie_block->lctl |= LCTL_CR14;
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ if (psw_ioint_disabled(vcpu))
+ __set_cpuflag(vcpu, CPUSTAT_IO_INT);
+ else
+ vcpu->arch.sie_block->lctl |= LCTL_CR6;
+ break;
default:
BUG();
}
}
+static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info)
+{
+ const unsigned short table[] = { 2, 4, 4, 6 };
+ int rc = 0;
+
+ switch (pgm_info->code & ~PGM_PER) {
+ case PGM_AFX_TRANSLATION:
+ case PGM_ASX_TRANSLATION:
+ case PGM_EX_TRANSLATION:
+ case PGM_LFX_TRANSLATION:
+ case PGM_LSTE_SEQUENCE:
+ case PGM_LSX_TRANSLATION:
+ case PGM_LX_TRANSLATION:
+ case PGM_PRIMARY_AUTHORITY:
+ case PGM_SECONDARY_AUTHORITY:
+ case PGM_SPACE_SWITCH:
+ rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+ (u64 *)__LC_TRANS_EXC_CODE);
+ break;
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_INSTANCE:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_EXTENDED_AUTHORITY:
+ rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+ (u8 *)__LC_EXC_ACCESS_ID);
+ break;
+ case PGM_ASCE_TYPE:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_SEGMENT_TRANSLATION:
+ rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+ (u64 *)__LC_TRANS_EXC_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+ (u8 *)__LC_EXC_ACCESS_ID);
+ rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+ (u8 *)__LC_OP_ACCESS_ID);
+ break;
+ case PGM_MONITOR:
+ rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
+ (u64 *)__LC_MON_CLASS_NR);
+ rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+ (u64 *)__LC_MON_CODE);
+ break;
+ case PGM_DATA:
+ rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+ (u32 *)__LC_DATA_EXC_CODE);
+ break;
+ case PGM_PROTECTION:
+ rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+ (u64 *)__LC_TRANS_EXC_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+ (u8 *)__LC_EXC_ACCESS_ID);
+ break;
+ }
+
+ if (pgm_info->code & PGM_PER) {
+ rc |= put_guest_lc(vcpu, pgm_info->per_code,
+ (u8 *) __LC_PER_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+ (u8 *)__LC_PER_ATMID);
+ rc |= put_guest_lc(vcpu, pgm_info->per_address,
+ (u64 *) __LC_PER_ADDRESS);
+ rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+ (u8 *) __LC_PER_ACCESS_ID);
+ }
+
+ switch (vcpu->arch.sie_block->icptcode) {
+ case ICPT_INST:
+ case ICPT_INSTPROGI:
+ case ICPT_OPEREXC:
+ case ICPT_PARTEXEC:
+ case ICPT_IOINST:
+ /* last instruction only stored for these icptcodes */
+ rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
+ (u16 *) __LC_PGM_ILC);
+ break;
+ case ICPT_PROGI:
+ rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
+ (u16 *) __LC_PGM_ILC);
+ break;
+ default:
+ rc |= put_guest_lc(vcpu, 0,
+ (u16 *) __LC_PGM_ILC);
+ }
+
+ rc |= put_guest_lc(vcpu, pgm_info->code,
+ (u16 *)__LC_PGM_INT_CODE);
+ rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+
+ return rc;
+}
+
static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
- struct interrupt_info *inti)
+ struct kvm_s390_interrupt_info *inti)
{
const unsigned short table[] = { 2, 4, 4, 6 };
- int rc, exception = 0;
+ int rc = 0;
switch (inti->type) {
case KVM_S390_INT_EMERGENCY:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
vcpu->stat.deliver_emergency_signal++;
- rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_EXT_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->emerg.code, 0);
+ rc = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, inti->emerg.code,
+ (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ break;
+ case KVM_S390_INT_EXTERNAL_CALL:
+ VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+ vcpu->stat.deliver_external_call++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->extcall.code, 0);
+ rc = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, inti->extcall.code,
+ (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ break;
+ case KVM_S390_INT_CLOCK_COMP:
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->ext.ext_params, 0);
+ deliver_ckc_interrupt(vcpu);
+ break;
+ case KVM_S390_INT_CPU_TIMER:
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->ext.ext_params, 0);
+ rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+ (u32 *)__LC_EXT_PARAMS);
break;
-
case KVM_S390_INT_SERVICE:
VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
inti->ext.ext_params);
vcpu->stat.deliver_service_signal++;
- rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_EXT_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
- if (rc == -EFAULT)
- exception = 1;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->ext.ext_params, 0);
+ rc = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+ (u32 *)__LC_EXT_PARAMS);
+ break;
+ case KVM_S390_INT_PFAULT_INIT:
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
+ inti->ext.ext_params2);
+ rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+ (u64 *) __LC_EXT_PARAMS2);
+ break;
+ case KVM_S390_INT_PFAULT_DONE:
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
+ inti->ext.ext_params2);
+ rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+ (u64 *)__LC_EXT_PARAMS2);
break;
-
case KVM_S390_INT_VIRTIO:
- VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx",
+ VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
inti->ext.ext_params, inti->ext.ext_params2);
vcpu->stat.deliver_virtio_interrupt++;
- rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_EXT_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM,
- inti->ext.ext_params2);
- if (rc == -EFAULT)
- exception = 1;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->ext.ext_params,
+ inti->ext.ext_params2);
+ rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+ (u32 *)__LC_EXT_PARAMS);
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+ (u64 *)__LC_EXT_PARAMS2);
break;
-
case KVM_S390_SIGP_STOP:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
vcpu->stat.deliver_stop_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ 0, 0);
__set_intercept_indicator(vcpu, inti);
break;
@@ -200,99 +426,121 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
inti->prefix.address);
vcpu->stat.deliver_prefix_signal++;
- vcpu->arch.sie_block->prefix = inti->prefix.address;
- vcpu->arch.sie_block->ihcpu = 0xffff;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->prefix.address, 0);
+ kvm_s390_set_prefix(vcpu, inti->prefix.address);
break;
case KVM_S390_RESTART:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
vcpu->stat.deliver_restart_signal++;
- rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
- restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ 0, 0);
+ rc = write_guest_lc(vcpu,
+ offsetof(struct _lowcore, restart_old_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
break;
-
case KVM_S390_PROGRAM_INT:
VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
inti->pgm.code,
table[vcpu->arch.sie_block->ipa >> 14]);
vcpu->stat.deliver_program_int++;
- rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = put_guest_u16(vcpu, __LC_PGM_ILC,
- table[vcpu->arch.sie_block->ipa >> 14]);
- if (rc == -EFAULT)
- exception = 1;
-
- rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->pgm.code, 0);
+ rc = __deliver_prog_irq(vcpu, &inti->pgm);
+ break;
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_PGM_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
+ case KVM_S390_MCHK:
+ VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+ inti->mchk.mcic);
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ inti->mchk.cr14,
+ inti->mchk.mcic);
+ rc = kvm_s390_vcpu_store_status(vcpu,
+ KVM_S390_STORE_STATUS_PREFIXED);
+ rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
+ rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ {
+ __u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
+ inti->io.subchannel_nr;
+ __u64 param1 = ((__u64)inti->io.io_int_parm << 32) |
+ inti->io.io_int_word;
+ VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+ vcpu->stat.deliver_io_int++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+ param0, param1);
+ rc = put_guest_lc(vcpu, inti->io.subchannel_id,
+ (u16 *)__LC_SUBCHANNEL_ID);
+ rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+ (u16 *)__LC_SUBCHANNEL_NR);
+ rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+ (u32 *)__LC_IO_INT_PARM);
+ rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+ (u32 *)__LC_IO_INT_WORD);
+ rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ break;
+ }
default:
BUG();
}
+ if (rc) {
+ printk("kvm: The guest lowcore is not mapped during interrupt "
+ "delivery, killing userspace\n");
+ do_exit(SIGKILL);
+ }
+}
- if (exception) {
- VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering"
- " interrupt");
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- if (inti->type == KVM_S390_PROGRAM_INT) {
- printk(KERN_WARNING "kvm: recursive program check\n");
- BUG();
- }
+static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+{
+ int rc;
+
+ rc = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ if (rc) {
+ printk("kvm: The guest lowcore is not mapped during interrupt "
+ "delivery, killing userspace\n");
+ do_exit(SIGKILL);
}
}
-static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+/* Check whether SIGP interpretation facility has an external call pending */
+int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
{
- int rc, exception = 0;
+ atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl;
- if (psw_extint_disabled(vcpu))
- return 0;
- if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
- return 0;
- rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
- if (rc == -EFAULT)
- exception = 1;
- rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
- rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
- __LC_EXT_NEW_PSW, sizeof(psw_t));
- if (rc == -EFAULT)
- exception = 1;
-
- if (exception) {
- VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \
- " ckc interrupt");
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- return 0;
- }
+ if (!psw_extint_disabled(vcpu) &&
+ (vcpu->arch.sie_block->gcr[0] & 0x2000ul) &&
+ (atomic_read(sigp_ctrl) & SIGP_CTRL_C) &&
+ (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
+ return 1;
- return 1;
+ return 0;
}
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
{
- struct local_interrupt *li = &vcpu->arch.local_int;
- struct float_interrupt *fi = vcpu->arch.local_int.float_int;
- struct interrupt_info *inti;
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
+ struct kvm_s390_interrupt_info *inti;
int rc = 0;
if (atomic_read(&li->active)) {
@@ -306,28 +554,32 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
}
if ((!rc) && atomic_read(&fi->active)) {
- spin_lock_bh(&fi->lock);
+ spin_lock(&fi->lock);
list_for_each_entry(inti, &fi->list, list)
if (__interrupt_is_deliverable(vcpu, inti)) {
rc = 1;
break;
}
- spin_unlock_bh(&fi->lock);
+ spin_unlock(&fi->lock);
}
- if ((!rc) && (vcpu->arch.sie_block->ckc <
- get_clock() + vcpu->arch.sie_block->epoch)) {
- if ((!psw_extint_disabled(vcpu)) &&
- (vcpu->arch.sie_block->gcr[0] & 0x800ul))
- rc = 1;
- }
+ if (!rc && kvm_cpu_has_pending_timer(vcpu))
+ rc = 1;
+
+ if (!rc && kvm_s390_si_ext_call_pending(vcpu))
+ rc = 1;
return rc;
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return 0;
+ if (!(vcpu->arch.sie_block->ckc <
+ get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+ return 0;
+ if (!ckc_interrupts_enabled(vcpu))
+ return 0;
+ return 1;
}
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -347,70 +599,102 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
if (psw_interrupts_disabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
__unset_cpu_idle(vcpu);
- return -ENOTSUPP; /* disabled wait */
+ return -EOPNOTSUPP; /* disabled wait */
}
- if (psw_extint_disabled(vcpu) ||
- (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
+ if (!ckc_interrupts_enabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
goto no_timer;
}
- now = get_clock() + vcpu->arch.sie_block->epoch;
+ now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
if (vcpu->arch.sie_block->ckc < now) {
__unset_cpu_idle(vcpu);
return 0;
}
- sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
+ sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
- vcpu->arch.ckc_timer.expires = jiffies + sltime;
-
- add_timer(&vcpu->arch.ckc_timer);
- VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime);
+ hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
+ VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
no_timer:
- spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
- add_wait_queue(&vcpu->arch.local_int.wq, &wait);
+ add_wait_queue(&vcpu->wq, &wait);
while (list_empty(&vcpu->arch.local_int.list) &&
list_empty(&vcpu->arch.local_int.float_int->list) &&
(!vcpu->arch.local_int.timer_due) &&
- !signal_pending(current)) {
+ !signal_pending(current) &&
+ !kvm_s390_si_ext_call_pending(vcpu)) {
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_bh(&vcpu->arch.local_int.lock);
- spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
- vcpu_put(vcpu);
+ spin_unlock(&vcpu->arch.local_int.float_int->lock);
schedule();
- vcpu_load(vcpu);
- spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+ spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
}
__unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING);
remove_wait_queue(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock);
- spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
- del_timer(&vcpu->arch.ckc_timer);
+ spin_unlock(&vcpu->arch.local_int.float_int->lock);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
return 0;
}
-void kvm_s390_idle_wakeup(unsigned long data)
+void kvm_s390_tasklet(unsigned long parm)
{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
- spin_lock_bh(&vcpu->arch.local_int.lock);
+ spin_lock(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.timer_due = 1;
- if (waitqueue_active(&vcpu->arch.local_int.wq))
- wake_up_interruptible(&vcpu->arch.local_int.wq);
- spin_unlock_bh(&vcpu->arch.local_int.lock);
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
+ spin_unlock(&vcpu->arch.local_int.lock);
+}
+
+/*
+ * low level hrtimer wake routine. Because this runs in hardirq context
+ * we schedule a tasklet to do the real work.
+ */
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
+{
+ struct kvm_vcpu *vcpu;
+
+ vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+ vcpu->preempted = true;
+ tasklet_schedule(&vcpu->arch.tasklet);
+
+ return HRTIMER_NORESTART;
}
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_interrupt_info *n, *inti = NULL;
+
+ spin_lock_bh(&li->lock);
+ list_for_each_entry_safe(inti, n, &li->list, list) {
+ list_del(&inti->list);
+ kfree(inti);
+ }
+ atomic_set(&li->active, 0);
+ spin_unlock_bh(&li->lock);
+
+ /* clear pending external calls set by sigp interpretation facility */
+ atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+ atomic_clear_mask(SIGP_CTRL_C,
+ &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
+}
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{
- struct local_interrupt *li = &vcpu->arch.local_int;
- struct float_interrupt *fi = vcpu->arch.local_int.float_int;
- struct interrupt_info *n, *inti = NULL;
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
+ struct kvm_s390_interrupt_info *n, *inti = NULL;
int deliver;
__reset_intercept_indicators(vcpu);
@@ -436,17 +720,73 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
} while (deliver);
}
- if ((vcpu->arch.sie_block->ckc <
- get_clock() + vcpu->arch.sie_block->epoch))
- __try_deliver_ckc_interrupt(vcpu);
+ if (kvm_cpu_has_pending_timer(vcpu))
+ deliver_ckc_interrupt(vcpu);
if (atomic_read(&fi->active)) {
do {
deliver = 0;
- spin_lock_bh(&fi->lock);
+ spin_lock(&fi->lock);
list_for_each_entry_safe(inti, n, &fi->list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list);
+ fi->irq_count--;
+ deliver = 1;
+ break;
+ }
+ __set_intercept_indicator(vcpu, inti);
+ }
+ if (list_empty(&fi->list))
+ atomic_set(&fi->active, 0);
+ spin_unlock(&fi->lock);
+ if (deliver) {
+ __do_deliver_interrupt(vcpu, inti);
+ kfree(inti);
+ }
+ } while (deliver);
+ }
+}
+
+void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
+ struct kvm_s390_interrupt_info *n, *inti = NULL;
+ int deliver;
+
+ __reset_intercept_indicators(vcpu);
+ if (atomic_read(&li->active)) {
+ do {
+ deliver = 0;
+ spin_lock_bh(&li->lock);
+ list_for_each_entry_safe(inti, n, &li->list, list) {
+ if ((inti->type == KVM_S390_MCHK) &&
+ __interrupt_is_deliverable(vcpu, inti)) {
+ list_del(&inti->list);
+ deliver = 1;
+ break;
+ }
+ __set_intercept_indicator(vcpu, inti);
+ }
+ if (list_empty(&li->list))
+ atomic_set(&li->active, 0);
+ spin_unlock_bh(&li->lock);
+ if (deliver) {
+ __do_deliver_interrupt(vcpu, inti);
+ kfree(inti);
+ }
+ } while (deliver);
+ }
+
+ if (atomic_read(&fi->active)) {
+ do {
+ deliver = 0;
+ spin_lock(&fi->lock);
+ list_for_each_entry_safe(inti, n, &fi->list, list) {
+ if ((inti->type == KVM_S390_MCHK) &&
+ __interrupt_is_deliverable(vcpu, inti)) {
+ list_del(&inti->list);
+ fi->irq_count--;
deliver = 1;
break;
}
@@ -454,7 +794,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
}
if (list_empty(&fi->list))
atomic_set(&fi->active, 0);
- spin_unlock_bh(&fi->lock);
+ spin_unlock(&fi->lock);
if (deliver) {
__do_deliver_interrupt(vcpu, inti);
kfree(inti);
@@ -465,62 +805,124 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
{
- struct local_interrupt *li = &vcpu->arch.local_int;
- struct interrupt_info *inti;
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_interrupt_info *inti;
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
return -ENOMEM;
- inti->type = KVM_S390_PROGRAM_INT;;
+ inti->type = KVM_S390_PROGRAM_INT;
inti->pgm.code = code;
VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
spin_lock_bh(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
- BUG_ON(waitqueue_active(&li->wq));
+ BUG_ON(waitqueue_active(li->wq));
spin_unlock_bh(&li->lock);
return 0;
}
-int kvm_s390_inject_vm(struct kvm *kvm,
- struct kvm_s390_interrupt *s390int)
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info)
{
- struct local_interrupt *li;
- struct float_interrupt *fi;
- struct interrupt_info *inti;
- int sigcpu;
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_interrupt_info *inti;
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
return -ENOMEM;
- switch (s390int->type) {
- case KVM_S390_INT_VIRTIO:
- VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx",
- s390int->parm, s390int->parm64);
- inti->type = s390int->type;
- inti->ext.ext_params = s390int->parm;
- inti->ext.ext_params2 = s390int->parm64;
- break;
- case KVM_S390_INT_SERVICE:
- VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
- inti->type = s390int->type;
- inti->ext.ext_params = s390int->parm;
+ VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
+ pgm_info->code);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+ pgm_info->code, 0, 1);
+
+ inti->type = KVM_S390_PROGRAM_INT;
+ memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
+ spin_lock_bh(&li->lock);
+ list_add(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ BUG_ON(waitqueue_active(li->wq));
+ spin_unlock_bh(&li->lock);
+ return 0;
+}
+
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+ u64 cr6, u64 schid)
+{
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_interrupt_info *inti, *iter;
+
+ if ((!schid && !cr6) || (schid && cr6))
+ return NULL;
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ inti = NULL;
+ list_for_each_entry(iter, &fi->list, list) {
+ if (!is_ioint(iter->type))
+ continue;
+ if (cr6 &&
+ ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0))
+ continue;
+ if (schid) {
+ if (((schid & 0x00000000ffff0000) >> 16) !=
+ iter->io.subchannel_id)
+ continue;
+ if ((schid & 0x000000000000ffff) !=
+ iter->io.subchannel_nr)
+ continue;
+ }
+ inti = iter;
break;
- case KVM_S390_PROGRAM_INT:
- case KVM_S390_SIGP_STOP:
- case KVM_S390_INT_EMERGENCY:
- default:
- kfree(inti);
- return -EINVAL;
}
+ if (inti) {
+ list_del_init(&inti->list);
+ fi->irq_count--;
+ }
+ if (list_empty(&fi->list))
+ atomic_set(&fi->active, 0);
+ spin_unlock(&fi->lock);
+ mutex_unlock(&kvm->lock);
+ return inti;
+}
+
+static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_interrupt_info *iter;
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int sigcpu;
+ int rc = 0;
mutex_lock(&kvm->lock);
fi = &kvm->arch.float_int;
- spin_lock_bh(&fi->lock);
- list_add_tail(&inti->list, &fi->list);
+ spin_lock(&fi->lock);
+ if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
+ rc = -EINVAL;
+ goto unlock_fi;
+ }
+ fi->irq_count++;
+ if (!is_ioint(inti->type)) {
+ list_add_tail(&inti->list, &fi->list);
+ } else {
+ u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
+
+ /* Keep I/O interrupts sorted in isc order. */
+ list_for_each_entry(iter, &fi->list, list) {
+ if (!is_ioint(iter->type))
+ continue;
+ if (int_word_to_isc_bits(iter->io.io_int_word)
+ <= isc_bits)
+ continue;
+ break;
+ }
+ list_add_tail(&inti->list, &iter->list);
+ }
atomic_set(&fi->active, 1);
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
if (sigcpu == KVM_MAX_VCPUS) {
@@ -528,24 +930,87 @@ int kvm_s390_inject_vm(struct kvm *kvm,
sigcpu = fi->next_rr_cpu++;
if (sigcpu == KVM_MAX_VCPUS)
sigcpu = fi->next_rr_cpu = 0;
- } while (fi->local_int[sigcpu] == NULL);
+ } while (kvm_get_vcpu(kvm, sigcpu) == NULL);
}
- li = fi->local_int[sigcpu];
+ dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+ li = &dst_vcpu->arch.local_int;
spin_lock_bh(&li->lock);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
+ kvm_get_vcpu(kvm, sigcpu)->preempted = true;
spin_unlock_bh(&li->lock);
- spin_unlock_bh(&fi->lock);
+unlock_fi:
+ spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
- return 0;
+ return rc;
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int)
+{
+ struct kvm_s390_interrupt_info *inti;
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ inti->type = s390int->type;
+ switch (inti->type) {
+ case KVM_S390_INT_VIRTIO:
+ VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
+ s390int->parm, s390int->parm64);
+ inti->ext.ext_params = s390int->parm;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
+ case KVM_S390_INT_SERVICE:
+ VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+ inti->ext.ext_params = s390int->parm;
+ break;
+ case KVM_S390_INT_PFAULT_DONE:
+ inti->type = s390int->type;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
+ case KVM_S390_MCHK:
+ VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
+ s390int->parm64);
+ inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
+ inti->mchk.mcic = s390int->parm64;
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ if (inti->type & IOINT_AI_MASK)
+ VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
+ else
+ VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
+ s390int->type & IOINT_CSSID_MASK,
+ s390int->type & IOINT_SSID_MASK,
+ s390int->type & IOINT_SCHID_MASK);
+ inti->io.subchannel_id = s390int->parm >> 16;
+ inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
+ inti->io.io_int_parm = s390int->parm64 >> 32;
+ inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull;
+ break;
+ default:
+ kfree(inti);
+ return -EINVAL;
+ }
+ trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
+ 2);
+
+ return __inject_vm(kvm, inti);
+}
+
+void kvm_s390_reinject_io_int(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ __inject_vm(kvm, inti);
}
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt *s390int)
{
- struct local_interrupt *li;
- struct interrupt_info *inti;
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_s390_interrupt_info *inti;
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
@@ -562,19 +1027,57 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
s390int->parm);
break;
+ case KVM_S390_SIGP_SET_PREFIX:
+ inti->prefix.address = s390int->parm;
+ inti->type = s390int->type;
+ VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+ s390int->parm);
+ break;
case KVM_S390_SIGP_STOP:
case KVM_S390_RESTART:
- case KVM_S390_SIGP_SET_PREFIX:
- case KVM_S390_INT_EMERGENCY:
+ case KVM_S390_INT_CLOCK_COMP:
+ case KVM_S390_INT_CPU_TIMER:
VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
inti->type = s390int->type;
break;
+ case KVM_S390_INT_EXTERNAL_CALL:
+ if (s390int->parm & 0xffff0000) {
+ kfree(inti);
+ return -EINVAL;
+ }
+ VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+ s390int->parm);
+ inti->type = s390int->type;
+ inti->extcall.code = s390int->parm;
+ break;
+ case KVM_S390_INT_EMERGENCY:
+ if (s390int->parm & 0xffff0000) {
+ kfree(inti);
+ return -EINVAL;
+ }
+ VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm);
+ inti->type = s390int->type;
+ inti->emerg.code = s390int->parm;
+ break;
+ case KVM_S390_MCHK:
+ VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+ s390int->parm64);
+ inti->type = s390int->type;
+ inti->mchk.mcic = s390int->parm64;
+ break;
+ case KVM_S390_INT_PFAULT_INIT:
+ inti->type = s390int->type;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
case KVM_S390_INT_VIRTIO:
case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
default:
kfree(inti);
return -EINVAL;
}
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
+ s390int->parm64, 2);
mutex_lock(&vcpu->kvm->lock);
li = &vcpu->arch.local_int;
@@ -587,9 +1090,530 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
if (inti->type == KVM_S390_SIGP_STOP)
li->action_bits |= ACTION_STOP_ON_STOP;
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&vcpu->arch.local_int.wq);
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
+ vcpu->preempted = true;
spin_unlock_bh(&li->lock);
mutex_unlock(&vcpu->kvm->lock);
return 0;
}
+
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
+{
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_interrupt_info *n, *inti = NULL;
+
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ list_for_each_entry_safe(inti, n, &fi->list, list) {
+ list_del(&inti->list);
+ kfree(inti);
+ }
+ fi->irq_count = 0;
+ atomic_set(&fi->active, 0);
+ spin_unlock(&fi->lock);
+ mutex_unlock(&kvm->lock);
+}
+
+static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
+ u8 *addr)
+{
+ struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+ struct kvm_s390_irq irq = {0};
+
+ irq.type = inti->type;
+ switch (inti->type) {
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+ irq.u.ext = inti->ext;
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ irq.u.io = inti->io;
+ break;
+ case KVM_S390_MCHK:
+ irq.u.mchk = inti->mchk;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_to_user(uptr, &irq, sizeof(irq)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+{
+ struct kvm_s390_interrupt_info *inti;
+ struct kvm_s390_float_interrupt *fi;
+ int ret = 0;
+ int n = 0;
+
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+
+ list_for_each_entry(inti, &fi->list, list) {
+ if (len < sizeof(struct kvm_s390_irq)) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ break;
+ }
+ ret = copy_irq_to_user(inti, buf);
+ if (ret)
+ break;
+ buf += sizeof(struct kvm_s390_irq);
+ len -= sizeof(struct kvm_s390_irq);
+ n++;
+ }
+
+ spin_unlock(&fi->lock);
+ mutex_unlock(&kvm->lock);
+
+ return ret < 0 ? ret : n;
+}
+
+static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r;
+
+ switch (attr->group) {
+ case KVM_DEV_FLIC_GET_ALL_IRQS:
+ r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+ attr->attr);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
+ u64 addr)
+{
+ struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+ void *target = NULL;
+ void __user *source;
+ u64 size;
+
+ if (get_user(inti->type, (u64 __user *)addr))
+ return -EFAULT;
+
+ switch (inti->type) {
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+ target = (void *) &inti->ext;
+ source = &uptr->u.ext;
+ size = sizeof(inti->ext);
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ target = (void *) &inti->io;
+ source = &uptr->u.io;
+ size = sizeof(inti->io);
+ break;
+ case KVM_S390_MCHK:
+ target = (void *) &inti->mchk;
+ source = &uptr->u.mchk;
+ size = sizeof(inti->mchk);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_from_user(target, source, size))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int enqueue_floating_irq(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_interrupt_info *inti = NULL;
+ int r = 0;
+ int len = attr->attr;
+
+ if (len % sizeof(struct kvm_s390_irq) != 0)
+ return -EINVAL;
+ else if (len > KVM_S390_FLIC_MAX_BUFFER)
+ return -EINVAL;
+
+ while (len >= sizeof(struct kvm_s390_irq)) {
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ r = copy_irq_from_user(inti, attr->addr);
+ if (r) {
+ kfree(inti);
+ return r;
+ }
+ r = __inject_vm(dev->kvm, inti);
+ if (r) {
+ kfree(inti);
+ return r;
+ }
+ len -= sizeof(struct kvm_s390_irq);
+ attr->addr += sizeof(struct kvm_s390_irq);
+ }
+
+ return r;
+}
+
+static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
+{
+ if (id >= MAX_S390_IO_ADAPTERS)
+ return NULL;
+ return kvm->arch.adapters[id];
+}
+
+static int register_io_adapter(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct s390_io_adapter *adapter;
+ struct kvm_s390_io_adapter adapter_info;
+
+ if (copy_from_user(&adapter_info,
+ (void __user *)attr->addr, sizeof(adapter_info)))
+ return -EFAULT;
+
+ if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) ||
+ (dev->kvm->arch.adapters[adapter_info.id] != NULL))
+ return -EINVAL;
+
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&adapter->maps);
+ init_rwsem(&adapter->maps_lock);
+ atomic_set(&adapter->nr_maps, 0);
+ adapter->id = adapter_info.id;
+ adapter->isc = adapter_info.isc;
+ adapter->maskable = adapter_info.maskable;
+ adapter->masked = false;
+ adapter->swap = adapter_info.swap;
+ dev->kvm->arch.adapters[adapter->id] = adapter;
+
+ return 0;
+}
+
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
+{
+ int ret;
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+ if (!adapter || !adapter->maskable)
+ return -EINVAL;
+ ret = adapter->masked;
+ adapter->masked = masked;
+ return ret;
+}
+
+static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map;
+ int ret;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&map->list);
+ map->guest_addr = addr;
+ map->addr = gmap_translate(addr, kvm->arch.gmap);
+ if (map->addr == -EFAULT) {
+ ret = -EFAULT;
+ goto out;
+ }
+ ret = get_user_pages_fast(map->addr, 1, 1, &map->page);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret != 1);
+ down_write(&adapter->maps_lock);
+ if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
+ list_add_tail(&map->list, &adapter->maps);
+ ret = 0;
+ } else {
+ put_page(map->page);
+ ret = -EINVAL;
+ }
+ up_write(&adapter->maps_lock);
+out:
+ if (ret)
+ kfree(map);
+ return ret;
+}
+
+static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map, *tmp;
+ int found = 0;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ down_write(&adapter->maps_lock);
+ list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
+ if (map->guest_addr == addr) {
+ found = 1;
+ atomic_dec(&adapter->nr_maps);
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ break;
+ }
+ }
+ up_write(&adapter->maps_lock);
+
+ return found ? 0 : -EINVAL;
+}
+
+void kvm_s390_destroy_adapters(struct kvm *kvm)
+{
+ int i;
+ struct s390_map_info *map, *tmp;
+
+ for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
+ if (!kvm->arch.adapters[i])
+ continue;
+ list_for_each_entry_safe(map, tmp,
+ &kvm->arch.adapters[i]->maps, list) {
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ }
+ kfree(kvm->arch.adapters[i]);
+ }
+}
+
+static int modify_io_adapter(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_io_adapter_req req;
+ struct s390_io_adapter *adapter;
+ int ret;
+
+ if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+ return -EFAULT;
+
+ adapter = get_io_adapter(dev->kvm, req.id);
+ if (!adapter)
+ return -EINVAL;
+ switch (req.type) {
+ case KVM_S390_IO_ADAPTER_MASK:
+ ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask);
+ if (ret > 0)
+ ret = 0;
+ break;
+ case KVM_S390_IO_ADAPTER_MAP:
+ ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
+ break;
+ case KVM_S390_IO_ADAPTER_UNMAP:
+ ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r = 0;
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ switch (attr->group) {
+ case KVM_DEV_FLIC_ENQUEUE:
+ r = enqueue_floating_irq(dev, attr);
+ break;
+ case KVM_DEV_FLIC_CLEAR_IRQS:
+ r = 0;
+ kvm_s390_clear_float_irqs(dev->kvm);
+ break;
+ case KVM_DEV_FLIC_APF_ENABLE:
+ dev->kvm->arch.gmap->pfault_enabled = 1;
+ break;
+ case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+ dev->kvm->arch.gmap->pfault_enabled = 0;
+ /*
+ * Make sure no async faults are in transition when
+ * clearing the queues. So we don't need to worry
+ * about late coming workers.
+ */
+ synchronize_srcu(&dev->kvm->srcu);
+ kvm_for_each_vcpu(i, vcpu, dev->kvm)
+ kvm_clear_async_pf_completion_queue(vcpu);
+ break;
+ case KVM_DEV_FLIC_ADAPTER_REGISTER:
+ r = register_io_adapter(dev, attr);
+ break;
+ case KVM_DEV_FLIC_ADAPTER_MODIFY:
+ r = modify_io_adapter(dev, attr);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static int flic_create(struct kvm_device *dev, u32 type)
+{
+ if (!dev)
+ return -EINVAL;
+ if (dev->kvm->arch.flic)
+ return -EINVAL;
+ dev->kvm->arch.flic = dev;
+ return 0;
+}
+
+static void flic_destroy(struct kvm_device *dev)
+{
+ dev->kvm->arch.flic = NULL;
+ kfree(dev);
+}
+
+/* s390 floating irq controller (flic) */
+struct kvm_device_ops kvm_flic_ops = {
+ .name = "kvm-flic",
+ .get_attr = flic_get_attr,
+ .set_attr = flic_set_attr,
+ .create = flic_create,
+ .destroy = flic_destroy,
+};
+
+static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
+{
+ unsigned long bit;
+
+ bit = bit_nr + (addr % PAGE_SIZE) * 8;
+
+ return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
+}
+
+static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
+ u64 addr)
+{
+ struct s390_map_info *map;
+
+ if (!adapter)
+ return NULL;
+
+ list_for_each_entry(map, &adapter->maps, list) {
+ if (map->guest_addr == addr)
+ return map;
+ }
+ return NULL;
+}
+
+static int adapter_indicators_set(struct kvm *kvm,
+ struct s390_io_adapter *adapter,
+ struct kvm_s390_adapter_int *adapter_int)
+{
+ unsigned long bit;
+ int summary_set, idx;
+ struct s390_map_info *info;
+ void *map;
+
+ info = get_map_info(adapter, adapter_int->ind_addr);
+ if (!info)
+ return -1;
+ map = page_address(info->page);
+ bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
+ set_bit(bit, map);
+ idx = srcu_read_lock(&kvm->srcu);
+ mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(info->page);
+ info = get_map_info(adapter, adapter_int->summary_addr);
+ if (!info) {
+ srcu_read_unlock(&kvm->srcu, idx);
+ return -1;
+ }
+ map = page_address(info->page);
+ bit = get_ind_bit(info->addr, adapter_int->summary_offset,
+ adapter->swap);
+ summary_set = test_and_set_bit(bit, map);
+ mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(info->page);
+ srcu_read_unlock(&kvm->srcu, idx);
+ return summary_set ? 0 : 1;
+}
+
+/*
+ * < 0 - not injected due to error
+ * = 0 - coalesced, summary indicator already active
+ * > 0 - injected interrupt
+ */
+static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ int ret;
+ struct s390_io_adapter *adapter;
+
+ /* We're only interested in the 0->1 transition. */
+ if (!level)
+ return 0;
+ adapter = get_io_adapter(kvm, e->adapter.adapter_id);
+ if (!adapter)
+ return -1;
+ down_read(&adapter->maps_lock);
+ ret = adapter_indicators_set(kvm, adapter, &e->adapter);
+ up_read(&adapter->maps_lock);
+ if ((ret > 0) && !adapter->masked) {
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_IO(1, 0, 0, 0),
+ .parm = 0,
+ .parm64 = (adapter->isc << 27) | 0x80000000,
+ };
+ ret = kvm_s390_inject_vm(kvm, &s390int);
+ if (ret == 0)
+ ret = 1;
+ }
+ return ret;
+}
+
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+{
+ int ret;
+
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_S390_ADAPTER:
+ e->set = set_adapter_int;
+ e->adapter.summary_addr = ue->u.adapter.summary_addr;
+ e->adapter.ind_addr = ue->u.adapter.ind_addr;
+ e->adapter.summary_offset = ue->u.adapter.summary_offset;
+ e->adapter.ind_offset = ue->u.adapter.ind_offset;
+ e->adapter.adapter_id = ue->u.adapter.adapter_id;
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
+ int irq_source_id, int level, bool line_status)
+{
+ return -EINVAL;
+}
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
new file mode 100644
index 00000000000..d98e4159643
--- /dev/null
+++ b/arch/s390/kvm/irq.h
@@ -0,0 +1,22 @@
+/*
+ * s390 irqchip routines
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_IRQ_H
+#define __KVM_IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+ return 1;
+}
+
+#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6558b09ff57..2f3e14fe91a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1,7 +1,7 @@
/*
- * s390host.c -- hosting zSeries kernel virtual machines
+ * hosting zSeries kernel virtual machines
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2009
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -10,23 +10,34 @@
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
* Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Christian Ehrhardt <ehrhardt@de.ibm.com>
+ * Jason J. Herne <jjherne@us.ibm.com>
*/
#include <linux/compiler.h>
#include <linux/err.h>
#include <linux/fs.h>
+#include <linux/hrtimer.h>
#include <linux/init.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/timer.h>
+#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
#include <asm/pgtable.h>
-
+#include <asm/nmi.h>
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/sclp.h>
#include "kvm-s390.h"
#include "gaccess.h"
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#include "trace-s390.h"
+
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -39,9 +50,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_instruction", VCPU_STAT(exit_instruction) },
{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
- { "instruction_lctg", VCPU_STAT(instruction_lctg) },
+ { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
+ { "instruction_stctl", VCPU_STAT(instruction_stctl) },
+ { "instruction_stctg", VCPU_STAT(instruction_stctg) },
{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
+ { "deliver_external_call", VCPU_STAT(deliver_external_call) },
{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
@@ -49,47 +63,65 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
{ "instruction_spx", VCPU_STAT(instruction_spx) },
{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
{ "instruction_stap", VCPU_STAT(instruction_stap) },
{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+ { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
+ { "instruction_essa", VCPU_STAT(instruction_essa) },
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
+ { "instruction_tprot", VCPU_STAT(instruction_tprot) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+ { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
+ { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+ { "diagnose_10", VCPU_STAT(diagnose_10) },
{ "diagnose_44", VCPU_STAT(diagnose_44) },
+ { "diagnose_9c", VCPU_STAT(diagnose_9c) },
{ NULL }
};
+unsigned long *vfacilities;
+static struct gmap_notifier gmap_notifier;
+
+/* test availability of vfacility */
+int test_vfacility(unsigned long nr)
+{
+ return __test_facility(nr, (void *) vfacilities);
+}
/* Section: not file related */
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
{
/* every s390 is virtualization enabled ;-) */
+ return 0;
}
void kvm_arch_hardware_disable(void *garbage)
{
}
-void decache_vcpus_on_cpu(int cpu)
-{
-}
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
int kvm_arch_hardware_setup(void)
{
+ gmap_notifier.notifier_call = kvm_gmap_notifier;
+ gmap_register_ipte_notifier(&gmap_notifier);
return 0;
}
void kvm_arch_hardware_unsetup(void)
{
+ gmap_unregister_ipte_notifier(&gmap_notifier);
}
void kvm_arch_check_processor_compat(void *rtn)
@@ -116,7 +148,60 @@ long kvm_arch_dev_ioctl(struct file *filp,
int kvm_dev_ioctl_check_extension(long ext)
{
- return 0;
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_S390_PSW:
+ case KVM_CAP_S390_GMAP:
+ case KVM_CAP_SYNC_MMU:
+#ifdef CONFIG_KVM_S390_UCONTROL
+ case KVM_CAP_S390_UCONTROL:
+#endif
+ case KVM_CAP_ASYNC_PF:
+ case KVM_CAP_SYNC_REGS:
+ case KVM_CAP_ONE_REG:
+ case KVM_CAP_ENABLE_CAP:
+ case KVM_CAP_S390_CSS_SUPPORT:
+ case KVM_CAP_IRQFD:
+ case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_ENABLE_CAP_VM:
+ case KVM_CAP_VM_ATTRIBUTES:
+ r = 1;
+ break;
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ r = KVM_MAX_VCPUS;
+ break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_USER_MEM_SLOTS;
+ break;
+ case KVM_CAP_S390_COW:
+ r = MACHINE_HAS_ESOP;
+ break;
+ default:
+ r = 0;
+ }
+ return r;
+}
+
+static void kvm_s390_sync_dirty_log(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ gfn_t cur_gfn, last_gfn;
+ unsigned long address;
+ struct gmap *gmap = kvm->arch.gmap;
+
+ down_read(&gmap->mm->mmap_sem);
+ /* Loop over all guest pages */
+ last_gfn = memslot->base_gfn + memslot->npages;
+ for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
+ address = gfn_to_hva_memslot(memslot, cur_gfn);
+
+ if (gmap_test_and_clear_dirty(address, gmap))
+ mark_page_dirty(kvm, cur_gfn);
+ }
+ up_read(&gmap->mm->mmap_sem);
}
/* Section: vm related */
@@ -126,7 +211,129 @@ int kvm_dev_ioctl_check_extension(long ext)
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log)
{
- return 0;
+ int r;
+ unsigned long n;
+ struct kvm_memory_slot *memslot;
+ int is_dirty = 0;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = -EINVAL;
+ if (log->slot >= KVM_USER_MEM_SLOTS)
+ goto out;
+
+ memslot = id_to_memslot(kvm->memslots, log->slot);
+ r = -ENOENT;
+ if (!memslot->dirty_bitmap)
+ goto out;
+
+ kvm_s390_sync_dirty_log(kvm, memslot);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ if (r)
+ goto out;
+
+ /* Clear the dirty log */
+ if (is_dirty) {
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+ }
+ r = 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_S390_IRQCHIP:
+ kvm->arch.use_irqchip = 1;
+ r = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+}
+
+static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+ unsigned int idx;
+ switch (attr->attr) {
+ case KVM_S390_VM_MEM_ENABLE_CMMA:
+ ret = -EBUSY;
+ mutex_lock(&kvm->lock);
+ if (atomic_read(&kvm->online_vcpus) == 0) {
+ kvm->arch.use_cmma = 1;
+ ret = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ break;
+ case KVM_S390_VM_MEM_CLR_CMMA:
+ mutex_lock(&kvm->lock);
+ idx = srcu_read_lock(&kvm->srcu);
+ page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
+ srcu_read_unlock(&kvm->srcu, idx);
+ mutex_unlock(&kvm->lock);
+ ret = 0;
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ return ret;
+}
+
+static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_S390_VM_MEM_CTRL:
+ ret = kvm_s390_mem_control(kvm, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ return -ENXIO;
+}
+
+static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_S390_VM_MEM_CTRL:
+ switch (attr->attr) {
+ case KVM_S390_VM_MEM_ENABLE_CMMA:
+ case KVM_S390_VM_MEM_CLR_CMMA:
+ ret = 0;
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
}
long kvm_arch_vm_ioctl(struct file *filp,
@@ -134,6 +341,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
+ struct kvm_device_attr attr;
int r;
switch (ioctl) {
@@ -146,31 +354,84 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_s390_inject_vm(kvm, &s390int);
break;
}
- default:
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+ break;
+ }
+ case KVM_CREATE_IRQCHIP: {
+ struct kvm_irq_routing_entry routing;
+
r = -EINVAL;
+ if (kvm->arch.use_irqchip) {
+ /* Set up dummy routing. */
+ memset(&routing, 0, sizeof(routing));
+ kvm_set_irq_routing(kvm, &routing, 0, 0);
+ r = 0;
+ }
+ break;
+ }
+ case KVM_SET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_s390_vm_set_attr(kvm, &attr);
+ break;
+ }
+ case KVM_GET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_s390_vm_get_attr(kvm, &attr);
+ break;
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_s390_vm_has_attr(kvm, &attr);
+ break;
+ }
+ default:
+ r = -ENOTTY;
}
return r;
}
-struct kvm *kvm_arch_create_vm(void)
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
- struct kvm *kvm;
int rc;
char debug_name[16];
+ static unsigned long sca_offset;
+
+ rc = -EINVAL;
+#ifdef CONFIG_KVM_S390_UCONTROL
+ if (type & ~KVM_VM_S390_UCONTROL)
+ goto out_err;
+ if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
+ goto out_err;
+#else
+ if (type)
+ goto out_err;
+#endif
rc = s390_enable_sie();
if (rc)
- goto out_nokvm;
+ goto out_err;
rc = -ENOMEM;
- kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
- if (!kvm)
- goto out_nokvm;
kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
if (!kvm->arch.sca)
- goto out_nosca;
+ goto out_err;
+ spin_lock(&kvm_lock);
+ sca_offset = (sca_offset + 16) & 0x7f0;
+ kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
+ spin_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
@@ -180,55 +441,139 @@ struct kvm *kvm_arch_create_vm(void)
spin_lock_init(&kvm->arch.float_int.lock);
INIT_LIST_HEAD(&kvm->arch.float_int.list);
+ init_waitqueue_head(&kvm->arch.ipte_wq);
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
- try_module_get(THIS_MODULE);
+ if (type & KVM_VM_S390_UCONTROL) {
+ kvm->arch.gmap = NULL;
+ } else {
+ kvm->arch.gmap = gmap_alloc(current->mm);
+ if (!kvm->arch.gmap)
+ goto out_nogmap;
+ kvm->arch.gmap->private = kvm;
+ kvm->arch.gmap->pfault_enabled = 0;
+ }
+
+ kvm->arch.css_support = 0;
+ kvm->arch.use_irqchip = 0;
- return kvm;
+ spin_lock_init(&kvm->arch.start_stop_lock);
+
+ return 0;
+out_nogmap:
+ debug_unregister(kvm->arch.dbf);
out_nodbf:
free_page((unsigned long)(kvm->arch.sca));
-out_nosca:
- kfree(kvm);
-out_nokvm:
- return ERR_PTR(rc);
+out_err:
+ return rc;
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+ trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+ kvm_s390_clear_local_irqs(vcpu);
+ kvm_clear_async_pf_completion_queue(vcpu);
+ if (!kvm_is_ucontrol(vcpu->kvm)) {
+ clear_bit(63 - vcpu->vcpu_id,
+ (unsigned long *) &vcpu->kvm->arch.sca->mcn);
+ if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
+ (__u64) vcpu->arch.sie_block)
+ vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
+ }
+ smp_mb();
+
+ if (kvm_is_ucontrol(vcpu->kvm))
+ gmap_free(vcpu->arch.gmap);
+
+ if (kvm_s390_cmma_enabled(vcpu->kvm))
+ kvm_s390_vcpu_unsetup_cmma(vcpu);
+ free_page((unsigned long)(vcpu->arch.sie_block));
+
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+}
+
+static void kvm_free_vcpus(struct kvm *kvm)
+{
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_arch_vcpu_destroy(vcpu);
+
+ mutex_lock(&kvm->lock);
+ for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+ kvm->vcpus[i] = NULL;
+
+ atomic_set(&kvm->online_vcpus, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_arch_sync_events(struct kvm *kvm)
+{
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- debug_unregister(kvm->arch.dbf);
+ kvm_free_vcpus(kvm);
free_page((unsigned long)(kvm->arch.sca));
- kfree(kvm);
- module_put(THIS_MODULE);
+ debug_unregister(kvm->arch.dbf);
+ if (!kvm_is_ucontrol(kvm))
+ gmap_free(kvm->arch.gmap);
+ kvm_s390_destroy_adapters(kvm);
+ kvm_s390_clear_float_irqs(kvm);
}
/* Section: vcpu related */
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ vcpu->arch.gmap = gmap_alloc(current->mm);
+ if (!vcpu->arch.gmap)
+ return -ENOMEM;
+ vcpu->arch.gmap->private = vcpu->kvm;
+ return 0;
+ }
+
+ vcpu->arch.gmap = vcpu->kvm->arch.gmap;
+ vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
+ KVM_SYNC_GPRS |
+ KVM_SYNC_ACRS |
+ KVM_SYNC_CRS;
return 0;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
- /* kvm common code refers to this, but does'nt call it */
- BUG();
+ /* Nothing todo */
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- save_fp_regs(&vcpu->arch.host_fpregs);
+ save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+ save_fp_regs(vcpu->arch.host_fpregs.fprs);
save_access_regs(vcpu->arch.host_acrs);
- vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
- restore_fp_regs(&vcpu->arch.guest_fpregs);
- restore_access_regs(vcpu->arch.guest_acrs);
+ restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ restore_access_regs(vcpu->run->s.regs.acrs);
+ gmap_enable(vcpu->arch.gmap);
+ atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
- save_fp_regs(&vcpu->arch.guest_fpregs);
- save_access_regs(vcpu->arch.guest_acrs);
- restore_fp_regs(&vcpu->arch.host_fpregs);
+ atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ gmap_disable(vcpu->arch.gmap);
+ save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ save_access_regs(vcpu->run->s.regs.acrs);
+ restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+ restore_fp_regs(vcpu->arch.host_fpregs.fprs);
restore_access_regs(vcpu->arch.host_acrs);
}
@@ -237,8 +582,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
/* this equals initial cpu reset in pop, but we don't switch to ESA */
vcpu->arch.sie_block->gpsw.mask = 0UL;
vcpu->arch.sie_block->gpsw.addr = 0UL;
- vcpu->arch.sie_block->prefix = 0UL;
- vcpu->arch.sie_block->ihcpu = 0xffff;
+ kvm_s390_set_prefix(vcpu, 0);
vcpu->arch.sie_block->cputm = 0UL;
vcpu->arch.sie_block->ckc = 0UL;
vcpu->arch.sie_block->todpr = 0;
@@ -248,141 +592,333 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.guest_fpregs.fpc = 0;
asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
vcpu->arch.sie_block->gbea = 1;
+ vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
+ kvm_s390_vcpu_stop(vcpu);
+ kvm_s390_clear_local_irqs(vcpu);
+}
+
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
+{
+ free_page(vcpu->arch.sie_block->cbrlo);
+ vcpu->arch.sie_block->cbrlo = 0;
+}
+
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
+ if (!vcpu->arch.sie_block->cbrlo)
+ return -ENOMEM;
+
+ vcpu->arch.sie_block->ecb2 |= 0x80;
+ vcpu->arch.sie_block->ecb2 &= ~0x08;
+ return 0;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
- vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
- vcpu->arch.sie_block->gmsor = 0x000000000000;
- vcpu->arch.sie_block->ecb = 2;
- vcpu->arch.sie_block->eca = 0xC1002001U;
- setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
- (unsigned long) vcpu);
+ int rc = 0;
+
+ atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
+ CPUSTAT_SM |
+ CPUSTAT_STOPPED |
+ CPUSTAT_GED);
+ vcpu->arch.sie_block->ecb = 6;
+ if (test_vfacility(50) && test_vfacility(73))
+ vcpu->arch.sie_block->ecb |= 0x10;
+
+ vcpu->arch.sie_block->ecb2 = 8;
+ vcpu->arch.sie_block->eca = 0xD1002000U;
+ if (sclp_has_siif())
+ vcpu->arch.sie_block->eca |= 1;
+ vcpu->arch.sie_block->fac = (int) (long) vfacilities;
+ vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
+ ICTL_TPROT;
+
+ if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+ rc = kvm_s390_vcpu_setup_cmma(vcpu);
+ if (rc)
+ return rc;
+ }
+ hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
+ (unsigned long) vcpu);
+ vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(&vcpu->arch.cpu_id);
- vcpu->arch.cpu_id.version = 0xfe;
- return 0;
+ vcpu->arch.cpu_id.version = 0xff;
+ return rc;
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
- struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
- int rc = -ENOMEM;
+ struct kvm_vcpu *vcpu;
+ struct sie_page *sie_page;
+ int rc = -EINVAL;
- if (!vcpu)
- goto out_nomem;
+ if (id >= KVM_MAX_VCPUS)
+ goto out;
- vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
+ rc = -ENOMEM;
+
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu)
+ goto out;
- if (!vcpu->arch.sie_block)
+ sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
+ if (!sie_page)
goto out_free_cpu;
+ vcpu->arch.sie_block = &sie_page->sie_block;
+ vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+
vcpu->arch.sie_block->icpua = id;
- BUG_ON(!kvm->arch.sca);
- BUG_ON(kvm->arch.sca->cpu[id].sda);
- kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
- vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
- vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
+ if (!kvm_is_ucontrol(kvm)) {
+ if (!kvm->arch.sca) {
+ WARN_ON_ONCE(1);
+ goto out_free_cpu;
+ }
+ if (!kvm->arch.sca->cpu[id].sda)
+ kvm->arch.sca->cpu[id].sda =
+ (__u64) vcpu->arch.sie_block;
+ vcpu->arch.sie_block->scaoh =
+ (__u32)(((__u64)kvm->arch.sca) >> 32);
+ vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
+ set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
+ }
spin_lock_init(&vcpu->arch.local_int.lock);
INIT_LIST_HEAD(&vcpu->arch.local_int.list);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
- spin_lock_bh(&kvm->arch.float_int.lock);
- kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
- init_waitqueue_head(&vcpu->arch.local_int.wq);
+ vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
- spin_unlock_bh(&kvm->arch.float_int.lock);
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
- goto out_free_cpu;
+ goto out_free_sie_block;
VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
vcpu->arch.sie_block);
-
- try_module_get(THIS_MODULE);
+ trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
return vcpu;
+out_free_sie_block:
+ free_page((unsigned long)(vcpu->arch.sie_block));
out_free_cpu:
- kfree(vcpu);
-out_nomem:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
return ERR_PTR(rc);
}
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
- VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
- free_page((unsigned long)(vcpu->arch.sie_block));
- kfree(vcpu);
- module_put(THIS_MODULE);
+ return kvm_cpu_has_interrupt(vcpu);
}
-int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+void s390_vcpu_block(struct kvm_vcpu *vcpu)
+{
+ atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+{
+ atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+/*
+ * Kick a guest cpu out of SIE and wait until SIE is not running.
+ * If the CPU is not running (e.g. waiting as idle) the function will
+ * return immediately. */
+void exit_sie(struct kvm_vcpu *vcpu)
+{
+ atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+ while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
+ cpu_relax();
+}
+
+/* Kick a guest cpu out of SIE and prevent SIE-reentry */
+void exit_sie_sync(struct kvm_vcpu *vcpu)
+{
+ s390_vcpu_block(vcpu);
+ exit_sie(vcpu);
+}
+
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
+{
+ int i;
+ struct kvm *kvm = gmap->private;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /* match against both prefix pages */
+ if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
+ VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ exit_sie_sync(vcpu);
+ }
+ }
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
/* kvm common code refers to this, but never calls it */
BUG();
return 0;
}
+static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
+ struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_S390_TODPR:
+ r = put_user(vcpu->arch.sie_block->todpr,
+ (u32 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_EPOCHDIFF:
+ r = put_user(vcpu->arch.sie_block->epoch,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CPU_TIMER:
+ r = put_user(vcpu->arch.sie_block->cputm,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CLOCK_COMP:
+ r = put_user(vcpu->arch.sie_block->ckc,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFTOKEN:
+ r = put_user(vcpu->arch.pfault_token,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFCOMPARE:
+ r = put_user(vcpu->arch.pfault_compare,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFSELECT:
+ r = put_user(vcpu->arch.pfault_select,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PP:
+ r = put_user(vcpu->arch.sie_block->pp,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_GBEA:
+ r = put_user(vcpu->arch.sie_block->gbea,
+ (u64 __user *)reg->addr);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
+ struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_S390_TODPR:
+ r = get_user(vcpu->arch.sie_block->todpr,
+ (u32 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_EPOCHDIFF:
+ r = get_user(vcpu->arch.sie_block->epoch,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CPU_TIMER:
+ r = get_user(vcpu->arch.sie_block->cputm,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CLOCK_COMP:
+ r = get_user(vcpu->arch.sie_block->ckc,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFTOKEN:
+ r = get_user(vcpu->arch.pfault_token,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFCOMPARE:
+ r = get_user(vcpu->arch.pfault_compare,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFSELECT:
+ r = get_user(vcpu->arch.pfault_select,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PP:
+ r = get_user(vcpu->arch.sie_block->pp,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_GBEA:
+ r = get_user(vcpu->arch.sie_block->gbea,
+ (u64 __user *)reg->addr);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
{
- vcpu_load(vcpu);
kvm_s390_vcpu_initial_reset(vcpu);
- vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- vcpu_load(vcpu);
- memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs));
- vcpu_put(vcpu);
+ memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
return 0;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- vcpu_load(vcpu);
- memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
- vcpu_put(vcpu);
+ memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
return 0;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- vcpu_load(vcpu);
- memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
+ memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
- vcpu_put(vcpu);
+ restore_access_regs(vcpu->run->s.regs.acrs);
return 0;
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- vcpu_load(vcpu);
- memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
+ memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
- vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- vcpu_load(vcpu);
+ if (test_fp_ctl(fpu->fpc))
+ return -EINVAL;
memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
vcpu->arch.guest_fpregs.fpc = fpu->fpc;
- vcpu_put(vcpu);
+ restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- vcpu_load(vcpu);
memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
fpu->fpc = vcpu->arch.guest_fpregs.fpc;
- vcpu_put(vcpu);
return 0;
}
@@ -390,12 +926,12 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
{
int rc = 0;
- vcpu_load(vcpu);
- if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
+ if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
rc = -EBUSY;
- else
- vcpu->arch.sie_block->gpsw = psw;
- vcpu_put(vcpu);
+ else {
+ vcpu->run->psw_mask = psw.mask;
+ vcpu->run->psw_addr = psw.addr;
+ }
return rc;
}
@@ -405,10 +941,40 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
+ KVM_GUESTDBG_USE_HW_BP | \
+ KVM_GUESTDBG_ENABLE)
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
- return -EINVAL; /* not implemented yet */
+ int rc = 0;
+
+ vcpu->guest_debug = 0;
+ kvm_s390_clear_bp_data(vcpu);
+
+ if (dbg->control & ~VALID_GUESTDBG_FLAGS)
+ return -EINVAL;
+
+ if (dbg->control & KVM_GUESTDBG_ENABLE) {
+ vcpu->guest_debug = dbg->control;
+ /* enforce guest PER */
+ atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+
+ if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
+ rc = kvm_s390_import_bp_data(vcpu, dbg);
+ } else {
+ atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ vcpu->arch.guestdbg.last_bp = 0;
+ }
+
+ if (rc) {
+ vcpu->guest_debug = 0;
+ kvm_s390_clear_bp_data(vcpu);
+ atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ }
+
+ return rc;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
@@ -423,37 +989,286 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
-extern void s390_handle_mcck(void);
+bool kvm_s390_cmma_enabled(struct kvm *kvm)
+{
+ if (!MACHINE_IS_LPAR)
+ return false;
+ /* only enable for z10 and later */
+ if (!MACHINE_HAS_EDAT1)
+ return false;
+ if (!kvm->arch.use_cmma)
+ return false;
+ return true;
+}
+
+static bool ibs_enabled(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
+}
+
+static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
+{
+retry:
+ s390_vcpu_unblock(vcpu);
+ /*
+ * We use MMU_RELOAD just to re-arm the ipte notifier for the
+ * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
+ * This ensures that the ipte instruction for this request has
+ * already finished. We might race against a second unmapper that
+ * wants to set the blocking bit. Lets just retry the request loop.
+ */
+ if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+ int rc;
+ rc = gmap_ipte_notify(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu),
+ PAGE_SIZE * 2);
+ if (rc)
+ return rc;
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
+ if (!ibs_enabled(vcpu)) {
+ trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
+ atomic_set_mask(CPUSTAT_IBS,
+ &vcpu->arch.sie_block->cpuflags);
+ }
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
+ if (ibs_enabled(vcpu)) {
+ trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
+ atomic_clear_mask(CPUSTAT_IBS,
+ &vcpu->arch.sie_block->cpuflags);
+ }
+ goto retry;
+ }
+
+ return 0;
+}
+
+/**
+ * kvm_arch_fault_in_page - fault-in guest page if necessary
+ * @vcpu: The corresponding virtual cpu
+ * @gpa: Guest physical address
+ * @writable: Whether the page should be writable or not
+ *
+ * Make sure that a guest page has been faulted-in on the host.
+ *
+ * Return: Zero on success, negative error code otherwise.
+ */
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
+{
+ struct mm_struct *mm = current->mm;
+ hva_t hva;
+ long rc;
+
+ hva = gmap_fault(gpa, vcpu->arch.gmap);
+ if (IS_ERR_VALUE(hva))
+ return (long)hva;
+ down_read(&mm->mmap_sem);
+ rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
+ up_read(&mm->mmap_sem);
+
+ return rc < 0 ? rc : 0;
+}
+
+static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+ unsigned long token)
+{
+ struct kvm_s390_interrupt inti;
+ inti.parm64 = token;
+
+ if (start_token) {
+ inti.type = KVM_S390_INT_PFAULT_INIT;
+ WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+ } else {
+ inti.type = KVM_S390_INT_PFAULT_DONE;
+ WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+ }
+}
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+ __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+ __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ /* s390 will always inject the page directly */
+}
-static void __vcpu_run(struct kvm_vcpu *vcpu)
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
{
- memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
+ /*
+ * s390 will always inject the page directly,
+ * but we still want check_async_completion to cleanup
+ */
+ return true;
+}
+
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+{
+ hva_t hva;
+ struct kvm_arch_async_pf arch;
+ int rc;
+
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ return 0;
+ if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+ vcpu->arch.pfault_compare)
+ return 0;
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (kvm_cpu_has_interrupt(vcpu))
+ return 0;
+ if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+ return 0;
+ if (!vcpu->arch.gmap->pfault_enabled)
+ return 0;
+
+ hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
+ hva += current->thread.gmap_addr & ~PAGE_MASK;
+ if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
+ return 0;
+
+ rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+ return rc;
+}
+
+static int vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+ int rc, cpuflags;
+
+ /*
+ * On s390 notifications for arriving pages will be delivered directly
+ * to the guest but the house keeping for completed pfaults is
+ * handled outside the worker.
+ */
+ kvm_check_async_pf_completion(vcpu);
+
+ memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
if (need_resched())
schedule();
- if (test_thread_flag(TIF_MCCK_PENDING))
+ if (test_cpu_flag(CIF_MCCK_PENDING))
s390_handle_mcck();
- kvm_s390_deliver_pending_interrupts(vcpu);
+ if (!kvm_is_ucontrol(vcpu->kvm))
+ kvm_s390_deliver_pending_interrupts(vcpu);
- vcpu->arch.sie_block->icptcode = 0;
- local_irq_disable();
- kvm_guest_enter();
- local_irq_enable();
- VCPU_EVENT(vcpu, 6, "entering sie flags %x",
- atomic_read(&vcpu->arch.sie_block->cpuflags));
- if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
- VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ rc = kvm_s390_handle_requests(vcpu);
+ if (rc)
+ return rc;
+
+ if (guestdbg_enabled(vcpu)) {
+ kvm_s390_backup_guest_per_regs(vcpu);
+ kvm_s390_patch_guest_per_regs(vcpu);
}
+
+ vcpu->arch.sie_block->icptcode = 0;
+ cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+ VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
+ trace_kvm_s390_sie_enter(vcpu, cpuflags);
+
+ return 0;
+}
+
+static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+{
+ int rc = -1;
+
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
- local_irq_disable();
- kvm_guest_exit();
- local_irq_enable();
+ trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
+
+ if (guestdbg_enabled(vcpu))
+ kvm_s390_restore_guest_per_regs(vcpu);
+
+ if (exit_reason >= 0) {
+ rc = 0;
+ } else if (kvm_is_ucontrol(vcpu->kvm)) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code =
+ current->thread.gmap_addr;
+ vcpu->run->s390_ucontrol.pgm_code = 0x10;
+ rc = -EREMOTE;
+
+ } else if (current->thread.gmap_pfault) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ current->thread.gmap_pfault = 0;
+ if (kvm_arch_setup_async_pf(vcpu)) {
+ rc = 0;
+ } else {
+ gpa_t gpa = current->thread.gmap_addr;
+ rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
+ }
+ }
+
+ if (rc == -1) {
+ VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+ trace_kvm_s390_sie_fault(vcpu);
+ rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+
+ memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
- memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
+ if (rc == 0) {
+ if (kvm_is_ucontrol(vcpu->kvm))
+ /* Don't exit for host interrupts. */
+ rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
+ else
+ rc = kvm_handle_sie_intercept(vcpu);
+ }
+
+ return rc;
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ int rc, exit_reason;
+
+ /*
+ * We try to hold kvm->srcu during most of vcpu_run (except when run-
+ * ning the guest), so that memslots (and other stuff) are protected
+ */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ do {
+ rc = vcpu_pre_run(vcpu);
+ if (rc)
+ break;
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ /*
+ * As PF_VCPU will be used in fault handler, between
+ * guest_enter and guest_exit should be no uaccess.
+ */
+ preempt_disable();
+ kvm_guest_enter();
+ preempt_enable();
+ exit_reason = sie64a(vcpu->arch.sie_block,
+ vcpu->run->s.regs.gprs);
+ kvm_guest_exit();
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ rc = vcpu_post_run(vcpu, exit_reason);
+ } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ return rc;
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -461,43 +1276,58 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int rc;
sigset_t sigsaved;
- vcpu_load(vcpu);
+ if (guestdbg_exit_pending(vcpu)) {
+ kvm_s390_prepare_debug_exit(vcpu);
+ return 0;
+ }
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
- atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
-
- BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
+ kvm_s390_vcpu_start(vcpu);
switch (kvm_run->exit_reason) {
case KVM_EXIT_S390_SIEIC:
- vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
- vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
- break;
case KVM_EXIT_UNKNOWN:
+ case KVM_EXIT_INTR:
case KVM_EXIT_S390_RESET:
+ case KVM_EXIT_S390_UCONTROL:
+ case KVM_EXIT_S390_TSCH:
+ case KVM_EXIT_DEBUG:
break;
default:
BUG();
}
- might_sleep();
+ vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
+ vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
+ kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
+ kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+ kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
+ memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+ kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+ }
- do {
- __vcpu_run(vcpu);
- rc = kvm_handle_sie_intercept(vcpu);
- } while (!signal_pending(current) && !rc);
+ might_fault();
+ rc = __vcpu_run(vcpu);
- if (signal_pending(current) && !rc)
+ if (signal_pending(current) && !rc) {
+ kvm_run->exit_reason = KVM_EXIT_INTR;
rc = -EINTR;
+ }
- if (rc == -ENOTSUPP) {
+ if (guestdbg_exit_pending(vcpu) && !rc) {
+ kvm_s390_prepare_debug_exit(vcpu);
+ rc = 0;
+ }
+
+ if (rc == -EOPNOTSUPP) {
/* intercept cannot be handled in-kernel, prepare kvm-run */
kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
- kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
- kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
rc = 0;
@@ -509,100 +1339,204 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = 0;
}
+ kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
+ kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
+ kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
+ memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
- vcpu_put(vcpu);
-
vcpu->stat.exit_userspace++;
return rc;
}
-static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
- unsigned long n, int prefix)
-{
- if (prefix)
- return copy_to_guest(vcpu, guestdest, from, n);
- else
- return copy_to_guest_absolute(vcpu, guestdest, from, n);
-}
-
/*
* store status at address
* we use have two special cases:
* KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
* KVM_S390_STORE_STATUS_PREFIXED: -> prefix
*/
-int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
{
- const unsigned char archmode = 1;
- int prefix;
+ unsigned char archmode = 1;
+ unsigned int px;
+ u64 clkcomp;
+ int rc;
- if (addr == KVM_S390_STORE_STATUS_NOADDR) {
- if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
+ if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
+ if (write_guest_abs(vcpu, 163, &archmode, 1))
return -EFAULT;
- addr = SAVE_AREA_BASE;
- prefix = 0;
- } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
- if (copy_to_guest(vcpu, 163ul, &archmode, 1))
+ gpa = SAVE_AREA_BASE;
+ } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
+ if (write_guest_real(vcpu, 163, &archmode, 1))
return -EFAULT;
- addr = SAVE_AREA_BASE;
- prefix = 1;
- } else
- prefix = 0;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
- vcpu->arch.guest_fpregs.fprs, 128, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
- vcpu->arch.guest_gprs, 128, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
- &vcpu->arch.sie_block->gpsw, 16, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
- &vcpu->arch.sie_block->prefix, 4, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu,
- addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
- &vcpu->arch.guest_fpregs.fpc, 4, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
- &vcpu->arch.sie_block->todpr, 4, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
- &vcpu->arch.sie_block->cputm, 8, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
- &vcpu->arch.sie_block->ckc, 8, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
- &vcpu->arch.guest_acrs, 64, prefix))
- return -EFAULT;
-
- if (__guestcopy(vcpu,
- addr + offsetof(struct save_area_s390x, ctrl_regs),
- &vcpu->arch.sie_block->gcr, 128, prefix))
- return -EFAULT;
- return 0;
+ gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
+ }
+ rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
+ vcpu->arch.guest_fpregs.fprs, 128);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
+ vcpu->run->s.regs.gprs, 128);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
+ &vcpu->arch.sie_block->gpsw, 16);
+ px = kvm_s390_get_prefix(vcpu);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
+ &px, 4);
+ rc |= write_guest_abs(vcpu,
+ gpa + offsetof(struct save_area, fp_ctrl_reg),
+ &vcpu->arch.guest_fpregs.fpc, 4);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
+ &vcpu->arch.sie_block->todpr, 4);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
+ &vcpu->arch.sie_block->cputm, 8);
+ clkcomp = vcpu->arch.sie_block->ckc >> 8;
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
+ &clkcomp, 8);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
+ &vcpu->run->s.regs.acrs, 64);
+ rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
+ &vcpu->arch.sie_block->gcr, 128);
+ return rc ? -EFAULT : 0;
}
-static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
{
- int rc;
+ /*
+ * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
+ * copying in vcpu load/put. Lets update our copies before we save
+ * it into the save area
+ */
+ save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ save_access_regs(vcpu->run->s.regs.acrs);
+
+ return kvm_s390_store_status_unloaded(vcpu, addr);
+}
- vcpu_load(vcpu);
- rc = __kvm_s390_vcpu_store_status(vcpu, addr);
- vcpu_put(vcpu);
- return rc;
+static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
+}
+
+static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+ kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
+ kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
+ exit_sie_sync(vcpu);
+}
+
+static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
+{
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ __disable_ibs_on_vcpu(vcpu);
+ }
+}
+
+static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+ kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
+ kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
+ exit_sie_sync(vcpu);
+}
+
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+{
+ int i, online_vcpus, started_vcpus = 0;
+
+ if (!is_vcpu_stopped(vcpu))
+ return;
+
+ trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
+ /* Only one cpu at a time may enter/leave the STOPPED state. */
+ spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+ online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+ for (i = 0; i < online_vcpus; i++) {
+ if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
+ started_vcpus++;
+ }
+
+ if (started_vcpus == 0) {
+ /* we're the only active VCPU -> speed it up */
+ __enable_ibs_on_vcpu(vcpu);
+ } else if (started_vcpus == 1) {
+ /*
+ * As we are starting a second VCPU, we have to disable
+ * the IBS facility on all VCPUs to remove potentially
+ * oustanding ENABLE requests.
+ */
+ __disable_ibs_on_all_vcpus(vcpu->kvm);
+ }
+
+ atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ /*
+ * Another VCPU might have used IBS while we were offline.
+ * Let's play safe and flush the VCPU at startup.
+ */
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+ spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+ return;
+}
+
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+ int i, online_vcpus, started_vcpus = 0;
+ struct kvm_vcpu *started_vcpu = NULL;
+
+ if (is_vcpu_stopped(vcpu))
+ return;
+
+ trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
+ /* Only one cpu at a time may enter/leave the STOPPED state. */
+ spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+ online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+ atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ __disable_ibs_on_vcpu(vcpu);
+
+ for (i = 0; i < online_vcpus; i++) {
+ if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
+ started_vcpus++;
+ started_vcpu = vcpu->kvm->vcpus[i];
+ }
+ }
+
+ if (started_vcpus == 1) {
+ /*
+ * As we only have one VCPU left, we want to enable the
+ * IBS facility for that VCPU to speed it up.
+ */
+ __enable_ibs_on_vcpu(started_vcpu);
+ }
+
+ spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+ return;
+}
+
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+ struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_S390_CSS_SUPPORT:
+ if (!vcpu->kvm->arch.css_support) {
+ vcpu->kvm->arch.css_support = 1;
+ trace_kvm_s390_enable_css(vcpu->kvm);
+ }
+ r = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
}
long kvm_arch_vcpu_ioctl(struct file *filp,
@@ -610,82 +1544,224 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
+ int idx;
+ long r;
switch (ioctl) {
case KVM_S390_INTERRUPT: {
struct kvm_s390_interrupt s390int;
+ r = -EFAULT;
if (copy_from_user(&s390int, argp, sizeof(s390int)))
- return -EFAULT;
- return kvm_s390_inject_vcpu(vcpu, &s390int);
+ break;
+ r = kvm_s390_inject_vcpu(vcpu, &s390int);
+ break;
}
case KVM_S390_STORE_STATUS:
- return kvm_s390_vcpu_store_status(vcpu, arg);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = kvm_s390_vcpu_store_status(vcpu, arg);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ break;
case KVM_S390_SET_INITIAL_PSW: {
psw_t psw;
+ r = -EFAULT;
if (copy_from_user(&psw, argp, sizeof(psw)))
- return -EFAULT;
- return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
+ break;
+ r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
+ break;
}
case KVM_S390_INITIAL_RESET:
- return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ break;
+ case KVM_SET_ONE_REG:
+ case KVM_GET_ONE_REG: {
+ struct kvm_one_reg reg;
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ break;
+ if (ioctl == KVM_SET_ONE_REG)
+ r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
+ else
+ r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
+ break;
+ }
+#ifdef CONFIG_KVM_S390_UCONTROL
+ case KVM_S390_UCAS_MAP: {
+ struct kvm_s390_ucas_mapping ucasmap;
+
+ if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
+ r = -EFAULT;
+ break;
+ }
+
+ if (!kvm_is_ucontrol(vcpu->kvm)) {
+ r = -EINVAL;
+ break;
+ }
+
+ r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
+ ucasmap.vcpu_addr, ucasmap.length);
+ break;
+ }
+ case KVM_S390_UCAS_UNMAP: {
+ struct kvm_s390_ucas_mapping ucasmap;
+
+ if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
+ r = -EFAULT;
+ break;
+ }
+
+ if (!kvm_is_ucontrol(vcpu->kvm)) {
+ r = -EINVAL;
+ break;
+ }
+
+ r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
+ ucasmap.length);
+ break;
+ }
+#endif
+ case KVM_S390_VCPU_FAULT: {
+ r = gmap_fault(arg, vcpu->arch.gmap);
+ if (!IS_ERR_VALUE(r))
+ r = 0;
+ break;
+ }
+ case KVM_ENABLE_CAP:
+ {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+ break;
+ }
default:
- ;
+ r = -ENOTTY;
}
- return -EINVAL;
+ return r;
}
-/* Section: memory related */
-int kvm_arch_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old,
- int user_alloc)
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
{
- /* A few sanity checks. We can have exactly one memory slot which has
- to start at guest virtual zero and which has to be located at a
- page boundary in userland and which has to end at a page boundary.
- The memory in userland is ok to be fragmented into various different
- vmas. It is okay to mmap() and munmap() stuff in this slot after
- doing this call at any time */
+#ifdef CONFIG_KVM_S390_UCONTROL
+ if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
+ && (kvm_is_ucontrol(vcpu->kvm))) {
+ vmf->page = virt_to_page(vcpu->arch.sie_block);
+ get_page(vmf->page);
+ return 0;
+ }
+#endif
+ return VM_FAULT_SIGBUS;
+}
- if (mem->slot)
- return -EINVAL;
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
- if (mem->guest_phys_addr)
- return -EINVAL;
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
- if (mem->userspace_addr & (PAGE_SIZE - 1))
+/* Section: memory related */
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
+{
+ /* A few sanity checks. We can have memory slots which have to be
+ located/ended at a segment boundary (1MB). The memory in userland is
+ ok to be fragmented into various different vmas. It is okay to mmap()
+ and munmap() stuff in this slot after doing this call at any time */
+
+ if (mem->userspace_addr & 0xffffful)
return -EINVAL;
- if (mem->memory_size & (PAGE_SIZE - 1))
+ if (mem->memory_size & 0xffffful)
return -EINVAL;
- kvm->arch.guest_origin = mem->userspace_addr;
- kvm->arch.guest_memsize = mem->memory_size;
+ return 0;
+}
- /* FIXME: we do want to interrupt running CPUs and update their memory
- configuration now to avoid race conditions. But hey, changing the
- memory layout while virtual CPUs are running is usually bad
- programming practice. */
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ enum kvm_mr_change change)
+{
+ int rc;
- return 0;
+ /* If the basics of the memslot do not change, we do not want
+ * to update the gmap. Every update causes several unnecessary
+ * segment translation exceptions. This is usually handled just
+ * fine by the normal fault handler + gmap, but it will also
+ * cause faults on the prefix page of running guest CPUs.
+ */
+ if (old->userspace_addr == mem->userspace_addr &&
+ old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
+ old->npages * PAGE_SIZE == mem->memory_size)
+ return;
+
+ rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
+ mem->guest_phys_addr, mem->memory_size);
+ if (rc)
+ printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
+ return;
}
-gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot)
{
- return gfn;
}
static int __init kvm_s390_init(void)
{
- return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+ int ret;
+ ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+ if (ret)
+ return ret;
+
+ /*
+ * guests can ask for up to 255+1 double words, we need a full page
+ * to hold the maximum amount of facilities. On the other hand, we
+ * only set facilities that are known to work in KVM.
+ */
+ vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
+ if (!vfacilities) {
+ kvm_exit();
+ return -ENOMEM;
+ }
+ memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
+ vfacilities[0] &= 0xff82fff3f4fc2000UL;
+ vfacilities[1] &= 0x005c000000000000UL;
+ return 0;
}
static void __exit kvm_s390_exit(void)
{
+ free_page((unsigned long) vfacilities);
kvm_exit();
}
module_init(kvm_s390_init);
module_exit(kvm_s390_exit);
+
+/*
+ * Enable autoloading of the kvm module.
+ * Note that we add the module alias here instead of virt/kvm/kvm_main.c
+ * since x86 takes a different approach.
+ */
+#include <linux/miscdevice.h>
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3893cf12eac..a8655ed3161 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -1,7 +1,7 @@
/*
- * kvm_s390.h - definition for kvm on s390
+ * definition for kvm on s390
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2009
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -9,18 +9,28 @@
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
+ * Christian Ehrhardt <ehrhardt@de.ibm.com>
*/
#ifndef ARCH_S390_KVM_S390_H
#define ARCH_S390_KVM_S390_H
+#include <linux/hrtimer.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
+/* declare vfacilities extern */
+extern unsigned long *vfacilities;
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+/* Transactional Memory Execution related macros */
+#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10))
+#define TDB_FORMAT1 1
+#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
+
#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
do { \
debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
@@ -40,25 +50,187 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
}
+static inline int kvm_is_ucontrol(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_S390_UCONTROL
+ if (kvm->arch.gmap)
+ return 0;
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+#define GUEST_PREFIX_SHIFT 13
+static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.sie_block->prefix << GUEST_PREFIX_SHIFT;
+}
+
+static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
+{
+ vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+}
+
+static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
+{
+ u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+ u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+
+ return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
+ u64 *address1, u64 *address2)
+{
+ u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+ u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
+ u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
+ u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
+
+ *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+ *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
+{
+ if (r1)
+ *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+ if (r2)
+ *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+}
+
+static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
+{
+ u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+ u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+ ((vcpu->arch.sie_block->ipb & 0xff00) << 4);
+ /* The displacement is a 20bit _SIGNED_ value */
+ if (disp2 & 0x80000)
+ disp2+=0xfff00000;
+
+ return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
+}
+
+static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
+{
+ u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+ u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+
+ return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+/* Set the condition code in the guest program status word */
+static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
+{
+ vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44);
+ vcpu->arch.sie_block->gpsw.mask |= cc << 44;
+}
+
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
-void kvm_s390_idle_wakeup(unsigned long data);
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
+void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
-int kvm_s390_inject_vm(struct kvm *kvm,
- struct kvm_s390_interrupt *s390int);
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt *s390int);
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_float_irqs(struct kvm *kvm);
+int __must_check kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+ struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+ u64 cr6, u64 schid);
+void kvm_s390_reinject_io_int(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti);
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in priv.c */
-int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
+int is_valid_psw(psw_t *psw);
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
-int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
- unsigned long addr);
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
+void s390_vcpu_block(struct kvm_vcpu *vcpu);
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+void exit_sie(struct kvm_vcpu *vcpu);
+void exit_sie_sync(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
+/* is cmma enabled */
+bool kvm_s390_cmma_enabled(struct kvm *kvm);
+int test_vfacility(unsigned long nr);
+
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+/* implemented in interrupt.c */
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info);
+
+/**
+ * kvm_s390_inject_prog_cond - conditionally inject a program check
+ * @vcpu: virtual cpu
+ * @rc: original return/error code
+ *
+ * This function is supposed to be used after regular guest access functions
+ * failed, to conditionally inject a program check to a vcpu. The typical
+ * pattern would look like
+ *
+ * rc = write_guest(vcpu, addr, data, len);
+ * if (rc)
+ * return kvm_s390_inject_prog_cond(vcpu, rc);
+ *
+ * A negative return code from guest access functions implies an internal error
+ * like e.g. out of memory. In these cases no program check should be injected
+ * to the guest.
+ * A positive value implies that an exception happened while accessing a guest's
+ * memory. In this case all data belonging to the corresponding program check
+ * has been stored in vcpu->arch.pgm and can be injected with
+ * kvm_s390_inject_prog_irq().
+ *
+ * Returns: - the original @rc value if @rc was negative (internal error)
+ * - zero if @rc was already zero
+ * - zero or error code from injecting if @rc was positive
+ * (program check injected to @vcpu)
+ */
+static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
+{
+ if (rc <= 0)
+ return rc;
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+}
+
+/* implemented in interrupt.c */
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int psw_extint_disabled(struct kvm_vcpu *vcpu);
+void kvm_s390_destroy_adapters(struct kvm *kvm);
+int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
+
+/* implemented in guestdbg.c */
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu);
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg);
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c02286c6a93..f89c1cd6775 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1,7 +1,7 @@
/*
- * priv.c - handling privileged instructions
+ * handling privileged instructions
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2013
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -12,205 +12,458 @@
*/
#include <linux/kvm.h>
+#include <linux/gfp.h>
#include <linux/errno.h>
+#include <linux/compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/facility.h>
#include <asm/current.h>
#include <asm/debug.h>
#include <asm/ebcdic.h>
#include <asm/sysinfo.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
+#include <asm/ptrace.h>
+#include <asm/compat.h>
#include "gaccess.h"
#include "kvm-s390.h"
+#include "trace.h"
+
+/* Handle SCK (SET CLOCK) interception */
+static int handle_set_clock(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu *cpup;
+ s64 hostclk, val;
+ int i, rc;
+ u64 op2;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ op2 = kvm_s390_get_base_disp_s(vcpu);
+ if (op2 & 7) /* Operand must be on a doubleword boundary */
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ rc = read_guest(vcpu, op2, &val, sizeof(val));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ if (store_tod_clock(&hostclk)) {
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+ }
+ val = (val - hostclk) & ~0x3fUL;
+
+ mutex_lock(&vcpu->kvm->lock);
+ kvm_for_each_vcpu(i, cpup, vcpu->kvm)
+ cpup->arch.sie_block->epoch = val;
+ mutex_unlock(&vcpu->kvm->lock);
+
+ kvm_s390_set_psw_cc(vcpu, 0);
+ return 0;
+}
static int handle_set_prefix(struct kvm_vcpu *vcpu)
{
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
u64 operand2;
- u32 address = 0;
- u8 tmp;
+ u32 address;
+ int rc;
vcpu->stat.instruction_spx++;
- operand2 = disp2;
- if (base2)
- operand2 += vcpu->arch.guest_gprs[base2];
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
- if (operand2 & 3) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (operand2 & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* get the value */
- if (get_guest_u32(vcpu, operand2, &address)) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ rc = read_guest(vcpu, operand2, &address, sizeof(address));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
- address = address & 0x7fffe000u;
+ address &= 0x7fffe000u;
- /* make sure that the new value is valid memory */
- if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
- (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ /*
+ * Make sure the new value is valid memory. We only need to check the
+ * first page, since address is 8k aligned and memory pieces are always
+ * at least 1MB aligned and have at least a size of 1MB.
+ */
+ if (kvm_is_error_gpa(vcpu->kvm, address))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- vcpu->arch.sie_block->prefix = address;
- vcpu->arch.sie_block->ihcpu = 0xffff;
+ kvm_s390_set_prefix(vcpu, address);
VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
-out:
+ trace_kvm_s390_handle_prefix(vcpu, 1, address);
return 0;
}
static int handle_store_prefix(struct kvm_vcpu *vcpu)
{
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
u64 operand2;
u32 address;
+ int rc;
vcpu->stat.instruction_stpx++;
- operand2 = disp2;
- if (base2)
- operand2 += vcpu->arch.guest_gprs[base2];
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu);
/* must be word boundary */
- if (operand2 & 3) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (operand2 & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- address = vcpu->arch.sie_block->prefix;
- address = address & 0x7fffe000u;
+ address = kvm_s390_get_prefix(vcpu);
/* get the value */
- if (put_guest_u32(vcpu, operand2, address)) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ rc = write_guest(vcpu, operand2, &address, sizeof(address));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
-out:
+ trace_kvm_s390_handle_prefix(vcpu, 0, address);
return 0;
}
static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
{
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
- u64 useraddr;
+ u16 vcpu_id = vcpu->vcpu_id;
+ u64 ga;
int rc;
vcpu->stat.instruction_stap++;
- useraddr = disp2;
- if (base2)
- useraddr += vcpu->arch.guest_gprs[base2];
- if (useraddr & 1) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
- if (rc == -EFAULT) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ ga = kvm_s390_get_base_disp_s(vcpu);
- VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr);
-out:
+ if (ga & 1)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
+ trace_kvm_s390_handle_stap(vcpu, ga);
return 0;
}
+static void __skey_check_enable(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
+ return;
+
+ s390_enable_skey();
+ trace_kvm_s390_skey_related_inst(vcpu);
+ vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+}
+
+
static int handle_skey(struct kvm_vcpu *vcpu)
{
+ __skey_check_enable(vcpu);
+
vcpu->stat.instruction_storage_key++;
- vcpu->arch.sie_block->gpsw.addr -= 4;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ vcpu->arch.sie_block->gpsw.addr =
+ __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return 0;
}
-static int handle_stsch(struct kvm_vcpu *vcpu)
+static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
{
- vcpu->stat.instruction_stsch++;
- VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3");
- /* condition code 3 */
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+ vcpu->stat.instruction_ipte_interlock++;
+ if (psw_bits(*psw).p)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+ wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
+ psw->addr = __rewind_psw(*psw, 4);
+ VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
return 0;
}
-static int handle_chsc(struct kvm_vcpu *vcpu)
+static int handle_test_block(struct kvm_vcpu *vcpu)
{
- vcpu->stat.instruction_chsc++;
- VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3");
- /* condition code 3 */
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+ gpa_t addr;
+ int reg2;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
+ addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ addr = kvm_s390_logical_to_effective(vcpu, addr);
+ if (kvm_s390_check_low_addr_protection(vcpu, addr))
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+ addr = kvm_s390_real_to_abs(vcpu, addr);
+
+ if (kvm_is_error_gpa(vcpu->kvm, addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ /*
+ * We don't expect errors on modern systems, and do not care
+ * about storage keys (yet), so let's just clear the page.
+ */
+ if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE))
+ return -EFAULT;
+ kvm_s390_set_psw_cc(vcpu, 0);
+ vcpu->run->s.regs.gprs[0] = 0;
return 0;
}
+static int handle_tpi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_interrupt_info *inti;
+ unsigned long len;
+ u32 tpi_data[3];
+ int cc, rc;
+ u64 addr;
+
+ rc = 0;
+ addr = kvm_s390_get_base_disp_s(vcpu);
+ if (addr & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ cc = 0;
+ inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
+ if (!inti)
+ goto no_interrupt;
+ cc = 1;
+ tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
+ tpi_data[1] = inti->io.io_int_parm;
+ tpi_data[2] = inti->io.io_int_word;
+ if (addr) {
+ /*
+ * Store the two-word I/O interruption code into the
+ * provided area.
+ */
+ len = sizeof(tpi_data) - 4;
+ rc = write_guest(vcpu, addr, &tpi_data, len);
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ } else {
+ /*
+ * Store the three-word I/O interruption code into
+ * the appropriate lowcore area.
+ */
+ len = sizeof(tpi_data);
+ if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
+ rc = -EFAULT;
+ }
+ /*
+ * If we encounter a problem storing the interruption code, the
+ * instruction is suppressed from the guest's view: reinject the
+ * interrupt.
+ */
+ if (!rc)
+ kfree(inti);
+ else
+ kvm_s390_reinject_io_int(vcpu->kvm, inti);
+no_interrupt:
+ /* Set condition code and we're done. */
+ if (!rc)
+ kvm_s390_set_psw_cc(vcpu, cc);
+ return rc ? -EFAULT : 0;
+}
+
+static int handle_tsch(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_interrupt_info *inti;
+
+ inti = kvm_s390_get_io_int(vcpu->kvm, 0,
+ vcpu->run->s.regs.gprs[1]);
+
+ /*
+ * Prepare exit to userspace.
+ * We indicate whether we dequeued a pending I/O interrupt
+ * so that userspace can re-inject it if the instruction gets
+ * a program check. While this may re-order the pending I/O
+ * interrupts, this is no problem since the priority is kept
+ * intact.
+ */
+ vcpu->run->exit_reason = KVM_EXIT_S390_TSCH;
+ vcpu->run->s390_tsch.dequeued = !!inti;
+ if (inti) {
+ vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id;
+ vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr;
+ vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm;
+ vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word;
+ }
+ vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb;
+ kfree(inti);
+ return -EREMOTE;
+}
+
+static int handle_io_inst(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (vcpu->kvm->arch.css_support) {
+ /*
+ * Most I/O instructions will be handled by userspace.
+ * Exceptions are tpi and the interrupt portion of tsch.
+ */
+ if (vcpu->arch.sie_block->ipa == 0xb236)
+ return handle_tpi(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb235)
+ return handle_tsch(vcpu);
+ /* Handle in userspace. */
+ return -EOPNOTSUPP;
+ } else {
+ /*
+ * Set condition code 3 to stop the guest from issuing channel
+ * I/O instructions.
+ */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+ }
+}
+
static int handle_stfl(struct kvm_vcpu *vcpu)
{
- unsigned int facility_list = stfl();
int rc;
vcpu->stat.instruction_stfl++;
- facility_list &= ~(1UL<<24); /* no stfle */
- rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
- &facility_list, sizeof(facility_list));
- if (rc == -EFAULT)
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- else
- VCPU_EVENT(vcpu, 5, "store facility list value %x",
- facility_list);
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+ vfacilities, 4);
+ if (rc)
+ return rc;
+ VCPU_EVENT(vcpu, 5, "store facility list value %x",
+ *(unsigned int *) vfacilities);
+ trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
+ return 0;
+}
+
+static void handle_new_psw(struct kvm_vcpu *vcpu)
+{
+ /* Check whether the new psw is enabled for machine checks. */
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK)
+ kvm_s390_deliver_pending_machine_checks(vcpu);
+}
+
+#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
+#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
+#define PSW_ADDR_24 0x0000000000ffffffUL
+#define PSW_ADDR_31 0x000000007fffffffUL
+
+int is_valid_psw(psw_t *psw)
+{
+ if (psw->mask & PSW_MASK_UNASSIGNED)
+ return 0;
+ if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
+ if (psw->addr & ~PSW_ADDR_31)
+ return 0;
+ }
+ if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24))
+ return 0;
+ if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA)
+ return 0;
+ if (psw->addr & 1)
+ return 0;
+ return 1;
+}
+
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
+{
+ psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
+ psw_compat_t new_psw;
+ u64 addr;
+ int rc;
+
+ if (gpsw->mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_s(vcpu);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ if (!(new_psw.mask & PSW32_MASK_BASE))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
+ gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE;
+ gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
+ if (!is_valid_psw(gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ handle_new_psw(vcpu);
+ return 0;
+}
+
+static int handle_lpswe(struct kvm_vcpu *vcpu)
+{
+ psw_t new_psw;
+ u64 addr;
+ int rc;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_s(vcpu);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ vcpu->arch.sie_block->gpsw = new_psw;
+ if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ handle_new_psw(vcpu);
return 0;
}
static int handle_stidp(struct kvm_vcpu *vcpu)
{
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 stidp_data = vcpu->arch.stidp_data;
u64 operand2;
int rc;
vcpu->stat.instruction_stidp++;
- operand2 = disp2;
- if (base2)
- operand2 += vcpu->arch.guest_gprs[base2];
- if (operand2 & 7) {
- kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- goto out;
- }
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
- if (rc == -EFAULT) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out;
- }
+ operand2 = kvm_s390_get_base_disp_s(vcpu);
+
+ if (operand2 & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
-out:
return 0;
}
static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
{
- struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
int cpus = 0;
int n;
- spin_lock_bh(&fi->lock);
- for (n = 0; n < KVM_MAX_VCPUS; n++)
- if (fi->local_int[n])
- cpus++;
- spin_unlock_bh(&fi->lock);
+ cpus = atomic_read(&vcpu->kvm->online_vcpus);
/* deal with other level 3 hypervisors */
- if (stsi(mem, 3, 2, 2) == -ENOSYS)
+ if (stsi(mem, 3, 2, 2))
mem->count = 0;
if (mem->count < 8)
mem->count++;
@@ -230,85 +483,538 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
static int handle_stsi(struct kvm_vcpu *vcpu)
{
- int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28;
- int sel1 = vcpu->arch.guest_gprs[0] & 0xff;
- int sel2 = vcpu->arch.guest_gprs[1] & 0xffff;
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
+ int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
+ int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
+ unsigned long mem = 0;
u64 operand2;
- unsigned long mem;
+ int rc = 0;
vcpu->stat.instruction_stsi++;
VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
- operand2 = disp2;
- if (base2)
- operand2 += vcpu->arch.guest_gprs[base2];
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (fc > 3) {
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+ }
- if (operand2 & 0xfff && fc > 0)
+ if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
+ || vcpu->run->s.regs.gprs[1] & 0xffff0000)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- switch (fc) {
- case 0:
- vcpu->arch.guest_gprs[0] = 3 << 28;
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ if (fc == 0) {
+ vcpu->run->s.regs.gprs[0] = 3 << 28;
+ kvm_s390_set_psw_cc(vcpu, 0);
return 0;
+ }
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu);
+
+ if (operand2 & 0xfff)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (fc) {
case 1: /* same handling for 1 and 2 */
case 2:
mem = get_zeroed_page(GFP_KERNEL);
if (!mem)
- goto out_fail;
- if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS)
- goto out_mem;
+ goto out_no_data;
+ if (stsi((void *) mem, fc, sel1, sel2))
+ goto out_no_data;
break;
case 3:
if (sel1 != 2 || sel2 != 2)
- goto out_fail;
+ goto out_no_data;
mem = get_zeroed_page(GFP_KERNEL);
if (!mem)
- goto out_fail;
+ goto out_no_data;
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
- default:
- goto out_fail;
}
- if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- goto out_mem;
+ rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
+ if (rc) {
+ rc = kvm_s390_inject_prog_cond(vcpu, rc);
+ goto out;
}
+ trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
free_page(mem);
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- vcpu->arch.guest_gprs[0] = 0;
+ kvm_s390_set_psw_cc(vcpu, 0);
+ vcpu->run->s.regs.gprs[0] = 0;
return 0;
-out_mem:
+out_no_data:
+ kvm_s390_set_psw_cc(vcpu, 3);
+out:
free_page(mem);
-out_fail:
- /* condition code 3 */
- vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
- return 0;
+ return rc;
}
-static intercept_handler_t priv_handlers[256] = {
+static const intercept_handler_t b2_handlers[256] = {
[0x02] = handle_stidp,
+ [0x04] = handle_set_clock,
[0x10] = handle_set_prefix,
[0x11] = handle_store_prefix,
[0x12] = handle_store_cpu_address,
+ [0x21] = handle_ipte_interlock,
[0x29] = handle_skey,
[0x2a] = handle_skey,
[0x2b] = handle_skey,
- [0x34] = handle_stsch,
- [0x5f] = handle_chsc,
+ [0x2c] = handle_test_block,
+ [0x30] = handle_io_inst,
+ [0x31] = handle_io_inst,
+ [0x32] = handle_io_inst,
+ [0x33] = handle_io_inst,
+ [0x34] = handle_io_inst,
+ [0x35] = handle_io_inst,
+ [0x36] = handle_io_inst,
+ [0x37] = handle_io_inst,
+ [0x38] = handle_io_inst,
+ [0x39] = handle_io_inst,
+ [0x3a] = handle_io_inst,
+ [0x3b] = handle_io_inst,
+ [0x3c] = handle_io_inst,
+ [0x50] = handle_ipte_interlock,
+ [0x5f] = handle_io_inst,
+ [0x74] = handle_io_inst,
+ [0x76] = handle_io_inst,
[0x7d] = handle_stsi,
[0xb1] = handle_stfl,
+ [0xb2] = handle_lpswe,
+};
+
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t handler;
+
+ /*
+ * A lot of B2 instructions are priviledged. Here we check for
+ * the privileged ones, that we can handle in the kernel.
+ * Anything else goes to userspace.
+ */
+ handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+ if (handler)
+ return handler(vcpu);
+
+ return -EOPNOTSUPP;
+}
+
+static int handle_epsw(struct kvm_vcpu *vcpu)
+{
+ int reg1, reg2;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ /* This basically extracts the mask half of the psw. */
+ vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
+ vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
+ if (reg2) {
+ vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
+ vcpu->run->s.regs.gprs[reg2] |=
+ vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
+ }
+ return 0;
+}
+
+#define PFMF_RESERVED 0xfffc0101UL
+#define PFMF_SK 0x00020000UL
+#define PFMF_CF 0x00010000UL
+#define PFMF_UI 0x00008000UL
+#define PFMF_FSC 0x00007000UL
+#define PFMF_NQ 0x00000800UL
+#define PFMF_MR 0x00000400UL
+#define PFMF_MC 0x00000200UL
+#define PFMF_KEY 0x000000feUL
+
+static int handle_pfmf(struct kvm_vcpu *vcpu)
+{
+ int reg1, reg2;
+ unsigned long start, end;
+
+ vcpu->stat.instruction_pfmf++;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ if (!MACHINE_HAS_PFMF)
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* Only provide non-quiescing support if the host supports it */
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* No support for conditional-SSKE */
+ if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+ if (kvm_s390_check_low_addr_protection(vcpu, start))
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+ }
+
+ switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+ case 0x00000000:
+ end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
+ break;
+ case 0x00001000:
+ end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+ break;
+ /* We dont support EDAT2
+ case 0x00002000:
+ end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
+ break;*/
+ default:
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ }
+ while (start < end) {
+ unsigned long useraddr, abs_addr;
+
+ /* Translate guest address to host address */
+ if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0)
+ abs_addr = kvm_s390_real_to_abs(vcpu, start);
+ else
+ abs_addr = start;
+ useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr));
+ if (kvm_is_error_hva(useraddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+ if (clear_user((void __user *)useraddr, PAGE_SIZE))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+ __skey_check_enable(vcpu);
+ if (set_guest_storage_key(current->mm, useraddr,
+ vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
+ vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+
+ start += PAGE_SIZE;
+ }
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC)
+ vcpu->run->s.regs.gprs[reg2] = end;
+ return 0;
+}
+
+static int handle_essa(struct kvm_vcpu *vcpu)
+{
+ /* entries expected to be 1FF */
+ int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+ unsigned long *cbrlo, cbrle;
+ struct gmap *gmap;
+ int i;
+
+ VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
+ gmap = vcpu->arch.gmap;
+ vcpu->stat.instruction_essa++;
+ if (!kvm_s390_cmma_enabled(vcpu->kvm))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* Rewind PSW to repeat the ESSA instruction */
+ vcpu->arch.sie_block->gpsw.addr =
+ __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+ vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
+ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
+ down_read(&gmap->mm->mmap_sem);
+ for (i = 0; i < entries; ++i) {
+ cbrle = cbrlo[i];
+ if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE))
+ /* invalid entry */
+ break;
+ /* try to free backing */
+ __gmap_zap(cbrle, gmap);
+ }
+ up_read(&gmap->mm->mmap_sem);
+ if (i < entries)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ return 0;
+}
+
+static const intercept_handler_t b9_handlers[256] = {
+ [0x8a] = handle_ipte_interlock,
+ [0x8d] = handle_epsw,
+ [0x8e] = handle_ipte_interlock,
+ [0x8f] = handle_ipte_interlock,
+ [0xab] = handle_essa,
+ [0xaf] = handle_pfmf,
+};
+
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t handler;
+
+ /* This is handled just as for the B2 instructions. */
+ handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+ if (handler)
+ return handler(vcpu);
+
+ return -EOPNOTSUPP;
+}
+
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u32 val = 0;
+ int reg, rc;
+ u64 ga;
+
+ vcpu->stat.instruction_lctl++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rs(vcpu);
+
+ if (ga & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
+
+ reg = reg1;
+ do {
+ rc = read_guest(vcpu, ga, &val, sizeof(val));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+ vcpu->arch.sie_block->gcr[reg] |= val;
+ ga += 4;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u64 ga;
+ u32 val;
+ int reg, rc;
+
+ vcpu->stat.instruction_stctl++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rs(vcpu);
+
+ if (ga & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
+
+ reg = reg1;
+ do {
+ val = vcpu->arch.sie_block->gcr[reg] & 0x00000000fffffffful;
+ rc = write_guest(vcpu, ga, &val, sizeof(val));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ ga += 4;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
+static int handle_lctlg(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u64 ga, val;
+ int reg, rc;
+
+ vcpu->stat.instruction_lctlg++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rsy(vcpu);
+
+ if (ga & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ reg = reg1;
+
+ VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
+
+ do {
+ rc = read_guest(vcpu, ga, &val, sizeof(val));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ vcpu->arch.sie_block->gcr[reg] = val;
+ ga += 8;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
+static int handle_stctg(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u64 ga, val;
+ int reg, rc;
+
+ vcpu->stat.instruction_stctg++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rsy(vcpu);
+
+ if (ga & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ reg = reg1;
+
+ VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
+
+ do {
+ val = vcpu->arch.sie_block->gcr[reg];
+ rc = write_guest(vcpu, ga, &val, sizeof(val));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ ga += 8;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+
+ return 0;
+}
+
+static const intercept_handler_t eb_handlers[256] = {
+ [0x2f] = handle_lctlg,
+ [0x25] = handle_stctg,
+};
+
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t handler;
+
+ handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
+ if (handler)
+ return handler(vcpu);
+ return -EOPNOTSUPP;
+}
+
+static int handle_tprot(struct kvm_vcpu *vcpu)
+{
+ u64 address1, address2;
+ unsigned long hva, gpa;
+ int ret = 0, cc = 0;
+ bool writable;
+
+ vcpu->stat.instruction_tprot++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
+
+ /* we only handle the Linux memory detection case:
+ * access key == 0
+ * everything else goes to userspace. */
+ if (address2 & 0xf0)
+ return -EOPNOTSUPP;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
+ ipte_lock(vcpu);
+ ret = guest_translate_address(vcpu, address1, &gpa, 1);
+ if (ret == PGM_PROTECTION) {
+ /* Write protected? Try again with read-only... */
+ cc = 1;
+ ret = guest_translate_address(vcpu, address1, &gpa, 0);
+ }
+ if (ret) {
+ if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
+ ret = kvm_s390_inject_program_int(vcpu, ret);
+ } else if (ret > 0) {
+ /* Translation not available */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ ret = 0;
+ }
+ goto out_unlock;
+ }
+
+ hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
+ if (kvm_is_error_hva(hva)) {
+ ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ } else {
+ if (!writable)
+ cc = 1; /* Write not permitted ==> read-only */
+ kvm_s390_set_psw_cc(vcpu, cc);
+ /* Note: CC2 only occurs for storage keys (not supported yet) */
+ }
+out_unlock:
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
+ ipte_unlock(vcpu);
+ return ret;
+}
+
+int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
+{
+ /* For e5xx... instructions we only handle TPROT */
+ if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01)
+ return handle_tprot(vcpu);
+ return -EOPNOTSUPP;
+}
+
+static int handle_sckpf(struct kvm_vcpu *vcpu)
+{
+ u32 value;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
+ return kvm_s390_inject_program_int(vcpu,
+ PGM_SPECIFICATION);
+
+ value = vcpu->run->s.regs.gprs[0] & 0x000000000000ffff;
+ vcpu->arch.sie_block->todpr = value;
+
+ return 0;
+}
+
+static const intercept_handler_t x01_handlers[256] = {
+ [0x07] = handle_sckpf,
};
-int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
+int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
{
intercept_handler_t handler;
- handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+ handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
if (handler)
return handler(vcpu);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
deleted file mode 100644
index 934fd6a885f..00000000000
--- a/arch/s390/kvm/sie64a.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * sie64a.S - low level sie call
- *
- * Copyright IBM Corp. 2008
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#include <linux/errno.h>
-#include <asm/asm-offsets.h>
-
-SP_R5 = 5 * 8 # offset into stackframe
-SP_R6 = 6 * 8
-
-/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
- */
- .globl sie64a
-sie64a:
- lgr %r5,%r3
- stmg %r5,%r14,SP_R5(%r15) # save register on entry
- lgr %r14,%r2 # pointer to sie control block
- lmg %r0,%r13,0(%r3) # load guest gprs 0-13
-sie_inst:
- sie 0(%r14)
- lg %r14,SP_R5(%r15)
- stmg %r0,%r13,0(%r14) # save guest gprs 0-13
- lghi %r2,0
- lmg %r6,%r14,SP_R6(%r15)
- br %r14
-
-sie_err:
- lg %r14,SP_R5(%r15)
- stmg %r0,%r13,0(%r14) # save guest gprs 0-13
- lghi %r2,-EFAULT
- lmg %r6,%r14,SP_R6(%r15)
- br %r14
-
- .section __ex_table,"a"
- .quad sie_inst,sie_err
- .previous
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 0a236acfb5f..43079a48cc9 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,7 +1,7 @@
/*
- * sigp.c - handlinge interprocessor communication
+ * handling interprocessor communication
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2013
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -9,61 +9,44 @@
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
+ * Christian Ehrhardt <ehrhardt@de.ibm.com>
*/
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <asm/sigp.h>
#include "gaccess.h"
#include "kvm-s390.h"
+#include "trace.h"
-/* sigp order codes */
-#define SIGP_SENSE 0x01
-#define SIGP_EXTERNAL_CALL 0x02
-#define SIGP_EMERGENCY 0x03
-#define SIGP_START 0x04
-#define SIGP_STOP 0x05
-#define SIGP_RESTART 0x06
-#define SIGP_STOP_STORE_STATUS 0x09
-#define SIGP_INITIAL_CPU_RESET 0x0b
-#define SIGP_CPU_RESET 0x0c
-#define SIGP_SET_PREFIX 0x0d
-#define SIGP_STORE_STATUS_ADDR 0x0e
-#define SIGP_SET_ARCH 0x12
-
-/* cpu status bits */
-#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL
-#define SIGP_STAT_INCORRECT_STATE 0x00000200UL
-#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL
-#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL
-#define SIGP_STAT_STOPPED 0x00000040UL
-#define SIGP_STAT_OPERATOR_INTERV 0x00000020UL
-#define SIGP_STAT_CHECK_STOP 0x00000010UL
-#define SIGP_STAT_INOPERATIVE 0x00000004UL
-#define SIGP_STAT_INVALID_ORDER 0x00000002UL
-#define SIGP_STAT_RECEIVER_CHECK 0x00000001UL
-
-
-static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
+static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
+ u64 *reg)
{
- struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int cpuflags;
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
- return 3; /* not operational */
+ return SIGP_CC_NOT_OPERATIONAL;
- spin_lock_bh(&fi->lock);
- if (fi->local_int[cpu_addr] == NULL)
- rc = 3; /* not operational */
- else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
- & CPUSTAT_RUNNING) {
- *reg &= 0xffffffff00000000UL;
- rc = 1; /* status stored */
- } else {
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
+
+ cpuflags = atomic_read(li->cpuflags);
+ if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED)))
+ rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+ else {
*reg &= 0xffffffff00000000UL;
- *reg |= SIGP_STAT_STOPPED;
- rc = 1; /* status stored */
+ if (cpuflags & CPUSTAT_ECALL_PEND)
+ *reg |= SIGP_STATUS_EXT_CALL_PENDING;
+ if (cpuflags & CPUSTAT_STOPPED)
+ *reg |= SIGP_STATUS_STOPPED;
+ rc = SIGP_CC_STATUS_STORED;
}
- spin_unlock_bh(&fi->lock);
VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
return rc;
@@ -71,97 +54,156 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
- struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
- struct local_interrupt *li;
- struct interrupt_info *inti;
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_EMERGENCY,
+ .parm = vcpu->vcpu_id,
+ };
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int rc = 0;
+
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+ if (!rc)
+ VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+
+ return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+ u16 asn, u64 *reg)
+{
+ struct kvm_vcpu *dst_vcpu = NULL;
+ const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
+ u16 p_asn, s_asn;
+ psw_t *psw;
+ u32 flags;
+
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
+ psw = &dst_vcpu->arch.sie_block->gpsw;
+ p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */
+ s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */
+
+ /* Deliver the emergency signal? */
+ if (!(flags & CPUSTAT_STOPPED)
+ || (psw->mask & psw_int_mask) != psw_int_mask
+ || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
+ || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
+ return __sigp_emergency(vcpu, cpu_addr);
+ } else {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INCORRECT_STATE;
+ return SIGP_CC_STATUS_STORED;
+ }
+}
+
+static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
+{
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_EXTERNAL_CALL,
+ .parm = vcpu->vcpu_id,
+ };
+ struct kvm_vcpu *dst_vcpu = NULL;
int rc;
- if (cpu_addr >= KVM_MAX_VCPUS)
- return 3; /* not operational */
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
- inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+ if (!rc)
+ VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
+
+ return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
+}
+
+static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
+{
+ struct kvm_s390_interrupt_info *inti;
+ int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+
+ inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
if (!inti)
return -ENOMEM;
+ inti->type = KVM_S390_SIGP_STOP;
- inti->type = KVM_S390_INT_EMERGENCY;
-
- spin_lock_bh(&fi->lock);
- li = fi->local_int[cpu_addr];
- if (li == NULL) {
- rc = 3; /* not operational */
+ spin_lock_bh(&li->lock);
+ if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
kfree(inti);
- goto unlock;
+ if ((action & ACTION_STORE_ON_STOP) != 0)
+ rc = -ESHUTDOWN;
+ goto out;
}
- spin_lock_bh(&li->lock);
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
+ atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+ li->action_bits |= action;
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
+out:
spin_unlock_bh(&li->lock);
- rc = 0; /* order accepted */
-unlock:
- spin_unlock_bh(&fi->lock);
- VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+
return rc;
}
-static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
+static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
{
- struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
- struct local_interrupt *li;
- struct interrupt_info *inti;
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
- return 3; /* not operational */
+ return SIGP_CC_NOT_OPERATIONAL;
- inti = kzalloc(sizeof(*inti), GFP_KERNEL);
- if (!inti)
- return -ENOMEM;
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
- inti->type = KVM_S390_SIGP_STOP;
+ rc = __inject_sigp_stop(li, action);
- spin_lock_bh(&fi->lock);
- li = fi->local_int[cpu_addr];
- if (li == NULL) {
- rc = 3; /* not operational */
- kfree(inti);
- goto unlock;
- }
- spin_lock_bh(&li->lock);
- list_add_tail(&inti->list, &li->list);
- atomic_set(&li->active, 1);
- atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
- if (store)
- li->action_bits |= ACTION_STORE_ON_STOP;
- li->action_bits |= ACTION_STOP_ON_STOP;
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
- spin_unlock_bh(&li->lock);
- rc = 0; /* order accepted */
-unlock:
- spin_unlock_bh(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+
+ if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+ /* If the CPU has already been stopped, we still have
+ * to save the status when doing stop-and-store. This
+ * has to be done after unlocking all spinlocks. */
+ rc = kvm_s390_store_status_unloaded(dst_vcpu,
+ KVM_S390_STORE_STATUS_NOADDR);
+ }
+
return rc;
}
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
{
int rc;
+ unsigned int i;
+ struct kvm_vcpu *v;
switch (parameter & 0xff) {
case 0:
- printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
- " not supported");
- rc = 3; /* not operational */
+ rc = SIGP_CC_NOT_OPERATIONAL;
break;
case 1:
case 2:
- rc = 0; /* order accepted */
+ kvm_for_each_vcpu(i, v, vcpu->kvm) {
+ v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(v);
+ }
+
+ rc = SIGP_CC_ORDER_CODE_ACCEPTED;
break;
default:
- rc = -ENOTSUPP;
+ rc = -EOPNOTSUPP;
}
return rc;
}
@@ -169,41 +211,39 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
u64 *reg)
{
- struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
- struct local_interrupt *li;
- struct interrupt_info *inti;
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
+ struct kvm_s390_interrupt_info *inti;
int rc;
- u8 tmp;
-
- /* make sure that the new value is valid memory */
- address = address & 0x7fffe000u;
- if ((copy_from_guest(vcpu, &tmp,
- (u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
- (copy_from_guest(vcpu, &tmp, (u64) (address +
- vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
- *reg |= SIGP_STAT_INVALID_PARAMETER;
- return 1; /* invalid parameter */
+
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
+
+ /*
+ * Make sure the new value is valid memory. We only need to check the
+ * first page, since address is 8k aligned and memory pieces are always
+ * at least 1MB aligned and have at least a size of 1MB.
+ */
+ address &= 0x7fffe000u;
+ if (kvm_is_error_gpa(vcpu->kvm, address)) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INVALID_PARAMETER;
+ return SIGP_CC_STATUS_STORED;
}
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
- return 2; /* busy */
-
- spin_lock_bh(&fi->lock);
- li = fi->local_int[cpu_addr];
-
- if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
- rc = 1; /* incorrect state */
- *reg &= SIGP_STAT_INCORRECT_STATE;
- kfree(inti);
- goto out_fi;
- }
+ return SIGP_CC_BUSY;
spin_lock_bh(&li->lock);
/* cpu must be in stopped state */
- if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
- rc = 1; /* incorrect state */
- *reg &= SIGP_STAT_INCORRECT_STATE;
+ if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INCORRECT_STATE;
+ rc = SIGP_CC_STATUS_STORED;
kfree(inti);
goto out_li;
}
@@ -213,15 +253,96 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&li->wq);
- rc = 0; /* order accepted */
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
+ rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
out_li:
spin_unlock_bh(&li->lock);
-out_fi:
- spin_unlock_bh(&fi->lock);
+ return rc;
+}
+
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
+ u32 addr, u64 *reg)
+{
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int flags;
+ int rc;
+
+ if (cpu_id < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+
+ spin_lock_bh(&dst_vcpu->arch.local_int.lock);
+ flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
+ spin_unlock_bh(&dst_vcpu->arch.local_int.lock);
+ if (!(flags & CPUSTAT_STOPPED)) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INCORRECT_STATE;
+ return SIGP_CC_STATUS_STORED;
+ }
+
+ addr &= 0x7ffffe00;
+ rc = kvm_s390_store_status_unloaded(dst_vcpu, addr);
+ if (rc == -EFAULT) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INVALID_PARAMETER;
+ rc = SIGP_CC_STATUS_STORED;
+ }
+ return rc;
+}
+
+static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
+ u64 *reg)
+{
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int rc;
+
+ if (cpu_addr >= KVM_MAX_VCPUS)
+ return SIGP_CC_NOT_OPERATIONAL;
+
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
+ if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+ /* running */
+ rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+ } else {
+ /* not running */
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_NOT_RUNNING;
+ rc = SIGP_CC_STATUS_STORED;
+ }
+
+ VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
+ rc);
+
+ return rc;
+}
+
+/* Test whether the destination CPU is available and not busy */
+static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
+{
+ struct kvm_s390_local_interrupt *li;
+ int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+ struct kvm_vcpu *dst_vcpu = NULL;
+
+ if (cpu_addr >= KVM_MAX_VCPUS)
+ return SIGP_CC_NOT_OPERATIONAL;
+
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
+ spin_lock_bh(&li->lock);
+ if (li->action_bits & ACTION_STOP_ON_STOP)
+ rc = SIGP_CC_BUSY;
+ spin_unlock_bh(&li->lock);
+
return rc;
}
@@ -229,60 +350,126 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
{
int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
int r3 = vcpu->arch.sie_block->ipa & 0x000f;
- int base2 = vcpu->arch.sie_block->ipb >> 28;
- int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
u32 parameter;
- u16 cpu_addr = vcpu->arch.guest_gprs[r3];
+ u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
u8 order_code;
int rc;
- order_code = disp2;
- if (base2)
- order_code += vcpu->arch.guest_gprs[base2];
+ /* sigp in userspace can exit */
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ order_code = kvm_s390_get_base_disp_rs(vcpu);
if (r1 % 2)
- parameter = vcpu->arch.guest_gprs[r1];
+ parameter = vcpu->run->s.regs.gprs[r1];
else
- parameter = vcpu->arch.guest_gprs[r1 + 1];
+ parameter = vcpu->run->s.regs.gprs[r1 + 1];
+ trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
switch (order_code) {
case SIGP_SENSE:
vcpu->stat.instruction_sigp_sense++;
rc = __sigp_sense(vcpu, cpu_addr,
- &vcpu->arch.guest_gprs[r1]);
+ &vcpu->run->s.regs.gprs[r1]);
break;
- case SIGP_EMERGENCY:
+ case SIGP_EXTERNAL_CALL:
+ vcpu->stat.instruction_sigp_external_call++;
+ rc = __sigp_external_call(vcpu, cpu_addr);
+ break;
+ case SIGP_EMERGENCY_SIGNAL:
vcpu->stat.instruction_sigp_emergency++;
rc = __sigp_emergency(vcpu, cpu_addr);
break;
case SIGP_STOP:
vcpu->stat.instruction_sigp_stop++;
- rc = __sigp_stop(vcpu, cpu_addr, 0);
+ rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
break;
- case SIGP_STOP_STORE_STATUS:
+ case SIGP_STOP_AND_STORE_STATUS:
vcpu->stat.instruction_sigp_stop++;
- rc = __sigp_stop(vcpu, cpu_addr, 1);
+ rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
+ ACTION_STOP_ON_STOP);
+ break;
+ case SIGP_STORE_STATUS_AT_ADDRESS:
+ rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
+ &vcpu->run->s.regs.gprs[r1]);
break;
- case SIGP_SET_ARCH:
+ case SIGP_SET_ARCHITECTURE:
vcpu->stat.instruction_sigp_arch++;
rc = __sigp_set_arch(vcpu, parameter);
break;
case SIGP_SET_PREFIX:
vcpu->stat.instruction_sigp_prefix++;
rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
- &vcpu->arch.guest_gprs[r1]);
+ &vcpu->run->s.regs.gprs[r1]);
+ break;
+ case SIGP_COND_EMERGENCY_SIGNAL:
+ rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
+ &vcpu->run->s.regs.gprs[r1]);
+ break;
+ case SIGP_SENSE_RUNNING:
+ vcpu->stat.instruction_sigp_sense_running++;
+ rc = __sigp_sense_running(vcpu, cpu_addr,
+ &vcpu->run->s.regs.gprs[r1]);
+ break;
+ case SIGP_START:
+ rc = sigp_check_callable(vcpu, cpu_addr);
+ if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
+ rc = -EOPNOTSUPP; /* Handle START in user space */
break;
case SIGP_RESTART:
vcpu->stat.instruction_sigp_restart++;
- /* user space must know about restart */
+ rc = sigp_check_callable(vcpu, cpu_addr);
+ if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
+ VCPU_EVENT(vcpu, 4,
+ "sigp restart %x to handle userspace",
+ cpu_addr);
+ /* user space must know about restart */
+ rc = -EOPNOTSUPP;
+ }
+ break;
default:
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
if (rc < 0)
return rc;
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
+ kvm_s390_set_psw_cc(vcpu, rc);
return 0;
}
+
+/*
+ * Handle SIGP partial execution interception.
+ *
+ * This interception will occur at the source cpu when a source cpu sends an
+ * external call to a target cpu and the target cpu has the WAIT bit set in
+ * its cpuflags. Interception will occurr after the interrupt indicator bits at
+ * the target cpu have been set. All error cases will lead to instruction
+ * interception, therefore nothing is to be checked or prepared.
+ */
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
+{
+ int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+ struct kvm_vcpu *dest_vcpu;
+ u8 order_code = kvm_s390_get_base_disp_rs(vcpu);
+
+ trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+
+ if (order_code == SIGP_EXTERNAL_CALL) {
+ dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ BUG_ON(dest_vcpu == NULL);
+
+ spin_lock_bh(&dest_vcpu->arch.local_int.lock);
+ if (waitqueue_active(&dest_vcpu->wq))
+ wake_up_interruptible(&dest_vcpu->wq);
+ dest_vcpu->preempted = true;
+ spin_unlock_bh(&dest_vcpu->arch.local_int.lock);
+
+ kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED);
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
new file mode 100644
index 00000000000..647e9d6a481
--- /dev/null
+++ b/arch/s390/kvm/trace-s390.h
@@ -0,0 +1,273 @@
+#if !defined(_TRACE_KVMS390_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVMS390_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm-s390
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace-s390
+
+/*
+ * Trace point for the creation of the kvm instance.
+ */
+TRACE_EVENT(kvm_s390_create_vm,
+ TP_PROTO(unsigned long type),
+ TP_ARGS(type),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, type)
+ ),
+
+ TP_fast_assign(
+ __entry->type = type;
+ ),
+
+ TP_printk("create vm%s",
+ __entry->type & KVM_VM_S390_UCONTROL ? " (UCONTROL)" : "")
+ );
+
+/*
+ * Trace points for creation and destruction of vpcus.
+ */
+TRACE_EVENT(kvm_s390_create_vcpu,
+ TP_PROTO(unsigned int id, struct kvm_vcpu *vcpu,
+ struct kvm_s390_sie_block *sie_block),
+ TP_ARGS(id, vcpu, sie_block),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(struct kvm_vcpu *, vcpu)
+ __field(struct kvm_s390_sie_block *, sie_block)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->vcpu = vcpu;
+ __entry->sie_block = sie_block;
+ ),
+
+ TP_printk("create cpu %d at %p, sie block at %p", __entry->id,
+ __entry->vcpu, __entry->sie_block)
+ );
+
+TRACE_EVENT(kvm_s390_destroy_vcpu,
+ TP_PROTO(unsigned int id),
+ TP_ARGS(id),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ ),
+
+ TP_printk("destroy cpu %d", __entry->id)
+ );
+
+/*
+ * Trace point for start and stop of vpcus.
+ */
+TRACE_EVENT(kvm_s390_vcpu_start_stop,
+ TP_PROTO(unsigned int id, int state),
+ TP_ARGS(id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(int, state)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->state = state;
+ ),
+
+ TP_printk("%s cpu %d", __entry->state ? "starting" : "stopping",
+ __entry->id)
+ );
+
+/*
+ * Trace points for injection of interrupts, either per machine or
+ * per vcpu.
+ */
+
+#define kvm_s390_int_type \
+ {KVM_S390_SIGP_STOP, "sigp stop"}, \
+ {KVM_S390_PROGRAM_INT, "program interrupt"}, \
+ {KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \
+ {KVM_S390_RESTART, "sigp restart"}, \
+ {KVM_S390_INT_VIRTIO, "virtio interrupt"}, \
+ {KVM_S390_INT_SERVICE, "sclp interrupt"}, \
+ {KVM_S390_INT_EMERGENCY, "sigp emergency"}, \
+ {KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
+
+TRACE_EVENT(kvm_s390_inject_vm,
+ TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
+ TP_ARGS(type, parm, parm64, who),
+
+ TP_STRUCT__entry(
+ __field(__u32, inttype)
+ __field(__u32, parm)
+ __field(__u64, parm64)
+ __field(int, who)
+ ),
+
+ TP_fast_assign(
+ __entry->inttype = type & 0x00000000ffffffff;
+ __entry->parm = parm;
+ __entry->parm64 = parm64;
+ __entry->who = who;
+ ),
+
+ TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
+ (__entry->who == 1) ? " (from kernel)" :
+ (__entry->who == 2) ? " (from user)" : "",
+ __entry->inttype,
+ __print_symbolic(__entry->inttype, kvm_s390_int_type),
+ __entry->parm, __entry->parm64)
+ );
+
+TRACE_EVENT(kvm_s390_inject_vcpu,
+ TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \
+ int who),
+ TP_ARGS(id, type, parm, parm64, who),
+
+ TP_STRUCT__entry(
+ __field(int, id)
+ __field(__u32, inttype)
+ __field(__u32, parm)
+ __field(__u64, parm64)
+ __field(int, who)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->inttype = type & 0x00000000ffffffff;
+ __entry->parm = parm;
+ __entry->parm64 = parm64;
+ __entry->who = who;
+ ),
+
+ TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
+ (__entry->who == 1) ? " (from kernel)" :
+ (__entry->who == 2) ? " (from user)" : "",
+ __entry->id, __entry->inttype,
+ __print_symbolic(__entry->inttype, kvm_s390_int_type),
+ __entry->parm, __entry->parm64)
+ );
+
+/*
+ * Trace point for the actual delivery of interrupts.
+ */
+TRACE_EVENT(kvm_s390_deliver_interrupt,
+ TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1),
+ TP_ARGS(id, type, data0, data1),
+
+ TP_STRUCT__entry(
+ __field(int, id)
+ __field(__u32, inttype)
+ __field(__u64, data0)
+ __field(__u64, data1)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->inttype = type & 0x00000000ffffffff;
+ __entry->data0 = data0;
+ __entry->data1 = data1;
+ ),
+
+ TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \
+ "data:%08llx %016llx",
+ __entry->id, __entry->inttype,
+ __print_symbolic(__entry->inttype, kvm_s390_int_type),
+ __entry->data0, __entry->data1)
+ );
+
+/*
+ * Trace point for resets that may be requested from userspace.
+ */
+TRACE_EVENT(kvm_s390_request_resets,
+ TP_PROTO(__u64 resets),
+ TP_ARGS(resets),
+
+ TP_STRUCT__entry(
+ __field(__u64, resets)
+ ),
+
+ TP_fast_assign(
+ __entry->resets = resets;
+ ),
+
+ TP_printk("requesting userspace resets %llx",
+ __entry->resets)
+ );
+
+/*
+ * Trace point for a vcpu's stop requests.
+ */
+TRACE_EVENT(kvm_s390_stop_request,
+ TP_PROTO(unsigned int action_bits),
+ TP_ARGS(action_bits),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, action_bits)
+ ),
+
+ TP_fast_assign(
+ __entry->action_bits = action_bits;
+ ),
+
+ TP_printk("stop request, action_bits = %08x",
+ __entry->action_bits)
+ );
+
+
+/*
+ * Trace point for enabling channel I/O instruction support.
+ */
+TRACE_EVENT(kvm_s390_enable_css,
+ TP_PROTO(void *kvm),
+ TP_ARGS(kvm),
+
+ TP_STRUCT__entry(
+ __field(void *, kvm)
+ ),
+
+ TP_fast_assign(
+ __entry->kvm = kvm;
+ ),
+
+ TP_printk("enabling channel I/O support (kvm @ %p)\n",
+ __entry->kvm)
+ );
+
+/*
+ * Trace point for enabling and disabling interlocking-and-broadcasting
+ * suppression.
+ */
+TRACE_EVENT(kvm_s390_enable_disable_ibs,
+ TP_PROTO(unsigned int id, int state),
+ TP_ARGS(id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(int, state)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->state = state;
+ ),
+
+ TP_printk("%s ibs on cpu %d",
+ __entry->state ? "enabling" : "disabling", __entry->id)
+ );
+
+
+#endif /* _TRACE_KVMS390_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
new file mode 100644
index 00000000000..916834d7a73
--- /dev/null
+++ b/arch/s390/kvm/trace.h
@@ -0,0 +1,418 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+#include <asm/sie.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/*
+ * Helpers for vcpu-specific tracepoints containing the same information
+ * as s390dbf VCPU_EVENTs.
+ */
+#define VCPU_PROTO_COMMON struct kvm_vcpu *vcpu
+#define VCPU_ARGS_COMMON vcpu
+#define VCPU_FIELD_COMMON __field(int, id) \
+ __field(unsigned long, pswmask) \
+ __field(unsigned long, pswaddr)
+#define VCPU_ASSIGN_COMMON do { \
+ __entry->id = vcpu->vcpu_id; \
+ __entry->pswmask = vcpu->arch.sie_block->gpsw.mask; \
+ __entry->pswaddr = vcpu->arch.sie_block->gpsw.addr; \
+ } while (0);
+#define VCPU_TP_PRINTK(p_str, p_args...) \
+ TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
+ __entry->pswmask, __entry->pswaddr, p_args)
+
+TRACE_EVENT(kvm_s390_skey_related_inst,
+ TP_PROTO(VCPU_PROTO_COMMON),
+ TP_ARGS(VCPU_ARGS_COMMON),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ ),
+ VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
+ );
+
+TRACE_EVENT(kvm_s390_major_guest_pfault,
+ TP_PROTO(VCPU_PROTO_COMMON),
+ TP_ARGS(VCPU_ARGS_COMMON),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ ),
+ VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
+ );
+
+TRACE_EVENT(kvm_s390_pfault_init,
+ TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+ TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(long, pfault_token)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->pfault_token = pfault_token;
+ ),
+ VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
+ );
+
+TRACE_EVENT(kvm_s390_pfault_done,
+ TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+ TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(long, pfault_token)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->pfault_token = pfault_token;
+ ),
+ VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
+ );
+
+/*
+ * Tracepoints for SIE entry and exit.
+ */
+TRACE_EVENT(kvm_s390_sie_enter,
+ TP_PROTO(VCPU_PROTO_COMMON, int cpuflags),
+ TP_ARGS(VCPU_ARGS_COMMON, cpuflags),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, cpuflags)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->cpuflags = cpuflags;
+ ),
+
+ VCPU_TP_PRINTK("entering sie flags %x", __entry->cpuflags)
+ );
+
+TRACE_EVENT(kvm_s390_sie_fault,
+ TP_PROTO(VCPU_PROTO_COMMON),
+ TP_ARGS(VCPU_ARGS_COMMON),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ ),
+
+ VCPU_TP_PRINTK("%s", "fault in sie instruction")
+ );
+
+TRACE_EVENT(kvm_s390_sie_exit,
+ TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode),
+ TP_ARGS(VCPU_ARGS_COMMON, icptcode),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(u8, icptcode)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->icptcode = icptcode;
+ ),
+
+ VCPU_TP_PRINTK("exit sie icptcode %d (%s)", __entry->icptcode,
+ __print_symbolic(__entry->icptcode,
+ sie_intercept_code))
+ );
+
+/*
+ * Trace point for intercepted instructions.
+ */
+TRACE_EVENT(kvm_s390_intercept_instruction,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+ TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u64, instruction)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->instruction = ((__u64)ipa << 48) |
+ ((__u64)ipb << 16);
+ ),
+
+ VCPU_TP_PRINTK("intercepted instruction %016llx (%s)",
+ __entry->instruction,
+ __print_symbolic(icpt_insn_decoder(__entry->instruction),
+ icpt_insn_codes))
+ );
+
+/*
+ * Trace point for intercepted program interruptions.
+ */
+TRACE_EVENT(kvm_s390_intercept_prog,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+ TP_ARGS(VCPU_ARGS_COMMON, code),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u16, code)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->code = code;
+ ),
+
+ VCPU_TP_PRINTK("intercepted program interruption %04x",
+ __entry->code)
+ );
+
+/*
+ * Trace point for validity intercepts.
+ */
+TRACE_EVENT(kvm_s390_intercept_validity,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 viwhy),
+ TP_ARGS(VCPU_ARGS_COMMON, viwhy),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u16, viwhy)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->viwhy = viwhy;
+ ),
+
+ VCPU_TP_PRINTK("got validity intercept %04x", __entry->viwhy)
+ );
+
+/*
+ * Trace points for instructions that are of special interest.
+ */
+
+TRACE_EVENT(kvm_s390_handle_sigp,
+ TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \
+ __u32 parameter),
+ TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr, parameter),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u8, order_code)
+ __field(__u16, cpu_addr)
+ __field(__u32, parameter)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->order_code = order_code;
+ __entry->cpu_addr = cpu_addr;
+ __entry->parameter = parameter;
+ ),
+
+ VCPU_TP_PRINTK("handle sigp order %02x (%s), cpu address %04x, " \
+ "parameter %08x", __entry->order_code,
+ __print_symbolic(__entry->order_code,
+ sigp_order_codes),
+ __entry->cpu_addr, __entry->parameter)
+ );
+
+TRACE_EVENT(kvm_s390_handle_sigp_pei,
+ TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr),
+ TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u8, order_code)
+ __field(__u16, cpu_addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->order_code = order_code;
+ __entry->cpu_addr = cpu_addr;
+ ),
+
+ VCPU_TP_PRINTK("handle sigp pei order %02x (%s), cpu address %04x",
+ __entry->order_code,
+ __print_symbolic(__entry->order_code,
+ sigp_order_codes),
+ __entry->cpu_addr)
+ );
+
+TRACE_EVENT(kvm_s390_handle_diag,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+ TP_ARGS(VCPU_ARGS_COMMON, code),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u16, code)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->code = code;
+ ),
+
+ VCPU_TP_PRINTK("handle diagnose call %04x (%s)", __entry->code,
+ __print_symbolic(__entry->code, diagnose_codes))
+ );
+
+TRACE_EVENT(kvm_s390_handle_lctl,
+ TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, g)
+ __field(int, reg1)
+ __field(int, reg3)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->g = g;
+ __entry->reg1 = reg1;
+ __entry->reg3 = reg3;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("%s: loading cr %x-%x from %016llx",
+ __entry->g ? "lctlg" : "lctl",
+ __entry->reg1, __entry->reg3, __entry->addr)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stctl,
+ TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, g)
+ __field(int, reg1)
+ __field(int, reg3)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->g = g;
+ __entry->reg1 = reg1;
+ __entry->reg3 = reg3;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx",
+ __entry->g ? "stctg" : "stctl",
+ __entry->reg1, __entry->reg3, __entry->addr)
+ );
+
+TRACE_EVENT(kvm_s390_handle_prefix,
+ TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
+ TP_ARGS(VCPU_ARGS_COMMON, set, address),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, set)
+ __field(u32, address)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->set = set;
+ __entry->address = address;
+ ),
+
+ VCPU_TP_PRINTK("%s prefix to %08x",
+ __entry->set ? "setting" : "storing",
+ __entry->address)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stap,
+ TP_PROTO(VCPU_PROTO_COMMON, u64 address),
+ TP_ARGS(VCPU_ARGS_COMMON, address),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(u64, address)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->address = address;
+ ),
+
+ VCPU_TP_PRINTK("storing cpu address to %016llx",
+ __entry->address)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stfl,
+ TP_PROTO(VCPU_PROTO_COMMON, unsigned int facility_list),
+ TP_ARGS(VCPU_ARGS_COMMON, facility_list),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(unsigned int, facility_list)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->facility_list = facility_list;
+ ),
+
+ VCPU_TP_PRINTK("store facility list value %08x",
+ __entry->facility_list)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stsi,
+ TP_PROTO(VCPU_PROTO_COMMON, int fc, int sel1, int sel2, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, fc, sel1, sel2, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, fc)
+ __field(int, sel1)
+ __field(int, sel2)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->fc = fc;
+ __entry->sel1 = sel1;
+ __entry->sel2 = sel2;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("STSI %d.%d.%d information stored to %016llx",
+ __entry->fc, __entry->sel1, __entry->sel2,
+ __entry->addr)
+ );
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index ab6735df2d2..c6d752e8bf2 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -2,7 +2,7 @@
# Makefile for s390-specific library files..
#
-lib-y += delay.o string.o uaccess_std.o uaccess_pt.o
-obj-$(CONFIG_32BIT) += div64.o qrnnd.o
-lib-$(CONFIG_64BIT) += uaccess_mvcos.o
+lib-y += delay.o string.o uaccess.o find.o
+obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o
+obj-$(CONFIG_64BIT) += mem64.o
lib-$(CONFIG_SMP) += spinlock.o
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index eae21a8ac72..a9f3d0042d5 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -1,21 +1,19 @@
/*
- * arch/s390/lib/delay.c
* Precise Delay Loops for S390
*
- * S390 version
- * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *
- * Derived from "arch/i386/lib/delay.c"
- * Copyright (C) 1993 Linus Torvalds
- * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ * Copyright IBM Corp. 1999, 2008
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
*/
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/timex.h>
+#include <linux/module.h>
#include <linux/irqflags.h>
#include <linux/interrupt.h>
+#include <asm/vtimer.h>
+#include <asm/div64.h>
void __delay(unsigned long loops)
{
@@ -29,46 +27,103 @@ void __delay(unsigned long loops)
asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
}
+static void __udelay_disabled(unsigned long long usecs)
+{
+ unsigned long cr0, cr6, new;
+ u64 clock_saved, end;
+
+ end = get_tod_clock() + (usecs << 12);
+ clock_saved = local_tick_disable();
+ __ctl_store(cr0, 0, 0);
+ __ctl_store(cr6, 6, 6);
+ new = (cr0 & 0xffff00e0) | 0x00000800;
+ __ctl_load(new , 0, 0);
+ new = 0;
+ __ctl_load(new, 6, 6);
+ lockdep_off();
+ do {
+ set_clock_comparator(end);
+ vtime_stop_cpu();
+ } while (get_tod_clock_fast() < end);
+ lockdep_on();
+ __ctl_load(cr0, 0, 0);
+ __ctl_load(cr6, 6, 6);
+ local_tick_enable(clock_saved);
+}
+
+static void __udelay_enabled(unsigned long long usecs)
+{
+ u64 clock_saved, end;
+
+ end = get_tod_clock_fast() + (usecs << 12);
+ do {
+ clock_saved = 0;
+ if (end < S390_lowcore.clock_comparator) {
+ clock_saved = local_tick_disable();
+ set_clock_comparator(end);
+ }
+ vtime_stop_cpu();
+ if (clock_saved)
+ local_tick_enable(clock_saved);
+ } while (get_tod_clock_fast() < end);
+}
+
/*
* Waits for 'usecs' microseconds using the TOD clock comparator.
*/
-void __udelay(unsigned long usecs)
+void __udelay(unsigned long long usecs)
{
- u64 end, time, old_cc = 0;
- unsigned long flags, cr0, mask, dummy;
- int irq_context;
+ unsigned long flags;
- irq_context = in_interrupt();
- if (!irq_context)
- local_bh_disable();
+ preempt_disable();
local_irq_save(flags);
- if (raw_irqs_disabled_flags(flags)) {
- old_cc = S390_lowcore.clock_comparator;
- S390_lowcore.clock_comparator = -1ULL;
- __ctl_store(cr0, 0, 0);
- dummy = (cr0 & 0xffff00e0) | 0x00000800;
- __ctl_load(dummy , 0, 0);
- mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
- } else
- mask = psw_kernel_bits | PSW_MASK_WAIT |
- PSW_MASK_EXT | PSW_MASK_IO;
-
- end = get_clock() + ((u64) usecs << 12);
- do {
- time = end < S390_lowcore.clock_comparator ?
- end : S390_lowcore.clock_comparator;
- set_clock_comparator(time);
- trace_hardirqs_on();
- __load_psw_mask(mask);
- local_irq_disable();
- } while (get_clock() < end);
-
- if (raw_irqs_disabled_flags(flags)) {
- __ctl_load(cr0, 0, 0);
- S390_lowcore.clock_comparator = old_cc;
+ if (in_irq()) {
+ __udelay_disabled(usecs);
+ goto out;
+ }
+ if (in_softirq()) {
+ if (raw_irqs_disabled_flags(flags))
+ __udelay_disabled(usecs);
+ else
+ __udelay_enabled(usecs);
+ goto out;
}
- if (!irq_context)
+ if (raw_irqs_disabled_flags(flags)) {
+ local_bh_disable();
+ __udelay_disabled(usecs);
_local_bh_enable();
- set_clock_comparator(S390_lowcore.clock_comparator);
+ goto out;
+ }
+ __udelay_enabled(usecs);
+out:
local_irq_restore(flags);
+ preempt_enable();
+}
+EXPORT_SYMBOL(__udelay);
+
+/*
+ * Simple udelay variant. To be used on startup and reboot
+ * when the interrupt handler isn't working.
+ */
+void udelay_simple(unsigned long long usecs)
+{
+ u64 end;
+
+ end = get_tod_clock_fast() + (usecs << 12);
+ while (get_tod_clock_fast() < end)
+ cpu_relax();
+}
+
+void __ndelay(unsigned long long nsecs)
+{
+ u64 end;
+
+ nsecs <<= 9;
+ do_div(nsecs, 125);
+ end = get_tod_clock_fast() + nsecs;
+ if (nsecs & ~0xfffUL)
+ __udelay(nsecs >> 12);
+ while (get_tod_clock_fast() < end)
+ barrier();
}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/s390/lib/div64.c b/arch/s390/lib/div64.c
index a5f8300bf3e..261152f8324 100644
--- a/arch/s390/lib/div64.c
+++ b/arch/s390/lib/div64.c
@@ -1,9 +1,7 @@
/*
- * arch/s390/lib/div64.c
- *
* __div64_32 implementation for 31 bit.
*
- * Copyright (C) IBM Corp. 2006
+ * Copyright IBM Corp. 2006
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
*/
@@ -61,7 +59,7 @@ static uint32_t __div64_31(uint64_t *n, uint32_t base)
" clr %0,%3\n"
" jl 0f\n"
" slr %0,%3\n"
- " alr %1,%2\n"
+ " ahi %1,1\n"
"0:\n"
: "+d" (reg2), "+d" (reg3), "=d" (tmp)
: "d" (base), "2" (1UL) : "cc" );
diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c
new file mode 100644
index 00000000000..922003c1b90
--- /dev/null
+++ b/arch/s390/lib/find.c
@@ -0,0 +1,77 @@
+/*
+ * MSB0 numbered special bitops handling.
+ *
+ * On s390x the bits are numbered:
+ * |0..............63|64............127|128...........191|192...........255|
+ * and on s390:
+ * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
+ *
+ * The reason for this bit numbering is the fact that the hardware sets bits
+ * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap
+ * from the 'wrong end'.
+ */
+
+#include <linux/compiler.h>
+#include <linux/bitops.h>
+#include <linux/export.h>
+
+unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size)
+{
+ const unsigned long *p = addr;
+ unsigned long result = 0;
+ unsigned long tmp;
+
+ while (size & ~(BITS_PER_LONG - 1)) {
+ if ((tmp = *(p++)))
+ goto found;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = (*p) & (~0UL << (BITS_PER_LONG - size));
+ if (!tmp) /* Are any bits set? */
+ return result + size; /* Nope. */
+found:
+ return result + (__fls(tmp) ^ (BITS_PER_LONG - 1));
+}
+EXPORT_SYMBOL(find_first_bit_inv);
+
+unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + (offset / BITS_PER_LONG);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL >> offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+found_first:
+ tmp &= (~0UL << (BITS_PER_LONG - size));
+ if (!tmp) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + (__fls(tmp) ^ (BITS_PER_LONG - 1));
+}
+EXPORT_SYMBOL(find_next_bit_inv);
diff --git a/arch/s390/lib/mem32.S b/arch/s390/lib/mem32.S
new file mode 100644
index 00000000000..14ca9244b61
--- /dev/null
+++ b/arch/s390/lib/mem32.S
@@ -0,0 +1,92 @@
+/*
+ * String handling functions.
+ *
+ * Copyright IBM Corp. 2012
+ */
+
+#include <linux/linkage.h>
+
+/*
+ * memset implementation
+ *
+ * This code corresponds to the C construct below. We do distinguish
+ * between clearing (c == 0) and setting a memory array (c != 0) simply
+ * because nearly all memset invocations in the kernel clear memory and
+ * the xc instruction is preferred in such cases.
+ *
+ * void *memset(void *s, int c, size_t n)
+ * {
+ * if (likely(c == 0))
+ * return __builtin_memset(s, 0, n);
+ * return __builtin_memset(s, c, n);
+ * }
+ */
+ENTRY(memset)
+ basr %r5,%r0
+.Lmemset_base:
+ ltr %r4,%r4
+ bzr %r14
+ ltr %r3,%r3
+ jnz .Lmemset_fill
+ ahi %r4,-1
+ lr %r3,%r4
+ srl %r3,8
+ ltr %r3,%r3
+ lr %r1,%r2
+ je .Lmemset_clear_rest
+.Lmemset_clear_loop:
+ xc 0(256,%r1),0(%r1)
+ la %r1,256(%r1)
+ brct %r3,.Lmemset_clear_loop
+.Lmemset_clear_rest:
+ ex %r4,.Lmemset_xc-.Lmemset_base(%r5)
+ br %r14
+.Lmemset_fill:
+ stc %r3,0(%r2)
+ chi %r4,1
+ lr %r1,%r2
+ ber %r14
+ ahi %r4,-2
+ lr %r3,%r4
+ srl %r3,8
+ ltr %r3,%r3
+ je .Lmemset_fill_rest
+.Lmemset_fill_loop:
+ mvc 1(256,%r1),0(%r1)
+ la %r1,256(%r1)
+ brct %r3,.Lmemset_fill_loop
+.Lmemset_fill_rest:
+ ex %r4,.Lmemset_mvc-.Lmemset_base(%r5)
+ br %r14
+.Lmemset_xc:
+ xc 0(1,%r1),0(%r1)
+.Lmemset_mvc:
+ mvc 1(1,%r1),0(%r1)
+
+/*
+ * memcpy implementation
+ *
+ * void *memcpy(void *dest, const void *src, size_t n)
+ */
+ENTRY(memcpy)
+ basr %r5,%r0
+.Lmemcpy_base:
+ ltr %r4,%r4
+ bzr %r14
+ ahi %r4,-1
+ lr %r0,%r4
+ srl %r0,8
+ ltr %r0,%r0
+ lr %r1,%r2
+ jnz .Lmemcpy_loop
+.Lmemcpy_rest:
+ ex %r4,.Lmemcpy_mvc-.Lmemcpy_base(%r5)
+ br %r14
+.Lmemcpy_loop:
+ mvc 0(256,%r1),0(%r3)
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ brct %r0,.Lmemcpy_loop
+ j .Lmemcpy_rest
+.Lmemcpy_mvc:
+ mvc 0(1,%r1),0(%r3)
diff --git a/arch/s390/lib/mem64.S b/arch/s390/lib/mem64.S
new file mode 100644
index 00000000000..c6d553e85ab
--- /dev/null
+++ b/arch/s390/lib/mem64.S
@@ -0,0 +1,88 @@
+/*
+ * String handling functions.
+ *
+ * Copyright IBM Corp. 2012
+ */
+
+#include <linux/linkage.h>
+
+/*
+ * memset implementation
+ *
+ * This code corresponds to the C construct below. We do distinguish
+ * between clearing (c == 0) and setting a memory array (c != 0) simply
+ * because nearly all memset invocations in the kernel clear memory and
+ * the xc instruction is preferred in such cases.
+ *
+ * void *memset(void *s, int c, size_t n)
+ * {
+ * if (likely(c == 0))
+ * return __builtin_memset(s, 0, n);
+ * return __builtin_memset(s, c, n);
+ * }
+ */
+ENTRY(memset)
+ ltgr %r4,%r4
+ bzr %r14
+ ltgr %r3,%r3
+ jnz .Lmemset_fill
+ aghi %r4,-1
+ srlg %r3,%r4,8
+ ltgr %r3,%r3
+ lgr %r1,%r2
+ jz .Lmemset_clear_rest
+.Lmemset_clear_loop:
+ xc 0(256,%r1),0(%r1)
+ la %r1,256(%r1)
+ brctg %r3,.Lmemset_clear_loop
+.Lmemset_clear_rest:
+ larl %r3,.Lmemset_xc
+ ex %r4,0(%r3)
+ br %r14
+.Lmemset_fill:
+ stc %r3,0(%r2)
+ cghi %r4,1
+ lgr %r1,%r2
+ ber %r14
+ aghi %r4,-2
+ srlg %r3,%r4,8
+ ltgr %r3,%r3
+ jz .Lmemset_fill_rest
+.Lmemset_fill_loop:
+ mvc 1(256,%r1),0(%r1)
+ la %r1,256(%r1)
+ brctg %r3,.Lmemset_fill_loop
+.Lmemset_fill_rest:
+ larl %r3,.Lmemset_mvc
+ ex %r4,0(%r3)
+ br %r14
+.Lmemset_xc:
+ xc 0(1,%r1),0(%r1)
+.Lmemset_mvc:
+ mvc 1(1,%r1),0(%r1)
+
+/*
+ * memcpy implementation
+ *
+ * void *memcpy(void *dest, const void *src, size_t n)
+ */
+ENTRY(memcpy)
+ ltgr %r4,%r4
+ bzr %r14
+ aghi %r4,-1
+ srlg %r5,%r4,8
+ ltgr %r5,%r5
+ lgr %r1,%r2
+ jnz .Lmemcpy_loop
+.Lmemcpy_rest:
+ larl %r5,.Lmemcpy_mvc
+ ex %r4,0(%r5)
+ br %r14
+.Lmemcpy_loop:
+ mvc 0(256,%r1),0(%r3)
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ brctg %r5,.Lmemcpy_loop
+ j .Lmemcpy_rest
+.Lmemcpy_mvc:
+ mvc 0(1,%r1),0(%r3)
diff --git a/arch/s390/lib/qrnnd.S b/arch/s390/lib/qrnnd.S
index eb1df632e74..d321329130e 100644
--- a/arch/s390/lib/qrnnd.S
+++ b/arch/s390/lib/qrnnd.S
@@ -1,5 +1,7 @@
# S/390 __udiv_qrnnd
+#include <linux/linkage.h>
+
# r2 : &__r
# r3 : upper half of 64 bit word n
# r4 : lower half of 64 bit word n
@@ -8,8 +10,7 @@
# the quotient q is to be returned
.text
- .globl __udiv_qrnnd
-__udiv_qrnnd:
+ENTRY(__udiv_qrnnd)
st %r2,24(%r15) # store pointer to reminder for later
lr %r0,%r3 # reload n
lr %r1,%r4
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index e41f4008afc..5b0e445bc3f 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -1,8 +1,7 @@
/*
- * arch/s390/lib/spinlock.c
* Out of line spinlock code.
*
- * Copyright (C) IBM Corp. 2004, 2006
+ * Copyright IBM Corp. 2004, 2006
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
*/
@@ -10,6 +9,7 @@
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/init.h>
+#include <linux/smp.h>
#include <asm/io.h>
int spin_retry = 1000;
@@ -24,147 +24,210 @@ static int __init spin_retry_setup(char *str)
}
__setup("spin_retry=", spin_retry_setup);
-static inline void _raw_yield(void)
+void arch_spin_lock_wait(arch_spinlock_t *lp)
{
- if (MACHINE_HAS_DIAG44)
- asm volatile("diag 0,0,0x44");
+ unsigned int cpu = SPINLOCK_LOCKVAL;
+ unsigned int owner;
+ int count;
+
+ while (1) {
+ owner = ACCESS_ONCE(lp->lock);
+ /* Try to get the lock if it is free. */
+ if (!owner) {
+ if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+ return;
+ continue;
+ }
+ /* Check if the lock owner is running. */
+ if (!smp_vcpu_scheduled(~owner)) {
+ smp_yield_cpu(~owner);
+ continue;
+ }
+ /* Loop for a while on the lock value. */
+ count = spin_retry;
+ do {
+ owner = ACCESS_ONCE(lp->lock);
+ } while (owner && count-- > 0);
+ if (!owner)
+ continue;
+ /*
+ * For multiple layers of hypervisors, e.g. z/VM + LPAR
+ * yield the CPU if the lock is still unavailable.
+ */
+ if (!MACHINE_IS_LPAR)
+ smp_yield_cpu(~owner);
+ }
}
+EXPORT_SYMBOL(arch_spin_lock_wait);
-static inline void _raw_yield_cpu(int cpu)
+void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
{
- if (MACHINE_HAS_DIAG9C)
- asm volatile("diag %0,0,0x9c"
- : : "d" (__cpu_logical_map[cpu]));
- else
- _raw_yield();
+ unsigned int cpu = SPINLOCK_LOCKVAL;
+ unsigned int owner;
+ int count;
+
+ local_irq_restore(flags);
+ while (1) {
+ owner = ACCESS_ONCE(lp->lock);
+ /* Try to get the lock if it is free. */
+ if (!owner) {
+ local_irq_disable();
+ if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+ return;
+ local_irq_restore(flags);
+ }
+ /* Check if the lock owner is running. */
+ if (!smp_vcpu_scheduled(~owner)) {
+ smp_yield_cpu(~owner);
+ continue;
+ }
+ /* Loop for a while on the lock value. */
+ count = spin_retry;
+ do {
+ owner = ACCESS_ONCE(lp->lock);
+ } while (owner && count-- > 0);
+ if (!owner)
+ continue;
+ /*
+ * For multiple layers of hypervisors, e.g. z/VM + LPAR
+ * yield the CPU if the lock is still unavailable.
+ */
+ if (!MACHINE_IS_LPAR)
+ smp_yield_cpu(~owner);
+ }
+}
+EXPORT_SYMBOL(arch_spin_lock_wait_flags);
+
+void arch_spin_relax(arch_spinlock_t *lp)
+{
+ unsigned int cpu = lp->lock;
+ if (cpu != 0) {
+ if (MACHINE_IS_VM || MACHINE_IS_KVM ||
+ !smp_vcpu_scheduled(~cpu))
+ smp_yield_cpu(~cpu);
+ }
+}
+EXPORT_SYMBOL(arch_spin_relax);
+
+int arch_spin_trylock_retry(arch_spinlock_t *lp)
+{
+ int count;
+
+ for (count = spin_retry; count > 0; count--)
+ if (arch_spin_trylock_once(lp))
+ return 1;
+ return 0;
}
+EXPORT_SYMBOL(arch_spin_trylock_retry);
-void _raw_spin_lock_wait(raw_spinlock_t *lp)
+void _raw_read_lock_wait(arch_rwlock_t *rw)
{
+ unsigned int old;
int count = spin_retry;
- unsigned int cpu = ~smp_processor_id();
while (1) {
if (count-- <= 0) {
- unsigned int owner = lp->owner_cpu;
- if (owner != 0)
- _raw_yield_cpu(~owner);
+ smp_yield();
count = spin_retry;
}
- if (__raw_spin_is_locked(lp))
+ old = ACCESS_ONCE(rw->lock);
+ if ((int) old < 0)
continue;
- if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
+ if (_raw_compare_and_swap(&rw->lock, old, old + 1))
return;
}
}
-EXPORT_SYMBOL(_raw_spin_lock_wait);
+EXPORT_SYMBOL(_raw_read_lock_wait);
-void _raw_spin_lock_wait_flags(raw_spinlock_t *lp, unsigned long flags)
+void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
{
+ unsigned int old;
int count = spin_retry;
- unsigned int cpu = ~smp_processor_id();
local_irq_restore(flags);
while (1) {
if (count-- <= 0) {
- unsigned int owner = lp->owner_cpu;
- if (owner != 0)
- _raw_yield_cpu(~owner);
+ smp_yield();
count = spin_retry;
}
- if (__raw_spin_is_locked(lp))
+ old = ACCESS_ONCE(rw->lock);
+ if ((int) old < 0)
continue;
local_irq_disable();
- if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
+ if (_raw_compare_and_swap(&rw->lock, old, old + 1))
return;
local_irq_restore(flags);
}
}
-EXPORT_SYMBOL(_raw_spin_lock_wait_flags);
+EXPORT_SYMBOL(_raw_read_lock_wait_flags);
-int _raw_spin_trylock_retry(raw_spinlock_t *lp)
+int _raw_read_trylock_retry(arch_rwlock_t *rw)
{
- unsigned int cpu = ~smp_processor_id();
- int count;
+ unsigned int old;
+ int count = spin_retry;
- for (count = spin_retry; count > 0; count--) {
- if (__raw_spin_is_locked(lp))
+ while (count-- > 0) {
+ old = ACCESS_ONCE(rw->lock);
+ if ((int) old < 0)
continue;
- if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
+ if (_raw_compare_and_swap(&rw->lock, old, old + 1))
return 1;
}
return 0;
}
-EXPORT_SYMBOL(_raw_spin_trylock_retry);
-
-void _raw_spin_relax(raw_spinlock_t *lock)
-{
- unsigned int cpu = lock->owner_cpu;
- if (cpu != 0)
- _raw_yield_cpu(~cpu);
-}
-EXPORT_SYMBOL(_raw_spin_relax);
+EXPORT_SYMBOL(_raw_read_trylock_retry);
-void _raw_read_lock_wait(raw_rwlock_t *rw)
+void _raw_write_lock_wait(arch_rwlock_t *rw)
{
unsigned int old;
int count = spin_retry;
while (1) {
if (count-- <= 0) {
- _raw_yield();
+ smp_yield();
count = spin_retry;
}
- if (!__raw_read_can_lock(rw))
+ old = ACCESS_ONCE(rw->lock);
+ if (old)
continue;
- old = rw->lock & 0x7fffffffU;
- if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
+ if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
return;
}
}
-EXPORT_SYMBOL(_raw_read_lock_wait);
+EXPORT_SYMBOL(_raw_write_lock_wait);
-int _raw_read_trylock_retry(raw_rwlock_t *rw)
+void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
{
unsigned int old;
int count = spin_retry;
- while (count-- > 0) {
- if (!__raw_read_can_lock(rw))
- continue;
- old = rw->lock & 0x7fffffffU;
- if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL(_raw_read_trylock_retry);
-
-void _raw_write_lock_wait(raw_rwlock_t *rw)
-{
- int count = spin_retry;
-
+ local_irq_restore(flags);
while (1) {
if (count-- <= 0) {
- _raw_yield();
+ smp_yield();
count = spin_retry;
}
- if (!__raw_write_can_lock(rw))
+ old = ACCESS_ONCE(rw->lock);
+ if (old)
continue;
- if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
+ local_irq_disable();
+ if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
return;
+ local_irq_restore(flags);
}
}
-EXPORT_SYMBOL(_raw_write_lock_wait);
+EXPORT_SYMBOL(_raw_write_lock_wait_flags);
-int _raw_write_trylock_retry(raw_rwlock_t *rw)
+int _raw_write_trylock_retry(arch_rwlock_t *rw)
{
+ unsigned int old;
int count = spin_retry;
while (count-- > 0) {
- if (!__raw_write_can_lock(rw))
+ old = ACCESS_ONCE(rw->lock);
+ if (old)
continue;
- if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
+ if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
return 1;
}
return 0;
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index ae5cf5d03d4..b647d5ff0ad 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -1,9 +1,8 @@
/*
- * arch/s390/lib/string.c
* Optimized string functions
*
* S390 version
- * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 2004
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
*/
@@ -99,7 +98,7 @@ size_t strlcpy(char *dest, const char *src, size_t size)
if (size) {
size_t len = (ret >= size) ? size-1 : ret;
dest[len] = '\0';
- __builtin_memcpy(dest, src, len);
+ memcpy(dest, src, len);
}
return ret;
}
@@ -117,8 +116,8 @@ EXPORT_SYMBOL(strlcpy);
char *strncpy(char *dest, const char *src, size_t n)
{
size_t len = __strnend(src, n) - src;
- __builtin_memset(dest + len, 0, n - len);
- __builtin_memcpy(dest, src, len);
+ memset(dest + len, 0, n - len);
+ memcpy(dest, src, len);
return dest;
}
EXPORT_SYMBOL(strncpy);
@@ -164,7 +163,7 @@ size_t strlcat(char *dest, const char *src, size_t n)
if (len >= n)
len = n - 1;
dest[len] = '\0';
- __builtin_memcpy(dest, src, len);
+ memcpy(dest, src, len);
}
return res;
}
@@ -187,7 +186,7 @@ char *strncat(char *dest, const char *src, size_t n)
char *p = __strend(dest);
p[len] = '\0';
- __builtin_memcpy(p, src, len);
+ memcpy(p, src, len);
return dest;
}
EXPORT_SYMBOL(strncat);
@@ -341,41 +340,3 @@ void *memscan(void *s, int c, size_t n)
return (void *) ret;
}
EXPORT_SYMBOL(memscan);
-
-/**
- * memcpy - Copy one area of memory to another
- * @dest: Where to copy to
- * @src: Where to copy from
- * @n: The size of the area.
- *
- * returns a pointer to @dest
- */
-void *memcpy(void *dest, const void *src, size_t n)
-{
- return __builtin_memcpy(dest, src, n);
-}
-EXPORT_SYMBOL(memcpy);
-
-/**
- * memset - Fill a region of memory with the given value
- * @s: Pointer to the start of the area.
- * @c: The byte to fill the area with
- * @n: The size of the area.
- *
- * returns a pointer to @s
- */
-void *memset(void *s, int c, size_t n)
-{
- char *xs;
-
- if (c == 0)
- return __builtin_memset(s, 0, n);
-
- xs = (char *) s;
- if (n > 0)
- do {
- *xs++ = c;
- } while (--n > 0);
- return s;
-}
-EXPORT_SYMBOL(memset);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
new file mode 100644
index 00000000000..53dd5d7a0c9
--- /dev/null
+++ b/arch/s390/lib/uaccess.c
@@ -0,0 +1,406 @@
+/*
+ * Standard user space access functions based on mvcp/mvcs and doing
+ * interesting things in the secondary space mode.
+ *
+ * Copyright IBM Corp. 2006,2014
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Gerald Schaefer (gerald.schaefer@de.ibm.com)
+ */
+
+#include <linux/jump_label.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/facility.h>
+
+#ifndef CONFIG_64BIT
+#define AHI "ahi"
+#define ALR "alr"
+#define CLR "clr"
+#define LHI "lhi"
+#define SLR "slr"
+#else
+#define AHI "aghi"
+#define ALR "algr"
+#define CLR "clgr"
+#define LHI "lghi"
+#define SLR "slgr"
+#endif
+
+static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE;
+
+static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x81UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
+ "9: jz 7f\n"
+ "1:"ALR" %0,%3\n"
+ " "SLR" %1,%3\n"
+ " "SLR" %2,%3\n"
+ " j 0b\n"
+ "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
+ " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 4f\n"
+ "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
+ "10:"SLR" %0,%4\n"
+ " "ALR" %2,%4\n"
+ "4:"LHI" %4,-1\n"
+ " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
+ " bras %3,6f\n" /* memset loop */
+ " xc 0(1,%2),0(%2)\n"
+ "5: xc 0(256,%2),0(%2)\n"
+ " la %2,256(%2)\n"
+ "6:"AHI" %4,-256\n"
+ " jnm 5b\n"
+ " ex %4,0(%3)\n"
+ " j 8f\n"
+ "7:"SLR" %0,%0\n"
+ "8:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
+ unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+
+ load_kernel_asce();
+ tmp1 = -256UL;
+ asm volatile(
+ " sacf 0\n"
+ "0: mvcp 0(%0,%2),0(%1),%3\n"
+ "10:jz 8f\n"
+ "1:"ALR" %0,%3\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "2: mvcp 0(%0,%2),0(%1),%3\n"
+ "11:jnz 1b\n"
+ " j 8f\n"
+ "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
+ " "LHI" %3,-4096\n"
+ " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "4: mvcp 0(%4,%2),0(%1),%3\n"
+ "12:"SLR" %0,%4\n"
+ " "ALR" %2,%4\n"
+ "5:"LHI" %4,-1\n"
+ " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
+ " bras %3,7f\n" /* memset loop */
+ " xc 0(1,%2),0(%2)\n"
+ "6: xc 0(256,%2),0(%2)\n"
+ " la %2,256(%2)\n"
+ "7:"AHI" %4,-256\n"
+ " jnm 6b\n"
+ " ex %4,0(%3)\n"
+ " j 9f\n"
+ "8:"SLR" %0,%0\n"
+ "9: sacf 768\n"
+ EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
+ EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ if (static_key_false(&have_mvcos))
+ return copy_from_user_mvcos(to, from, n);
+ return copy_from_user_mvcp(to, from, n);
+}
+EXPORT_SYMBOL(__copy_from_user);
+
+static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x810000UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+ "6: jz 4f\n"
+ "1:"ALR" %0,%3\n"
+ " "SLR" %1,%3\n"
+ " "SLR" %2,%3\n"
+ " j 0b\n"
+ "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
+ " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
+ "7:"SLR" %0,%4\n"
+ " j 5f\n"
+ "4:"SLR" %0,%0\n"
+ "5:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
+ unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+
+ load_kernel_asce();
+ tmp1 = -256UL;
+ asm volatile(
+ " sacf 0\n"
+ "0: mvcs 0(%0,%1),0(%2),%3\n"
+ "7: jz 5f\n"
+ "1:"ALR" %0,%3\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "2: mvcs 0(%0,%1),0(%2),%3\n"
+ "8: jnz 1b\n"
+ " j 5f\n"
+ "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
+ " "LHI" %3,-4096\n"
+ " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 6f\n"
+ "4: mvcs 0(%4,%1),0(%2),%3\n"
+ "9:"SLR" %0,%4\n"
+ " j 6f\n"
+ "5:"SLR" %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
+ EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ if (static_key_false(&have_mvcos))
+ return copy_to_user_mvcos(to, from, n);
+ return copy_to_user_mvcs(to, from, n);
+}
+EXPORT_SYMBOL(__copy_to_user);
+
+static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x810081UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ /* FIXME: copy with reduced length. */
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+ " jz 2f\n"
+ "1:"ALR" %0,%3\n"
+ " "SLR" %1,%3\n"
+ " "SLR" %2,%3\n"
+ " j 0b\n"
+ "2:"SLR" %0,%0\n"
+ "3: \n"
+ EX_TABLE(0b,3b)
+ : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ unsigned long tmp1;
+
+ load_kernel_asce();
+ asm volatile(
+ " sacf 256\n"
+ " "AHI" %0,-1\n"
+ " jo 5f\n"
+ " bras %3,3f\n"
+ "0:"AHI" %0,257\n"
+ "1: mvc 0(1,%1),0(%2)\n"
+ " la %1,1(%1)\n"
+ " la %2,1(%2)\n"
+ " "AHI" %0,-1\n"
+ " jnz 1b\n"
+ " j 5f\n"
+ "2: mvc 0(256,%1),0(%2)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "3:"AHI" %0,-256\n"
+ " jnm 2b\n"
+ "4: ex %0,1b-0b(%3)\n"
+ "5: "SLR" %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+ : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+ if (static_key_false(&have_mvcos))
+ return copy_in_user_mvcos(to, from, n);
+ return copy_in_user_mvc(to, from, n);
+}
+EXPORT_SYMBOL(__copy_in_user);
+
+static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x810000UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
+ " jz 4f\n"
+ "1:"ALR" %0,%2\n"
+ " "SLR" %1,%2\n"
+ " j 0b\n"
+ "2: la %3,4095(%1)\n"/* %4 = to + 4095 */
+ " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */
+ " "SLR" %3,%1\n"
+ " "CLR" %0,%3\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
+ " "SLR" %0,%3\n"
+ " j 5f\n"
+ "4:"SLR" %0,%0\n"
+ "5:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+ : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
+ : "a" (empty_zero_page), "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+
+ load_kernel_asce();
+ asm volatile(
+ " sacf 256\n"
+ " "AHI" %0,-1\n"
+ " jo 5f\n"
+ " bras %3,3f\n"
+ " xc 0(1,%1),0(%1)\n"
+ "0:"AHI" %0,257\n"
+ " la %2,255(%1)\n" /* %2 = ptr + 255 */
+ " srl %2,12\n"
+ " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */
+ " "SLR" %2,%1\n"
+ " "CLR" %0,%2\n" /* clear crosses next page boundary? */
+ " jnh 5f\n"
+ " "AHI" %2,-1\n"
+ "1: ex %2,0(%3)\n"
+ " "AHI" %2,1\n"
+ " "SLR" %0,%2\n"
+ " j 5f\n"
+ "2: xc 0(256,%1),0(%1)\n"
+ " la %1,256(%1)\n"
+ "3:"AHI" %0,-256\n"
+ " jnm 2b\n"
+ "4: ex %0,0(%3)\n"
+ "5: "SLR" %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+ : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __clear_user(void __user *to, unsigned long size)
+{
+ if (static_key_false(&have_mvcos))
+ return clear_user_mvcos(to, size);
+ return clear_user_xc(to, size);
+}
+EXPORT_SYMBOL(__clear_user);
+
+static inline unsigned long strnlen_user_srst(const char __user *src,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0;
+ unsigned long tmp1, tmp2;
+
+ asm volatile(
+ " la %2,0(%1)\n"
+ " la %3,0(%0,%1)\n"
+ " "SLR" %0,%0\n"
+ " sacf 256\n"
+ "0: srst %3,%2\n"
+ " jo 0b\n"
+ " la %0,1(%3)\n" /* strnlen_user results includes \0 */
+ " "SLR" %0,%1\n"
+ "1: sacf 768\n"
+ EX_TABLE(0b,1b)
+ : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+unsigned long __strnlen_user(const char __user *src, unsigned long size)
+{
+ if (unlikely(!size))
+ return 0;
+ load_kernel_asce();
+ return strnlen_user_srst(src, size);
+}
+EXPORT_SYMBOL(__strnlen_user);
+
+long __strncpy_from_user(char *dst, const char __user *src, long size)
+{
+ size_t done, len, offset, len_str;
+
+ if (unlikely(size <= 0))
+ return 0;
+ done = 0;
+ do {
+ offset = (size_t)src & ~PAGE_MASK;
+ len = min(size - done, PAGE_SIZE - offset);
+ if (copy_from_user(dst, src, len))
+ return -EFAULT;
+ len_str = strnlen(dst, len);
+ done += len_str;
+ src += len_str;
+ dst += len_str;
+ } while ((len_str == len) && (done < size));
+ return done;
+}
+EXPORT_SYMBOL(__strncpy_from_user);
+
+/*
+ * The "old" uaccess variant without mvcos can be enforced with the
+ * uaccess_primary kernel parameter. This is mainly for debugging purposes.
+ */
+static int uaccess_primary __initdata;
+
+static int __init parse_uaccess_pt(char *__unused)
+{
+ uaccess_primary = 1;
+ return 0;
+}
+early_param("uaccess_primary", parse_uaccess_pt);
+
+static int __init uaccess_init(void)
+{
+ if (IS_ENABLED(CONFIG_64BIT) && !uaccess_primary && test_facility(27))
+ static_key_slow_inc(&have_mvcos);
+ return 0;
+}
+early_initcall(uaccess_init);
diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h
deleted file mode 100644
index 126011df14f..00000000000
--- a/arch/s390/lib/uaccess.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * arch/s390/uaccess.h
- *
- * Copyright IBM Corp. 2007
- *
- */
-
-#ifndef __ARCH_S390_LIB_UACCESS_H
-#define __ARCH_S390_LIB_UACCESS_H
-
-extern size_t copy_from_user_std(size_t, const void __user *, void *);
-extern size_t copy_to_user_std(size_t, void __user *, const void *);
-extern size_t strnlen_user_std(size_t, const char __user *);
-extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
-extern int futex_atomic_cmpxchg_std(int __user *, int, int);
-extern int futex_atomic_op_std(int, int __user *, int, int *);
-
-extern size_t copy_from_user_pt(size_t, const void __user *, void *);
-extern size_t copy_to_user_pt(size_t, void __user *, const void *);
-extern int futex_atomic_op_pt(int, int __user *, int, int *);
-extern int futex_atomic_cmpxchg_pt(int __user *, int, int);
-
-#endif /* __ARCH_S390_LIB_UACCESS_H */
diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c
deleted file mode 100644
index 3f15aaf5485..00000000000
--- a/arch/s390/lib/uaccess_mvcos.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * arch/s390/lib/uaccess_mvcos.c
- *
- * Optimized user space space access functions based on mvcos.
- *
- * Copyright (C) IBM Corp. 2006
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Gerald Schaefer (gerald.schaefer@de.ibm.com)
- */
-
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <asm/uaccess.h>
-#include <asm/futex.h>
-#include "uaccess.h"
-
-#ifndef __s390x__
-#define AHI "ahi"
-#define ALR "alr"
-#define CLR "clr"
-#define LHI "lhi"
-#define SLR "slr"
-#else
-#define AHI "aghi"
-#define ALR "algr"
-#define CLR "clgr"
-#define LHI "lghi"
-#define SLR "slgr"
-#endif
-
-static size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x)
-{
- register unsigned long reg0 asm("0") = 0x81UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
- " jz 7f\n"
- "1:"ALR" %0,%3\n"
- " "SLR" %1,%3\n"
- " "SLR" %2,%3\n"
- " j 0b\n"
- "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
- " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
- " "SLR" %4,%1\n"
- " "CLR" %0,%4\n" /* copy crosses next page boundary? */
- " jnh 4f\n"
- "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
- " "SLR" %0,%4\n"
- " "ALR" %2,%4\n"
- "4:"LHI" %4,-1\n"
- " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
- " bras %3,6f\n" /* memset loop */
- " xc 0(1,%2),0(%2)\n"
- "5: xc 0(256,%2),0(%2)\n"
- " la %2,256(%2)\n"
- "6:"AHI" %4,-256\n"
- " jnm 5b\n"
- " ex %4,0(%3)\n"
- " j 8f\n"
- "7:"SLR" %0,%0\n"
- "8: \n"
- EX_TABLE(0b,2b) EX_TABLE(3b,4b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-static size_t copy_from_user_mvcos_check(size_t size, const void __user *ptr, void *x)
-{
- if (size <= 256)
- return copy_from_user_std(size, ptr, x);
- return copy_from_user_mvcos(size, ptr, x);
-}
-
-static size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x)
-{
- register unsigned long reg0 asm("0") = 0x810000UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
- " jz 4f\n"
- "1:"ALR" %0,%3\n"
- " "SLR" %1,%3\n"
- " "SLR" %2,%3\n"
- " j 0b\n"
- "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
- " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
- " "SLR" %4,%1\n"
- " "CLR" %0,%4\n" /* copy crosses next page boundary? */
- " jnh 5f\n"
- "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
- " "SLR" %0,%4\n"
- " j 5f\n"
- "4:"SLR" %0,%0\n"
- "5: \n"
- EX_TABLE(0b,2b) EX_TABLE(3b,5b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-static size_t copy_to_user_mvcos_check(size_t size, void __user *ptr,
- const void *x)
-{
- if (size <= 256)
- return copy_to_user_std(size, ptr, x);
- return copy_to_user_mvcos(size, ptr, x);
-}
-
-static size_t copy_in_user_mvcos(size_t size, void __user *to,
- const void __user *from)
-{
- register unsigned long reg0 asm("0") = 0x810081UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- /* FIXME: copy with reduced length. */
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
- " jz 2f\n"
- "1:"ALR" %0,%3\n"
- " "SLR" %1,%3\n"
- " "SLR" %2,%3\n"
- " j 0b\n"
- "2:"SLR" %0,%0\n"
- "3: \n"
- EX_TABLE(0b,3b)
- : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-static size_t clear_user_mvcos(size_t size, void __user *to)
-{
- register unsigned long reg0 asm("0") = 0x810000UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
- " jz 4f\n"
- "1:"ALR" %0,%2\n"
- " "SLR" %1,%2\n"
- " j 0b\n"
- "2: la %3,4095(%1)\n"/* %4 = to + 4095 */
- " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */
- " "SLR" %3,%1\n"
- " "CLR" %0,%3\n" /* copy crosses next page boundary? */
- " jnh 5f\n"
- "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
- " "SLR" %0,%3\n"
- " j 5f\n"
- "4:"SLR" %0,%0\n"
- "5: \n"
- EX_TABLE(0b,2b) EX_TABLE(3b,5b)
- : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
- : "a" (empty_zero_page), "d" (reg0) : "cc", "memory");
- return size;
-}
-
-#ifdef CONFIG_S390_SWITCH_AMODE
-static size_t strnlen_user_mvcos(size_t count, const char __user *src)
-{
- char buf[256];
- int rc;
- size_t done, len, len_str;
-
- done = 0;
- do {
- len = min(count - done, (size_t) 256);
- rc = uaccess.copy_from_user(len, src + done, buf);
- if (unlikely(rc == len))
- return 0;
- len -= rc;
- len_str = strnlen(buf, len);
- done += len_str;
- } while ((len_str == len) && (done < count));
- return done + 1;
-}
-
-static size_t strncpy_from_user_mvcos(size_t count, const char __user *src,
- char *dst)
-{
- int rc;
- size_t done, len, len_str;
-
- done = 0;
- do {
- len = min(count - done, (size_t) 4096);
- rc = uaccess.copy_from_user(len, src + done, dst);
- if (unlikely(rc == len))
- return -EFAULT;
- len -= rc;
- len_str = strnlen(dst, len);
- done += len_str;
- } while ((len_str == len) && (done < count));
- return done;
-}
-#endif /* CONFIG_S390_SWITCH_AMODE */
-
-struct uaccess_ops uaccess_mvcos = {
- .copy_from_user = copy_from_user_mvcos_check,
- .copy_from_user_small = copy_from_user_std,
- .copy_to_user = copy_to_user_mvcos_check,
- .copy_to_user_small = copy_to_user_std,
- .copy_in_user = copy_in_user_mvcos,
- .clear_user = clear_user_mvcos,
- .strnlen_user = strnlen_user_std,
- .strncpy_from_user = strncpy_from_user_std,
- .futex_atomic_op = futex_atomic_op_std,
- .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std,
-};
-
-#ifdef CONFIG_S390_SWITCH_AMODE
-struct uaccess_ops uaccess_mvcos_switch = {
- .copy_from_user = copy_from_user_mvcos,
- .copy_from_user_small = copy_from_user_mvcos,
- .copy_to_user = copy_to_user_mvcos,
- .copy_to_user_small = copy_to_user_mvcos,
- .copy_in_user = copy_in_user_mvcos,
- .clear_user = clear_user_mvcos,
- .strnlen_user = strnlen_user_mvcos,
- .strncpy_from_user = strncpy_from_user_mvcos,
- .futex_atomic_op = futex_atomic_op_pt,
- .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt,
-};
-#endif
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
deleted file mode 100644
index d66215b0fde..00000000000
--- a/arch/s390/lib/uaccess_pt.c
+++ /dev/null
@@ -1,464 +0,0 @@
-/*
- * arch/s390/lib/uaccess_pt.c
- *
- * User access functions based on page table walks for enhanced
- * system layout without hardware support.
- *
- * Copyright IBM Corp. 2006
- * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com)
- */
-
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/mm.h>
-#include <asm/uaccess.h>
-#include <asm/futex.h>
-#include "uaccess.h"
-
-static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
-
- pgd = pgd_offset(mm, addr);
- if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- return NULL;
-
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- return NULL;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- return NULL;
-
- return pte_offset_map(pmd, addr);
-}
-
-static int __handle_fault(struct mm_struct *mm, unsigned long address,
- int write_access)
-{
- struct vm_area_struct *vma;
- int ret = -EFAULT;
- int fault;
-
- if (in_atomic())
- return ret;
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, address);
- if (unlikely(!vma))
- goto out;
- if (unlikely(vma->vm_start > address)) {
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto out;
- if (expand_stack(vma, address))
- goto out;
- }
-
- if (!write_access) {
- /* page not present, check vm flags */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto out;
- } else {
- if (!(vma->vm_flags & VM_WRITE))
- goto out;
- }
-
-survive:
- fault = handle_mm_fault(mm, vma, address, write_access);
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto out_sigbus;
- BUG();
- }
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
- ret = 0;
-out:
- up_read(&mm->mmap_sem);
- return ret;
-
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (is_global_init(current)) {
- yield();
- down_read(&mm->mmap_sem);
- goto survive;
- }
- printk("VM: killing process %s\n", current->comm);
- return ret;
-
-out_sigbus:
- up_read(&mm->mmap_sem);
- current->thread.prot_addr = address;
- current->thread.trap_no = 0x11;
- force_sig(SIGBUS, current);
- return ret;
-}
-
-static size_t __user_copy_pt(unsigned long uaddr, void *kptr,
- size_t n, int write_user)
-{
- struct mm_struct *mm = current->mm;
- unsigned long offset, pfn, done, size;
- pte_t *pte;
- void *from, *to;
-
- done = 0;
-retry:
- spin_lock(&mm->page_table_lock);
- do {
- pte = follow_table(mm, uaddr);
- if (!pte || !pte_present(*pte) ||
- (write_user && !pte_write(*pte)))
- goto fault;
-
- pfn = pte_pfn(*pte);
- if (!pfn_valid(pfn))
- goto out;
-
- offset = uaddr & (PAGE_SIZE - 1);
- size = min(n - done, PAGE_SIZE - offset);
- if (write_user) {
- to = (void *)((pfn << PAGE_SHIFT) + offset);
- from = kptr + done;
- } else {
- from = (void *)((pfn << PAGE_SHIFT) + offset);
- to = kptr + done;
- }
- memcpy(to, from, size);
- done += size;
- uaddr += size;
- } while (done < n);
-out:
- spin_unlock(&mm->page_table_lock);
- return n - done;
-fault:
- spin_unlock(&mm->page_table_lock);
- if (__handle_fault(mm, uaddr, write_user))
- return n - done;
- goto retry;
-}
-
-/*
- * Do DAT for user address by page table walk, return kernel address.
- * This function needs to be called with current->mm->page_table_lock held.
- */
-static unsigned long __dat_user_addr(unsigned long uaddr)
-{
- struct mm_struct *mm = current->mm;
- unsigned long pfn, ret;
- pte_t *pte;
- int rc;
-
- ret = 0;
-retry:
- pte = follow_table(mm, uaddr);
- if (!pte || !pte_present(*pte))
- goto fault;
-
- pfn = pte_pfn(*pte);
- if (!pfn_valid(pfn))
- goto out;
-
- ret = (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1));
-out:
- return ret;
-fault:
- spin_unlock(&mm->page_table_lock);
- rc = __handle_fault(mm, uaddr, 0);
- spin_lock(&mm->page_table_lock);
- if (rc)
- goto out;
- goto retry;
-}
-
-size_t copy_from_user_pt(size_t n, const void __user *from, void *to)
-{
- size_t rc;
-
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memcpy(to, (void __kernel __force *) from, n);
- return 0;
- }
- rc = __user_copy_pt((unsigned long) from, to, n, 0);
- if (unlikely(rc))
- memset(to + n - rc, 0, rc);
- return rc;
-}
-
-size_t copy_to_user_pt(size_t n, void __user *to, const void *from)
-{
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memcpy((void __kernel __force *) to, from, n);
- return 0;
- }
- return __user_copy_pt((unsigned long) to, (void *) from, n, 1);
-}
-
-static size_t clear_user_pt(size_t n, void __user *to)
-{
- long done, size, ret;
-
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memset((void __kernel __force *) to, 0, n);
- return 0;
- }
- done = 0;
- do {
- if (n - done > PAGE_SIZE)
- size = PAGE_SIZE;
- else
- size = n - done;
- ret = __user_copy_pt((unsigned long) to + done,
- &empty_zero_page, size, 1);
- done += size;
- if (ret)
- return ret + n - done;
- } while (done < n);
- return 0;
-}
-
-static size_t strnlen_user_pt(size_t count, const char __user *src)
-{
- char *addr;
- unsigned long uaddr = (unsigned long) src;
- struct mm_struct *mm = current->mm;
- unsigned long offset, pfn, done, len;
- pte_t *pte;
- size_t len_str;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return strnlen((const char __kernel __force *) src, count) + 1;
- done = 0;
-retry:
- spin_lock(&mm->page_table_lock);
- do {
- pte = follow_table(mm, uaddr);
- if (!pte || !pte_present(*pte))
- goto fault;
-
- pfn = pte_pfn(*pte);
- if (!pfn_valid(pfn)) {
- done = -1;
- goto out;
- }
-
- offset = uaddr & (PAGE_SIZE-1);
- addr = (char *)(pfn << PAGE_SHIFT) + offset;
- len = min(count - done, PAGE_SIZE - offset);
- len_str = strnlen(addr, len);
- done += len_str;
- uaddr += len_str;
- } while ((len_str == len) && (done < count));
-out:
- spin_unlock(&mm->page_table_lock);
- return done + 1;
-fault:
- spin_unlock(&mm->page_table_lock);
- if (__handle_fault(mm, uaddr, 0)) {
- return 0;
- }
- goto retry;
-}
-
-static size_t strncpy_from_user_pt(size_t count, const char __user *src,
- char *dst)
-{
- size_t n = strnlen_user_pt(count, src);
-
- if (!n)
- return -EFAULT;
- if (n > count)
- n = count;
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memcpy(dst, (const char __kernel __force *) src, n);
- if (dst[n-1] == '\0')
- return n-1;
- else
- return n;
- }
- if (__user_copy_pt((unsigned long) src, dst, n, 0))
- return -EFAULT;
- if (dst[n-1] == '\0')
- return n-1;
- else
- return n;
-}
-
-static size_t copy_in_user_pt(size_t n, void __user *to,
- const void __user *from)
-{
- struct mm_struct *mm = current->mm;
- unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to,
- uaddr, done, size;
- unsigned long uaddr_from = (unsigned long) from;
- unsigned long uaddr_to = (unsigned long) to;
- pte_t *pte_from, *pte_to;
- int write_user;
-
- if (segment_eq(get_fs(), KERNEL_DS)) {
- memcpy((void __force *) to, (void __force *) from, n);
- return 0;
- }
- done = 0;
-retry:
- spin_lock(&mm->page_table_lock);
- do {
- pte_from = follow_table(mm, uaddr_from);
- if (!pte_from || !pte_present(*pte_from)) {
- uaddr = uaddr_from;
- write_user = 0;
- goto fault;
- }
-
- pte_to = follow_table(mm, uaddr_to);
- if (!pte_to || !pte_present(*pte_to) || !pte_write(*pte_to)) {
- uaddr = uaddr_to;
- write_user = 1;
- goto fault;
- }
-
- pfn_from = pte_pfn(*pte_from);
- if (!pfn_valid(pfn_from))
- goto out;
- pfn_to = pte_pfn(*pte_to);
- if (!pfn_valid(pfn_to))
- goto out;
-
- offset_from = uaddr_from & (PAGE_SIZE-1);
- offset_to = uaddr_from & (PAGE_SIZE-1);
- offset_max = max(offset_from, offset_to);
- size = min(n - done, PAGE_SIZE - offset_max);
-
- memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to,
- (void *)(pfn_from << PAGE_SHIFT) + offset_from, size);
- done += size;
- uaddr_from += size;
- uaddr_to += size;
- } while (done < n);
-out:
- spin_unlock(&mm->page_table_lock);
- return n - done;
-fault:
- spin_unlock(&mm->page_table_lock);
- if (__handle_fault(mm, uaddr, write_user))
- return n - done;
- goto retry;
-}
-
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
- asm volatile("0: l %1,0(%6)\n" \
- "1: " insn \
- "2: cs %1,%2,0(%6)\n" \
- "3: jl 1b\n" \
- " lhi %0,0\n" \
- "4:\n" \
- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
- : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
- "=m" (*uaddr) \
- : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
- "m" (*uaddr) : "cc" );
-
-static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
-{
- int oldval = 0, newval, ret;
-
- switch (op) {
- case FUTEX_OP_SET:
- __futex_atomic_op("lr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_ADD:
- __futex_atomic_op("lr %2,%1\nar %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op("lr %2,%1\nor %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- default:
- ret = -ENOSYS;
- }
- if (ret == 0)
- *old = oldval;
- return ret;
-}
-
-int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
-{
- int ret;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return __futex_atomic_op_pt(op, uaddr, oparg, old);
- spin_lock(&current->mm->page_table_lock);
- uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
- if (!uaddr) {
- spin_unlock(&current->mm->page_table_lock);
- return -EFAULT;
- }
- get_page(virt_to_page(uaddr));
- spin_unlock(&current->mm->page_table_lock);
- ret = __futex_atomic_op_pt(op, uaddr, oparg, old);
- put_page(virt_to_page(uaddr));
- return ret;
-}
-
-static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
-{
- int ret;
-
- asm volatile("0: cs %1,%4,0(%5)\n"
- "1: lr %0,%1\n"
- "2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
- : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
- : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
- : "cc", "memory" );
- return ret;
-}
-
-int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
-{
- int ret;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
- spin_lock(&current->mm->page_table_lock);
- uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
- if (!uaddr) {
- spin_unlock(&current->mm->page_table_lock);
- return -EFAULT;
- }
- get_page(virt_to_page(uaddr));
- spin_unlock(&current->mm->page_table_lock);
- ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
- put_page(virt_to_page(uaddr));
- return ret;
-}
-
-struct uaccess_ops uaccess_pt = {
- .copy_from_user = copy_from_user_pt,
- .copy_from_user_small = copy_from_user_pt,
- .copy_to_user = copy_to_user_pt,
- .copy_to_user_small = copy_to_user_pt,
- .copy_in_user = copy_in_user_pt,
- .clear_user = clear_user_pt,
- .strnlen_user = strnlen_user_pt,
- .strncpy_from_user = strncpy_from_user_pt,
- .futex_atomic_op = futex_atomic_op_pt,
- .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt,
-};
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
deleted file mode 100644
index d2ffbadb51a..00000000000
--- a/arch/s390/lib/uaccess_std.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * arch/s390/lib/uaccess_std.c
- *
- * Standard user space access functions based on mvcp/mvcs and doing
- * interesting things in the secondary space mode.
- *
- * Copyright (C) IBM Corp. 2006
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Gerald Schaefer (gerald.schaefer@de.ibm.com)
- */
-
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <linux/uaccess.h>
-#include <asm/futex.h>
-#include "uaccess.h"
-
-#ifndef __s390x__
-#define AHI "ahi"
-#define ALR "alr"
-#define CLR "clr"
-#define LHI "lhi"
-#define SLR "slr"
-#else
-#define AHI "aghi"
-#define ALR "algr"
-#define CLR "clgr"
-#define LHI "lghi"
-#define SLR "slgr"
-#endif
-
-size_t copy_from_user_std(size_t size, const void __user *ptr, void *x)
-{
- unsigned long tmp1, tmp2;
-
- tmp1 = -256UL;
- asm volatile(
- "0: mvcp 0(%0,%2),0(%1),%3\n"
- " jz 8f\n"
- "1:"ALR" %0,%3\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "2: mvcp 0(%0,%2),0(%1),%3\n"
- " jnz 1b\n"
- " j 8f\n"
- "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
- " "LHI" %3,-4096\n"
- " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
- " "SLR" %4,%1\n"
- " "CLR" %0,%4\n" /* copy crosses next page boundary? */
- " jnh 5f\n"
- "4: mvcp 0(%4,%2),0(%1),%3\n"
- " "SLR" %0,%4\n"
- " "ALR" %2,%4\n"
- "5:"LHI" %4,-1\n"
- " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
- " bras %3,7f\n" /* memset loop */
- " xc 0(1,%2),0(%2)\n"
- "6: xc 0(256,%2),0(%2)\n"
- " la %2,256(%2)\n"
- "7:"AHI" %4,-256\n"
- " jnm 6b\n"
- " ex %4,0(%3)\n"
- " j 9f\n"
- "8:"SLR" %0,%0\n"
- "9: \n"
- EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
- return size;
-}
-
-static size_t copy_from_user_std_check(size_t size, const void __user *ptr,
- void *x)
-{
- if (size <= 1024)
- return copy_from_user_std(size, ptr, x);
- return copy_from_user_pt(size, ptr, x);
-}
-
-size_t copy_to_user_std(size_t size, void __user *ptr, const void *x)
-{
- unsigned long tmp1, tmp2;
-
- tmp1 = -256UL;
- asm volatile(
- "0: mvcs 0(%0,%1),0(%2),%3\n"
- " jz 5f\n"
- "1:"ALR" %0,%3\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "2: mvcs 0(%0,%1),0(%2),%3\n"
- " jnz 1b\n"
- " j 5f\n"
- "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
- " "LHI" %3,-4096\n"
- " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
- " "SLR" %4,%1\n"
- " "CLR" %0,%4\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "4: mvcs 0(%4,%1),0(%2),%3\n"
- " "SLR" %0,%4\n"
- " j 6f\n"
- "5:"SLR" %0,%0\n"
- "6: \n"
- EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
- return size;
-}
-
-static size_t copy_to_user_std_check(size_t size, void __user *ptr,
- const void *x)
-{
- if (size <= 1024)
- return copy_to_user_std(size, ptr, x);
- return copy_to_user_pt(size, ptr, x);
-}
-
-static size_t copy_in_user_std(size_t size, void __user *to,
- const void __user *from)
-{
- unsigned long tmp1;
-
- asm volatile(
- " "AHI" %0,-1\n"
- " jo 5f\n"
- " sacf 256\n"
- " bras %3,3f\n"
- "0:"AHI" %0,257\n"
- "1: mvc 0(1,%1),0(%2)\n"
- " la %1,1(%1)\n"
- " la %2,1(%2)\n"
- " "AHI" %0,-1\n"
- " jnz 1b\n"
- " j 5f\n"
- "2: mvc 0(256,%1),0(%2)\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "3:"AHI" %0,-256\n"
- " jnm 2b\n"
- "4: ex %0,1b-0b(%3)\n"
- " sacf 0\n"
- "5: "SLR" %0,%0\n"
- "6:\n"
- EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
- : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
- : : "cc", "memory");
- return size;
-}
-
-static size_t clear_user_std(size_t size, void __user *to)
-{
- unsigned long tmp1, tmp2;
-
- asm volatile(
- " "AHI" %0,-1\n"
- " jo 5f\n"
- " sacf 256\n"
- " bras %3,3f\n"
- " xc 0(1,%1),0(%1)\n"
- "0:"AHI" %0,257\n"
- " la %2,255(%1)\n" /* %2 = ptr + 255 */
- " srl %2,12\n"
- " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */
- " "SLR" %2,%1\n"
- " "CLR" %0,%2\n" /* clear crosses next page boundary? */
- " jnh 5f\n"
- " "AHI" %2,-1\n"
- "1: ex %2,0(%3)\n"
- " "AHI" %2,1\n"
- " "SLR" %0,%2\n"
- " j 5f\n"
- "2: xc 0(256,%1),0(%1)\n"
- " la %1,256(%1)\n"
- "3:"AHI" %0,-256\n"
- " jnm 2b\n"
- "4: ex %0,0(%3)\n"
- " sacf 0\n"
- "5: "SLR" %0,%0\n"
- "6:\n"
- EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
- : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
- return size;
-}
-
-size_t strnlen_user_std(size_t size, const char __user *src)
-{
- register unsigned long reg0 asm("0") = 0UL;
- unsigned long tmp1, tmp2;
-
- asm volatile(
- " la %2,0(%1)\n"
- " la %3,0(%0,%1)\n"
- " "SLR" %0,%0\n"
- " sacf 256\n"
- "0: srst %3,%2\n"
- " jo 0b\n"
- " la %0,1(%3)\n" /* strnlen_user results includes \0 */
- " "SLR" %0,%1\n"
- "1: sacf 0\n"
- EX_TABLE(0b,1b)
- : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst)
-{
- register unsigned long reg0 asm("0") = 0UL;
- unsigned long tmp1, tmp2;
-
- asm volatile(
- " la %3,0(%1)\n"
- " la %4,0(%0,%1)\n"
- " sacf 256\n"
- "0: srst %4,%3\n"
- " jo 0b\n"
- " sacf 0\n"
- " la %0,0(%4)\n"
- " jh 1f\n" /* found \0 in string ? */
- " "AHI" %4,1\n" /* include \0 in copy */
- "1:"SLR" %0,%1\n" /* %0 = return length (without \0) */
- " "SLR" %4,%1\n" /* %4 = copy length (including \0) */
- "2: mvcp 0(%4,%2),0(%1),%5\n"
- " jz 9f\n"
- "3:"AHI" %4,-256\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "4: mvcp 0(%4,%2),0(%1),%5\n"
- " jnz 3b\n"
- " j 9f\n"
- "7: sacf 0\n"
- "8:"LHI" %0,%6\n"
- "9:\n"
- EX_TABLE(0b,7b) EX_TABLE(2b,8b) EX_TABLE(4b,8b)
- : "+a" (size), "+a" (src), "+d" (dst), "=a" (tmp1), "=a" (tmp2)
- : "d" (reg0), "K" (-EFAULT) : "cc", "memory");
- return size;
-}
-
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
- asm volatile( \
- " sacf 256\n" \
- "0: l %1,0(%6)\n" \
- "1:"insn \
- "2: cs %1,%2,0(%6)\n" \
- "3: jl 1b\n" \
- " lhi %0,0\n" \
- "4: sacf 0\n" \
- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
- : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
- "=m" (*uaddr) \
- : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
- "m" (*uaddr) : "cc");
-
-int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old)
-{
- int oldval = 0, newval, ret;
-
- switch (op) {
- case FUTEX_OP_SET:
- __futex_atomic_op("lr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_ADD:
- __futex_atomic_op("lr %2,%1\nar %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op("lr %2,%1\nor %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- default:
- ret = -ENOSYS;
- }
- *old = oldval;
- return ret;
-}
-
-int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval)
-{
- int ret;
-
- asm volatile(
- " sacf 256\n"
- "0: cs %1,%4,0(%5)\n"
- "1: lr %0,%1\n"
- "2: sacf 0\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
- : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
- : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
- : "cc", "memory" );
- return ret;
-}
-
-struct uaccess_ops uaccess_std = {
- .copy_from_user = copy_from_user_std_check,
- .copy_from_user_small = copy_from_user_std,
- .copy_to_user = copy_to_user_std_check,
- .copy_to_user_small = copy_to_user_std,
- .copy_in_user = copy_in_user_std,
- .clear_user = clear_user_std,
- .strnlen_user = strnlen_user_std,
- .strncpy_from_user = strncpy_from_user_std,
- .futex_atomic_op = futex_atomic_op_std,
- .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std,
-};
diff --git a/arch/s390/lib/ucmpdi2.c b/arch/s390/lib/ucmpdi2.c
new file mode 100644
index 00000000000..3e05ff53258
--- /dev/null
+++ b/arch/s390/lib/ucmpdi2.c
@@ -0,0 +1,26 @@
+#include <linux/module.h>
+
+union ull_union {
+ unsigned long long ull;
+ struct {
+ unsigned int high;
+ unsigned int low;
+ } ui;
+};
+
+int __ucmpdi2(unsigned long long a, unsigned long long b)
+{
+ union ull_union au = {.ull = a};
+ union ull_union bu = {.ull = b};
+
+ if (au.ui.high < bu.ui.high)
+ return 0;
+ else if (au.ui.high > bu.ui.high)
+ return 2;
+ if (au.ui.low < bu.ui.low)
+ return 0;
+ else if (au.ui.low > bu.ui.low)
+ return 2;
+ return 1;
+}
+EXPORT_SYMBOL(__ucmpdi2);
diff --git a/arch/s390/math-emu/Makefile b/arch/s390/math-emu/Makefile
index c8489034105..51d399549f6 100644
--- a/arch/s390/math-emu/Makefile
+++ b/arch/s390/math-emu/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_MATHEMU) := math.o
-EXTRA_CFLAGS := -I$(src) -Iinclude/math-emu -w
+ccflags-y := -I$(src) -Iinclude/math-emu -w
diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c
index 3ee78ccb617..a6ba0d72433 100644
--- a/arch/s390/math-emu/math.c
+++ b/arch/s390/math-emu/math.c
@@ -1,8 +1,6 @@
/*
- * arch/s390/math-emu/math.c
- *
* S390 version
- * Copyright (C) 1999-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 1999, 2001
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
*
* 'math.c' emulates IEEE instructions on a S390 processor
@@ -21,6 +19,8 @@
#include <math-emu/double.h>
#include <math-emu/quad.h>
+#define FPC_VALID_MASK 0xF8F8FF03
+
/*
* I miss a macro to round a floating point number to the
* nearest integer in the same floating point format.
@@ -2088,7 +2088,7 @@ int math_emu_ldr(__u8 *opcode) {
__u16 opc = *((__u16 *) opcode);
if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */
- /* we got an exception therfore ry can't be in {0,2,4,6} */
+ /* we got an exception therefore ry can't be in {0,2,4,6} */
asm volatile( /* load rx from fp_regs.fprs[ry] */
" bras 1,0f\n"
" ld 0,0(%1)\n"
@@ -2118,7 +2118,7 @@ int math_emu_ler(__u8 *opcode) {
__u16 opc = *((__u16 *) opcode);
if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */
- /* we got an exception therfore ry can't be in {0,2,4,6} */
+ /* we got an exception therefore ry can't be in {0,2,4,6} */
asm volatile( /* load rx from fp_regs.fprs[ry] */
" bras 1,0f\n"
" le 0,0(%1)\n"
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 2a745813454..839592ca265 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,7 +2,9 @@
# Makefile for the linux s390-specific parts of the memory manager.
#
-obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o
-obj-$(CONFIG_CMM) += cmm.o
-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_PAGE_STATES) += page-states.o
+obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
+obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o
+
+obj-$(CONFIG_CMM) += cmm.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 413c240cbca..79ddd580d60 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -1,29 +1,32 @@
/*
- * arch/s390/mm/cmm.c
+ * Collaborative memory management interface.
*
- * S390 version
- * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ * Copyright IBM Corp 2003, 2010
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
*
- * Collaborative memory management interface.
*/
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/sysctl.h>
#include <linux/ctype.h>
#include <linux/swap.h>
#include <linux/kthread.h>
#include <linux/oom.h>
+#include <linux/suspend.h>
+#include <linux/uaccess.h>
#include <asm/pgalloc.h>
-#include <asm/uaccess.h>
#include <asm/diag.h>
-static char *sender = "VMRMSVM";
+#ifdef CONFIG_CMM_IUCV
+static char *cmm_default_sender = "VMRMSVM";
+#endif
+static char *sender;
module_param(sender, charp, 0400);
MODULE_PARM_DESC(sender,
"Guest name that may send SMSG messages (default VMRMSVM)");
@@ -44,20 +47,21 @@ static volatile long cmm_pages_target;
static volatile long cmm_timed_pages_target;
static long cmm_timeout_pages;
static long cmm_timeout_seconds;
+static int cmm_suspended;
static struct cmm_page_array *cmm_page_list;
static struct cmm_page_array *cmm_timed_page_list;
static DEFINE_SPINLOCK(cmm_lock);
static struct task_struct *cmm_thread_ptr;
-static wait_queue_head_t cmm_thread_wait;
-static struct timer_list cmm_timer;
+static DECLARE_WAIT_QUEUE_HEAD(cmm_thread_wait);
+static DEFINE_TIMER(cmm_timer, NULL, 0, 0);
static void cmm_timer_fn(unsigned long);
static void cmm_set_timer(void);
-static long
-cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list)
+static long cmm_alloc_pages(long nr, long *counter,
+ struct cmm_page_array **list)
{
struct cmm_page_array *pa, *npa;
unsigned long addr;
@@ -87,7 +91,7 @@ cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list)
} else
free_page((unsigned long) npa);
}
- diag10(addr);
+ diag10_range(addr >> PAGE_SHIFT, 1);
pa->pages[pa->index++] = addr;
(*counter)++;
spin_unlock(&cmm_lock);
@@ -96,8 +100,7 @@ cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list)
return nr;
}
-static long
-cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
+static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
{
struct cmm_page_array *pa;
unsigned long addr;
@@ -137,19 +140,18 @@ static int cmm_oom_notify(struct notifier_block *self,
}
static struct notifier_block cmm_oom_nb = {
- .notifier_call = cmm_oom_notify
+ .notifier_call = cmm_oom_notify,
};
-static int
-cmm_thread(void *dummy)
+static int cmm_thread(void *dummy)
{
int rc;
while (1) {
rc = wait_event_interruptible(cmm_thread_wait,
- (cmm_pages != cmm_pages_target ||
- cmm_timed_pages != cmm_timed_pages_target ||
- kthread_should_stop()));
+ (!cmm_suspended && (cmm_pages != cmm_pages_target ||
+ cmm_timed_pages != cmm_timed_pages_target)) ||
+ kthread_should_stop());
if (kthread_should_stop() || rc == -ERESTARTSYS) {
cmm_pages_target = cmm_pages;
cmm_timed_pages_target = cmm_timed_pages;
@@ -167,7 +169,7 @@ cmm_thread(void *dummy)
cmm_timed_pages_target = cmm_timed_pages;
} else if (cmm_timed_pages_target < cmm_timed_pages) {
cmm_free_pages(1, &cmm_timed_pages,
- &cmm_timed_page_list);
+ &cmm_timed_page_list);
}
if (cmm_timed_pages > 0 && !timer_pending(&cmm_timer))
cmm_set_timer();
@@ -175,14 +177,12 @@ cmm_thread(void *dummy)
return 0;
}
-static void
-cmm_kick_thread(void)
+static void cmm_kick_thread(void)
{
wake_up(&cmm_thread_wait);
}
-static void
-cmm_set_timer(void)
+static void cmm_set_timer(void)
{
if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) {
if (timer_pending(&cmm_timer))
@@ -199,8 +199,7 @@ cmm_set_timer(void)
add_timer(&cmm_timer);
}
-static void
-cmm_timer_fn(unsigned long ignored)
+static void cmm_timer_fn(unsigned long ignored)
{
long nr;
@@ -213,61 +212,53 @@ cmm_timer_fn(unsigned long ignored)
cmm_set_timer();
}
-void
-cmm_set_pages(long nr)
+static void cmm_set_pages(long nr)
{
cmm_pages_target = nr;
cmm_kick_thread();
}
-long
-cmm_get_pages(void)
+static long cmm_get_pages(void)
{
return cmm_pages;
}
-void
-cmm_add_timed_pages(long nr)
+static void cmm_add_timed_pages(long nr)
{
cmm_timed_pages_target += nr;
cmm_kick_thread();
}
-long
-cmm_get_timed_pages(void)
+static long cmm_get_timed_pages(void)
{
return cmm_timed_pages;
}
-void
-cmm_set_timeout(long nr, long seconds)
+static void cmm_set_timeout(long nr, long seconds)
{
cmm_timeout_pages = nr;
cmm_timeout_seconds = seconds;
cmm_set_timer();
}
-static int
-cmm_skip_blanks(char *cp, char **endp)
+static int cmm_skip_blanks(char *cp, char **endp)
{
char *str;
- for (str = cp; *str == ' ' || *str == '\t'; str++);
+ for (str = cp; *str == ' ' || *str == '\t'; str++)
+ ;
*endp = str;
return str != cp;
}
-#ifdef CONFIG_CMM_PROC
-
static struct ctl_table cmm_table[];
-static int
-cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+static int cmm_pages_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
char buf[16], *p;
+ unsigned int len;
long nr;
- int len;
if (!*lenp || (*ppos && !write)) {
*lenp = 0;
@@ -302,13 +293,12 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
return 0;
}
-static int
-cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+static int cmm_timeout_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
char buf[64], *p;
long nr, seconds;
- int len;
+ unsigned int len;
if (!*lenp || (*ppos && !write)) {
*lenp = 0;
@@ -343,37 +333,34 @@ static struct ctl_table cmm_table[] = {
{
.procname = "cmm_pages",
.mode = 0644,
- .proc_handler = &cmm_pages_handler,
+ .proc_handler = cmm_pages_handler,
},
{
.procname = "cmm_timed_pages",
.mode = 0644,
- .proc_handler = &cmm_pages_handler,
+ .proc_handler = cmm_pages_handler,
},
{
.procname = "cmm_timeout",
.mode = 0644,
- .proc_handler = &cmm_timeout_handler,
+ .proc_handler = cmm_timeout_handler,
},
- { .ctl_name = 0 }
+ { }
};
static struct ctl_table cmm_dir_table[] = {
{
- .ctl_name = CTL_VM,
.procname = "vm",
.maxlen = 0,
.mode = 0555,
.child = cmm_table,
},
- { .ctl_name = 0 }
+ { }
};
-#endif
#ifdef CONFIG_CMM_IUCV
#define SMSG_PREFIX "CMM"
-static void
-cmm_smsg_target(char *from, char *msg)
+static void cmm_smsg_target(const char *from, char *msg)
{
long nr, seconds;
@@ -411,17 +398,55 @@ cmm_smsg_target(char *from, char *msg)
static struct ctl_table_header *cmm_sysctl_header;
-static int
-cmm_init (void)
+static int cmm_suspend(void)
+{
+ cmm_suspended = 1;
+ cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
+ cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
+ return 0;
+}
+
+static int cmm_resume(void)
+{
+ cmm_suspended = 0;
+ cmm_kick_thread();
+ return 0;
+}
+
+static int cmm_power_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ switch (event) {
+ case PM_POST_HIBERNATION:
+ return cmm_resume();
+ case PM_HIBERNATION_PREPARE:
+ return cmm_suspend();
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block cmm_power_notifier = {
+ .notifier_call = cmm_power_event,
+};
+
+static int __init cmm_init(void)
{
int rc = -ENOMEM;
-#ifdef CONFIG_CMM_PROC
cmm_sysctl_header = register_sysctl_table(cmm_dir_table);
if (!cmm_sysctl_header)
- goto out;
-#endif
+ goto out_sysctl;
#ifdef CONFIG_CMM_IUCV
+ /* convert sender to uppercase characters */
+ if (sender) {
+ int len = strlen(sender);
+ while (len--)
+ sender[len] = toupper(sender[len]);
+ } else {
+ sender = cmm_default_sender;
+ }
+
rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
if (rc < 0)
goto out_smsg;
@@ -429,51 +454,42 @@ cmm_init (void)
rc = register_oom_notifier(&cmm_oom_nb);
if (rc < 0)
goto out_oom_notify;
- init_waitqueue_head(&cmm_thread_wait);
- init_timer(&cmm_timer);
+ rc = register_pm_notifier(&cmm_power_notifier);
+ if (rc)
+ goto out_pm;
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
- rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0;
- if (!rc)
- goto out;
- /*
- * kthread_create failed. undo all the stuff from above again.
- */
- unregister_oom_notifier(&cmm_oom_nb);
+ if (!IS_ERR(cmm_thread_ptr))
+ return 0;
+ rc = PTR_ERR(cmm_thread_ptr);
+ unregister_pm_notifier(&cmm_power_notifier);
+out_pm:
+ unregister_oom_notifier(&cmm_oom_nb);
out_oom_notify:
#ifdef CONFIG_CMM_IUCV
smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
out_smsg:
#endif
-#ifdef CONFIG_CMM_PROC
unregister_sysctl_table(cmm_sysctl_header);
-#endif
-out:
+out_sysctl:
+ del_timer_sync(&cmm_timer);
return rc;
}
+module_init(cmm_init);
-static void
-cmm_exit(void)
+static void __exit cmm_exit(void)
{
- kthread_stop(cmm_thread_ptr);
- unregister_oom_notifier(&cmm_oom_nb);
- cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
- cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
-#ifdef CONFIG_CMM_PROC
unregister_sysctl_table(cmm_sysctl_header);
-#endif
#ifdef CONFIG_CMM_IUCV
smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
#endif
+ unregister_pm_notifier(&cmm_power_notifier);
+ unregister_oom_notifier(&cmm_oom_nb);
+ kthread_stop(cmm_thread_ptr);
+ del_timer_sync(&cmm_timer);
+ cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
+ cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
}
-
-module_init(cmm_init);
module_exit(cmm_exit);
-EXPORT_SYMBOL(cmm_set_pages);
-EXPORT_SYMBOL(cmm_get_pages);
-EXPORT_SYMBOL(cmm_add_timed_pages);
-EXPORT_SYMBOL(cmm_get_timed_pages);
-EXPORT_SYMBOL(cmm_set_timeout);
-
MODULE_LICENSE("GPL");
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
new file mode 100644
index 00000000000..46d517c3c76
--- /dev/null
+++ b/arch/s390/mm/dump_pagetables.c
@@ -0,0 +1,246 @@
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/sections.h>
+#include <asm/pgtable.h>
+
+static unsigned long max_addr;
+
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+enum address_markers_idx {
+ IDENTITY_NR = 0,
+ KERNEL_START_NR,
+ KERNEL_END_NR,
+ VMEMMAP_NR,
+ VMALLOC_NR,
+#ifdef CONFIG_64BIT
+ MODULES_NR,
+#endif
+};
+
+static struct addr_marker address_markers[] = {
+ [IDENTITY_NR] = {0, "Identity Mapping"},
+ [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
+ [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"},
+ [VMEMMAP_NR] = {0, "vmemmap Area"},
+ [VMALLOC_NR] = {0, "vmalloc Area"},
+#ifdef CONFIG_64BIT
+ [MODULES_NR] = {0, "Modules Area"},
+#endif
+ { -1, NULL }
+};
+
+struct pg_state {
+ int level;
+ unsigned int current_prot;
+ unsigned long start_address;
+ unsigned long current_address;
+ const struct addr_marker *marker;
+};
+
+static void print_prot(struct seq_file *m, unsigned int pr, int level)
+{
+ static const char * const level_name[] =
+ { "ASCE", "PGD", "PUD", "PMD", "PTE" };
+
+ seq_printf(m, "%s ", level_name[level]);
+ if (pr & _PAGE_INVALID) {
+ seq_printf(m, "I\n");
+ return;
+ }
+ seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
+ seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " ");
+ seq_putc(m, '\n');
+}
+
+static void note_page(struct seq_file *m, struct pg_state *st,
+ unsigned int new_prot, int level)
+{
+ static const char units[] = "KMGTPE";
+ int width = sizeof(unsigned long) * 2;
+ const char *unit = units;
+ unsigned int prot, cur;
+ unsigned long delta;
+
+ /*
+ * If we have a "break" in the series, we need to flush the state
+ * that we have now. "break" is either changing perms, levels or
+ * address space marker.
+ */
+ prot = new_prot;
+ cur = st->current_prot;
+
+ if (!st->level) {
+ /* First entry */
+ st->current_prot = new_prot;
+ st->level = level;
+ st->marker = address_markers;
+ seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ } else if (prot != cur || level != st->level ||
+ st->current_address >= st->marker[1].start_address) {
+ /* Print the actual finished series */
+ seq_printf(m, "0x%0*lx-0x%0*lx",
+ width, st->start_address,
+ width, st->current_address);
+ delta = (st->current_address - st->start_address) >> 10;
+ while (!(delta & 0x3ff) && unit[1]) {
+ delta >>= 10;
+ unit++;
+ }
+ seq_printf(m, "%9lu%c ", delta, *unit);
+ print_prot(m, st->current_prot, st->level);
+ if (st->current_address >= st->marker[1].start_address) {
+ st->marker++;
+ seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ }
+ st->start_address = st->current_address;
+ st->current_prot = new_prot;
+ st->level = level;
+ }
+}
+
+/*
+ * The actual page table walker functions. In order to keep the
+ * implementation of print_prot() short, we only check and pass
+ * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
+ * segment or page table entry is invalid or read-only.
+ * After all it's just a hint that the current level being walked
+ * contains an invalid or read-only entry.
+ */
+static void walk_pte_level(struct seq_file *m, struct pg_state *st,
+ pmd_t *pmd, unsigned long addr)
+{
+ unsigned int prot;
+ pte_t *pte;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
+ st->current_address = addr;
+ pte = pte_offset_kernel(pmd, addr);
+ prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
+ note_page(m, st, prot, 4);
+ addr += PAGE_SIZE;
+ }
+}
+
+#ifdef CONFIG_64BIT
+#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO)
+#else
+#define _PMD_PROT_MASK 0
+#endif
+
+static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
+ pud_t *pud, unsigned long addr)
+{
+ unsigned int prot;
+ pmd_t *pmd;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
+ st->current_address = addr;
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd)) {
+ if (pmd_large(*pmd)) {
+ prot = pmd_val(*pmd) & _PMD_PROT_MASK;
+ note_page(m, st, prot, 3);
+ } else
+ walk_pte_level(m, st, pmd, addr);
+ } else
+ note_page(m, st, _PAGE_INVALID, 3);
+ addr += PMD_SIZE;
+ }
+}
+
+#ifdef CONFIG_64BIT
+#define _PUD_PROT_MASK (_REGION3_ENTRY_RO | _REGION3_ENTRY_CO)
+#else
+#define _PUD_PROT_MASK 0
+#endif
+
+static void walk_pud_level(struct seq_file *m, struct pg_state *st,
+ pgd_t *pgd, unsigned long addr)
+{
+ unsigned int prot;
+ pud_t *pud;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
+ st->current_address = addr;
+ pud = pud_offset(pgd, addr);
+ if (!pud_none(*pud))
+ if (pud_large(*pud)) {
+ prot = pud_val(*pud) & _PUD_PROT_MASK;
+ note_page(m, st, prot, 2);
+ } else
+ walk_pmd_level(m, st, pud, addr);
+ else
+ note_page(m, st, _PAGE_INVALID, 2);
+ addr += PUD_SIZE;
+ }
+}
+
+static void walk_pgd_level(struct seq_file *m)
+{
+ unsigned long addr = 0;
+ struct pg_state st;
+ pgd_t *pgd;
+ int i;
+
+ memset(&st, 0, sizeof(st));
+ for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
+ st.current_address = addr;
+ pgd = pgd_offset_k(addr);
+ if (!pgd_none(*pgd))
+ walk_pud_level(m, &st, pgd, addr);
+ else
+ note_page(m, &st, _PAGE_INVALID, 1);
+ addr += PGDIR_SIZE;
+ }
+ /* Flush out the last page */
+ st.current_address = max_addr;
+ note_page(m, &st, 0, 0);
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ walk_pgd_level(m);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int pt_dump_init(void)
+{
+ /*
+ * Figure out the maximum virtual address being accessible with the
+ * kernel ASCE. We need this to keep the page table walker functions
+ * from accessing non-existent entries.
+ */
+#ifdef CONFIG_32BIT
+ max_addr = 1UL << 31;
+#else
+ max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
+ max_addr = 1UL << (max_addr * 11 + 31);
+ address_markers[MODULES_NR].start_address = MODULES_VADDR;
+#endif
+ address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
+ address_markers[VMALLOC_NR].start_address = VMALLOC_START;
+ debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+ return 0;
+}
+device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
new file mode 100644
index 00000000000..4d1ee88864e
--- /dev/null
+++ b/arch/s390/mm/extable.c
@@ -0,0 +1,81 @@
+#include <linux/module.h>
+#include <linux/sort.h>
+#include <asm/uaccess.h>
+
+/*
+ * Search one exception table for an entry corresponding to the
+ * given instruction address, and return the address of the entry,
+ * or NULL if none is found.
+ * We use a binary search, and thus we assume that the table is
+ * already sorted.
+ */
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+ const struct exception_table_entry *last,
+ unsigned long value)
+{
+ const struct exception_table_entry *mid;
+ unsigned long addr;
+
+ while (first <= last) {
+ mid = ((last - first) >> 1) + first;
+ addr = extable_insn(mid);
+ if (addr < value)
+ first = mid + 1;
+ else if (addr > value)
+ last = mid - 1;
+ else
+ return mid;
+ }
+ return NULL;
+}
+
+/*
+ * The exception table needs to be sorted so that the binary
+ * search that we use to find entries in it works properly.
+ * This is used both for the kernel exception table and for
+ * the exception tables of modules that get loaded.
+ *
+ */
+static int cmp_ex(const void *a, const void *b)
+{
+ const struct exception_table_entry *x = a, *y = b;
+
+ /* This compare is only valid after normalization. */
+ return x->insn - y->insn;
+}
+
+void sort_extable(struct exception_table_entry *start,
+ struct exception_table_entry *finish)
+{
+ struct exception_table_entry *p;
+ int i;
+
+ /* Normalize entries to being relative to the start of the section */
+ for (p = start, i = 0; p < finish; p++, i += 8)
+ p->insn += i;
+ sort(start, finish - start, sizeof(*start), cmp_ex, NULL);
+ /* Denormalize all entries */
+ for (p = start, i = 0; p < finish; p++, i += 8)
+ p->insn -= i;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * If the exception table is sorted, any referring to the module init
+ * will be at the beginning or the end.
+ */
+void trim_init_extable(struct module *m)
+{
+ /* Trim the beginning */
+ while (m->num_exentries &&
+ within_module_init(extable_insn(&m->extable[0]), m)) {
+ m->extable++;
+ m->num_exentries--;
+ }
+ /* Trim the end */
+ while (m->num_exentries &&
+ within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m))
+ m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index f231f5ec74b..519bba716cc 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -1,12 +1,14 @@
/*
- * File...........: arch/s390/mm/extmem.c
* Author(s)......: Carsten Otte <cotte@de.ibm.com>
* Rob M van der Heij <rvdheij@nl.ibm.com>
* Steven Shultz <shultzss@us.ibm.com>
* Bugreports.to..: <Linux390@de.ibm.com>
- * (C) IBM Corporation 2002-2004
+ * Copyright IBM Corp. 2002, 2004
*/
+#define KMSG_COMPONENT "extmem"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/spinlock.h>
@@ -24,39 +26,46 @@
#include <asm/cpcmd.h>
#include <asm/setup.h>
-#define DCSS_DEBUG /* Debug messages on/off */
-
-#define DCSS_NAME "extmem"
-#ifdef DCSS_DEBUG
-#define PRINT_DEBUG(x...) printk(KERN_DEBUG DCSS_NAME " debug:" x)
-#else
-#define PRINT_DEBUG(x...) do {} while (0)
-#endif
-#define PRINT_INFO(x...) printk(KERN_INFO DCSS_NAME " info:" x)
-#define PRINT_WARN(x...) printk(KERN_WARNING DCSS_NAME " warning:" x)
-#define PRINT_ERR(x...) printk(KERN_ERR DCSS_NAME " error:" x)
-
-
#define DCSS_LOADSHR 0x00
#define DCSS_LOADNSR 0x04
#define DCSS_PURGESEG 0x08
#define DCSS_FINDSEG 0x0c
#define DCSS_LOADNOLY 0x10
#define DCSS_SEGEXT 0x18
+#define DCSS_LOADSHRX 0x20
+#define DCSS_LOADNSRX 0x24
+#define DCSS_FINDSEGX 0x2c
+#define DCSS_SEGEXTX 0x38
#define DCSS_FINDSEGA 0x0c
struct qrange {
- unsigned int start; // 3byte start address, 1 byte type
- unsigned int end; // 3byte end address, 1 byte reserved
+ unsigned long start; /* last byte type */
+ unsigned long end; /* last byte reserved */
};
struct qout64 {
+ unsigned long segstart;
+ unsigned long segend;
+ int segcnt;
+ int segrcnt;
+ struct qrange range[6];
+};
+
+#ifdef CONFIG_64BIT
+struct qrange_old {
+ unsigned int start; /* last byte type */
+ unsigned int end; /* last byte reserved */
+};
+
+/* output area format for the Diag x'64' old subcode x'18' */
+struct qout64_old {
int segstart;
int segend;
int segcnt;
int segrcnt;
- struct qrange range[6];
+ struct qrange_old range[6];
};
+#endif
struct qin64 {
char qopcode;
@@ -86,6 +95,55 @@ static DEFINE_MUTEX(dcss_lock);
static LIST_HEAD(dcss_list);
static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC",
"EW/EN-MIXED" };
+static int loadshr_scode, loadnsr_scode, findseg_scode;
+static int segext_scode, purgeseg_scode;
+static int scode_set;
+
+/* set correct Diag x'64' subcodes. */
+static int
+dcss_set_subcodes(void)
+{
+#ifdef CONFIG_64BIT
+ char *name = kmalloc(8 * sizeof(char), GFP_KERNEL | GFP_DMA);
+ unsigned long rx, ry;
+ int rc;
+
+ if (name == NULL)
+ return -ENOMEM;
+
+ rx = (unsigned long) name;
+ ry = DCSS_FINDSEGX;
+
+ strcpy(name, "dummy");
+ asm volatile(
+ " diag %0,%1,0x64\n"
+ "0: ipm %2\n"
+ " srl %2,28\n"
+ " j 2f\n"
+ "1: la %2,3\n"
+ "2:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+
+ kfree(name);
+ /* Diag x'64' new subcodes are supported, set to new subcodes */
+ if (rc != 3) {
+ loadshr_scode = DCSS_LOADSHRX;
+ loadnsr_scode = DCSS_LOADNSRX;
+ purgeseg_scode = DCSS_PURGESEG;
+ findseg_scode = DCSS_FINDSEGX;
+ segext_scode = DCSS_SEGEXTX;
+ return 0;
+ }
+#endif
+ /* Diag x'64' new subcodes are not supported, set to old subcodes */
+ loadshr_scode = DCSS_LOADNOLY;
+ loadnsr_scode = DCSS_LOADNSR;
+ purgeseg_scode = DCSS_PURGESEG;
+ findseg_scode = DCSS_FINDSEG;
+ segext_scode = DCSS_SEGEXT;
+ return 0;
+}
/*
* Create the 8 bytes, ebcdic VM segment name from
@@ -135,25 +193,45 @@ segment_by_name (char *name)
* Perform a function on a dcss segment.
*/
static inline int
-dcss_diag (__u8 func, void *parameter,
+dcss_diag(int *func, void *parameter,
unsigned long *ret1, unsigned long *ret2)
{
unsigned long rx, ry;
int rc;
+ if (scode_set == 0) {
+ rc = dcss_set_subcodes();
+ if (rc < 0)
+ return rc;
+ scode_set = 1;
+ }
rx = (unsigned long) parameter;
- ry = (unsigned long) func;
- asm volatile(
+ ry = (unsigned long) *func;
+
#ifdef CONFIG_64BIT
- " sam31\n"
- " diag %0,%1,0x64\n"
- " sam64\n"
+ /* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */
+ if (*func > DCSS_SEGEXT)
+ asm volatile(
+ " diag %0,%1,0x64\n"
+ " ipm %2\n"
+ " srl %2,28\n"
+ : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+ /* 31-bit Diag x'64' old subcode, switch to 31-bit addressing mode */
+ else
+ asm volatile(
+ " sam31\n"
+ " diag %0,%1,0x64\n"
+ " sam64\n"
+ " ipm %2\n"
+ " srl %2,28\n"
+ : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
#else
+ asm volatile(
" diag %0,%1,0x64\n"
-#endif
" ipm %2\n"
" srl %2,28\n"
: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+#endif
*ret1 = rx;
*ret2 = ry;
return rc;
@@ -173,12 +251,13 @@ dcss_diag_translate_rc (int vm_rc) {
static int
query_segment_type (struct dcss_segment *seg)
{
- struct qin64 *qin = kmalloc (sizeof(struct qin64), GFP_DMA);
- struct qout64 *qout = kmalloc (sizeof(struct qout64), GFP_DMA);
-
- int diag_cc, rc, i;
unsigned long dummy, vmrc;
+ int diag_cc, rc, i;
+ struct qout64 *qout;
+ struct qin64 *qin;
+ qin = kmalloc(sizeof(*qin), GFP_KERNEL | GFP_DMA);
+ qout = kmalloc(sizeof(*qout), GFP_KERNEL | GFP_DMA);
if ((qin == NULL) || (qout == NULL)) {
rc = -ENOMEM;
goto out_free;
@@ -190,16 +269,47 @@ query_segment_type (struct dcss_segment *seg)
qin->qoutlen = sizeof(struct qout64);
memcpy (qin->qname, seg->dcss_name, 8);
- diag_cc = dcss_diag (DCSS_SEGEXT, qin, &dummy, &vmrc);
+ diag_cc = dcss_diag(&segext_scode, qin, &dummy, &vmrc);
+ if (diag_cc < 0) {
+ rc = diag_cc;
+ goto out_free;
+ }
if (diag_cc > 1) {
- PRINT_WARN ("segment_type: diag returned error %ld\n", vmrc);
+ pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc);
rc = dcss_diag_translate_rc (vmrc);
goto out_free;
}
+#ifdef CONFIG_64BIT
+ /* Only old format of output area of Diagnose x'64' is supported,
+ copy data for the new format. */
+ if (segext_scode == DCSS_SEGEXT) {
+ struct qout64_old *qout_old;
+ qout_old = kzalloc(sizeof(*qout_old), GFP_KERNEL | GFP_DMA);
+ if (qout_old == NULL) {
+ rc = -ENOMEM;
+ goto out_free;
+ }
+ memcpy(qout_old, qout, sizeof(struct qout64_old));
+ qout->segstart = (unsigned long) qout_old->segstart;
+ qout->segend = (unsigned long) qout_old->segend;
+ qout->segcnt = qout_old->segcnt;
+ qout->segrcnt = qout_old->segrcnt;
+
+ if (qout->segcnt > 6)
+ qout->segrcnt = 6;
+ for (i = 0; i < qout->segrcnt; i++) {
+ qout->range[i].start =
+ (unsigned long) qout_old->range[i].start;
+ qout->range[i].end =
+ (unsigned long) qout_old->range[i].end;
+ }
+ kfree(qout_old);
+ }
+#endif
if (qout->segcnt > 6) {
- rc = -ENOTSUPP;
+ rc = -EOPNOTSUPP;
goto out_free;
}
@@ -214,11 +324,11 @@ query_segment_type (struct dcss_segment *seg)
for (i=0; i<qout->segcnt; i++) {
if (((qout->range[i].start & 0xff) != SEG_TYPE_EW) &&
((qout->range[i].start & 0xff) != SEG_TYPE_EN)) {
- rc = -ENOTSUPP;
+ rc = -EOPNOTSUPP;
goto out_free;
}
if (start != qout->range[i].start >> PAGE_SHIFT) {
- rc = -ENOTSUPP;
+ rc = -EOPNOTSUPP;
goto out_free;
}
start = (qout->range[i].end >> PAGE_SHIFT) + 1;
@@ -247,8 +357,7 @@ query_segment_type (struct dcss_segment *seg)
* -ENOSYS : we are not running on VM
* -EIO : could not perform query diagnose
* -ENOENT : no such segment
- * -ENOTSUPP: multi-part segment cannot be used with linux
- * -ENOSPC : segment cannot be used (overlaps with storage)
+ * -EOPNOTSUPP: multi-part segment cannot be used with linux
* -ENOMEM : out of memory
* 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h
*/
@@ -269,15 +378,41 @@ segment_type (char* name)
}
/*
+ * check if segment collides with other segments that are currently loaded
+ * returns 1 if this is the case, 0 if no collision was found
+ */
+static int
+segment_overlaps_others (struct dcss_segment *seg)
+{
+ struct list_head *l;
+ struct dcss_segment *tmp;
+
+ BUG_ON(!mutex_is_locked(&dcss_lock));
+ list_for_each(l, &dcss_list) {
+ tmp = list_entry(l, struct dcss_segment, list);
+ if ((tmp->start_addr >> 20) > (seg->end >> 20))
+ continue;
+ if ((tmp->end >> 20) < (seg->start_addr >> 20))
+ continue;
+ if (seg == tmp)
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
+/*
* real segment loading function, called from segment_load
*/
static int
__segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end)
{
- struct dcss_segment *seg = kmalloc(sizeof(struct dcss_segment),
- GFP_DMA);
- int dcss_command, rc, diag_cc;
+ unsigned long start_addr, end_addr, dummy;
+ struct dcss_segment *seg;
+ int rc, diag_cc;
+ start_addr = end_addr = 0;
+ seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA);
if (seg == NULL) {
rc = -ENOMEM;
goto out;
@@ -287,6 +422,13 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
if (rc < 0)
goto out_free;
+ if (loadshr_scode == DCSS_LOADSHRX) {
+ if (segment_overlaps_others(seg)) {
+ rc = -EBUSY;
+ goto out_free;
+ }
+ }
+
rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
if (rc)
@@ -316,35 +458,40 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
}
if (do_nonshared)
- dcss_command = DCSS_LOADNSR;
+ diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
+ &start_addr, &end_addr);
else
- dcss_command = DCSS_LOADNOLY;
-
- diag_cc = dcss_diag(dcss_command, seg->dcss_name,
- &seg->start_addr, &seg->end);
+ diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
+ &start_addr, &end_addr);
+ if (diag_cc < 0) {
+ dcss_diag(&purgeseg_scode, seg->dcss_name,
+ &dummy, &dummy);
+ rc = diag_cc;
+ goto out_resource;
+ }
if (diag_cc > 1) {
- PRINT_WARN ("segment_load: could not load segment %s - "
- "diag returned error (%ld)\n",name,seg->end);
- rc = dcss_diag_translate_rc (seg->end);
- dcss_diag(DCSS_PURGESEG, seg->dcss_name,
- &seg->start_addr, &seg->end);
+ pr_warning("Loading DCSS %s failed with rc=%ld\n", name,
+ end_addr);
+ rc = dcss_diag_translate_rc(end_addr);
+ dcss_diag(&purgeseg_scode, seg->dcss_name,
+ &dummy, &dummy);
goto out_resource;
}
+ seg->start_addr = start_addr;
+ seg->end = end_addr;
seg->do_nonshared = do_nonshared;
atomic_set(&seg->ref_count, 1);
list_add(&seg->list, &dcss_list);
*addr = seg->start_addr;
*end = seg->end;
if (do_nonshared)
- PRINT_INFO ("segment_load: loaded segment %s range %p .. %p "
- "type %s in non-shared mode\n", name,
- (void*)seg->start_addr, (void*)seg->end,
- segtype_string[seg->vm_segtype]);
+ pr_info("DCSS %s of range %p to %p and type %s loaded as "
+ "exclusive-writable\n", name, (void*) seg->start_addr,
+ (void*) seg->end, segtype_string[seg->vm_segtype]);
else {
- PRINT_INFO ("segment_load: loaded segment %s range %p .. %p "
- "type %s in shared mode\n", name,
- (void*)seg->start_addr, (void*)seg->end,
- segtype_string[seg->vm_segtype]);
+ pr_info("DCSS %s of range %p to %p and type %s loaded in "
+ "shared access mode\n", name, (void*) seg->start_addr,
+ (void*) seg->end, segtype_string[seg->vm_segtype]);
}
goto out;
out_resource:
@@ -369,7 +516,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
* -ENOSYS : we are not running on VM
* -EIO : could not perform query or load diagnose
* -ENOENT : no such segment
- * -ENOTSUPP: multi-part segment cannot be used with linux
+ * -EOPNOTSUPP: multi-part segment cannot be used with linux
* -ENOSPC : segment cannot be used (overlaps with storage)
* -EBUSY : segment can temporarily not be used (overlaps with dcss)
* -ERANGE : segment cannot be used (exceeds kernel mapping range)
@@ -423,9 +570,10 @@ int
segment_modify_shared (char *name, int do_nonshared)
{
struct dcss_segment *seg;
- unsigned long dummy;
- int dcss_command, rc, diag_cc;
+ unsigned long start_addr, end_addr, dummy;
+ int rc, diag_cc;
+ start_addr = end_addr = 0;
mutex_lock(&dcss_lock);
seg = segment_by_name (name);
if (seg == NULL) {
@@ -433,50 +581,62 @@ segment_modify_shared (char *name, int do_nonshared)
goto out_unlock;
}
if (do_nonshared == seg->do_nonshared) {
- PRINT_INFO ("segment_modify_shared: not reloading segment %s"
- " - already in requested mode\n",name);
+ pr_info("DCSS %s is already in the requested access "
+ "mode\n", name);
rc = 0;
goto out_unlock;
}
if (atomic_read (&seg->ref_count) != 1) {
- PRINT_WARN ("segment_modify_shared: not reloading segment %s - "
- "segment is in use by other driver(s)\n",name);
+ pr_warning("DCSS %s is in use and cannot be reloaded\n",
+ name);
rc = -EAGAIN;
goto out_unlock;
}
release_resource(seg->res);
- if (do_nonshared) {
- dcss_command = DCSS_LOADNSR;
+ if (do_nonshared)
seg->res->flags &= ~IORESOURCE_READONLY;
- } else {
- dcss_command = DCSS_LOADNOLY;
+ else
if (seg->vm_segtype == SEG_TYPE_SR ||
seg->vm_segtype == SEG_TYPE_ER)
seg->res->flags |= IORESOURCE_READONLY;
- }
+
if (request_resource(&iomem_resource, seg->res)) {
- PRINT_WARN("segment_modify_shared: could not reload segment %s"
- " - overlapping resources\n", name);
+ pr_warning("DCSS %s overlaps with used memory resources "
+ "and cannot be reloaded\n", name);
rc = -EBUSY;
kfree(seg->res);
- goto out_del;
+ goto out_del_mem;
+ }
+
+ dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+ if (do_nonshared)
+ diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
+ &start_addr, &end_addr);
+ else
+ diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
+ &start_addr, &end_addr);
+ if (diag_cc < 0) {
+ rc = diag_cc;
+ goto out_del_res;
}
- dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
- diag_cc = dcss_diag(dcss_command, seg->dcss_name,
- &seg->start_addr, &seg->end);
if (diag_cc > 1) {
- PRINT_WARN ("segment_modify_shared: could not reload segment %s"
- " - diag returned error (%ld)\n",name,seg->end);
- rc = dcss_diag_translate_rc (seg->end);
- goto out_del;
+ pr_warning("Reloading DCSS %s failed with rc=%ld\n", name,
+ end_addr);
+ rc = dcss_diag_translate_rc(end_addr);
+ goto out_del_res;
}
+ seg->start_addr = start_addr;
+ seg->end = end_addr;
seg->do_nonshared = do_nonshared;
rc = 0;
goto out_unlock;
- out_del:
+ out_del_res:
+ release_resource(seg->res);
+ kfree(seg->res);
+ out_del_mem:
vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
list_del(&seg->list);
- dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
+ dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
kfree(seg);
out_unlock:
mutex_unlock(&dcss_lock);
@@ -500,8 +660,7 @@ segment_unload(char *name)
mutex_lock(&dcss_lock);
seg = segment_by_name (name);
if (seg == NULL) {
- PRINT_ERR ("could not find segment %s in segment_unload, "
- "please report to linux390@de.ibm.com\n",name);
+ pr_err("Unloading unknown DCSS %s failed\n", name);
goto out_unlock;
}
if (atomic_dec_return(&seg->ref_count) != 0)
@@ -510,7 +669,7 @@ segment_unload(char *name)
kfree(seg->res);
vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
list_del(&seg->list);
- dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
+ dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
kfree(seg);
out_unlock:
mutex_unlock(&dcss_lock);
@@ -523,8 +682,6 @@ void
segment_save(char *name)
{
struct dcss_segment *seg;
- int startpfn = 0;
- int endpfn = 0;
char cmd1[160];
char cmd2[80];
int i, response;
@@ -536,16 +693,13 @@ segment_save(char *name)
seg = segment_by_name (name);
if (seg == NULL) {
- PRINT_ERR("could not find segment %s in segment_save, please "
- "report to linux390@de.ibm.com\n", name);
+ pr_err("Saving unknown DCSS %s failed\n", name);
goto out;
}
- startpfn = seg->start_addr >> PAGE_SHIFT;
- endpfn = (seg->end) >> PAGE_SHIFT;
sprintf(cmd1, "DEFSEG %s", name);
for (i=0; i<seg->segcnt; i++) {
- sprintf(cmd1+strlen(cmd1), " %X-%X %s",
+ sprintf(cmd1+strlen(cmd1), " %lX-%lX %s",
seg->range[i].start >> PAGE_SHIFT,
seg->range[i].end >> PAGE_SHIFT,
segtype_string[seg->range[i].start & 0xff]);
@@ -554,14 +708,14 @@ segment_save(char *name)
response = 0;
cpcmd(cmd1, NULL, 0, &response);
if (response) {
- PRINT_ERR("segment_save: DEFSEG failed with response code %i\n",
- response);
+ pr_err("Saving a DCSS failed with DEFSEG response code "
+ "%i\n", response);
goto out;
}
cpcmd(cmd2, NULL, 0, &response);
if (response) {
- PRINT_ERR("segment_save: SAVESEG failed with response code %i\n",
- response);
+ pr_err("Saving a DCSS failed with SAVESEG response code "
+ "%i\n", response);
goto out;
}
out:
@@ -576,44 +730,41 @@ void segment_warning(int rc, char *seg_name)
{
switch (rc) {
case -ENOENT:
- PRINT_WARN("cannot load/query segment %s, "
- "does not exist\n", seg_name);
+ pr_err("DCSS %s cannot be loaded or queried\n", seg_name);
break;
case -ENOSYS:
- PRINT_WARN("cannot load/query segment %s, "
- "not running on VM\n", seg_name);
+ pr_err("DCSS %s cannot be loaded or queried without "
+ "z/VM\n", seg_name);
break;
case -EIO:
- PRINT_WARN("cannot load/query segment %s, "
- "hardware error\n", seg_name);
+ pr_err("Loading or querying DCSS %s resulted in a "
+ "hardware error\n", seg_name);
break;
- case -ENOTSUPP:
- PRINT_WARN("cannot load/query segment %s, "
- "is a multi-part segment\n", seg_name);
+ case -EOPNOTSUPP:
+ pr_err("DCSS %s has multiple page ranges and cannot be "
+ "loaded or queried\n", seg_name);
break;
case -ENOSPC:
- PRINT_WARN("cannot load/query segment %s, "
- "overlaps with storage\n", seg_name);
+ pr_err("DCSS %s overlaps with used storage and cannot "
+ "be loaded\n", seg_name);
break;
case -EBUSY:
- PRINT_WARN("cannot load/query segment %s, "
- "overlaps with already loaded dcss\n", seg_name);
+ pr_err("%s needs used memory resources and cannot be "
+ "loaded or queried\n", seg_name);
break;
case -EPERM:
- PRINT_WARN("cannot load/query segment %s, "
- "already loaded in incompatible mode\n", seg_name);
+ pr_err("DCSS %s is already loaded in a different access "
+ "mode\n", seg_name);
break;
case -ENOMEM:
- PRINT_WARN("cannot load/query segment %s, "
- "out of memory\n", seg_name);
+ pr_err("There is not enough memory to load or query "
+ "DCSS %s\n", seg_name);
break;
case -ERANGE:
- PRINT_WARN("cannot load/query segment %s, "
- "exceeds kernel mapping range\n", seg_name);
+ pr_err("DCSS %s exceeds the kernel mapping range (%lu) "
+ "and cannot be loaded\n", seg_name, VMEM_MAX_PHYS);
break;
default:
- PRINT_WARN("cannot load/query segment %s, "
- "return value %i\n", seg_name, rc);
break;
}
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 4d537205e83..3f3b35403d0 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -1,8 +1,6 @@
/*
- * arch/s390/mm/fault.c
- *
* S390 version
- * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 1999
* Author(s): Hartmut Penner (hp@de.ibm.com)
* Ulrich Weigand (uweigand@de.ibm.com)
*
@@ -10,6 +8,8 @@
* Copyright (C) 1995 Linus Torvalds
*/
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -19,9 +19,9 @@
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
+#include <linux/compat.h>
#include <linux/smp.h>
#include <linux/kdebug.h>
-#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/console.h>
#include <linux/module.h>
@@ -29,49 +29,54 @@
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
-#include <asm/system.h>
+#include <asm/asm-offsets.h>
#include <asm/pgtable.h>
-#include <asm/s390_ext.h>
+#include <asm/irq.h>
#include <asm/mmu_context.h>
+#include <asm/facility.h>
#include "../kernel/entry.h"
#ifndef CONFIG_64BIT
#define __FAIL_ADDR_MASK 0x7ffff000
-#define __FIXUP_MASK 0x7fffffff
#define __SUBCODE_MASK 0x0200
#define __PF_RES_FIELD 0ULL
#else /* CONFIG_64BIT */
#define __FAIL_ADDR_MASK -4096L
-#define __FIXUP_MASK ~0L
#define __SUBCODE_MASK 0x0600
#define __PF_RES_FIELD 0x8000000000000000ULL
#endif /* CONFIG_64BIT */
-#ifdef CONFIG_SYSCTL
-extern int sysctl_userprocess_debug;
+#define VM_FAULT_BADCONTEXT 0x010000
+#define VM_FAULT_BADMAP 0x020000
+#define VM_FAULT_BADACCESS 0x040000
+#define VM_FAULT_SIGNAL 0x080000
+#define VM_FAULT_PFAULT 0x100000
+
+static unsigned long store_indication __read_mostly;
+
+#ifdef CONFIG_64BIT
+static int __init fault_init(void)
+{
+ if (test_facility(75))
+ store_indication = 0xc00;
+ return 0;
+}
+early_initcall(fault_init);
#endif
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, long err)
+static inline int notify_page_fault(struct pt_regs *regs)
{
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
- if (!user_mode(regs)) {
+ if (kprobes_built_in() && !user_mode(regs)) {
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
preempt_enable();
}
-
return ret;
}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, long err)
-{
- return 0;
-}
-#endif
/*
@@ -99,76 +104,197 @@ void bust_spinlocks(int yes)
/*
* Returns the address space associated with the fault.
- * Returns 0 for kernel space, 1 for user space and
- * 2 for code execution in user space with noexec=on.
+ * Returns 0 for kernel space and 1 for user space.
*/
-static inline int check_space(struct task_struct *tsk)
+static inline int user_space_fault(struct pt_regs *regs)
{
+ unsigned long trans_exc_code;
+
/*
- * The lowest two bits of S390_lowcore.trans_exc_code
- * indicate which paging table was used.
+ * The lowest two bits of the translation exception
+ * identification indicate which paging table was used.
*/
- int desc = S390_lowcore.trans_exc_code & 3;
-
- if (desc == 3) /* Home Segment Table Descriptor */
- return switch_amode == 0;
- if (desc == 2) /* Secondary Segment Table Descriptor */
- return tsk->thread.mm_segment.ar4;
-#ifdef CONFIG_S390_SWITCH_AMODE
- if (unlikely(desc == 1)) { /* STD determined via access register */
- /* %a0 always indicates primary space. */
- if (S390_lowcore.exc_access_id != 0) {
- save_access_regs(tsk->thread.acrs);
- /*
- * An alet of 0 indicates primary space.
- * An alet of 1 indicates secondary space.
- * Any other alet values generate an
- * alen-translation exception.
- */
- if (tsk->thread.acrs[S390_lowcore.exc_access_id])
- return tsk->thread.mm_segment.ar4;
- }
+ trans_exc_code = regs->int_parm_long & 3;
+ if (trans_exc_code == 3) /* home space -> kernel */
+ return 0;
+ if (user_mode(regs))
+ return 1;
+ if (trans_exc_code == 2) /* secondary space -> set_fs */
+ return current->thread.mm_segment.ar4;
+ if (current->flags & PF_VCPU)
+ return 1;
+ return 0;
+}
+
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+
+ return probe_kernel_address((unsigned long *)p, dummy);
+}
+
+#ifdef CONFIG_64BIT
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & PAGE_MASK);
+
+ pr_alert("AS:%016lx ", asce);
+ switch (asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_REGION1:
+ table = table + ((address >> 53) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R1:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION2:
+ table = table + ((address >> 42) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R2:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION3:
+ table = table + ((address >> 31) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R3:%016lx ", *table);
+ if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_SEGMENT:
+ table = table + ((address >> 20) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont(KERN_CONT "S:%016lx ", *table);
+ if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ }
+ table = table + ((address >> 12) & 0xff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%016lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+#else /* CONFIG_64BIT */
+
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & PAGE_MASK);
+
+ pr_alert("AS:%08lx ", asce);
+ table = table + ((address >> 20) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("S:%08lx ", *table);
+ if (*table & _SEGMENT_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ table = table + ((address >> 12) & 0xff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%08lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+#endif /* CONFIG_64BIT */
+
+static void dump_fault_info(struct pt_regs *regs)
+{
+ unsigned long asce;
+
+ pr_alert("Fault in ");
+ switch (regs->int_parm_long & 3) {
+ case 3:
+ pr_cont("home space ");
+ break;
+ case 2:
+ pr_cont("secondary space ");
+ break;
+ case 1:
+ pr_cont("access register ");
+ break;
+ case 0:
+ pr_cont("primary space ");
+ break;
+ }
+ pr_cont("mode while using ");
+ if (!user_space_fault(regs)) {
+ asce = S390_lowcore.kernel_asce;
+ pr_cont("kernel ");
+ }
+#ifdef CONFIG_PGSTE
+ else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+ asce = gmap->asce;
+ pr_cont("gmap ");
}
#endif
- /* Primary Segment Table Descriptor */
- return switch_amode << s390_noexec;
+ else {
+ asce = S390_lowcore.user_asce;
+ pr_cont("user ");
+ }
+ pr_cont("ASCE.\n");
+ dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
+}
+
+static inline void report_user_fault(struct pt_regs *regs, long signr)
+{
+ if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
+ return;
+ if (!unhandled_signal(current, signr))
+ return;
+ if (!printk_ratelimit())
+ return;
+ printk(KERN_ALERT "User process fault: interruption code 0x%X ",
+ regs->int_code);
+ print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+ printk(KERN_CONT "\n");
+ printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ dump_fault_info(regs);
+ show_regs(regs);
}
/*
* Send SIGSEGV to task. This is an external routine
* to keep the stack usage of do_page_fault small.
*/
-static void do_sigsegv(struct pt_regs *regs, unsigned long error_code,
- int si_code, unsigned long address)
+static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
{
struct siginfo si;
-#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG)
-#if defined(CONFIG_SYSCTL)
- if (sysctl_userprocess_debug)
-#endif
- {
- printk("User process fault: interruption code 0x%lX\n",
- error_code);
- printk("failing address: %lX\n", address);
- show_regs(regs);
- }
-#endif
+ report_user_fault(regs, SIGSEGV);
si.si_signo = SIGSEGV;
si.si_code = si_code;
- si.si_addr = (void __user *) address;
+ si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
force_sig_info(SIGSEGV, &si, current);
}
-static void do_no_context(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
+static noinline void do_no_context(struct pt_regs *regs)
{
const struct exception_table_entry *fixup;
+ unsigned long address;
/* Are we prepared to handle this kernel fault? */
- fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK);
+ fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
if (fixup) {
- regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
+ regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
return;
}
@@ -176,198 +302,184 @@ static void do_no_context(struct pt_regs *regs, unsigned long error_code,
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
- if (check_space(current) == 0)
+ address = regs->int_parm_long & __FAIL_ADDR_MASK;
+ if (!user_space_fault(regs))
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
- " at virtual kernel address %p\n", (void *)address);
+ " in virtual kernel address space\n");
else
printk(KERN_ALERT "Unable to handle kernel paging request"
- " at virtual user address %p\n", (void *)address);
-
- die("Oops", regs, error_code);
+ " in virtual user address space\n");
+ printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ dump_fault_info(regs);
+ die(regs, "Oops");
do_exit(SIGKILL);
}
-static void do_low_address(struct pt_regs *regs, unsigned long error_code)
+static noinline void do_low_address(struct pt_regs *regs)
{
/* Low-address protection hit in kernel mode means
NULL pointer write access in kernel mode. */
if (regs->psw.mask & PSW_MASK_PSTATE) {
/* Low-address protection hit in user mode 'cannot happen'. */
- die ("Low-address protection", regs, error_code);
+ die (regs, "Low-address protection");
do_exit(SIGKILL);
}
- do_no_context(regs, error_code, 0);
+ do_no_context(regs);
}
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
+static noinline void do_sigbus(struct pt_regs *regs)
{
struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
-
- up_read(&mm->mmap_sem);
- if (is_global_init(tsk)) {
- yield();
- down_read(&mm->mmap_sem);
- return 1;
- }
- printk("VM: killing process %s\n", tsk->comm);
- if (regs->psw.mask & PSW_MASK_PSTATE)
- do_group_exit(SIGKILL);
- do_no_context(regs, error_code, address);
- return 0;
-}
-
-static void do_sigbus(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
-{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
+ struct siginfo si;
- up_read(&mm->mmap_sem);
/*
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
- tsk->thread.prot_addr = address;
- tsk->thread.trap_no = error_code;
- force_sig(SIGBUS, tsk);
-
- /* Kernel mode? Handle exceptions or die */
- if (!(regs->psw.mask & PSW_MASK_PSTATE))
- do_no_context(regs, error_code, address);
+ si.si_signo = SIGBUS;
+ si.si_errno = 0;
+ si.si_code = BUS_ADRERR;
+ si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
+ force_sig_info(SIGBUS, &si, tsk);
}
-#ifdef CONFIG_S390_EXEC_PROTECT
-static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
- unsigned long address, unsigned long error_code)
+static noinline void do_fault_error(struct pt_regs *regs, int fault)
{
- u16 instruction;
- int rc;
-#ifdef CONFIG_COMPAT
- int compat;
-#endif
-
- pagefault_disable();
- rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
- pagefault_enable();
- if (rc)
- return -EFAULT;
+ int si_code;
- up_read(&mm->mmap_sem);
- clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
-#ifdef CONFIG_COMPAT
- compat = test_tsk_thread_flag(current, TIF_31BIT);
- if (compat && instruction == 0x0a77)
- sys32_sigreturn();
- else if (compat && instruction == 0x0aad)
- sys32_rt_sigreturn();
- else
-#endif
- if (instruction == 0x0a77)
- sys_sigreturn();
- else if (instruction == 0x0aad)
- sys_rt_sigreturn();
- else {
- current->thread.prot_addr = address;
- current->thread.trap_no = error_code;
- do_sigsegv(regs, error_code, SEGV_MAPERR, address);
+ switch (fault) {
+ case VM_FAULT_BADACCESS:
+ case VM_FAULT_BADMAP:
+ /* Bad memory access. Check if it is kernel or user space. */
+ if (user_mode(regs)) {
+ /* User mode accesses just cause a SIGSEGV */
+ si_code = (fault == VM_FAULT_BADMAP) ?
+ SEGV_MAPERR : SEGV_ACCERR;
+ do_sigsegv(regs, si_code);
+ return;
+ }
+ case VM_FAULT_BADCONTEXT:
+ case VM_FAULT_PFAULT:
+ do_no_context(regs);
+ break;
+ case VM_FAULT_SIGNAL:
+ if (!user_mode(regs))
+ do_no_context(regs);
+ break;
+ default: /* fault & VM_FAULT_ERROR */
+ if (fault & VM_FAULT_OOM) {
+ if (!user_mode(regs))
+ do_no_context(regs);
+ else
+ pagefault_out_of_memory();
+ } else if (fault & VM_FAULT_SIGBUS) {
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs))
+ do_no_context(regs);
+ else
+ do_sigbus(regs);
+ } else
+ BUG();
+ break;
}
- return 0;
}
-#endif /* CONFIG_S390_EXEC_PROTECT */
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
* routines.
*
- * error_code:
+ * interruption code (int_code):
* 04 Protection -> Write-Protection (suprression)
* 10 Segment translation -> Not present (nullification)
* 11 Page translation -> Not present (nullification)
* 3b Region third trans. -> Not present (nullification)
*/
-static inline void
-do_exception(struct pt_regs *regs, unsigned long error_code, int write)
+static inline int do_exception(struct pt_regs *regs, int access)
{
+#ifdef CONFIG_PGSTE
+ struct gmap *gmap;
+#endif
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
+ unsigned long trans_exc_code;
unsigned long address;
- int space;
- int si_code;
+ unsigned int flags;
int fault;
- if (notify_page_fault(regs, error_code))
- return;
-
tsk = current;
- mm = tsk->mm;
+ /*
+ * The instruction that caused the program check has
+ * been nullified. Don't signal single step via SIGTRAP.
+ */
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
+
+ if (notify_page_fault(regs))
+ return 0;
- /* get the failing address and the affected space */
- address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
- space = check_space(tsk);
+ mm = tsk->mm;
+ trans_exc_code = regs->int_parm_long;
/*
* Verify that the fault happened in user space, that
* we are not in an interrupt and that there is a
* user context.
*/
- if (unlikely(space == 0 || in_atomic() || !mm))
- goto no_context;
-
- /*
- * When we get here, the fault happened in the current
- * task's user address space, so we can switch on the
- * interrupts again and then search the VMAs
- */
- local_irq_enable();
-
+ fault = VM_FAULT_BADCONTEXT;
+ if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
+ goto out;
+
+ address = trans_exc_code & __FAIL_ADDR_MASK;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+ flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+ if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
+ flags |= FAULT_FLAG_WRITE;
down_read(&mm->mmap_sem);
- si_code = SEGV_MAPERR;
+#ifdef CONFIG_PGSTE
+ gmap = (struct gmap *)
+ ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
+ if (gmap) {
+ address = __gmap_fault(address, gmap);
+ if (address == -EFAULT) {
+ fault = VM_FAULT_BADMAP;
+ goto out_up;
+ }
+ if (address == -ENOMEM) {
+ fault = VM_FAULT_OOM;
+ goto out_up;
+ }
+ if (gmap->pfault_enabled)
+ flags |= FAULT_FLAG_RETRY_NOWAIT;
+ }
+#endif
+
+retry:
+ fault = VM_FAULT_BADMAP;
vma = find_vma(mm, address);
if (!vma)
- goto bad_area;
-
-#ifdef CONFIG_S390_EXEC_PROTECT
- if (unlikely((space == 2) && !(vma->vm_flags & VM_EXEC)))
- if (!signal_return(mm, regs, address, error_code))
- /*
- * signal_return() has done an up_read(&mm->mmap_sem)
- * if it returns 0.
- */
- return;
-#endif
+ goto out_up;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
- si_code = SEGV_ACCERR;
- if (!write) {
- /* page not present, check vm flags */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
- } else {
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
+ if (unlikely(vma->vm_start > address)) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto out_up;
+ if (expand_stack(vma, address))
+ goto out_up;
}
-survive:
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+ fault = VM_FAULT_BADACCESS;
+ if (unlikely(!(vma->vm_flags & access)))
+ goto out_up;
+
if (is_vm_hugetlb_page(vma))
address &= HPAGE_MASK;
/*
@@ -375,117 +487,98 @@ survive:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(mm, vma, address, write);
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM) {
- if (do_out_of_memory(regs, error_code, address))
- goto survive;
- return;
- } else if (fault & VM_FAULT_SIGBUS) {
- do_sigbus(regs, error_code, address);
- return;
- }
- BUG();
+ fault = handle_mm_fault(mm, vma, address, flags);
+ /* No reason to continue if interrupted by SIGKILL. */
+ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+ fault = VM_FAULT_SIGNAL;
+ goto out;
}
- if (fault & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
+ if (unlikely(fault & VM_FAULT_ERROR))
+ goto out_up;
- up_read(&mm->mmap_sem);
/*
- * The instruction that caused the program check will
- * be repeated. Don't signal single step via SIGTRAP.
+ * Major/minor page fault accounting is only done on the
+ * initial attempt. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at that point.
*/
- clear_tsk_thread_flag(tsk, TIF_SINGLE_STEP);
- return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
- up_read(&mm->mmap_sem);
-
- /* User mode accesses just cause a SIGSEGV */
- if (regs->psw.mask & PSW_MASK_PSTATE) {
- tsk->thread.prot_addr = address;
- tsk->thread.trap_no = error_code;
- do_sigsegv(regs, error_code, si_code, address);
- return;
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR) {
+ tsk->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+ regs, address);
+ } else {
+ tsk->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+ regs, address);
+ }
+ if (fault & VM_FAULT_RETRY) {
+#ifdef CONFIG_PGSTE
+ if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ /* FAULT_FLAG_RETRY_NOWAIT has been set,
+ * mmap_sem has not been released */
+ current->thread.gmap_pfault = 1;
+ fault = VM_FAULT_PFAULT;
+ goto out_up;
+ }
+#endif
+ /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+ * of starvation. */
+ flags &= ~(FAULT_FLAG_ALLOW_RETRY |
+ FAULT_FLAG_RETRY_NOWAIT);
+ flags |= FAULT_FLAG_TRIED;
+ down_read(&mm->mmap_sem);
+ goto retry;
+ }
}
-
-no_context:
- do_no_context(regs, error_code, address);
+ fault = 0;
+out_up:
+ up_read(&mm->mmap_sem);
+out:
+ return fault;
}
-void __kprobes do_protection_exception(struct pt_regs *regs,
- long error_code)
+void __kprobes do_protection_exception(struct pt_regs *regs)
{
- /* Protection exception is supressing, decrement psw address. */
- regs->psw.addr -= (error_code >> 16);
+ unsigned long trans_exc_code;
+ int fault;
+
+ trans_exc_code = regs->int_parm_long;
+ /*
+ * Protection exceptions are suppressing, decrement psw address.
+ * The exception to this rule are aborted transactions, for these
+ * the PSW already points to the correct location.
+ */
+ if (!(regs->int_code & 0x200))
+ regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
/*
* Check for low-address protection. This needs to be treated
* as a special case because the translation exception code
* field is not guaranteed to contain valid data in this case.
*/
- if (unlikely(!(S390_lowcore.trans_exc_code & 4))) {
- do_low_address(regs, error_code);
+ if (unlikely(!(trans_exc_code & 4))) {
+ do_low_address(regs);
return;
}
- do_exception(regs, 4, 1);
+ fault = do_exception(regs, VM_WRITE);
+ if (unlikely(fault))
+ do_fault_error(regs, fault);
}
-void __kprobes do_dat_exception(struct pt_regs *regs, long error_code)
+void __kprobes do_dat_exception(struct pt_regs *regs)
{
- do_exception(regs, error_code & 0xff, 0);
-}
-
-#ifdef CONFIG_64BIT
-void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code)
-{
- struct mm_struct *mm;
- struct vm_area_struct *vma;
- unsigned long address;
- int space;
-
- mm = current->mm;
- address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
- space = check_space(current);
+ int access, fault;
- if (unlikely(space == 0 || in_atomic() || !mm))
- goto no_context;
-
- local_irq_enable();
-
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, address);
- up_read(&mm->mmap_sem);
-
- if (vma) {
- update_mm(mm, current);
- return;
- }
-
- /* User mode accesses just cause a SIGSEGV */
- if (regs->psw.mask & PSW_MASK_PSTATE) {
- current->thread.prot_addr = address;
- current->thread.trap_no = error_code;
- do_sigsegv(regs, error_code, SEGV_MAPERR, address);
- return;
- }
-
-no_context:
- do_no_context(regs, error_code, address);
+ access = VM_READ | VM_EXEC | VM_WRITE;
+ fault = do_exception(regs, access);
+ if (unlikely(fault))
+ do_fault_error(regs, fault);
}
-#endif
#ifdef CONFIG_PFAULT
/*
* 'pfault' pseudo page faults routines.
*/
-static ext_int_info_t ext_int_pfault;
-static int pfault_disable = 0;
+static int pfault_disable;
static int __init nopfault(char *str)
{
@@ -495,25 +588,31 @@ static int __init nopfault(char *str)
__setup("nopfault", nopfault);
-typedef struct {
- __u16 refdiagc;
- __u16 reffcode;
- __u16 refdwlen;
- __u16 refversn;
- __u64 refgaddr;
- __u64 refselmk;
- __u64 refcmpmk;
- __u64 reserved;
-} __attribute__ ((packed, aligned(8))) pfault_refbk_t;
+struct pfault_refbk {
+ u16 refdiagc;
+ u16 reffcode;
+ u16 refdwlen;
+ u16 refversn;
+ u64 refgaddr;
+ u64 refselmk;
+ u64 refcmpmk;
+ u64 reserved;
+} __attribute__ ((packed, aligned(8)));
int pfault_init(void)
{
- pfault_refbk_t refbk =
- { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48,
- __PF_RES_FIELD };
+ struct pfault_refbk refbk = {
+ .refdiagc = 0x258,
+ .reffcode = 0,
+ .refdwlen = 5,
+ .refversn = 2,
+ .refgaddr = __LC_CURRENT_PID,
+ .refselmk = 1ULL << 48,
+ .refcmpmk = 1ULL << 48,
+ .reserved = __PF_RES_FIELD };
int rc;
- if (!MACHINE_IS_VM || pfault_disable)
+ if (pfault_disable)
return -1;
asm volatile(
" diag %1,%0,0x258\n"
@@ -522,18 +621,20 @@ int pfault_init(void)
"2:\n"
EX_TABLE(0b,1b)
: "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
- __ctl_set_bit(0, 9);
return rc;
}
void pfault_fini(void)
{
- pfault_refbk_t refbk =
- { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL };
-
- if (!MACHINE_IS_VM || pfault_disable)
+ struct pfault_refbk refbk = {
+ .refdiagc = 0x258,
+ .reffcode = 1,
+ .refdwlen = 5,
+ .refversn = 2,
+ };
+
+ if (pfault_disable)
return;
- __ctl_clear_bit(0,9);
asm volatile(
" diag %0,0,0x258\n"
"0:\n"
@@ -541,10 +642,15 @@ void pfault_fini(void)
: : "a" (&refbk), "m" (refbk) : "cc");
}
-static void pfault_interrupt(__u16 error_code)
+static DEFINE_SPINLOCK(pfault_lock);
+static LIST_HEAD(pfault_list);
+
+static void pfault_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
{
struct task_struct *tsk;
__u16 subcode;
+ pid_t pid;
/*
* Get the external interruption subcode & pfault
@@ -552,63 +658,118 @@ static void pfault_interrupt(__u16 error_code)
* in the 'cpu address' field associated with the
* external interrupt.
*/
- subcode = S390_lowcore.cpu_addr;
+ subcode = ext_code.subcode;
if ((subcode & 0xff00) != __SUBCODE_MASK)
return;
-
- /*
- * Get the token (= address of the task structure of the affected task).
- */
- tsk = *(struct task_struct **) __LC_PFAULT_INTPARM;
-
+ inc_irq_stat(IRQEXT_PFL);
+ /* Get the token (= pid of the affected task). */
+ pid = sizeof(void *) == 4 ? param32 : param64;
+ rcu_read_lock();
+ tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+ if (tsk)
+ get_task_struct(tsk);
+ rcu_read_unlock();
+ if (!tsk)
+ return;
+ spin_lock(&pfault_lock);
if (subcode & 0x0080) {
/* signal bit is set -> a page has been swapped in by VM */
- if (xchg(&tsk->thread.pfault_wait, -1) != 0) {
+ if (tsk->thread.pfault_wait == 1) {
/* Initial interrupt was faster than the completion
* interrupt. pfault_wait is valid. Set pfault_wait
* back to zero and wake up the process. This can
* safely be done because the task is still sleeping
* and can't produce new pfaults. */
tsk->thread.pfault_wait = 0;
+ list_del(&tsk->thread.list);
wake_up_process(tsk);
put_task_struct(tsk);
+ } else {
+ /* Completion interrupt was faster than initial
+ * interrupt. Set pfault_wait to -1 so the initial
+ * interrupt doesn't put the task to sleep.
+ * If the task is not running, ignore the completion
+ * interrupt since it must be a leftover of a PFAULT
+ * CANCEL operation which didn't remove all pending
+ * completion interrupts. */
+ if (tsk->state == TASK_RUNNING)
+ tsk->thread.pfault_wait = -1;
}
} else {
/* signal bit not set -> a real page is missing. */
- get_task_struct(tsk);
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- if (xchg(&tsk->thread.pfault_wait, 1) != 0) {
+ if (WARN_ON_ONCE(tsk != current))
+ goto out;
+ if (tsk->thread.pfault_wait == 1) {
+ /* Already on the list with a reference: put to sleep */
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_tsk_need_resched(tsk);
+ } else if (tsk->thread.pfault_wait == -1) {
/* Completion interrupt was faster than the initial
- * interrupt (swapped in a -1 for pfault_wait). Set
- * pfault_wait back to zero and exit. This can be
- * done safely because tsk is running in kernel
- * mode and can't produce new pfaults. */
+ * interrupt (pfault_wait == -1). Set pfault_wait
+ * back to zero and exit. */
tsk->thread.pfault_wait = 0;
- set_task_state(tsk, TASK_RUNNING);
- put_task_struct(tsk);
- } else
+ } else {
+ /* Initial interrupt arrived before completion
+ * interrupt. Let the task sleep.
+ * An extra task reference is needed since a different
+ * cpu may set the task state to TASK_RUNNING again
+ * before the scheduler is reached. */
+ get_task_struct(tsk);
+ tsk->thread.pfault_wait = 1;
+ list_add(&tsk->thread.list, &pfault_list);
+ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
set_tsk_need_resched(tsk);
+ }
}
+out:
+ spin_unlock(&pfault_lock);
+ put_task_struct(tsk);
}
-void __init pfault_irq_init(void)
+static int pfault_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
- if (!MACHINE_IS_VM)
- return;
+ struct thread_struct *thread, *next;
+ struct task_struct *tsk;
- /*
- * Try to get pfault pseudo page faults going.
- */
- if (register_early_external_interrupt(0x2603, pfault_interrupt,
- &ext_int_pfault) != 0)
- panic("Couldn't request external interrupt 0x2603");
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DEAD:
+ spin_lock_irq(&pfault_lock);
+ list_for_each_entry_safe(thread, next, &pfault_list, list) {
+ thread->pfault_wait = 0;
+ list_del(&thread->list);
+ tsk = container_of(thread, struct task_struct, thread);
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+ }
+ spin_unlock_irq(&pfault_lock);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
- if (pfault_init() == 0)
- return;
+static int __init pfault_irq_init(void)
+{
+ int rc;
- /* Tough luck, no pfault. */
+ rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+ if (rc)
+ goto out_extint;
+ rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
+ if (rc)
+ goto out_pfault;
+ irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
+ hotcpu_notifier(pfault_cpu_notify, 0);
+ return 0;
+
+out_pfault:
+ unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+out_extint:
pfault_disable = 1;
- unregister_early_external_interrupt(0x2603, pfault_interrupt,
- &ext_int_pfault);
+ return rc;
}
-#endif
+early_initcall(pfault_irq_init);
+
+#endif /* CONFIG_PFAULT */
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
new file mode 100644
index 00000000000..639fce46400
--- /dev/null
+++ b/arch/s390/mm/gup.c
@@ -0,0 +1,246 @@
+/*
+ * Lockless get_user_pages_fast for s390
+ *
+ * Copyright IBM Corp. 2010
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/vmstat.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <asm/pgtable.h>
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long mask;
+ pte_t *ptep, pte;
+ struct page *page;
+
+ mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
+
+ ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
+ do {
+ pte = *ptep;
+ barrier();
+ if ((pte_val(pte) & mask) != 0)
+ return 0;
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ page = pte_page(pte);
+ if (!page_cache_get_speculative(page))
+ return 0;
+ if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+ put_page(page);
+ return 0;
+ }
+ pages[*nr] = page;
+ (*nr)++;
+
+ } while (ptep++, addr += PAGE_SIZE, addr != end);
+
+ return 1;
+}
+
+static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long mask, result;
+ struct page *head, *page, *tail;
+ int refs;
+
+ result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
+ mask = result | _SEGMENT_ENTRY_INVALID;
+ if ((pmd_val(pmd) & mask) != result)
+ return 0;
+ VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
+
+ refs = 0;
+ head = pmd_page(pmd);
+ page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+ VM_BUG_ON(compound_head(page) != head);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+ if (!page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
+ }
+
+ if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
+ *nr -= refs;
+ while (refs--)
+ put_page(head);
+ return 0;
+ }
+
+ /*
+ * Any tail page need their mapcount reference taken before we
+ * return.
+ */
+ while (refs--) {
+ if (PageTail(tail))
+ get_huge_page_tail(tail);
+ tail++;
+ }
+
+ return 1;
+}
+
+
+static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pmd_t *pmdp, pmd;
+
+ pmdp = (pmd_t *) pudp;
+#ifdef CONFIG_64BIT
+ if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+ pmdp = (pmd_t *) pud_deref(pud);
+ pmdp += pmd_index(addr);
+#endif
+ do {
+ pmd = *pmdp;
+ barrier();
+ next = pmd_addr_end(addr, end);
+ /*
+ * The pmd_trans_splitting() check below explains why
+ * pmdp_splitting_flush() has to serialize with
+ * smp_call_function() against our disabled IRQs, to stop
+ * this gup-fast code from running while we set the
+ * splitting bit in the pmd. Returning zero will take
+ * the slow path that will call wait_split_huge_page()
+ * if the pmd is still in splitting state.
+ */
+ if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+ return 0;
+ if (unlikely(pmd_large(pmd))) {
+ if (!gup_huge_pmd(pmdp, pmd, addr, next,
+ write, pages, nr))
+ return 0;
+ } else if (!gup_pte_range(pmdp, pmd, addr, next,
+ write, pages, nr))
+ return 0;
+ } while (pmdp++, addr = next, addr != end);
+
+ return 1;
+}
+
+static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pud_t *pudp, pud;
+
+ pudp = (pud_t *) pgdp;
+#ifdef CONFIG_64BIT
+ if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+ pudp = (pud_t *) pgd_deref(pgd);
+ pudp += pud_index(addr);
+#endif
+ do {
+ pud = *pudp;
+ barrier();
+ next = pud_addr_end(addr, end);
+ if (pud_none(pud))
+ return 0;
+ if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr))
+ return 0;
+ } while (pudp++, addr = next, addr != end);
+
+ return 1;
+}
+
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, len, end;
+ unsigned long next, flags;
+ pgd_t *pgdp, pgd;
+ int nr = 0;
+
+ start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ if ((end <= start) || (end > TASK_SIZE))
+ return 0;
+ /*
+ * local_irq_save() doesn't prevent pagetable teardown, but does
+ * prevent the pagetables from being freed on s390.
+ *
+ * So long as we atomically load page table pointers versus teardown,
+ * we can follow the address down to the the page and take a ref on it.
+ */
+ local_irq_save(flags);
+ pgdp = pgd_offset(mm, addr);
+ do {
+ pgd = *pgdp;
+ barrier();
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ break;
+ if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
+ break;
+ } while (pgdp++, addr = next, addr != end);
+ local_irq_restore(flags);
+
+ return nr;
+}
+
+/**
+ * get_user_pages_fast() - pin user pages in memory
+ * @start: starting user address
+ * @nr_pages: number of pages from start to pin
+ * @write: whether pages will be written to
+ * @pages: array that receives pointers to the pages pinned.
+ * Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ int nr, ret;
+
+ start &= PAGE_MASK;
+ nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ if (nr == nr_pages)
+ return nr;
+
+ /* Try to get the remaining pages with get_user_pages */
+ start += nr << PAGE_SHIFT;
+ pages += nr;
+ down_read(&mm->mmap_sem);
+ ret = get_user_pages(current, mm, start,
+ nr_pages - nr, write, 0, pages, NULL);
+ up_read(&mm->mmap_sem);
+ /* Have to be a bit careful with return values */
+ if (nr > 0)
+ ret = (ret < 0) ? nr : ret + nr;
+ return ret;
+}
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index f4b6124fdb7..0ff66a7e29b 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -1,38 +1,131 @@
/*
* IBM System z Huge TLB Page Support for Kernel.
*
- * Copyright 2007 IBM Corp.
+ * Copyright IBM Corp. 2007
* Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
#include <linux/mm.h>
#include <linux/hugetlb.h>
+static inline pmd_t __pte_to_pmd(pte_t pte)
+{
+ int none, young, prot;
+ pmd_t pmd;
+
+ /*
+ * Convert encoding pte bits pmd bits
+ * .IR...wrdytp ..R...I...y.
+ * empty .10...000000 -> ..0...1...0.
+ * prot-none, clean, old .11...000001 -> ..0...1...1.
+ * prot-none, clean, young .11...000101 -> ..1...1...1.
+ * prot-none, dirty, old .10...001001 -> ..0...1...1.
+ * prot-none, dirty, young .10...001101 -> ..1...1...1.
+ * read-only, clean, old .11...010001 -> ..1...1...0.
+ * read-only, clean, young .01...010101 -> ..1...0...1.
+ * read-only, dirty, old .11...011001 -> ..1...1...0.
+ * read-only, dirty, young .01...011101 -> ..1...0...1.
+ * read-write, clean, old .11...110001 -> ..0...1...0.
+ * read-write, clean, young .01...110101 -> ..0...0...1.
+ * read-write, dirty, old .10...111001 -> ..0...1...0.
+ * read-write, dirty, young .00...111101 -> ..0...0...1.
+ * Huge ptes are dirty by definition, a clean pte is made dirty
+ * by the conversion.
+ */
+ if (pte_present(pte)) {
+ pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
+ if (pte_val(pte) & _PAGE_INVALID)
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ none = (pte_val(pte) & _PAGE_PRESENT) &&
+ !(pte_val(pte) & _PAGE_READ) &&
+ !(pte_val(pte) & _PAGE_WRITE);
+ prot = (pte_val(pte) & _PAGE_PROTECT) &&
+ !(pte_val(pte) & _PAGE_WRITE);
+ young = pte_val(pte) & _PAGE_YOUNG;
+ if (none || young)
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ if (prot || (none && young))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ } else
+ pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
+ return pmd;
+}
+
+static inline pte_t __pmd_to_pte(pmd_t pmd)
+{
+ pte_t pte;
+
+ /*
+ * Convert encoding pmd bits pte bits
+ * ..R...I...y. .IR...wrdytp
+ * empty ..0...1...0. -> .10...000000
+ * prot-none, old ..0...1...1. -> .10...001001
+ * prot-none, young ..1...1...1. -> .10...001101
+ * read-only, old ..1...1...0. -> .11...011001
+ * read-only, young ..1...0...1. -> .01...011101
+ * read-write, old ..0...1...0. -> .10...111001
+ * read-write, young ..0...0...1. -> .00...111101
+ * Huge ptes are dirty by definition
+ */
+ if (pmd_present(pmd)) {
+ pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY |
+ (pmd_val(pmd) & PAGE_MASK);
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
+ pte_val(pte) |= _PAGE_INVALID;
+ if (pmd_prot_none(pmd)) {
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+ pte_val(pte) |= _PAGE_YOUNG;
+ } else {
+ pte_val(pte) |= _PAGE_READ;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
+ pte_val(pte) |= _PAGE_PROTECT;
+ else
+ pte_val(pte) |= _PAGE_WRITE;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
+ pte_val(pte) |= _PAGE_YOUNG;
+ }
+ } else
+ pte_val(pte) = _PAGE_INVALID;
+ return pte;
+}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *pteptr, pte_t pteval)
+ pte_t *ptep, pte_t pte)
{
- pmd_t *pmdp = (pmd_t *) pteptr;
- pte_t shadow_pteval = pteval;
- unsigned long mask;
+ pmd_t pmd;
+ pmd = __pte_to_pmd(pte);
if (!MACHINE_HAS_HPAGE) {
- pteptr = (pte_t *) pte_page(pteval)[1].index;
- mask = pte_val(pteval) &
- (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
- pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
- if (mm->context.noexec) {
- pteptr += PTRS_PER_PTE;
- pte_val(shadow_pteval) =
- (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
- }
- }
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) |= pte_page(pte)[1].index;
+ } else
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO;
+ *(pmd_t *) ptep = pmd;
+}
- pmd_val(*pmdp) = pte_val(pteval);
- if (mm->context.noexec) {
- pmdp = get_shadow_table(pmdp);
- pmd_val(*pmdp) = pte_val(shadow_pteval);
+pte_t huge_ptep_get(pte_t *ptep)
+{
+ unsigned long origin;
+ pmd_t pmd;
+
+ pmd = *(pmd_t *) ptep;
+ if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
+ origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) |= *(unsigned long *) origin;
}
+ return __pmd_to_pte(pmd);
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pmd_t *pmdp = (pmd_t *) ptep;
+ pte_t pte = huge_ptep_get(ptep);
+
+ pmdp_flush_direct(mm, addr, pmdp);
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ return pte;
}
int arch_prepare_hugepage(struct page *page)
@@ -45,11 +138,11 @@ int arch_prepare_hugepage(struct page *page)
if (MACHINE_HAS_HPAGE)
return 0;
- ptep = (pte_t *) pte_alloc_one(&init_mm, address);
+ ptep = (pte_t *) pte_alloc_one(&init_mm, addr);
if (!ptep)
return -ENOMEM;
- pte = mk_pte(page, PAGE_RW);
+ pte_val(pte) = addr;
for (i = 0; i < PTRS_PER_PTE; i++) {
set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
pte_val(pte) += PAGE_SIZE;
@@ -68,11 +161,14 @@ void arch_release_hugepage(struct page *page)
ptep = (pte_t *) page[1].index;
if (!ptep)
return;
- pte_free(&init_mm, ptep);
+ clear_table((unsigned long *) ptep, _PAGE_INVALID,
+ PTRS_PER_PTE * sizeof(pte_t));
+ page_table_free(&init_mm, (unsigned long *) ptep);
page[1].index = 0;
}
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+ unsigned long addr, unsigned long sz)
{
pgd_t *pgdp;
pud_t *pudp;
@@ -119,6 +215,11 @@ int pmd_huge(pmd_t pmd)
return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
}
+int pud_huge(pud_t pud)
+{
+ return 0;
+}
+
struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmdp, int write)
{
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 05598649b32..0c1073ed1e8 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -1,8 +1,6 @@
/*
- * arch/s390/mm/init.c
- *
* S390 version
- * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 1999
* Author(s): Hartmut Penner (hp@de.ibm.com)
*
* Derived from "arch/i386/mm/init.c"
@@ -23,11 +21,13 @@
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
+#include <linux/memory.h>
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/initrd.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -36,42 +36,60 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+#include <asm/ctl_reg.h>
+#include <asm/sclp.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
-char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
-void show_mem(void)
+unsigned long empty_zero_page, zero_page_mask;
+EXPORT_SYMBOL(empty_zero_page);
+
+static void __init setup_zero_pages(void)
{
- unsigned long i, total = 0, reserved = 0;
- unsigned long shared = 0, cached = 0;
- unsigned long flags;
+ struct cpuid cpu_id;
+ unsigned int order;
struct page *page;
- pg_data_t *pgdat;
-
- printk("Mem-info:\n");
- show_free_areas();
- for_each_online_pgdat(pgdat) {
- pgdat_resize_lock(pgdat, &flags);
- for (i = 0; i < pgdat->node_spanned_pages; i++) {
- if (!pfn_valid(pgdat->node_start_pfn + i))
- continue;
- page = pfn_to_page(pgdat->node_start_pfn + i);
- total++;
- if (PageReserved(page))
- reserved++;
- else if (PageSwapCache(page))
- cached++;
- else if (page_count(page))
- shared += page_count(page) - 1;
- }
- pgdat_resize_unlock(pgdat, &flags);
+ int i;
+
+ get_cpu_id(&cpu_id);
+ switch (cpu_id.machine) {
+ case 0x9672: /* g5 */
+ case 0x2064: /* z900 */
+ case 0x2066: /* z900 */
+ case 0x2084: /* z990 */
+ case 0x2086: /* z990 */
+ case 0x2094: /* z9-109 */
+ case 0x2096: /* z9-109 */
+ order = 0;
+ break;
+ case 0x2097: /* z10 */
+ case 0x2098: /* z10 */
+ case 0x2817: /* z196 */
+ case 0x2818: /* z196 */
+ order = 2;
+ break;
+ case 0x2827: /* zEC12 */
+ case 0x2828: /* zEC12 */
+ default:
+ order = 5;
+ break;
+ }
+ /* Limit number of empty zero pages for small memory sizes */
+ if (order > 2 && totalram_pages <= 16384)
+ order = 2;
+
+ empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!empty_zero_page)
+ panic("Out of memory in setup_zero_pages");
+
+ page = virt_to_page((void *) empty_zero_page);
+ split_page(page, order);
+ for (i = 1 << order; i > 0; i--) {
+ mark_page_reserved(page);
+ page++;
}
- printk("%ld pages of RAM\n", total);
- printk("%ld reserved pages\n", reserved);
- printk("%ld pages shared\n", shared);
- printk("%ld pages swap cached\n", cached);
+
+ zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
}
/*
@@ -79,20 +97,23 @@ void show_mem(void)
*/
void __init paging_init(void)
{
- static const int ssm_mask = 0x04000000L;
unsigned long max_zone_pfns[MAX_NR_ZONES];
- unsigned long pgd_type;
+ unsigned long pgd_type, asce_bits;
init_mm.pgd = swapper_pg_dir;
- S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK;
#ifdef CONFIG_64BIT
- /* A three level page table (4TB) is enough for the kernel space. */
- S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
- pgd_type = _REGION3_ENTRY_EMPTY;
+ if (VMALLOC_END > (1UL << 42)) {
+ asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+ pgd_type = _REGION2_ENTRY_EMPTY;
+ } else {
+ asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ pgd_type = _REGION3_ENTRY_EMPTY;
+ }
#else
- S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH;
+ asce_bits = _ASCE_TABLE_LENGTH;
pgd_type = _SEGMENT_ENTRY_EMPTY;
#endif
+ S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
clear_table((unsigned long *) init_mm.pgd, pgd_type,
sizeof(unsigned long)*2048);
vmem_map_init();
@@ -101,104 +122,110 @@ void __init paging_init(void)
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
__ctl_load(S390_lowcore.kernel_asce, 13, 13);
- __raw_local_irq_ssm(ssm_mask);
+ arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
-#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
free_area_init_nodes(max_zone_pfns);
}
void __init mem_init(void)
{
- unsigned long codesize, reservedpages, datasize, initsize;
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(0, mm_cpumask(&init_mm));
+ atomic_set(&init_mm.context.attach_count, 1);
- max_mapnr = num_physpages = max_low_pfn;
+ max_mapnr = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
- /* clear the zero-page */
- memset(empty_zero_page, 0, PAGE_SIZE);
-
/* Setup guest page hinting */
cmma_init();
/* this will put all low memory onto the freelists */
- totalram_pages += free_all_bootmem();
-
- reservedpages = 0;
-
- codesize = (unsigned long) &_etext - (unsigned long) &_text;
- datasize = (unsigned long) &_edata - (unsigned long) &_etext;
- initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
- printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
- max_mapnr << (PAGE_SHIFT-10),
- codesize >> 10,
- reservedpages << (PAGE_SHIFT-10),
- datasize >>10,
- initsize >> 10);
+ free_all_bootmem();
+ setup_zero_pages(); /* Setup zeroed pages. */
+
+ mem_init_print_info(NULL);
printk("Write protected kernel read-only data: %#lx - %#lx\n",
(unsigned long)&_stext,
PFN_ALIGN((unsigned long)&_eshared) - 1);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-void kernel_map_pages(struct page *page, int numpages, int enable)
+void free_initmem(void)
+{
+ free_initmem_default(POISON_FREE_INITMEM);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- unsigned long address;
- int i;
+ free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+ "initrd");
+}
+#endif
- for (i = 0; i < numpages; i++) {
- address = page_to_phys(page + i);
- pgd = pgd_offset_k(address);
- pud = pud_offset(pgd, address);
- pmd = pmd_offset(pud, address);
- pte = pte_offset_kernel(pmd, address);
- if (!enable) {
- ptep_invalidate(&init_mm, address, pte);
- continue;
+#ifdef CONFIG_MEMORY_HOTPLUG
+int arch_add_memory(int nid, u64 start, u64 size)
+{
+ unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
+ unsigned long start_pfn = PFN_DOWN(start);
+ unsigned long size_pages = PFN_DOWN(size);
+ struct zone *zone;
+ int rc;
+
+ rc = vmem_add_mapping(start, size);
+ if (rc)
+ return rc;
+ for_each_zone(zone) {
+ if (zone_idx(zone) != ZONE_MOVABLE) {
+ /* Add range within existing zone limits */
+ zone_start_pfn = zone->zone_start_pfn;
+ zone_end_pfn = zone->zone_start_pfn +
+ zone->spanned_pages;
+ } else {
+ /* Add remaining range to ZONE_MOVABLE */
+ zone_start_pfn = start_pfn;
+ zone_end_pfn = start_pfn + size_pages;
}
- *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
- /* Flush cpu write queue. */
- mb();
+ if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
+ continue;
+ nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
+ zone_end_pfn - start_pfn : size_pages;
+ rc = __add_pages(nid, zone, start_pfn, nr_pages);
+ if (rc)
+ break;
+ start_pfn += nr_pages;
+ size_pages -= nr_pages;
+ if (!size_pages)
+ break;
}
+ if (rc)
+ vmem_remove_mapping(start, size);
+ return rc;
}
-#endif
-void free_initmem(void)
+unsigned long memory_block_size_bytes(void)
{
- unsigned long addr;
-
- addr = (unsigned long)(&__init_begin);
- for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
- }
- printk ("Freeing unused kernel memory: %ldk freed\n",
- ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10);
+ /*
+ * Make sure the memory block size is always greater
+ * or equal than the memory increment size.
+ */
+ return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm());
}
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
{
- if (start < end)
- printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- }
+ /*
+ * There is no hardware or firmware interface which could trigger a
+ * hot memory remove on s390. So there is nothing that needs to be
+ * implemented.
+ */
+ return -EBUSY;
}
#endif
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
new file mode 100644
index 00000000000..2a2e35416d2
--- /dev/null
+++ b/arch/s390/mm/maccess.c
@@ -0,0 +1,204 @@
+/*
+ * Access kernel memory without faulting -- s390 specific implementation.
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/cpu.h>
+#include <asm/ctl_reg.h>
+#include <asm/io.h>
+
+/*
+ * This function writes to kernel memory bypassing DAT and possible
+ * write protection. It copies one to four bytes from src to dst
+ * using the stura instruction.
+ * Returns the number of bytes copied or -EFAULT.
+ */
+static long probe_kernel_write_odd(void *dst, const void *src, size_t size)
+{
+ unsigned long count, aligned;
+ int offset, mask;
+ int rc = -EFAULT;
+
+ aligned = (unsigned long) dst & ~3UL;
+ offset = (unsigned long) dst & 3;
+ count = min_t(unsigned long, 4 - offset, size);
+ mask = (0xf << (4 - count)) & 0xf;
+ mask >>= offset;
+ asm volatile(
+ " bras 1,0f\n"
+ " icm 0,0,0(%3)\n"
+ "0: l 0,0(%1)\n"
+ " lra %1,0(%1)\n"
+ "1: ex %2,0(1)\n"
+ "2: stura 0,%1\n"
+ " la %0,0\n"
+ "3:\n"
+ EX_TABLE(0b,3b) EX_TABLE(1b,3b) EX_TABLE(2b,3b)
+ : "+d" (rc), "+a" (aligned)
+ : "a" (mask), "a" (src) : "cc", "memory", "0", "1");
+ return rc ? rc : count;
+}
+
+long probe_kernel_write(void *dst, const void *src, size_t size)
+{
+ long copied = 0;
+
+ while (size) {
+ copied = probe_kernel_write_odd(dst, src, size);
+ if (copied < 0)
+ break;
+ dst += copied;
+ src += copied;
+ size -= copied;
+ }
+ return copied < 0 ? -EFAULT : 0;
+}
+
+static int __memcpy_real(void *dest, void *src, size_t count)
+{
+ register unsigned long _dest asm("2") = (unsigned long) dest;
+ register unsigned long _len1 asm("3") = (unsigned long) count;
+ register unsigned long _src asm("4") = (unsigned long) src;
+ register unsigned long _len2 asm("5") = (unsigned long) count;
+ int rc = -EFAULT;
+
+ asm volatile (
+ "0: mvcle %1,%2,0x0\n"
+ "1: jo 0b\n"
+ " lhi %0,0x0\n"
+ "2:\n"
+ EX_TABLE(1b,2b)
+ : "+d" (rc), "+d" (_dest), "+d" (_src), "+d" (_len1),
+ "+d" (_len2), "=m" (*((long *) dest))
+ : "m" (*((long *) src))
+ : "cc", "memory");
+ return rc;
+}
+
+/*
+ * Copy memory in real mode (kernel to kernel)
+ */
+int memcpy_real(void *dest, void *src, size_t count)
+{
+ unsigned long flags;
+ int rc;
+
+ if (!count)
+ return 0;
+ local_irq_save(flags);
+ __arch_local_irq_stnsm(0xfbUL);
+ rc = __memcpy_real(dest, src, count);
+ local_irq_restore(flags);
+ return rc;
+}
+
+/*
+ * Copy memory in absolute mode (kernel to kernel)
+ */
+void memcpy_absolute(void *dest, void *src, size_t count)
+{
+ unsigned long cr0, flags, prefix;
+
+ flags = arch_local_irq_save();
+ __ctl_store(cr0, 0, 0);
+ __ctl_clear_bit(0, 28); /* disable lowcore protection */
+ prefix = store_prefix();
+ if (prefix) {
+ local_mcck_disable();
+ set_prefix(0);
+ memcpy(dest, src, count);
+ set_prefix(prefix);
+ local_mcck_enable();
+ } else {
+ memcpy(dest, src, count);
+ }
+ __ctl_load(cr0, 0, 0);
+ arch_local_irq_restore(flags);
+}
+
+/*
+ * Copy memory from kernel (real) to user (virtual)
+ */
+int copy_to_user_real(void __user *dest, void *src, unsigned long count)
+{
+ int offs = 0, size, rc;
+ char *buf;
+
+ buf = (char *) __get_free_page(GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ rc = -EFAULT;
+ while (offs < count) {
+ size = min(PAGE_SIZE, count - offs);
+ if (memcpy_real(buf, src + offs, size))
+ goto out;
+ if (copy_to_user(dest + offs, buf, size))
+ goto out;
+ offs += size;
+ }
+ rc = 0;
+out:
+ free_page((unsigned long) buf);
+ return rc;
+}
+
+/*
+ * Check if physical address is within prefix or zero page
+ */
+static int is_swapped(unsigned long addr)
+{
+ unsigned long lc;
+ int cpu;
+
+ if (addr < sizeof(struct _lowcore))
+ return 1;
+ for_each_online_cpu(cpu) {
+ lc = (unsigned long) lowcore_ptr[cpu];
+ if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc)
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Convert a physical pointer for /dev/mem access
+ *
+ * For swapped prefix pages a new buffer is returned that contains a copy of
+ * the absolute memory. The buffer size is maximum one page large.
+ */
+void *xlate_dev_mem_ptr(unsigned long addr)
+{
+ void *bounce = (void *) addr;
+ unsigned long size;
+
+ get_online_cpus();
+ preempt_disable();
+ if (is_swapped(addr)) {
+ size = PAGE_SIZE - (addr & ~PAGE_MASK);
+ bounce = (void *) __get_free_page(GFP_ATOMIC);
+ if (bounce)
+ memcpy_absolute(bounce, (void *) addr, size);
+ }
+ preempt_enable();
+ put_online_cpus();
+ return bounce;
+}
+
+/*
+ * Free converted buffer for /dev/mem access (if necessary)
+ */
+void unxlate_dev_mem_ptr(unsigned long addr, void *buf)
+{
+ if ((void *) addr != buf)
+ free_page((unsigned long) buf);
+}
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
new file mode 100644
index 00000000000..5535cfe0ee1
--- /dev/null
+++ b/arch/s390/mm/mem_detect.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/setup.h>
+
+#define ADDR2G (1ULL << 31)
+
+#define CHUNK_READ_WRITE 0
+#define CHUNK_READ_ONLY 1
+
+static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size)
+{
+ memblock_add_range(&memblock.memory, start, size, 0, 0);
+ memblock_add_range(&memblock.physmem, start, size, 0, 0);
+}
+
+void __init detect_memory_memblock(void)
+{
+ unsigned long long memsize, rnmax, rzm;
+ unsigned long addr, size;
+ int type;
+
+ rzm = sclp_get_rzm();
+ rnmax = sclp_get_rnmax();
+ memsize = rzm * rnmax;
+ if (!rzm)
+ rzm = 1ULL << 17;
+ if (IS_ENABLED(CONFIG_32BIT)) {
+ rzm = min(ADDR2G, rzm);
+ memsize = min(ADDR2G, memsize);
+ }
+ max_physmem_end = memsize;
+ addr = 0;
+ /* keep memblock lists close to the kernel */
+ memblock_set_bottom_up(true);
+ do {
+ size = 0;
+ type = tprot(addr);
+ do {
+ size += rzm;
+ if (max_physmem_end && addr + size >= max_physmem_end)
+ break;
+ } while (type == tprot(addr + size));
+ if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
+ if (max_physmem_end && (addr + size > max_physmem_end))
+ size = max_physmem_end - addr;
+ memblock_physmem_add(addr, size);
+ }
+ addr += size;
+ } while (addr < max_physmem_end);
+ memblock_set_bottom_up(false);
+ if (!max_physmem_end)
+ max_physmem_end = memblock_end_of_DRAM();
+}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 5932a824547..9b436c21195 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -1,6 +1,4 @@
/*
- * linux/arch/s390/mm/mmap.c
- *
* flexible mmap layout support
*
* Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
@@ -26,41 +24,61 @@
#include <linux/personality.h>
#include <linux/mm.h>
+#include <linux/mman.h>
#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/compat.h>
#include <asm/pgalloc.h>
+static unsigned long stack_maxrandom_size(void)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+ if (current->personality & ADDR_NO_RANDOMIZE)
+ return 0;
+ return STACK_RND_MASK << PAGE_SHIFT;
+}
+
/*
* Top of mmap area (just below the process stack).
*
- * Leave an at least ~128 MB hole.
+ * Leave at least a ~32 MB hole.
*/
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
+#define MIN_GAP (32*1024*1024)
+#define MAX_GAP (STACK_TOP/6*5)
+
+static inline int mmap_is_legacy(void)
+{
+ if (current->personality & ADDR_COMPAT_LAYOUT)
+ return 1;
+ if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+ return 1;
+ return sysctl_legacy_va_layout;
+}
+
+static unsigned long mmap_rnd(void)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+ /* 8MB randomization for mmap_base */
+ return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
+}
+
+static unsigned long mmap_base_legacy(void)
+{
+ return TASK_UNMAPPED_BASE + mmap_rnd();
+}
static inline unsigned long mmap_base(void)
{
- unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+ unsigned long gap = rlimit(RLIMIT_STACK);
if (gap < MIN_GAP)
gap = MIN_GAP;
else if (gap > MAX_GAP)
gap = MAX_GAP;
-
- return TASK_SIZE - (gap & PAGE_MASK);
-}
-
-static inline int mmap_is_legacy(void)
-{
-#ifdef CONFIG_64BIT
- /*
- * Force standard allocation for 64 bit programs.
- */
- if (!test_thread_flag(TIF_31BIT))
- return 1;
-#endif
- return sysctl_legacy_va_layout ||
- (current->personality & ADDR_COMPAT_LAYOUT) ||
- current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY;
+ gap &= PAGE_MASK;
+ return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap;
}
#ifndef CONFIG_64BIT
@@ -76,55 +94,69 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* bit is set, or if the expected stack growth is unlimited:
*/
if (mmap_is_legacy()) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->mmap_base = mmap_base_legacy();
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
-EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
#else
+int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
+{
+ if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+ return 0;
+ if (!(flags & MAP_FIXED))
+ addr = 0;
+ if ((addr + len) >= TASK_SIZE)
+ return crst_table_upgrade(current->mm, 1UL << 53);
+ return 0;
+}
+
static unsigned long
s390_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
+ unsigned long area;
int rc;
- addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
- if (addr & ~PAGE_MASK)
- return addr;
- if (unlikely(mm->context.asce_limit < addr + len)) {
- rc = crst_table_upgrade(mm, addr + len);
+ area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
+ if (!(area & ~PAGE_MASK))
+ return area;
+ if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+ /* Upgrade the page table to 4 levels and retry. */
+ rc = crst_table_upgrade(mm, 1UL << 53);
if (rc)
return (unsigned long) rc;
+ area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
}
- return addr;
+ return area;
}
static unsigned long
-s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
const unsigned long len, const unsigned long pgoff,
const unsigned long flags)
{
struct mm_struct *mm = current->mm;
- unsigned long addr = addr0;
+ unsigned long area;
int rc;
- addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
- if (addr & ~PAGE_MASK)
- return addr;
- if (unlikely(mm->context.asce_limit < addr + len)) {
- rc = crst_table_upgrade(mm, addr + len);
+ area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
+ if (!(area & ~PAGE_MASK))
+ return area;
+ if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+ /* Upgrade the page table to 4 levels and retry. */
+ rc = crst_table_upgrade(mm, 1UL << 53);
if (rc)
return (unsigned long) rc;
+ area = arch_get_unmapped_area_topdown(filp, addr, len,
+ pgoff, flags);
}
- return addr;
+ return area;
}
/*
* This function, called very early during the creation of a new
@@ -137,15 +169,12 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* bit is set, or if the expected stack growth is unlimited:
*/
if (mmap_is_legacy()) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->mmap_base = mmap_base_legacy();
mm->get_unmapped_area = s390_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = s390_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
-EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
#endif
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index fc0ad73ffd9..a90d45e9dfb 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -1,6 +1,4 @@
/*
- * arch/s390/mm/page-states.c
- *
* Copyright IBM Corp. 2008
*
* Guest page hinting for unused pages.
@@ -12,16 +10,18 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/mm.h>
+#include <linux/gfp.h>
#include <linux/init.h>
#define ESSA_SET_STABLE 1
#define ESSA_SET_UNUSED 2
-static int cmma_flag;
+static int cmma_flag = 1;
static int __init cmma(char *str)
{
char *parm;
+
parm = strstrip(str);
if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) {
cmma_flag = 1;
@@ -32,7 +32,6 @@ static int __init cmma(char *str)
return 1;
return 0;
}
-
__setup("cmma=", cmma);
void __init cmma_init(void)
@@ -52,28 +51,64 @@ void __init cmma_init(void)
cmma_flag = 0;
}
-void arch_free_page(struct page *page, int order)
+static inline void set_page_unstable(struct page *page, int order)
{
int i, rc;
- if (!cmma_flag)
- return;
for (i = 0; i < (1 << order); i++)
asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
: "=&d" (rc)
- : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT),
+ : "a" (page_to_phys(page + i)),
"i" (ESSA_SET_UNUSED));
}
-void arch_alloc_page(struct page *page, int order)
+void arch_free_page(struct page *page, int order)
{
- int i, rc;
-
if (!cmma_flag)
return;
+ set_page_unstable(page, order);
+}
+
+static inline void set_page_stable(struct page *page, int order)
+{
+ int i, rc;
+
for (i = 0; i < (1 << order); i++)
asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
: "=&d" (rc)
- : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT),
+ : "a" (page_to_phys(page + i)),
"i" (ESSA_SET_STABLE));
}
+
+void arch_alloc_page(struct page *page, int order)
+{
+ if (!cmma_flag)
+ return;
+ set_page_stable(page, order);
+}
+
+void arch_set_page_states(int make_stable)
+{
+ unsigned long flags, order, t;
+ struct list_head *l;
+ struct page *page;
+ struct zone *zone;
+
+ if (!cmma_flag)
+ return;
+ if (make_stable)
+ drain_local_pages(NULL);
+ for_each_populated_zone(zone) {
+ spin_lock_irqsave(&zone->lock, flags);
+ for_each_migratetype_order(order, t) {
+ list_for_each(l, &zone->free_area[order].free_list[t]) {
+ page = list_entry(l, struct page, lru);
+ if (make_stable)
+ set_page_stable(page, order);
+ else
+ set_page_unstable(page, order);
+ }
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
new file mode 100644
index 00000000000..8400f494623
--- /dev/null
+++ b/arch/s390/mm/pageattr.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/hugetlb.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+
+#if PAGE_DEFAULT_KEY
+static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
+{
+ asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
+ : [addr] "+a" (addr) : [skey] "d" (skey));
+ return addr;
+}
+
+void __storage_key_init_range(unsigned long start, unsigned long end)
+{
+ unsigned long boundary, size;
+
+ while (start < end) {
+ if (MACHINE_HAS_EDAT1) {
+ /* set storage keys for a 1MB frame */
+ size = 1UL << 20;
+ boundary = (start + size) & ~(size - 1);
+ if (boundary <= end) {
+ do {
+ start = sske_frame(start, PAGE_DEFAULT_KEY);
+ } while (start < boundary);
+ continue;
+ }
+ }
+ page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
+ start += PAGE_SIZE;
+ }
+}
+#endif
+
+static pte_t *walk_page_table(unsigned long addr)
+{
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_k(addr);
+ if (pgd_none(*pgdp))
+ return NULL;
+ pudp = pud_offset(pgdp, addr);
+ if (pud_none(*pudp) || pud_large(*pudp))
+ return NULL;
+ pmdp = pmd_offset(pudp, addr);
+ if (pmd_none(*pmdp) || pmd_large(*pmdp))
+ return NULL;
+ ptep = pte_offset_kernel(pmdp, addr);
+ if (pte_none(*ptep))
+ return NULL;
+ return ptep;
+}
+
+static void change_page_attr(unsigned long addr, int numpages,
+ pte_t (*set) (pte_t))
+{
+ pte_t *ptep, pte;
+ int i;
+
+ for (i = 0; i < numpages; i++) {
+ ptep = walk_page_table(addr);
+ if (WARN_ON_ONCE(!ptep))
+ break;
+ pte = *ptep;
+ pte = set(pte);
+ __ptep_ipte(addr, ptep);
+ *ptep = pte;
+ addr += PAGE_SIZE;
+ }
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+ change_page_attr(addr, numpages, pte_wrprotect);
+ return 0;
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+ change_page_attr(addr, numpages, pte_mkwrite);
+ return 0;
+}
+
+/* not possible */
+int set_memory_nx(unsigned long addr, int numpages)
+{
+ return 0;
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+ return 0;
+}
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ unsigned long address;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ int i;
+
+ for (i = 0; i < numpages; i++) {
+ address = page_to_phys(page + i);
+ pgd = pgd_offset_k(address);
+ pud = pud_offset(pgd, address);
+ pmd = pmd_offset(pud, address);
+ pte = pte_offset_kernel(pmd, address);
+ if (!enable) {
+ __ptep_ipte(address, pte);
+ pte_val(*pte) = _PAGE_INVALID;
+ continue;
+ }
+ pte_val(*pte) = __pa(address);
+ }
+}
+
+#ifdef CONFIG_HIBERNATION
+bool kernel_page_present(struct page *page)
+{
+ unsigned long addr;
+ int cc;
+
+ addr = page_to_phys(page);
+ asm volatile(
+ " lra %1,0(%1)\n"
+ " ipm %0\n"
+ " srl %0,28"
+ : "=d" (cc), "+a" (addr) : : "cc");
+ return cc == 0;
+}
+#endif /* CONFIG_HIBERNATION */
+
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 3d98ba82ea6..37b8241ec78 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1,24 +1,24 @@
/*
- * arch/s390/mm/pgtable.c
- *
- * Copyright IBM Corp. 2007
+ * Copyright IBM Corp. 2007, 2011
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
+#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/smp.h>
#include <linux/highmem.h>
-#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/quicklist.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
-#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
@@ -27,78 +27,52 @@
#ifndef CONFIG_64BIT
#define ALLOC_ORDER 1
-#define TABLES_PER_PAGE 4
-#define FRAG_MASK 15UL
-#define SECOND_HALVES 10UL
-
-void clear_table_pgstes(unsigned long *table)
-{
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
- memset(table + 256, 0, PAGE_SIZE/4);
- clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
- memset(table + 768, 0, PAGE_SIZE/4);
-}
-
+#define FRAG_MASK 0x0f
#else
#define ALLOC_ORDER 2
-#define TABLES_PER_PAGE 2
-#define FRAG_MASK 3UL
-#define SECOND_HALVES 2UL
-
-void clear_table_pgstes(unsigned long *table)
-{
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
- memset(table + 256, 0, PAGE_SIZE/2);
-}
-
+#define FRAG_MASK 0x03
#endif
-unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
+
+unsigned long *crst_table_alloc(struct mm_struct *mm)
{
struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
if (!page)
return NULL;
- page->index = 0;
- if (noexec) {
- struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
- if (!shadow) {
- __free_pages(page, ALLOC_ORDER);
- return NULL;
- }
- page->index = page_to_phys(shadow);
- }
- spin_lock(&mm->page_table_lock);
- list_add(&page->lru, &mm->context.crst_list);
- spin_unlock(&mm->page_table_lock);
return (unsigned long *) page_to_phys(page);
}
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
- unsigned long *shadow = get_shadow_table(table);
- struct page *page = virt_to_page(table);
-
- spin_lock(&mm->page_table_lock);
- list_del(&page->lru);
- spin_unlock(&mm->page_table_lock);
- if (shadow)
- free_pages((unsigned long) shadow, ALLOC_ORDER);
free_pages((unsigned long) table, ALLOC_ORDER);
}
#ifdef CONFIG_64BIT
+static void __crst_table_upgrade(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ set_user_asce(mm);
+ }
+ __tlb_flush_local();
+}
+
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
{
unsigned long *table, *pgd;
unsigned long entry;
+ int flush;
BUG_ON(limit > (1UL << 53));
+ flush = 0;
repeat:
- table = crst_table_alloc(mm, mm->context.noexec);
+ table = crst_table_alloc(mm);
if (!table)
return -ENOMEM;
- spin_lock(&mm->page_table_lock);
+ spin_lock_bh(&mm->page_table_lock);
if (mm->context.asce_limit < limit) {
pgd = (unsigned long *) mm->pgd;
if (mm->context.asce_limit <= (1UL << 31)) {
@@ -117,14 +91,17 @@ repeat:
crst_table_init(table, entry);
pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
mm->pgd = (pgd_t *) table;
+ mm->task_size = mm->context.asce_limit;
table = NULL;
+ flush = 1;
}
- spin_unlock(&mm->page_table_lock);
+ spin_unlock_bh(&mm->page_table_lock);
if (table)
crst_table_free(mm, table);
if (mm->context.asce_limit < limit)
goto repeat;
- update_mm(mm, current);
+ if (flush)
+ on_each_cpu(__crst_table_upgrade, mm, 0);
return 0;
}
@@ -132,9 +109,10 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
{
pgd_t *pgd;
- if (mm->context.asce_limit <= limit)
- return;
- __tlb_flush_mm(mm);
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ __tlb_flush_mm(mm);
+ }
while (mm->context.asce_limit > limit) {
pgd = mm->pgd;
switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
@@ -154,98 +132,1231 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
BUG();
}
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+ mm->task_size = mm->context.asce_limit;
crst_table_free(mm, (unsigned long *) pgd);
}
- update_mm(mm, current);
+ if (current->active_mm == mm)
+ set_user_asce(mm);
}
#endif
+#ifdef CONFIG_PGSTE
+
+/**
+ * gmap_alloc - allocate a guest address space
+ * @mm: pointer to the parent mm_struct
+ *
+ * Returns a guest address space structure.
+ */
+struct gmap *gmap_alloc(struct mm_struct *mm)
+{
+ struct gmap *gmap;
+ struct page *page;
+ unsigned long *table;
+
+ gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
+ if (!gmap)
+ goto out;
+ INIT_LIST_HEAD(&gmap->crst_list);
+ gmap->mm = mm;
+ page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ if (!page)
+ goto out_free;
+ list_add(&page->lru, &gmap->crst_list);
+ table = (unsigned long *) page_to_phys(page);
+ crst_table_init(table, _REGION1_ENTRY_EMPTY);
+ gmap->table = table;
+ gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | __pa(table);
+ list_add(&gmap->list, &mm->context.gmap_list);
+ return gmap;
+
+out_free:
+ kfree(gmap);
+out:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(gmap_alloc);
+
+static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
+{
+ struct gmap_pgtable *mp;
+ struct gmap_rmap *rmap;
+ struct page *page;
+
+ if (*table & _SEGMENT_ENTRY_INVALID)
+ return 0;
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ list_for_each_entry(rmap, &mp->mapper, list) {
+ if (rmap->entry != table)
+ continue;
+ list_del(&rmap->list);
+ kfree(rmap);
+ break;
+ }
+ *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
+ return 1;
+}
+
+static void gmap_flush_tlb(struct gmap *gmap)
+{
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
+ _ASCE_TYPE_REGION1);
+ else
+ __tlb_flush_global();
+}
+
+/**
+ * gmap_free - free a guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_free(struct gmap *gmap)
+{
+ struct page *page, *next;
+ unsigned long *table;
+ int i;
+
+
+ /* Flush tlb. */
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
+ _ASCE_TYPE_REGION1);
+ else
+ __tlb_flush_global();
+
+ /* Free all segment & region tables. */
+ down_read(&gmap->mm->mmap_sem);
+ spin_lock(&gmap->mm->page_table_lock);
+ list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
+ table = (unsigned long *) page_to_phys(page);
+ if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
+ /* Remove gmap rmap structures for segment table. */
+ for (i = 0; i < PTRS_PER_PMD; i++, table++)
+ gmap_unlink_segment(gmap, table);
+ __free_pages(page, ALLOC_ORDER);
+ }
+ spin_unlock(&gmap->mm->page_table_lock);
+ up_read(&gmap->mm->mmap_sem);
+ list_del(&gmap->list);
+ kfree(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_free);
+
+/**
+ * gmap_enable - switch primary space to the guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_enable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = (unsigned long) gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_enable);
+
+/**
+ * gmap_disable - switch back to the standard primary address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_disable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = 0UL;
+}
+EXPORT_SYMBOL_GPL(gmap_disable);
+
/*
- * page table entry allocation/free routines.
+ * gmap_alloc_table is assumed to be called with mmap_sem held
*/
-unsigned long *page_table_alloc(struct mm_struct *mm)
+static int gmap_alloc_table(struct gmap *gmap,
+ unsigned long *table, unsigned long init)
+ __releases(&gmap->mm->page_table_lock)
+ __acquires(&gmap->mm->page_table_lock)
{
struct page *page;
+ unsigned long *new;
+
+ /* since we dont free the gmap table until gmap_free we can unlock */
+ spin_unlock(&gmap->mm->page_table_lock);
+ page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ spin_lock(&gmap->mm->page_table_lock);
+ if (!page)
+ return -ENOMEM;
+ new = (unsigned long *) page_to_phys(page);
+ crst_table_init(new, init);
+ if (*table & _REGION_ENTRY_INVALID) {
+ list_add(&page->lru, &gmap->crst_list);
+ *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+ (*table & _REGION_ENTRY_TYPE_MASK);
+ } else
+ __free_pages(page, ALLOC_ORDER);
+ return 0;
+}
+
+/**
+ * gmap_unmap_segment - unmap segment from the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @addr: address in the guest address space
+ * @len: length of the memory area to unmap
+ *
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
+ */
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+{
unsigned long *table;
- unsigned long bits;
+ unsigned long off;
+ int flush;
+
+ if ((to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || to + len < to)
+ return -EINVAL;
+
+ flush = 0;
+ down_read(&gmap->mm->mmap_sem);
+ spin_lock(&gmap->mm->page_table_lock);
+ for (off = 0; off < len; off += PMD_SIZE) {
+ /* Walk the guest addr space page table */
+ table = gmap->table + (((to + off) >> 53) & 0x7ff);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 42) & 0x7ff);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 31) & 0x7ff);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 20) & 0x7ff);
+
+ /* Clear segment table entry in guest address space. */
+ flush |= gmap_unlink_segment(gmap, table);
+ *table = _SEGMENT_ENTRY_INVALID;
+ }
+out:
+ spin_unlock(&gmap->mm->page_table_lock);
+ up_read(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_unmap_segment);
+
+/**
+ * gmap_mmap_segment - map a segment to the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @from: source address in the parent address space
+ * @to: target address in the guest address space
+ *
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
+ */
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len)
+{
+ unsigned long *table;
+ unsigned long off;
+ int flush;
+
+ if ((from | to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || from + len > TASK_MAX_SIZE ||
+ from + len < from || to + len < to)
+ return -EINVAL;
+
+ flush = 0;
+ down_read(&gmap->mm->mmap_sem);
+ spin_lock(&gmap->mm->page_table_lock);
+ for (off = 0; off < len; off += PMD_SIZE) {
+ /* Walk the gmap address space page table */
+ table = gmap->table + (((to + off) >> 53) & 0x7ff);
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
+ goto out_unmap;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 42) & 0x7ff);
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
+ goto out_unmap;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 31) & 0x7ff);
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
+ goto out_unmap;
+ table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
+ table = table + (((to + off) >> 20) & 0x7ff);
+
+ /* Store 'from' address in an invalid segment table entry. */
+ flush |= gmap_unlink_segment(gmap, table);
+ *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
+ _SEGMENT_ENTRY_PROTECT);
+ }
+ spin_unlock(&gmap->mm->page_table_lock);
+ up_read(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ return 0;
+
+out_unmap:
+ spin_unlock(&gmap->mm->page_table_lock);
+ up_read(&gmap->mm->mmap_sem);
+ gmap_unmap_segment(gmap, to, len);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(gmap_map_segment);
+
+static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
+{
+ unsigned long *table;
+
+ table = gmap->table + ((address >> 53) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
+ return ERR_PTR(-EFAULT);
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 42) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
+ return ERR_PTR(-EFAULT);
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 31) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INVALID))
+ return ERR_PTR(-EFAULT);
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 20) & 0x7ff);
+ return table;
+}
+
+/**
+ * __gmap_translate - translate a guest address to a user space address
+ * @address: guest address
+ * @gmap: pointer to guest mapping meta data structure
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
+{
+ unsigned long *segment_ptr, vmaddr, segment;
+ struct gmap_pgtable *mp;
+ struct page *page;
+
+ current->thread.gmap_addr = address;
+ segment_ptr = gmap_table_walk(address, gmap);
+ if (IS_ERR(segment_ptr))
+ return PTR_ERR(segment_ptr);
+ /* Convert the gmap address to an mm address. */
+ segment = *segment_ptr;
+ if (!(segment & _SEGMENT_ENTRY_INVALID)) {
+ page = pfn_to_page(segment >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ return mp->vmaddr | (address & ~PMD_MASK);
+ } else if (segment & _SEGMENT_ENTRY_PROTECT) {
+ vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
+ return vmaddr | (address & ~PMD_MASK);
+ }
+ return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(__gmap_translate);
+
+/**
+ * gmap_translate - translate a guest address to a user space address
+ * @address: guest address
+ * @gmap: pointer to guest mapping meta data structure
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ */
+unsigned long gmap_translate(unsigned long address, struct gmap *gmap)
+{
+ unsigned long rc;
+
+ down_read(&gmap->mm->mmap_sem);
+ rc = __gmap_translate(address, gmap);
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_translate);
+
+static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
+ unsigned long *segment_ptr, struct gmap *gmap)
+{
+ unsigned long vmaddr;
+ struct vm_area_struct *vma;
+ struct gmap_pgtable *mp;
+ struct gmap_rmap *rmap;
+ struct mm_struct *mm;
+ struct page *page;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
- bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
+ mm = gmap->mm;
+ vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
+ vma = find_vma(mm, vmaddr);
+ if (!vma || vma->vm_start > vmaddr)
+ return -EFAULT;
+ /* Walk the parent mm page table */
+ pgd = pgd_offset(mm, vmaddr);
+ pud = pud_alloc(mm, pgd, vmaddr);
+ if (!pud)
+ return -ENOMEM;
+ pmd = pmd_alloc(mm, pud, vmaddr);
+ if (!pmd)
+ return -ENOMEM;
+ if (!pmd_present(*pmd) &&
+ __pte_alloc(mm, vma, pmd, vmaddr))
+ return -ENOMEM;
+ /* large pmds cannot yet be handled */
+ if (pmd_large(*pmd))
+ return -EFAULT;
+ /* pmd now points to a valid segment table entry. */
+ rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
+ if (!rmap)
+ return -ENOMEM;
+ /* Link gmap segment table entry location to page table. */
+ page = pmd_page(*pmd);
+ mp = (struct gmap_pgtable *) page->index;
+ rmap->gmap = gmap;
+ rmap->entry = segment_ptr;
+ rmap->vmaddr = address & PMD_MASK;
spin_lock(&mm->page_table_lock);
- page = NULL;
+ if (*segment_ptr == segment) {
+ list_add(&rmap->list, &mp->mapper);
+ /* Set gmap segment table entry to page table. */
+ *segment_ptr = pmd_val(*pmd) & PAGE_MASK;
+ rmap = NULL;
+ }
+ spin_unlock(&mm->page_table_lock);
+ kfree(rmap);
+ return 0;
+}
+
+static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
+{
+ struct gmap_rmap *rmap, *next;
+ struct gmap_pgtable *mp;
+ struct page *page;
+ int flush;
+
+ flush = 0;
+ spin_lock(&mm->page_table_lock);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
+ *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
+ _SEGMENT_ENTRY_PROTECT);
+ list_del(&rmap->list);
+ kfree(rmap);
+ flush = 1;
+ }
+ spin_unlock(&mm->page_table_lock);
+ if (flush)
+ __tlb_flush_global();
+}
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
+{
+ unsigned long *segment_ptr, segment;
+ struct gmap_pgtable *mp;
+ struct page *page;
+ int rc;
+
+ current->thread.gmap_addr = address;
+ segment_ptr = gmap_table_walk(address, gmap);
+ if (IS_ERR(segment_ptr))
+ return -EFAULT;
+ /* Convert the gmap address to an mm address. */
+ while (1) {
+ segment = *segment_ptr;
+ if (!(segment & _SEGMENT_ENTRY_INVALID)) {
+ /* Page table is present */
+ page = pfn_to_page(segment >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ return mp->vmaddr | (address & ~PMD_MASK);
+ }
+ if (!(segment & _SEGMENT_ENTRY_PROTECT))
+ /* Nothing mapped in the gmap address space. */
+ break;
+ rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
+ if (rc)
+ return rc;
+ }
+ return -EFAULT;
+}
+
+unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
+{
+ unsigned long rc;
+
+ down_read(&gmap->mm->mmap_sem);
+ rc = __gmap_fault(address, gmap);
+ up_read(&gmap->mm->mmap_sem);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_fault);
+
+static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
+{
+ if (!non_swap_entry(entry))
+ dec_mm_counter(mm, MM_SWAPENTS);
+ else if (is_migration_entry(entry)) {
+ struct page *page = migration_entry_to_page(entry);
+
+ if (PageAnon(page))
+ dec_mm_counter(mm, MM_ANONPAGES);
+ else
+ dec_mm_counter(mm, MM_FILEPAGES);
+ }
+ free_swap_and_cache(entry);
+}
+
+/**
+ * The mm->mmap_sem lock must be held
+ */
+static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
+{
+ unsigned long ptev, pgstev;
+ spinlock_t *ptl;
+ pgste_t pgste;
+ pte_t *ptep, pte;
+
+ ptep = get_locked_pte(mm, address, &ptl);
+ if (unlikely(!ptep))
+ return;
+ pte = *ptep;
+ if (!pte_swap(pte))
+ goto out_pte;
+ /* Zap unused and logically-zero pages */
+ pgste = pgste_get_lock(ptep);
+ pgstev = pgste_val(pgste);
+ ptev = pte_val(pte);
+ if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
+ ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
+ gmap_zap_swap_entry(pte_to_swp_entry(pte), mm);
+ pte_clear(mm, address, ptep);
+ }
+ pgste_set_unlock(ptep, pgste);
+out_pte:
+ pte_unmap_unlock(*ptep, ptl);
+}
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+void __gmap_zap(unsigned long address, struct gmap *gmap)
+{
+ unsigned long *table, *segment_ptr;
+ unsigned long segment, pgstev, ptev;
+ struct gmap_pgtable *mp;
+ struct page *page;
+
+ segment_ptr = gmap_table_walk(address, gmap);
+ if (IS_ERR(segment_ptr))
+ return;
+ segment = *segment_ptr;
+ if (segment & _SEGMENT_ENTRY_INVALID)
+ return;
+ page = pfn_to_page(segment >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ address = mp->vmaddr | (address & ~PMD_MASK);
+ /* Page table is present */
+ table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
+ table = table + ((address >> 12) & 0xff);
+ pgstev = table[PTRS_PER_PTE];
+ ptev = table[0];
+ /* quick check, checked again with locks held */
+ if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
+ ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
+ gmap_zap_unused(gmap->mm, address);
+}
+EXPORT_SYMBOL_GPL(__gmap_zap);
+
+void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
+{
+
+ unsigned long *table, address, size;
+ struct vm_area_struct *vma;
+ struct gmap_pgtable *mp;
+ struct page *page;
+
+ down_read(&gmap->mm->mmap_sem);
+ address = from;
+ while (address < to) {
+ /* Walk the gmap address space page table */
+ table = gmap->table + ((address >> 53) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 42) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 31) & 0x7ff);
+ if (unlikely(*table & _REGION_ENTRY_INVALID)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = table + ((address >> 20) & 0x7ff);
+ if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ vma = find_vma(gmap->mm, mp->vmaddr);
+ size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
+ zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
+ size, NULL);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ }
+ up_read(&gmap->mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
+static LIST_HEAD(gmap_notifier_list);
+static DEFINE_SPINLOCK(gmap_notifier_lock);
+
+/**
+ * gmap_register_ipte_notifier - register a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_register_ipte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_add(&nb->list, &gmap_notifier_list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
+
+/**
+ * gmap_unregister_ipte_notifier - remove a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_del_init(&nb->list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
+
+/**
+ * gmap_ipte_notify - mark a range of ptes for invalidation notification
+ * @gmap: pointer to guest mapping meta data structure
+ * @start: virtual address in the guest address space
+ * @len: size of area
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists and
+ * the invalidation notification could be set. If the gmap mapping is missing
+ * for one or more pages -EFAULT is returned. If no memory could be allocated
+ * -ENOMEM is returned. This function establishes missing page table entries.
+ */
+int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
+{
+ unsigned long addr;
+ spinlock_t *ptl;
+ pte_t *ptep, entry;
+ pgste_t pgste;
+ int rc = 0;
+
+ if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK))
+ return -EINVAL;
+ down_read(&gmap->mm->mmap_sem);
+ while (len) {
+ /* Convert gmap address and connect the page tables */
+ addr = __gmap_fault(start, gmap);
+ if (IS_ERR_VALUE(addr)) {
+ rc = addr;
+ break;
+ }
+ /* Get the page mapped */
+ if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) {
+ rc = -EFAULT;
+ break;
+ }
+ /* Walk the process page table, lock and get pte pointer */
+ ptep = get_locked_pte(gmap->mm, addr, &ptl);
+ if (unlikely(!ptep))
+ continue;
+ /* Set notification bit in the pgste of the pte */
+ entry = *ptep;
+ if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) |= PGSTE_IN_BIT;
+ pgste_set_unlock(ptep, pgste);
+ start += PAGE_SIZE;
+ len -= PAGE_SIZE;
+ }
+ spin_unlock(ptl);
+ }
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_ipte_notify);
+
+/**
+ * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
+ * @mm: pointer to the process mm_struct
+ * @pte: pointer to the page table entry
+ *
+ * This function is assumed to be called with the page table lock held
+ * for the pte to notify.
+ */
+void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
+{
+ unsigned long segment_offset;
+ struct gmap_notifier *nb;
+ struct gmap_pgtable *mp;
+ struct gmap_rmap *rmap;
+ struct page *page;
+
+ segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+ segment_offset = segment_offset * (4096 / sizeof(pte_t));
+ page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ spin_lock(&gmap_notifier_lock);
+ list_for_each_entry(rmap, &mp->mapper, list) {
+ list_for_each_entry(nb, &gmap_notifier_list, list)
+ nb->notifier_call(rmap->gmap,
+ rmap->vmaddr + segment_offset);
+ }
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
+
+static inline int page_table_with_pgste(struct page *page)
+{
+ return atomic_read(&page->_mapcount) == 0;
+}
+
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
+ unsigned long vmaddr)
+{
+ struct page *page;
+ unsigned long *table;
+ struct gmap_pgtable *mp;
+
+ page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!page)
+ return NULL;
+ mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
+ if (!mp) {
+ __free_page(page);
+ return NULL;
+ }
+ if (!pgtable_page_ctor(page)) {
+ kfree(mp);
+ __free_page(page);
+ return NULL;
+ }
+ mp->vmaddr = vmaddr & PMD_MASK;
+ INIT_LIST_HEAD(&mp->mapper);
+ page->index = (unsigned long) mp;
+ atomic_set(&page->_mapcount, 0);
+ table = (unsigned long *) page_to_phys(page);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+ return table;
+}
+
+static inline void page_table_free_pgste(unsigned long *table)
+{
+ struct page *page;
+ struct gmap_pgtable *mp;
+
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ mp = (struct gmap_pgtable *) page->index;
+ BUG_ON(!list_empty(&mp->mapper));
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ kfree(mp);
+ __free_page(page);
+}
+
+static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end, bool init_skey)
+{
+ pte_t *start_pte, *pte;
+ spinlock_t *ptl;
+ pgste_t pgste;
+
+ start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ pte = start_pte;
+ do {
+ pgste = pgste_get_lock(pte);
+ pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+ if (init_skey) {
+ unsigned long address;
+
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+ PGSTE_GR_BIT | PGSTE_GC_BIT);
+
+ /* skip invalid and not writable pages */
+ if (pte_val(*pte) & _PAGE_INVALID ||
+ !(pte_val(*pte) & _PAGE_WRITE)) {
+ pgste_set_unlock(pte, pgste);
+ continue;
+ }
+
+ address = pte_val(*pte) & PAGE_MASK;
+ page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
+ }
+ pgste_set_unlock(pte, pgste);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap_unlock(start_pte, ptl);
+
+ return addr;
+}
+
+static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end, bool init_skey)
+{
+ unsigned long next;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
+ } while (pmd++, addr = next, addr != end);
+
+ return addr;
+}
+
+static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end, bool init_skey)
+{
+ unsigned long next;
+ pud_t *pud;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
+ } while (pud++, addr = next, addr != end);
+
+ return addr;
+}
+
+void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool init_skey)
+{
+ unsigned long addr, next;
+ pgd_t *pgd;
+
+ down_write(&mm->mmap_sem);
+ if (init_skey && mm_use_skey(mm))
+ goto out_up;
+ addr = start;
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
+ } while (pgd++, addr = next, addr != end);
+ if (init_skey)
+ current->mm->context.use_skey = 1;
+out_up:
+ up_write(&mm->mmap_sem);
+}
+EXPORT_SYMBOL(page_table_reset_pgste);
+
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned long key, bool nq)
+{
+ spinlock_t *ptl;
+ pgste_t old, new;
+ pte_t *ptep;
+
+ down_read(&mm->mmap_sem);
+ ptep = get_locked_pte(current->mm, addr, &ptl);
+ if (unlikely(!ptep)) {
+ up_read(&mm->mmap_sem);
+ return -EFAULT;
+ }
+
+ new = old = pgste_get_lock(ptep);
+ pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
+ PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+ pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ unsigned long address, bits, skey;
+
+ address = pte_val(*ptep) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(address);
+ bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+ /* Set storage key ACC and FP */
+ page_set_storage_key(address, skey, !nq);
+ /* Merge host changed & referenced into pgste */
+ pgste_val(new) |= bits << 52;
+ }
+ /* changing the guest storage key is considered a change of the page */
+ if ((pgste_val(new) ^ pgste_val(old)) &
+ (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
+ pgste_val(new) |= PGSTE_UC_BIT;
+
+ pgste_set_unlock(ptep, new);
+ pte_unmap_unlock(*ptep, ptl);
+ up_read(&mm->mmap_sem);
+ return 0;
+}
+EXPORT_SYMBOL(set_guest_storage_key);
+
+#else /* CONFIG_PGSTE */
+
+static inline int page_table_with_pgste(struct page *page)
+{
+ return 0;
+}
+
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
+ unsigned long vmaddr)
+{
+ return NULL;
+}
+
+void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool init_skey)
+{
+}
+
+static inline void page_table_free_pgste(unsigned long *table)
+{
+}
+
+static inline void gmap_disconnect_pgtable(struct mm_struct *mm,
+ unsigned long *table)
+{
+}
+
+#endif /* CONFIG_PGSTE */
+
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+ unsigned int old, new;
+
+ do {
+ old = atomic_read(v);
+ new = old ^ bits;
+ } while (atomic_cmpxchg(v, old, new) != old);
+ return new;
+}
+
+/*
+ * page table entry allocation/free routines.
+ */
+unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
+{
+ unsigned long *uninitialized_var(table);
+ struct page *uninitialized_var(page);
+ unsigned int mask, bit;
+
+ if (mm_has_pgste(mm))
+ return page_table_alloc_pgste(mm, vmaddr);
+ /* Allocate fragments of a 4K page as 1K/2K page table */
+ spin_lock_bh(&mm->context.list_lock);
+ mask = FRAG_MASK;
if (!list_empty(&mm->context.pgtable_list)) {
page = list_first_entry(&mm->context.pgtable_list,
struct page, lru);
- if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
- page = NULL;
+ table = (unsigned long *) page_to_phys(page);
+ mask = atomic_read(&page->_mapcount);
+ mask = mask | (mask >> 4);
}
- if (!page) {
- spin_unlock(&mm->page_table_lock);
+ if ((mask & FRAG_MASK) == FRAG_MASK) {
+ spin_unlock_bh(&mm->context.list_lock);
page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (!page)
return NULL;
- pgtable_page_ctor(page);
- page->flags &= ~FRAG_MASK;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ atomic_set(&page->_mapcount, 1);
table = (unsigned long *) page_to_phys(page);
- if (mm->context.pgstes)
- clear_table_pgstes(table);
- else
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
- spin_lock(&mm->page_table_lock);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE);
+ spin_lock_bh(&mm->context.list_lock);
list_add(&page->lru, &mm->context.pgtable_list);
+ } else {
+ for (bit = 1; mask & bit; bit <<= 1)
+ table += PTRS_PER_PTE;
+ mask = atomic_xor_bits(&page->_mapcount, bit);
+ if ((mask & FRAG_MASK) == FRAG_MASK)
+ list_del(&page->lru);
}
- table = (unsigned long *) page_to_phys(page);
- while (page->flags & bits) {
- table += 256;
- bits <<= 1;
- }
- page->flags |= bits;
- if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
- list_move_tail(&page->lru, &mm->context.pgtable_list);
- spin_unlock(&mm->page_table_lock);
+ spin_unlock_bh(&mm->context.list_lock);
return table;
}
void page_table_free(struct mm_struct *mm, unsigned long *table)
{
struct page *page;
- unsigned long bits;
+ unsigned int bit, mask;
- bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
- bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- spin_lock(&mm->page_table_lock);
- page->flags ^= bits;
- if (page->flags & FRAG_MASK) {
- /* Page now has some free pgtable fragments. */
- list_move(&page->lru, &mm->context.pgtable_list);
- page = NULL;
- } else
- /* All fragments of the 4K page have been freed. */
+ if (page_table_with_pgste(page)) {
+ gmap_disconnect_pgtable(mm, table);
+ return page_table_free_pgste(table);
+ }
+ /* Free 1K/2K page table fragment of a 4K page */
+ bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
+ spin_lock_bh(&mm->context.list_lock);
+ if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
list_del(&page->lru);
- spin_unlock(&mm->page_table_lock);
- if (page) {
+ mask = atomic_xor_bits(&page->_mapcount, bit);
+ if (mask & FRAG_MASK)
+ list_add(&page->lru, &mm->context.pgtable_list);
+ spin_unlock_bh(&mm->context.list_lock);
+ if (mask == 0) {
pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
__free_page(page);
}
}
-void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
+static void __page_table_free_rcu(void *table, unsigned bit)
{
struct page *page;
- spin_lock(&mm->page_table_lock);
- /* Free shadow region and segment tables. */
- list_for_each_entry(page, &mm->context.crst_list, lru)
- if (page->index) {
- free_pages((unsigned long) page->index, ALLOC_ORDER);
- page->index = 0;
+ if (bit == FRAG_MASK)
+ return page_table_free_pgste(table);
+ /* Free 1K/2K page table fragment of a 4K page */
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+ }
+}
+
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
+{
+ struct mm_struct *mm;
+ struct page *page;
+ unsigned int bit, mask;
+
+ mm = tlb->mm;
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page)) {
+ gmap_disconnect_pgtable(mm, table);
+ table = (unsigned long *) (__pa(table) | FRAG_MASK);
+ tlb_remove_table(tlb, table);
+ return;
+ }
+ bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
+ spin_lock_bh(&mm->context.list_lock);
+ if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
+ list_del(&page->lru);
+ mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
+ if (mask & FRAG_MASK)
+ list_add_tail(&page->lru, &mm->context.pgtable_list);
+ spin_unlock_bh(&mm->context.list_lock);
+ table = (unsigned long *) (__pa(table) | (bit << 4));
+ tlb_remove_table(tlb, table);
+}
+
+static void __tlb_remove_table(void *_table)
+{
+ const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
+ void *table = (void *)((unsigned long) _table & ~mask);
+ unsigned type = (unsigned long) _table & mask;
+
+ if (type)
+ __page_table_free_rcu(table, type);
+ else
+ free_pages((unsigned long) table, ALLOC_ORDER);
+}
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+ * assumed to be actually RCU-freed.
+ *
+ * It is however sufficient for software page-table walkers that rely
+ * on IRQ disabling. See the comment near struct mmu_table_batch.
+ */
+ smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ __tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+ struct mmu_table_batch *batch;
+ int i;
+
+ batch = container_of(head, struct mmu_table_batch, rcu);
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch) {
+ call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+ *batch = NULL;
+ }
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ tlb->mm->context.flush_mm = 1;
+ if (*batch == NULL) {
+ *batch = (struct mmu_table_batch *)
+ __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+ if (*batch == NULL) {
+ __tlb_flush_mm_lazy(tlb->mm);
+ tlb_remove_table_one(table);
+ return;
}
- /* "Free" second halves of page tables. */
- list_for_each_entry(page, &mm->context.pgtable_list, lru)
- page->flags &= ~SECOND_HALVES;
- spin_unlock(&mm->page_table_lock);
- mm->context.noexec = 0;
- update_mm(mm, tsk);
+ (*batch)->nr = 0;
+ }
+ (*batch)->tables[(*batch)->nr++] = table;
+ if ((*batch)->nr == MAX_TABLE_BATCH)
+ tlb_flush_mmu(tlb);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void thp_split_vma(struct vm_area_struct *vma)
+{
+ unsigned long addr;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
+ follow_page(vma, addr, FOLL_SPLIT);
+}
+
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ thp_split_vma(vma);
+ vma->vm_flags &= ~VM_HUGEPAGE;
+ vma->vm_flags |= VM_NOHUGEPAGE;
+ }
+ mm->def_flags |= VM_NOHUGEPAGE;
+}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
+ struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next, *table, *new;
+ struct page *page;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+again:
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ table = (unsigned long *) pmd_deref(*pmd);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page))
+ continue;
+ /* Allocate new page table with pgstes */
+ new = page_table_alloc_pgste(mm, addr);
+ if (!new)
+ return -ENOMEM;
+
+ spin_lock(&mm->page_table_lock);
+ if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
+ /* Nuke pmd entry pointing to the "short" page table */
+ pmdp_flush_lazy(mm, addr, pmd);
+ pmd_clear(pmd);
+ /* Copy ptes from old table to new table */
+ memcpy(new, table, PAGE_SIZE/2);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ /* Establish new table */
+ pmd_populate(mm, pmd, (pte_t *) new);
+ /* Free old table with rcu, there might be a walker! */
+ page_table_free_rcu(tlb, table);
+ new = NULL;
+ }
+ spin_unlock(&mm->page_table_lock);
+ if (new) {
+ page_table_free_pgste(new);
+ goto again;
+ }
+ } while (pmd++, addr = next, addr != end);
+
+ return addr;
+}
+
+static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
+ struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pud_t *pud;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
+ if (unlikely(IS_ERR_VALUE(next)))
+ return next;
+ } while (pud++, addr = next, addr != end);
+
+ return addr;
+}
+
+static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pgd_t *pgd;
+
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
+ if (unlikely(IS_ERR_VALUE(next)))
+ return next;
+ } while (pgd++, addr = next, addr != end);
+
+ return 0;
}
/*
@@ -254,46 +1365,132 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
int s390_enable_sie(void)
{
struct task_struct *tsk = current;
- struct mm_struct *mm, *old_mm;
+ struct mm_struct *mm = tsk->mm;
+ struct mmu_gather tlb;
/* Do we have pgstes? if yes, we are done */
- if (tsk->mm->context.pgstes)
+ if (mm_has_pgste(tsk->mm))
return 0;
- /* lets check if we are allowed to replace the mm */
- task_lock(tsk);
- if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
- tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
- task_unlock(tsk);
- return -EINVAL;
- }
- task_unlock(tsk);
+ down_write(&mm->mmap_sem);
+ /* split thp mappings and disable thp for future mappings */
+ thp_split_mm(mm);
+ /* Reallocate the page tables with pgstes */
+ tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
+ if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE))
+ mm->context.has_pgste = 1;
+ tlb_finish_mmu(&tlb, 0, TASK_SIZE);
+ up_write(&mm->mmap_sem);
+ return mm->context.has_pgste ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
- /* we copy the mm with pgstes enabled */
- tsk->mm->context.pgstes = 1;
- mm = dup_mm(tsk);
- tsk->mm->context.pgstes = 0;
- if (!mm)
- return -ENOMEM;
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+void s390_enable_skey(void)
+{
+ page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
- /* Now lets check again if somebody attached ptrace etc */
- task_lock(tsk);
- if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
- tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
- mmput(mm);
- task_unlock(tsk);
- return -EINVAL;
+/*
+ * Test and reset if a guest page is dirty
+ */
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+ bool dirty = false;
+
+ pte = get_locked_pte(gmap->mm, address, &ptl);
+ if (unlikely(!pte))
+ return false;
+
+ if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
+ dirty = true;
+
+ spin_unlock(ptl);
+ return dirty;
+}
+EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ /* No need to flush TLB
+ * On s390 reference bits are in storage key and never in TLB */
+ return pmdp_test_and_clear_young(vma, address, pmdp);
+}
+
+int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+ if (pmd_same(*pmdp, entry))
+ return 0;
+ pmdp_invalidate(vma, address, pmdp);
+ set_pmd_at(vma->vm_mm, address, pmdp, entry);
+ return 1;
+}
+
+static void pmdp_splitting_flush_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
+ (unsigned long *) pmdp)) {
+ /* need to serialize against gup-fast (IRQ disabled) */
+ smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
}
+}
- /* ok, we are alone. No ptrace, no threads, etc. */
- old_mm = tsk->mm;
- tsk->mm = tsk->active_mm = mm;
- preempt_disable();
- update_mm(mm, tsk);
- cpu_set(smp_processor_id(), mm->cpu_vm_mask);
- preempt_enable();
- task_unlock(tsk);
- mmput(old_mm);
- return 0;
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
+{
+ struct list_head *lh = (struct list_head *) pgtable;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ if (!pmd_huge_pte(mm, pmdp))
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
}
-EXPORT_SYMBOL_GPL(s390_enable_sie);
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+ struct list_head *lh;
+ pgtable_t pgtable;
+ pte_t *ptep;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ pgtable = pmd_huge_pte(mm, pmdp);
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ pmd_huge_pte(mm, pmdp) = NULL;
+ else {
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+ ptep = (pte_t *) pgtable;
+ pte_val(*ptep) = _PAGE_INVALID;
+ ptep++;
+ pte_val(*ptep) = _PAGE_INVALID;
+ return pgtable;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index e4868bfc672..fe9012a49aa 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -1,6 +1,4 @@
/*
- * arch/s390/mm/vmem.c
- *
* Copyright IBM Corp. 2006
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
@@ -11,6 +9,8 @@
#include <linux/module.h>
#include <linux/list.h>
#include <linux/hugetlb.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
@@ -60,17 +60,18 @@ static inline pmd_t *vmem_pmd_alloc(void)
return pmd;
}
-static pte_t __ref *vmem_pte_alloc(void)
+static pte_t __ref *vmem_pte_alloc(unsigned long address)
{
pte_t *pte;
if (slab_is_available())
- pte = (pte_t *) page_table_alloc(&init_mm);
+ pte = (pte_t *) page_table_alloc(&init_mm, address);
else
- pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
+ pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
+ PTRS_PER_PTE * sizeof(pte_t));
if (!pte)
return NULL;
- clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
+ clear_table((unsigned long *) pte, _PAGE_INVALID,
PTRS_PER_PTE * sizeof(pte_t));
return pte;
}
@@ -80,57 +81,65 @@ static pte_t __ref *vmem_pte_alloc(void)
*/
static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
{
- unsigned long address;
+ unsigned long end = start + size;
+ unsigned long address = start;
pgd_t *pg_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
- pte_t pte;
int ret = -ENOMEM;
- for (address = start; address < start + size; address += PAGE_SIZE) {
+ while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
pu_dir = vmem_pud_alloc();
if (!pu_dir)
goto out;
- pgd_populate_kernel(&init_mm, pg_dir, pu_dir);
+ pgd_populate(&init_mm, pg_dir, pu_dir);
}
-
pu_dir = pud_offset(pg_dir, address);
+#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
+ if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
+ !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
+ pud_val(*pu_dir) = __pa(address) |
+ _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
+ (ro ? _REGION_ENTRY_PROTECT : 0);
+ address += PUD_SIZE;
+ continue;
+ }
+#endif
if (pud_none(*pu_dir)) {
pm_dir = vmem_pmd_alloc();
if (!pm_dir)
goto out;
- pud_populate_kernel(&init_mm, pu_dir, pm_dir);
+ pud_populate(&init_mm, pu_dir, pm_dir);
}
-
- pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
pm_dir = pmd_offset(pu_dir, address);
-
-#ifdef __s390x__
- if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
- (address + HPAGE_SIZE <= start + size) &&
- (address >= HPAGE_SIZE)) {
- pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
- pmd_val(*pm_dir) = pte_val(pte);
- address += HPAGE_SIZE - PAGE_SIZE;
+#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
+ if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
+ !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
+ pmd_val(*pm_dir) = __pa(address) |
+ _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
+ _SEGMENT_ENTRY_YOUNG |
+ (ro ? _SEGMENT_ENTRY_PROTECT : 0);
+ address += PMD_SIZE;
continue;
}
#endif
if (pmd_none(*pm_dir)) {
- pt_dir = vmem_pte_alloc();
+ pt_dir = vmem_pte_alloc(address);
if (!pt_dir)
goto out;
- pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
+ pmd_populate(&init_mm, pm_dir, pt_dir);
}
pt_dir = pte_offset_kernel(pm_dir, address);
- *pt_dir = pte;
+ pte_val(*pt_dir) = __pa(address) |
+ pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+ address += PAGE_SIZE;
}
ret = 0;
out:
- flush_tlb_kernel_range(start, start + size);
return ret;
}
@@ -140,58 +149,67 @@ out:
*/
static void vmem_remove_range(unsigned long start, unsigned long size)
{
- unsigned long address;
+ unsigned long end = start + size;
+ unsigned long address = start;
pgd_t *pg_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
pte_t pte;
- pte_val(pte) = _PAGE_TYPE_EMPTY;
- for (address = start; address < start + size; address += PAGE_SIZE) {
+ pte_val(pte) = _PAGE_INVALID;
+ while (address < end) {
pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ address += PGDIR_SIZE;
+ continue;
+ }
pu_dir = pud_offset(pg_dir, address);
- if (pud_none(*pu_dir))
+ if (pud_none(*pu_dir)) {
+ address += PUD_SIZE;
+ continue;
+ }
+ if (pud_large(*pu_dir)) {
+ pud_clear(pu_dir);
+ address += PUD_SIZE;
continue;
+ }
pm_dir = pmd_offset(pu_dir, address);
- if (pmd_none(*pm_dir))
+ if (pmd_none(*pm_dir)) {
+ address += PMD_SIZE;
continue;
-
- if (pmd_huge(*pm_dir)) {
- pmd_clear_kernel(pm_dir);
- address += HPAGE_SIZE - PAGE_SIZE;
+ }
+ if (pmd_large(*pm_dir)) {
+ pmd_clear(pm_dir);
+ address += PMD_SIZE;
continue;
}
-
pt_dir = pte_offset_kernel(pm_dir, address);
*pt_dir = pte;
+ address += PAGE_SIZE;
}
- flush_tlb_kernel_range(start, start + size);
+ flush_tlb_kernel_range(start, end);
}
/*
* Add a backed mem_map array to the virtual mem_map array.
*/
-int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
- unsigned long address, start_addr, end_addr;
+ unsigned long address = start;
pgd_t *pg_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
- pte_t pte;
int ret = -ENOMEM;
- start_addr = (unsigned long) start;
- end_addr = (unsigned long) (start + nr);
-
- for (address = start_addr; address < end_addr; address += PAGE_SIZE) {
+ for (address = start; address < end;) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
pu_dir = vmem_pud_alloc();
if (!pu_dir)
goto out;
- pgd_populate_kernel(&init_mm, pg_dir, pu_dir);
+ pgd_populate(&init_mm, pg_dir, pu_dir);
}
pu_dir = pud_offset(pg_dir, address);
@@ -199,15 +217,38 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
pm_dir = vmem_pmd_alloc();
if (!pm_dir)
goto out;
- pud_populate_kernel(&init_mm, pu_dir, pm_dir);
+ pud_populate(&init_mm, pu_dir, pm_dir);
}
pm_dir = pmd_offset(pu_dir, address);
if (pmd_none(*pm_dir)) {
- pt_dir = vmem_pte_alloc();
+#ifdef CONFIG_64BIT
+ /* Use 1MB frames for vmemmap if available. We always
+ * use large frames even if they are only partially
+ * used.
+ * Otherwise we would have also page tables since
+ * vmemmap_populate gets called for each section
+ * separately. */
+ if (MACHINE_HAS_EDAT1) {
+ void *new_page;
+
+ new_page = vmemmap_alloc_block(PMD_SIZE, node);
+ if (!new_page)
+ goto out;
+ pmd_val(*pm_dir) = __pa(new_page) |
+ _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
+ _SEGMENT_ENTRY_CO;
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+#endif
+ pt_dir = vmem_pte_alloc(address);
if (!pt_dir)
goto out;
- pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
+ pmd_populate(&init_mm, pm_dir, pt_dir);
+ } else if (pmd_large(*pm_dir)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
}
pt_dir = pte_offset_kernel(pm_dir, address);
@@ -217,17 +258,21 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
new_page =__pa(vmem_alloc_pages(0));
if (!new_page)
goto out;
- pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
- *pt_dir = pte;
+ pte_val(*pt_dir) =
+ __pa(new_page) | pgprot_val(PAGE_KERNEL);
}
+ address += PAGE_SIZE;
}
- memset(start, 0, nr * sizeof(struct page));
+ memset((void *)start, 0, end - start);
ret = 0;
out:
- flush_tlb_kernel_range(start_addr, end_addr);
return ret;
}
+void vmemmap_free(unsigned long start, unsigned long end)
+{
+}
+
/*
* Add memory segment to the segment list if it doesn't overlap with
* an already present segment.
@@ -328,17 +373,14 @@ out:
void __init vmem_map_init(void)
{
unsigned long ro_start, ro_end;
- unsigned long start, end;
- int i;
-
- INIT_LIST_HEAD(&init_mm.context.crst_list);
- INIT_LIST_HEAD(&init_mm.context.pgtable_list);
- init_mm.context.noexec = 0;
- ro_start = ((unsigned long)&_stext) & PAGE_MASK;
- ro_end = PFN_ALIGN((unsigned long)&_eshared);
- for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
- start = memory_chunk[i].addr;
- end = memory_chunk[i].addr + memory_chunk[i].size;
+ struct memblock_region *reg;
+ phys_addr_t start, end;
+
+ ro_start = PFN_ALIGN((unsigned long)&_stext);
+ ro_end = (unsigned long)&_eshared & PAGE_MASK;
+ for_each_memblock(memory, reg) {
+ start = reg->base;
+ end = reg->base + reg->size - 1;
if (start >= ro_end || end <= ro_start)
vmem_add_mem(start, end - start, 0);
else if (start >= ro_start && end <= ro_end)
@@ -358,23 +400,21 @@ void __init vmem_map_init(void)
}
/*
- * Convert memory chunk array to a memory segment list so there is a single
- * list that contains both r/w memory and shared memory segments.
+ * Convert memblock.memory to a memory segment list so there is a single
+ * list that contains all memory segments.
*/
static int __init vmem_convert_memory_chunk(void)
{
+ struct memblock_region *reg;
struct memory_segment *seg;
- int i;
mutex_lock(&vmem_mutex);
- for (i = 0; i < MEMORY_CHUNKS; i++) {
- if (!memory_chunk[i].size)
- continue;
+ for_each_memblock(memory, reg) {
seg = kzalloc(sizeof(*seg), GFP_KERNEL);
if (!seg)
panic("Out of memory...\n");
- seg->start = memory_chunk[i].addr;
- seg->size = memory_chunk[i].size;
+ seg->start = reg->base;
+ seg->size = reg->size;
insert_memory_segment(seg);
}
mutex_unlock(&vmem_mutex);
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
new file mode 100644
index 00000000000..90568c33ddb
--- /dev/null
+++ b/arch/s390/net/Makefile
@@ -0,0 +1,4 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
new file mode 100644
index 00000000000..7e45d13816c
--- /dev/null
+++ b/arch/s390/net/bpf_jit.S
@@ -0,0 +1,130 @@
+/*
+ * BPF Jit compiler for s390, help functions.
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/linkage.h>
+
+/*
+ * Calling convention:
+ * registers %r2, %r6-%r8, %r10-%r11, %r13, %r15 are call saved
+ * %r2: skb pointer
+ * %r3: offset parameter
+ * %r5: BPF A accumulator
+ * %r8: return address
+ * %r9: save register for skb pointer
+ * %r10: skb->data
+ * %r11: skb->len - skb->data_len (headlen)
+ * %r12: BPF X accumulator
+ *
+ * skb_copy_bits takes 4 parameters:
+ * %r2 = skb pointer
+ * %r3 = offset into skb data
+ * %r4 = length to copy
+ * %r5 = pointer to temp buffer
+ */
+#define SKBDATA %r8
+
+ /* A = *(u32 *) (skb->data+K+X) */
+ENTRY(sk_load_word_ind)
+ ar %r3,%r12 # offset += X
+ bmr %r8 # < 0 -> return with cc
+
+ /* A = *(u32 *) (skb->data+K) */
+ENTRY(sk_load_word)
+ llgfr %r1,%r3 # extend offset
+ ahi %r3,4 # offset + 4
+ clr %r11,%r3 # hlen <= offset + 4 ?
+ jl sk_load_word_slow
+ l %r5,0(%r1,%r10) # get word from skb
+ xr %r1,%r1 # set cc to zero
+ br %r8
+
+sk_load_word_slow:
+ lgr %r9,%r2 # save %r2
+ lhi %r4,4 # 4 bytes
+ la %r5,160(%r15) # pointer to temp buffer
+ brasl %r14,skb_copy_bits # get data from skb
+ l %r5,160(%r15) # load result from temp buffer
+ ltgr %r2,%r2 # set cc to (%r2 != 0)
+ lgr %r2,%r9 # restore %r2
+ br %r8
+
+ /* A = *(u16 *) (skb->data+K+X) */
+ENTRY(sk_load_half_ind)
+ ar %r3,%r12 # offset += X
+ bmr %r8 # < 0 -> return with cc
+
+ /* A = *(u16 *) (skb->data+K) */
+ENTRY(sk_load_half)
+ llgfr %r1,%r3 # extend offset
+ ahi %r3,2 # offset + 2
+ clr %r11,%r3 # hlen <= offset + 2 ?
+ jl sk_load_half_slow
+ llgh %r5,0(%r1,%r10) # get half from skb
+ xr %r1,%r1 # set cc to zero
+ br %r8
+
+sk_load_half_slow:
+ lgr %r9,%r2 # save %r2
+ lhi %r4,2 # 2 bytes
+ la %r5,162(%r15) # pointer to temp buffer
+ brasl %r14,skb_copy_bits # get data from skb
+ xc 160(2,%r15),160(%r15)
+ l %r5,160(%r15) # load result from temp buffer
+ ltgr %r2,%r2 # set cc to (%r2 != 0)
+ lgr %r2,%r9 # restore %r2
+ br %r8
+
+ /* A = *(u8 *) (skb->data+K+X) */
+ENTRY(sk_load_byte_ind)
+ ar %r3,%r12 # offset += X
+ bmr %r8 # < 0 -> return with cc
+
+ /* A = *(u8 *) (skb->data+K) */
+ENTRY(sk_load_byte)
+ llgfr %r1,%r3 # extend offset
+ clr %r11,%r3 # hlen < offset ?
+ jle sk_load_byte_slow
+ lhi %r5,0
+ ic %r5,0(%r1,%r10) # get byte from skb
+ xr %r1,%r1 # set cc to zero
+ br %r8
+
+sk_load_byte_slow:
+ lgr %r9,%r2 # save %r2
+ lhi %r4,1 # 1 bytes
+ la %r5,163(%r15) # pointer to temp buffer
+ brasl %r14,skb_copy_bits # get data from skb
+ xc 160(3,%r15),160(%r15)
+ l %r5,160(%r15) # load result from temp buffer
+ ltgr %r2,%r2 # set cc to (%r2 != 0)
+ lgr %r2,%r9 # restore %r2
+ br %r8
+
+ /* A = (*(u8 *)(skb->data+K) & 0xf) << 2 */
+ENTRY(sk_load_byte_msh)
+ llgfr %r1,%r3 # extend offset
+ clr %r11,%r3 # hlen < offset ?
+ jle sk_load_byte_slow
+ lhi %r12,0
+ ic %r12,0(%r1,%r10) # get byte from skb
+ nill %r12,0x0f
+ sll %r12,2
+ xr %r1,%r1 # set cc to zero
+ br %r8
+
+sk_load_byte_msh_slow:
+ lgr %r9,%r2 # save %r2
+ lhi %r4,2 # 2 bytes
+ la %r5,162(%r15) # pointer to temp buffer
+ brasl %r14,skb_copy_bits # get data from skb
+ xc 160(3,%r15),160(%r15)
+ l %r12,160(%r15) # load result from temp buffer
+ nill %r12,0x0f
+ sll %r12,2
+ ltgr %r2,%r2 # set cc to (%r2 != 0)
+ lgr %r2,%r9 # restore %r2
+ br %r8
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
new file mode 100644
index 00000000000..a2cbd875543
--- /dev/null
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -0,0 +1,891 @@
+/*
+ * BPF Jit compiler for s390.
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/moduleloader.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/filter.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <asm/cacheflush.h>
+#include <asm/facility.h>
+#include <asm/dis.h>
+
+/*
+ * Conventions:
+ * %r2 = skb pointer
+ * %r3 = offset parameter
+ * %r4 = scratch register / length parameter
+ * %r5 = BPF A accumulator
+ * %r8 = return address
+ * %r9 = save register for skb pointer
+ * %r10 = skb->data
+ * %r11 = skb->len - skb->data_len (headlen)
+ * %r12 = BPF X accumulator
+ * %r13 = literal pool pointer
+ * 0(%r15) - 63(%r15) scratch memory array with BPF_MEMWORDS
+ */
+int bpf_jit_enable __read_mostly;
+
+/*
+ * assembly code in arch/x86/net/bpf_jit.S
+ */
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+
+struct bpf_jit {
+ unsigned int seen;
+ u8 *start;
+ u8 *prg;
+ u8 *mid;
+ u8 *lit;
+ u8 *end;
+ u8 *base_ip;
+ u8 *ret0_ip;
+ u8 *exit_ip;
+ unsigned int off_load_word;
+ unsigned int off_load_half;
+ unsigned int off_load_byte;
+ unsigned int off_load_bmsh;
+ unsigned int off_load_iword;
+ unsigned int off_load_ihalf;
+ unsigned int off_load_ibyte;
+};
+
+#define BPF_SIZE_MAX 4096 /* Max size for program */
+
+#define SEEN_DATAREF 1 /* might call external helpers */
+#define SEEN_XREG 2 /* ebx is used */
+#define SEEN_MEM 4 /* use mem[] for temporary storage */
+#define SEEN_RET0 8 /* pc_ret0 points to a valid return 0 */
+#define SEEN_LITERAL 16 /* code uses literals */
+#define SEEN_LOAD_WORD 32 /* code uses sk_load_word */
+#define SEEN_LOAD_HALF 64 /* code uses sk_load_half */
+#define SEEN_LOAD_BYTE 128 /* code uses sk_load_byte */
+#define SEEN_LOAD_BMSH 256 /* code uses sk_load_byte_msh */
+#define SEEN_LOAD_IWORD 512 /* code uses sk_load_word_ind */
+#define SEEN_LOAD_IHALF 1024 /* code uses sk_load_half_ind */
+#define SEEN_LOAD_IBYTE 2048 /* code uses sk_load_byte_ind */
+
+#define EMIT2(op) \
+({ \
+ if (jit->prg + 2 <= jit->mid) \
+ *(u16 *) jit->prg = op; \
+ jit->prg += 2; \
+})
+
+#define EMIT4(op) \
+({ \
+ if (jit->prg + 4 <= jit->mid) \
+ *(u32 *) jit->prg = op; \
+ jit->prg += 4; \
+})
+
+#define EMIT4_DISP(op, disp) \
+({ \
+ unsigned int __disp = (disp) & 0xfff; \
+ EMIT4(op | __disp); \
+})
+
+#define EMIT4_IMM(op, imm) \
+({ \
+ unsigned int __imm = (imm) & 0xffff; \
+ EMIT4(op | __imm); \
+})
+
+#define EMIT4_PCREL(op, pcrel) \
+({ \
+ long __pcrel = ((pcrel) >> 1) & 0xffff; \
+ EMIT4(op | __pcrel); \
+})
+
+#define EMIT6(op1, op2) \
+({ \
+ if (jit->prg + 6 <= jit->mid) { \
+ *(u32 *) jit->prg = op1; \
+ *(u16 *) (jit->prg + 4) = op2; \
+ } \
+ jit->prg += 6; \
+})
+
+#define EMIT6_DISP(op1, op2, disp) \
+({ \
+ unsigned int __disp = (disp) & 0xfff; \
+ EMIT6(op1 | __disp, op2); \
+})
+
+#define EMIT6_IMM(op, imm) \
+({ \
+ unsigned int __imm = (imm); \
+ EMIT6(op | (__imm >> 16), __imm & 0xffff); \
+})
+
+#define EMIT_CONST(val) \
+({ \
+ unsigned int ret; \
+ ret = (unsigned int) (jit->lit - jit->base_ip); \
+ jit->seen |= SEEN_LITERAL; \
+ if (jit->lit + 4 <= jit->end) \
+ *(u32 *) jit->lit = val; \
+ jit->lit += 4; \
+ ret; \
+})
+
+#define EMIT_FN_CONST(bit, fn) \
+({ \
+ unsigned int ret; \
+ ret = (unsigned int) (jit->lit - jit->base_ip); \
+ if (jit->seen & bit) { \
+ jit->seen |= SEEN_LITERAL; \
+ if (jit->lit + 8 <= jit->end) \
+ *(void **) jit->lit = fn; \
+ jit->lit += 8; \
+ } \
+ ret; \
+})
+
+static void bpf_jit_prologue(struct bpf_jit *jit)
+{
+ /* Save registers and create stack frame if necessary */
+ if (jit->seen & SEEN_DATAREF) {
+ /* stmg %r8,%r15,88(%r15) */
+ EMIT6(0xeb8ff058, 0x0024);
+ /* lgr %r14,%r15 */
+ EMIT4(0xb90400ef);
+ /* aghi %r15,<offset> */
+ EMIT4_IMM(0xa7fb0000, (jit->seen & SEEN_MEM) ? -112 : -80);
+ /* stg %r14,152(%r15) */
+ EMIT6(0xe3e0f098, 0x0024);
+ } else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL))
+ /* stmg %r12,%r13,120(%r15) */
+ EMIT6(0xebcdf078, 0x0024);
+ else if (jit->seen & SEEN_XREG)
+ /* stg %r12,120(%r15) */
+ EMIT6(0xe3c0f078, 0x0024);
+ else if (jit->seen & SEEN_LITERAL)
+ /* stg %r13,128(%r15) */
+ EMIT6(0xe3d0f080, 0x0024);
+
+ /* Setup literal pool */
+ if (jit->seen & SEEN_LITERAL) {
+ /* basr %r13,0 */
+ EMIT2(0x0dd0);
+ jit->base_ip = jit->prg;
+ }
+ jit->off_load_word = EMIT_FN_CONST(SEEN_LOAD_WORD, sk_load_word);
+ jit->off_load_half = EMIT_FN_CONST(SEEN_LOAD_HALF, sk_load_half);
+ jit->off_load_byte = EMIT_FN_CONST(SEEN_LOAD_BYTE, sk_load_byte);
+ jit->off_load_bmsh = EMIT_FN_CONST(SEEN_LOAD_BMSH, sk_load_byte_msh);
+ jit->off_load_iword = EMIT_FN_CONST(SEEN_LOAD_IWORD, sk_load_word_ind);
+ jit->off_load_ihalf = EMIT_FN_CONST(SEEN_LOAD_IHALF, sk_load_half_ind);
+ jit->off_load_ibyte = EMIT_FN_CONST(SEEN_LOAD_IBYTE, sk_load_byte_ind);
+
+ /* Filter needs to access skb data */
+ if (jit->seen & SEEN_DATAREF) {
+ /* l %r11,<len>(%r2) */
+ EMIT4_DISP(0x58b02000, offsetof(struct sk_buff, len));
+ /* s %r11,<data_len>(%r2) */
+ EMIT4_DISP(0x5bb02000, offsetof(struct sk_buff, data_len));
+ /* lg %r10,<data>(%r2) */
+ EMIT6_DISP(0xe3a02000, 0x0004,
+ offsetof(struct sk_buff, data));
+ }
+}
+
+static void bpf_jit_epilogue(struct bpf_jit *jit)
+{
+ /* Return 0 */
+ if (jit->seen & SEEN_RET0) {
+ jit->ret0_ip = jit->prg;
+ /* lghi %r2,0 */
+ EMIT4(0xa7290000);
+ }
+ jit->exit_ip = jit->prg;
+ /* Restore registers */
+ if (jit->seen & SEEN_DATAREF)
+ /* lmg %r8,%r15,<offset>(%r15) */
+ EMIT6_DISP(0xeb8ff000, 0x0004,
+ (jit->seen & SEEN_MEM) ? 200 : 168);
+ else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL))
+ /* lmg %r12,%r13,120(%r15) */
+ EMIT6(0xebcdf078, 0x0004);
+ else if (jit->seen & SEEN_XREG)
+ /* lg %r12,120(%r15) */
+ EMIT6(0xe3c0f078, 0x0004);
+ else if (jit->seen & SEEN_LITERAL)
+ /* lg %r13,128(%r15) */
+ EMIT6(0xe3d0f080, 0x0004);
+ /* br %r14 */
+ EMIT2(0x07fe);
+}
+
+/* Helper to find the offset of pkt_type in sk_buff
+ * Make sure its still a 3bit field starting at the MSBs within a byte.
+ */
+#define PKT_TYPE_MAX 0xe0
+static int pkt_type_offset;
+
+static int __init bpf_pkt_type_offset_init(void)
+{
+ struct sk_buff skb_probe = {
+ .pkt_type = ~0,
+ };
+ char *ct = (char *)&skb_probe;
+ int off;
+
+ pkt_type_offset = -1;
+ for (off = 0; off < sizeof(struct sk_buff); off++) {
+ if (!ct[off])
+ continue;
+ if (ct[off] == PKT_TYPE_MAX)
+ pkt_type_offset = off;
+ else {
+ /* Found non matching bit pattern, fix needed. */
+ WARN_ON_ONCE(1);
+ pkt_type_offset = -1;
+ return -1;
+ }
+ }
+ return 0;
+}
+device_initcall(bpf_pkt_type_offset_init);
+
+/*
+ * make sure we dont leak kernel information to user
+ */
+static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter)
+{
+ /* Clear temporary memory if (seen & SEEN_MEM) */
+ if (jit->seen & SEEN_MEM)
+ /* xc 0(64,%r15),0(%r15) */
+ EMIT6(0xd73ff000, 0xf000);
+ /* Clear X if (seen & SEEN_XREG) */
+ if (jit->seen & SEEN_XREG)
+ /* lhi %r12,0 */
+ EMIT4(0xa7c80000);
+ /* Clear A if the first register does not set it. */
+ switch (filter[0].code) {
+ case BPF_LD | BPF_W | BPF_ABS:
+ case BPF_LD | BPF_H | BPF_ABS:
+ case BPF_LD | BPF_B | BPF_ABS:
+ case BPF_LD | BPF_W | BPF_LEN:
+ case BPF_LD | BPF_W | BPF_IND:
+ case BPF_LD | BPF_H | BPF_IND:
+ case BPF_LD | BPF_B | BPF_IND:
+ case BPF_LD | BPF_IMM:
+ case BPF_LD | BPF_MEM:
+ case BPF_MISC | BPF_TXA:
+ case BPF_RET | BPF_K:
+ /* first instruction sets A register */
+ break;
+ default: /* A = 0 */
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ }
+}
+
+static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
+ unsigned int *addrs, int i, int last)
+{
+ unsigned int K;
+ int offset;
+ unsigned int mask;
+ u16 code;
+
+ K = filter->k;
+ code = bpf_anc_helper(filter);
+
+ switch (code) {
+ case BPF_ALU | BPF_ADD | BPF_X: /* A += X */
+ jit->seen |= SEEN_XREG;
+ /* ar %r5,%r12 */
+ EMIT2(0x1a5c);
+ break;
+ case BPF_ALU | BPF_ADD | BPF_K: /* A += K */
+ if (!K)
+ break;
+ if (K <= 16383)
+ /* ahi %r5,<K> */
+ EMIT4_IMM(0xa75a0000, K);
+ else if (test_facility(21))
+ /* alfi %r5,<K> */
+ EMIT6_IMM(0xc25b0000, K);
+ else
+ /* a %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5a50d000, EMIT_CONST(K));
+ break;
+ case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */
+ jit->seen |= SEEN_XREG;
+ /* sr %r5,%r12 */
+ EMIT2(0x1b5c);
+ break;
+ case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
+ if (!K)
+ break;
+ if (K <= 16384)
+ /* ahi %r5,-K */
+ EMIT4_IMM(0xa75a0000, -K);
+ else if (test_facility(21))
+ /* alfi %r5,-K */
+ EMIT6_IMM(0xc25b0000, -K);
+ else
+ /* s %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5b50d000, EMIT_CONST(K));
+ break;
+ case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */
+ jit->seen |= SEEN_XREG;
+ /* msr %r5,%r12 */
+ EMIT4(0xb252005c);
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
+ if (K <= 16383)
+ /* mhi %r5,K */
+ EMIT4_IMM(0xa75c0000, K);
+ else if (test_facility(34))
+ /* msfi %r5,<K> */
+ EMIT6_IMM(0xc2510000, K);
+ else
+ /* ms %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x7150d000, EMIT_CONST(K));
+ break;
+ case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */
+ jit->seen |= SEEN_XREG | SEEN_RET0;
+ /* ltr %r12,%r12 */
+ EMIT2(0x12cc);
+ /* jz <ret0> */
+ EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
+ /* lhi %r4,0 */
+ EMIT4(0xa7480000);
+ /* dlr %r4,%r12 */
+ EMIT4(0xb997004c);
+ break;
+ case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
+ if (K == 1)
+ break;
+ /* lhi %r4,0 */
+ EMIT4(0xa7480000);
+ /* dl %r4,<d(K)>(%r13) */
+ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
+ break;
+ case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */
+ jit->seen |= SEEN_XREG | SEEN_RET0;
+ /* ltr %r12,%r12 */
+ EMIT2(0x12cc);
+ /* jz <ret0> */
+ EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg));
+ /* lhi %r4,0 */
+ EMIT4(0xa7480000);
+ /* dlr %r4,%r12 */
+ EMIT4(0xb997004c);
+ /* lr %r5,%r4 */
+ EMIT2(0x1854);
+ break;
+ case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */
+ if (K == 1) {
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ break;
+ }
+ /* lhi %r4,0 */
+ EMIT4(0xa7480000);
+ /* dl %r4,<d(K)>(%r13) */
+ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
+ /* lr %r5,%r4 */
+ EMIT2(0x1854);
+ break;
+ case BPF_ALU | BPF_AND | BPF_X: /* A &= X */
+ jit->seen |= SEEN_XREG;
+ /* nr %r5,%r12 */
+ EMIT2(0x145c);
+ break;
+ case BPF_ALU | BPF_AND | BPF_K: /* A &= K */
+ if (test_facility(21))
+ /* nilf %r5,<K> */
+ EMIT6_IMM(0xc05b0000, K);
+ else
+ /* n %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5450d000, EMIT_CONST(K));
+ break;
+ case BPF_ALU | BPF_OR | BPF_X: /* A |= X */
+ jit->seen |= SEEN_XREG;
+ /* or %r5,%r12 */
+ EMIT2(0x165c);
+ break;
+ case BPF_ALU | BPF_OR | BPF_K: /* A |= K */
+ if (test_facility(21))
+ /* oilf %r5,<K> */
+ EMIT6_IMM(0xc05d0000, K);
+ else
+ /* o %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5650d000, EMIT_CONST(K));
+ break;
+ case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
+ case BPF_ALU | BPF_XOR | BPF_X:
+ jit->seen |= SEEN_XREG;
+ /* xr %r5,%r12 */
+ EMIT2(0x175c);
+ break;
+ case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
+ if (!K)
+ break;
+ /* x %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5750d000, EMIT_CONST(K));
+ break;
+ case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
+ jit->seen |= SEEN_XREG;
+ /* sll %r5,0(%r12) */
+ EMIT4(0x8950c000);
+ break;
+ case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */
+ if (K == 0)
+ break;
+ /* sll %r5,K */
+ EMIT4_DISP(0x89500000, K);
+ break;
+ case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
+ jit->seen |= SEEN_XREG;
+ /* srl %r5,0(%r12) */
+ EMIT4(0x8850c000);
+ break;
+ case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
+ if (K == 0)
+ break;
+ /* srl %r5,K */
+ EMIT4_DISP(0x88500000, K);
+ break;
+ case BPF_ALU | BPF_NEG: /* A = -A */
+ /* lnr %r5,%r5 */
+ EMIT2(0x1155);
+ break;
+ case BPF_JMP | BPF_JA: /* ip += K */
+ offset = addrs[i + K] + jit->start - jit->prg;
+ EMIT4_PCREL(0xa7f40000, offset);
+ break;
+ case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */
+ mask = 0x200000; /* jh */
+ goto kbranch;
+ case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */
+ mask = 0xa00000; /* jhe */
+ goto kbranch;
+ case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */
+ mask = 0x800000; /* je */
+kbranch: /* Emit compare if the branch targets are different */
+ if (filter->jt != filter->jf) {
+ if (K <= 16383)
+ /* chi %r5,<K> */
+ EMIT4_IMM(0xa75e0000, K);
+ else if (test_facility(21))
+ /* clfi %r5,<K> */
+ EMIT6_IMM(0xc25f0000, K);
+ else
+ /* c %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5950d000, EMIT_CONST(K));
+ }
+branch: if (filter->jt == filter->jf) {
+ if (filter->jt == 0)
+ break;
+ /* j <jt> */
+ offset = addrs[i + filter->jt] + jit->start - jit->prg;
+ EMIT4_PCREL(0xa7f40000, offset);
+ break;
+ }
+ if (filter->jt != 0) {
+ /* brc <mask>,<jt> */
+ offset = addrs[i + filter->jt] + jit->start - jit->prg;
+ EMIT4_PCREL(0xa7040000 | mask, offset);
+ }
+ if (filter->jf != 0) {
+ /* brc <mask^15>,<jf> */
+ offset = addrs[i + filter->jf] + jit->start - jit->prg;
+ EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset);
+ }
+ break;
+ case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */
+ mask = 0x700000; /* jnz */
+ /* Emit test if the branch targets are different */
+ if (filter->jt != filter->jf) {
+ if (K > 65535) {
+ /* lr %r4,%r5 */
+ EMIT2(0x1845);
+ /* n %r4,<d(K)>(%r13) */
+ EMIT4_DISP(0x5440d000, EMIT_CONST(K));
+ } else
+ /* tmll %r5,K */
+ EMIT4_IMM(0xa7510000, K);
+ }
+ goto branch;
+ case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */
+ mask = 0x200000; /* jh */
+ goto xbranch;
+ case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */
+ mask = 0xa00000; /* jhe */
+ goto xbranch;
+ case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */
+ mask = 0x800000; /* je */
+xbranch: /* Emit compare if the branch targets are different */
+ if (filter->jt != filter->jf) {
+ jit->seen |= SEEN_XREG;
+ /* cr %r5,%r12 */
+ EMIT2(0x195c);
+ }
+ goto branch;
+ case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */
+ mask = 0x700000; /* jnz */
+ /* Emit test if the branch targets are different */
+ if (filter->jt != filter->jf) {
+ jit->seen |= SEEN_XREG;
+ /* lr %r4,%r5 */
+ EMIT2(0x1845);
+ /* nr %r4,%r12 */
+ EMIT2(0x144c);
+ }
+ goto branch;
+ case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD;
+ offset = jit->off_load_word;
+ goto load_abs;
+ case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF;
+ offset = jit->off_load_half;
+ goto load_abs;
+ case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE;
+ offset = jit->off_load_byte;
+load_abs: if ((int) K < 0)
+ goto out;
+call_fn: /* lg %r1,<d(function)>(%r13) */
+ EMIT6_DISP(0xe310d000, 0x0004, offset);
+ /* l %r3,<d(K)>(%r13) */
+ EMIT4_DISP(0x5830d000, EMIT_CONST(K));
+ /* basr %r8,%r1 */
+ EMIT2(0x0d81);
+ /* jnz <ret0> */
+ EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg));
+ break;
+ case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD;
+ offset = jit->off_load_iword;
+ goto call_fn;
+ case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF;
+ offset = jit->off_load_ihalf;
+ goto call_fn;
+ case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */
+ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE;
+ offset = jit->off_load_ibyte;
+ goto call_fn;
+ case BPF_LDX | BPF_B | BPF_MSH:
+ /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
+ jit->seen |= SEEN_RET0;
+ if ((int) K < 0) {
+ /* j <ret0> */
+ EMIT4_PCREL(0xa7f40000, (jit->ret0_ip - jit->prg));
+ break;
+ }
+ jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH;
+ offset = jit->off_load_bmsh;
+ goto call_fn;
+ case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+ /* l %r5,<d(len)>(%r2) */
+ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len));
+ break;
+ case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
+ jit->seen |= SEEN_XREG;
+ /* l %r12,<d(len)>(%r2) */
+ EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len));
+ break;
+ case BPF_LD | BPF_IMM: /* A = K */
+ if (K <= 16383)
+ /* lhi %r5,K */
+ EMIT4_IMM(0xa7580000, K);
+ else if (test_facility(21))
+ /* llilf %r5,<K> */
+ EMIT6_IMM(0xc05f0000, K);
+ else
+ /* l %r5,<d(K)>(%r13) */
+ EMIT4_DISP(0x5850d000, EMIT_CONST(K));
+ break;
+ case BPF_LDX | BPF_IMM: /* X = K */
+ jit->seen |= SEEN_XREG;
+ if (K <= 16383)
+ /* lhi %r12,<K> */
+ EMIT4_IMM(0xa7c80000, K);
+ else if (test_facility(21))
+ /* llilf %r12,<K> */
+ EMIT6_IMM(0xc0cf0000, K);
+ else
+ /* l %r12,<d(K)>(%r13) */
+ EMIT4_DISP(0x58c0d000, EMIT_CONST(K));
+ break;
+ case BPF_LD | BPF_MEM: /* A = mem[K] */
+ jit->seen |= SEEN_MEM;
+ /* l %r5,<K>(%r15) */
+ EMIT4_DISP(0x5850f000,
+ (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
+ break;
+ case BPF_LDX | BPF_MEM: /* X = mem[K] */
+ jit->seen |= SEEN_XREG | SEEN_MEM;
+ /* l %r12,<K>(%r15) */
+ EMIT4_DISP(0x58c0f000,
+ (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
+ break;
+ case BPF_MISC | BPF_TAX: /* X = A */
+ jit->seen |= SEEN_XREG;
+ /* lr %r12,%r5 */
+ EMIT2(0x18c5);
+ break;
+ case BPF_MISC | BPF_TXA: /* A = X */
+ jit->seen |= SEEN_XREG;
+ /* lr %r5,%r12 */
+ EMIT2(0x185c);
+ break;
+ case BPF_RET | BPF_K:
+ if (K == 0) {
+ jit->seen |= SEEN_RET0;
+ if (last)
+ break;
+ /* j <ret0> */
+ EMIT4_PCREL(0xa7f40000, jit->ret0_ip - jit->prg);
+ } else {
+ if (K <= 16383)
+ /* lghi %r2,K */
+ EMIT4_IMM(0xa7290000, K);
+ else
+ /* llgf %r2,<K>(%r13) */
+ EMIT6_DISP(0xe320d000, 0x0016, EMIT_CONST(K));
+ /* j <exit> */
+ if (last && !(jit->seen & SEEN_RET0))
+ break;
+ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+ }
+ break;
+ case BPF_RET | BPF_A:
+ /* llgfr %r2,%r5 */
+ EMIT4(0xb9160025);
+ /* j <exit> */
+ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+ break;
+ case BPF_ST: /* mem[K] = A */
+ jit->seen |= SEEN_MEM;
+ /* st %r5,<K>(%r15) */
+ EMIT4_DISP(0x5050f000,
+ (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
+ break;
+ case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+ jit->seen |= SEEN_XREG | SEEN_MEM;
+ /* st %r12,<K>(%r15) */
+ EMIT4_DISP(0x50c0f000,
+ (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
+ break;
+ case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* icm %r5,3,<d(protocol)>(%r2) */
+ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol));
+ break;
+ case BPF_ANC | SKF_AD_IFINDEX: /* if (!skb->dev) return 0;
+ * A = skb->dev->ifindex */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+ jit->seen |= SEEN_RET0;
+ /* lg %r1,<d(dev)>(%r2) */
+ EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev));
+ /* ltgr %r1,%r1 */
+ EMIT4(0xb9020011);
+ /* jz <ret0> */
+ EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
+ /* l %r5,<d(ifindex)>(%r1) */
+ EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex));
+ break;
+ case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+ /* l %r5,<d(mark)>(%r2) */
+ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark));
+ break;
+ case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* icm %r5,3,<d(queue_mapping)>(%r2) */
+ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping));
+ break;
+ case BPF_ANC | SKF_AD_HATYPE: /* if (!skb->dev) return 0;
+ * A = skb->dev->type */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
+ jit->seen |= SEEN_RET0;
+ /* lg %r1,<d(dev)>(%r2) */
+ EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev));
+ /* ltgr %r1,%r1 */
+ EMIT4(0xb9020011);
+ /* jz <ret0> */
+ EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* icm %r5,3,<d(type)>(%r1) */
+ EMIT4_DISP(0xbf531000, offsetof(struct net_device, type));
+ break;
+ case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+ /* l %r5,<d(hash)>(%r2) */
+ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash));
+ break;
+ case BPF_ANC | SKF_AD_VLAN_TAG:
+ case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* icm %r5,3,<d(vlan_tci)>(%r2) */
+ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci));
+ if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
+ /* nill %r5,0xefff */
+ EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT);
+ } else {
+ /* nill %r5,0x1000 */
+ EMIT4_IMM(0xa5570000, VLAN_TAG_PRESENT);
+ /* srl %r5,12 */
+ EMIT4_DISP(0x88500000, 12);
+ }
+ break;
+ case BPF_ANC | SKF_AD_PKTTYPE:
+ if (pkt_type_offset < 0)
+ goto out;
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+ /* ic %r5,<d(pkt_type_offset)>(%r2) */
+ EMIT4_DISP(0x43502000, pkt_type_offset);
+ /* srl %r5,5 */
+ EMIT4_DISP(0x88500000, 5);
+ break;
+ case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */
+#ifdef CONFIG_SMP
+ /* l %r5,<d(cpu_nr)> */
+ EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr));
+#else
+ /* lhi %r5,0 */
+ EMIT4(0xa7580000);
+#endif
+ break;
+ default: /* too complex, give up */
+ goto out;
+ }
+ addrs[i] = jit->prg - jit->start;
+ return 0;
+out:
+ return -1;
+}
+
+/*
+ * Note: for security reasons, bpf code will follow a randomly
+ * sized amount of illegal instructions.
+ */
+struct bpf_binary_header {
+ unsigned int pages;
+ u8 image[];
+};
+
+static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
+ u8 **image_ptr)
+{
+ struct bpf_binary_header *header;
+ unsigned int sz, hole;
+
+ /* Most BPF filters are really small, but if some of them fill a page,
+ * allow at least 128 extra bytes for illegal instructions.
+ */
+ sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE);
+ header = module_alloc(sz);
+ if (!header)
+ return NULL;
+ memset(header, 0, sz);
+ header->pages = sz / PAGE_SIZE;
+ hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header));
+ /* Insert random number of illegal instructions before BPF code
+ * and make sure the first instruction starts at an even address.
+ */
+ *image_ptr = &header->image[(prandom_u32() % hole) & -2];
+ return header;
+}
+
+void bpf_jit_compile(struct sk_filter *fp)
+{
+ struct bpf_binary_header *header = NULL;
+ unsigned long size, prg_len, lit_len;
+ struct bpf_jit jit, cjit;
+ unsigned int *addrs;
+ int pass, i;
+
+ if (!bpf_jit_enable)
+ return;
+ addrs = kcalloc(fp->len, sizeof(*addrs), GFP_KERNEL);
+ if (addrs == NULL)
+ return;
+ memset(&jit, 0, sizeof(cjit));
+ memset(&cjit, 0, sizeof(cjit));
+
+ for (pass = 0; pass < 10; pass++) {
+ jit.prg = jit.start;
+ jit.lit = jit.mid;
+
+ bpf_jit_prologue(&jit);
+ bpf_jit_noleaks(&jit, fp->insns);
+ for (i = 0; i < fp->len; i++) {
+ if (bpf_jit_insn(&jit, fp->insns + i, addrs, i,
+ i == fp->len - 1))
+ goto out;
+ }
+ bpf_jit_epilogue(&jit);
+ if (jit.start) {
+ WARN_ON(jit.prg > cjit.prg || jit.lit > cjit.lit);
+ if (memcmp(&jit, &cjit, sizeof(jit)) == 0)
+ break;
+ } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) {
+ prg_len = jit.prg - jit.start;
+ lit_len = jit.lit - jit.mid;
+ size = prg_len + lit_len;
+ if (size >= BPF_SIZE_MAX)
+ goto out;
+ header = bpf_alloc_binary(size, &jit.start);
+ if (!header)
+ goto out;
+ jit.prg = jit.mid = jit.start + prg_len;
+ jit.lit = jit.end = jit.start + prg_len + lit_len;
+ jit.base_ip += (unsigned long) jit.start;
+ jit.exit_ip += (unsigned long) jit.start;
+ jit.ret0_ip += (unsigned long) jit.start;
+ }
+ cjit = jit;
+ }
+ if (bpf_jit_enable > 1) {
+ bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start);
+ if (jit.start)
+ print_fn_code(jit.start, jit.mid - jit.start);
+ }
+ if (jit.start) {
+ set_memory_ro((unsigned long)header, header->pages);
+ fp->bpf_func = (void *) jit.start;
+ fp->jited = 1;
+ }
+out:
+ kfree(addrs);
+}
+
+void bpf_jit_free(struct sk_filter *fp)
+{
+ unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+ struct bpf_binary_header *header = (void *)addr;
+
+ if (!fp->jited)
+ goto free_filter;
+
+ set_memory_rw(addr, header->pages);
+ module_free(NULL, header);
+
+free_filter:
+ kfree(fp);
+}
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index 537b2d840e6..524c4b61582 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile
@@ -6,4 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprofilefs.o oprofile_stats.o \
timer_int.o )
-oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_64BIT) += hwsampler.o
diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c
index bc4b84a35ca..8a6811b2cdb 100644
--- a/arch/s390/oprofile/backtrace.c
+++ b/arch/s390/oprofile/backtrace.c
@@ -1,8 +1,6 @@
-/**
- * arch/s390/oprofile/backtrace.c
- *
+/*
* S390 Version
- * Copyright (C) 2005 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright IBM Corp. 2005
* Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
*/
@@ -60,7 +58,7 @@ void s390_backtrace(struct pt_regs * const regs, unsigned int depth)
unsigned long head;
struct stack_frame* head_sf;
- if (user_mode (regs))
+ if (user_mode(regs))
return;
head = regs->gprs[15];
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
new file mode 100644
index 00000000000..e53c6f26880
--- /dev/null
+++ b/arch/s390/oprofile/hwsampler.c
@@ -0,0 +1,1178 @@
+/*
+ * Copyright IBM Corp. 2010
+ * Author: Heinz Graalfs <graalfs@de.ibm.com>
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/semaphore.h>
+#include <linux/oom.h>
+#include <linux/oprofile.h>
+
+#include <asm/facility.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+
+#include "hwsampler.h"
+#include "op_counter.h"
+
+#define MAX_NUM_SDB 511
+#define MIN_NUM_SDB 1
+
+DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
+
+struct hws_execute_parms {
+ void *buffer;
+ signed int rc;
+};
+
+DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
+EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer);
+
+static DEFINE_MUTEX(hws_sem);
+static DEFINE_MUTEX(hws_sem_oom);
+
+static unsigned char hws_flush_all;
+static unsigned int hws_oom;
+static unsigned int hws_alert;
+static struct workqueue_struct *hws_wq;
+
+static unsigned int hws_state;
+enum {
+ HWS_INIT = 1,
+ HWS_DEALLOCATED,
+ HWS_STOPPED,
+ HWS_STARTED,
+ HWS_STOPPING };
+
+/* set to 1 if called by kernel during memory allocation */
+static unsigned char oom_killer_was_active;
+/* size of SDBT and SDB as of allocate API */
+static unsigned long num_sdbt = 100;
+static unsigned long num_sdb = 511;
+/* sampling interval (machine cycles) */
+static unsigned long interval;
+
+static unsigned long min_sampler_rate;
+static unsigned long max_sampler_rate;
+
+static void execute_qsi(void *parms)
+{
+ struct hws_execute_parms *ep = parms;
+
+ ep->rc = qsi(ep->buffer);
+}
+
+static void execute_ssctl(void *parms)
+{
+ struct hws_execute_parms *ep = parms;
+
+ ep->rc = lsctl(ep->buffer);
+}
+
+static int smp_ctl_ssctl_stop(int cpu)
+{
+ int rc;
+ struct hws_execute_parms ep;
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+ cb->ssctl.es = 0;
+ cb->ssctl.cs = 0;
+
+ ep.buffer = &cb->ssctl;
+ smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+ rc = ep.rc;
+ if (rc) {
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+ dump_stack();
+ }
+
+ ep.buffer = &cb->qsi;
+ smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+ if (cb->qsi.es || cb->qsi.cs) {
+ printk(KERN_EMERG "CPUMF sampling did not stop properly.\n");
+ dump_stack();
+ }
+
+ return rc;
+}
+
+static int smp_ctl_ssctl_deactivate(int cpu)
+{
+ int rc;
+ struct hws_execute_parms ep;
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+ cb->ssctl.es = 1;
+ cb->ssctl.cs = 0;
+
+ ep.buffer = &cb->ssctl;
+ smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+ rc = ep.rc;
+ if (rc)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+
+ ep.buffer = &cb->qsi;
+ smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+ if (cb->qsi.cs)
+ printk(KERN_EMERG "CPUMF sampling was not set inactive.\n");
+
+ return rc;
+}
+
+static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval)
+{
+ int rc;
+ struct hws_execute_parms ep;
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+ cb->ssctl.h = 1;
+ cb->ssctl.tear = cb->first_sdbt;
+ cb->ssctl.dear = *(unsigned long *) cb->first_sdbt;
+ cb->ssctl.interval = interval;
+ cb->ssctl.es = 1;
+ cb->ssctl.cs = 1;
+
+ ep.buffer = &cb->ssctl;
+ smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+ rc = ep.rc;
+ if (rc)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+
+ ep.buffer = &cb->qsi;
+ smp_call_function_single(cpu, execute_qsi, &ep, 1);
+ if (ep.rc)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu);
+
+ return rc;
+}
+
+static int smp_ctl_qsi(int cpu)
+{
+ struct hws_execute_parms ep;
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+ ep.buffer = &cb->qsi;
+ smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+ return ep.rc;
+}
+
+static void hws_ext_handler(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct hws_cpu_buffer *cb = &__get_cpu_var(sampler_cpu_buffer);
+
+ if (!(param32 & CPU_MF_INT_SF_MASK))
+ return;
+
+ if (!hws_alert)
+ return;
+
+ inc_irq_stat(IRQEXT_CMS);
+ atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32);
+
+ if (hws_wq)
+ queue_work(hws_wq, &cb->worker);
+}
+
+static void worker(struct work_struct *work);
+
+static void add_samples_to_oprofile(unsigned cpu, unsigned long *,
+ unsigned long *dear);
+
+static void init_all_cpu_buffers(void)
+{
+ int cpu;
+ struct hws_cpu_buffer *cb;
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ memset(cb, 0, sizeof(struct hws_cpu_buffer));
+ }
+}
+
+static void prepare_cpu_buffers(void)
+{
+ struct hws_cpu_buffer *cb;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ atomic_set(&cb->ext_params, 0);
+ cb->worker_entry = 0;
+ cb->sample_overflow = 0;
+ cb->req_alert = 0;
+ cb->incorrect_sdbt_entry = 0;
+ cb->invalid_entry_address = 0;
+ cb->loss_of_sample_data = 0;
+ cb->sample_auth_change_alert = 0;
+ cb->finish = 0;
+ cb->oom = 0;
+ cb->stop_mode = 0;
+ }
+}
+
+/*
+ * allocate_sdbt() - allocate sampler memory
+ * @cpu: the cpu for which sampler memory is allocated
+ *
+ * A 4K page is allocated for each requested SDBT.
+ * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs.
+ * Set ALERT_REQ mask in each SDBs trailer.
+ * Returns zero if successful, <0 otherwise.
+ */
+static int allocate_sdbt(int cpu)
+{
+ int j, k, rc;
+ unsigned long *sdbt;
+ unsigned long sdb;
+ unsigned long *tail;
+ unsigned long *trailer;
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+ if (cb->first_sdbt)
+ return -EINVAL;
+
+ sdbt = NULL;
+ tail = sdbt;
+
+ for (j = 0; j < num_sdbt; j++) {
+ sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+
+ mutex_lock(&hws_sem_oom);
+ /* OOM killer might have been activated */
+ barrier();
+ if (oom_killer_was_active || !sdbt) {
+ if (sdbt)
+ free_page((unsigned long)sdbt);
+
+ goto allocate_sdbt_error;
+ }
+ if (cb->first_sdbt == 0)
+ cb->first_sdbt = (unsigned long)sdbt;
+
+ /* link current page to tail of chain */
+ if (tail)
+ *tail = (unsigned long)(void *)sdbt + 1;
+
+ mutex_unlock(&hws_sem_oom);
+
+ for (k = 0; k < num_sdb; k++) {
+ /* get and set SDB page */
+ sdb = get_zeroed_page(GFP_KERNEL);
+
+ mutex_lock(&hws_sem_oom);
+ /* OOM killer might have been activated */
+ barrier();
+ if (oom_killer_was_active || !sdb) {
+ if (sdb)
+ free_page(sdb);
+
+ goto allocate_sdbt_error;
+ }
+ *sdbt = sdb;
+ trailer = trailer_entry_ptr(*sdbt);
+ *trailer = SDB_TE_ALERT_REQ_MASK;
+ sdbt++;
+ mutex_unlock(&hws_sem_oom);
+ }
+ tail = sdbt;
+ }
+ mutex_lock(&hws_sem_oom);
+ if (oom_killer_was_active)
+ goto allocate_sdbt_error;
+
+ rc = 0;
+ if (tail)
+ *tail = (unsigned long)
+ ((void *)cb->first_sdbt) + 1;
+
+allocate_sdbt_exit:
+ mutex_unlock(&hws_sem_oom);
+ return rc;
+
+allocate_sdbt_error:
+ rc = -ENOMEM;
+ goto allocate_sdbt_exit;
+}
+
+/*
+ * deallocate_sdbt() - deallocate all sampler memory
+ *
+ * For each online CPU all SDBT trees are deallocated.
+ * Returns the number of freed pages.
+ */
+static int deallocate_sdbt(void)
+{
+ int cpu;
+ int counter;
+
+ counter = 0;
+
+ for_each_online_cpu(cpu) {
+ unsigned long start;
+ unsigned long sdbt;
+ unsigned long *curr;
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+ if (!cb->first_sdbt)
+ continue;
+
+ sdbt = cb->first_sdbt;
+ curr = (unsigned long *) sdbt;
+ start = sdbt;
+
+ /* we'll free the SDBT after all SDBs are processed... */
+ while (1) {
+ if (!*curr || !sdbt)
+ break;
+
+ /* watch for link entry reset if found */
+ if (is_link_entry(curr)) {
+ curr = get_next_sdbt(curr);
+ if (sdbt)
+ free_page(sdbt);
+
+ /* we are done if we reach the start */
+ if ((unsigned long) curr == start)
+ break;
+ else
+ sdbt = (unsigned long) curr;
+ } else {
+ /* process SDB pointer */
+ if (*curr) {
+ free_page(*curr);
+ curr++;
+ }
+ }
+ counter++;
+ }
+ cb->first_sdbt = 0;
+ }
+ return counter;
+}
+
+static int start_sampling(int cpu)
+{
+ int rc;
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ rc = smp_ctl_ssctl_enable_activate(cpu, interval);
+ if (rc) {
+ printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu);
+ goto start_exit;
+ }
+
+ rc = -EINVAL;
+ if (!cb->qsi.es) {
+ printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu);
+ goto start_exit;
+ }
+
+ if (!cb->qsi.cs) {
+ printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu);
+ goto start_exit;
+ }
+
+ printk(KERN_INFO
+ "hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n",
+ cpu, interval);
+
+ rc = 0;
+
+start_exit:
+ return rc;
+}
+
+static int stop_sampling(int cpu)
+{
+ unsigned long v;
+ int rc;
+ struct hws_cpu_buffer *cb;
+
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ if (!rc && !cb->qsi.es)
+ printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu);
+
+ rc = smp_ctl_ssctl_stop(cpu);
+ if (rc) {
+ printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n",
+ cpu, rc);
+ goto stop_exit;
+ }
+
+ printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu);
+
+stop_exit:
+ v = cb->req_alert;
+ if (v)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert,"
+ " count=%lu.\n", cpu, v);
+
+ v = cb->loss_of_sample_data;
+ if (v)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data,"
+ " count=%lu.\n", cpu, v);
+
+ v = cb->invalid_entry_address;
+ if (v)
+ printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address,"
+ " count=%lu.\n", cpu, v);
+
+ v = cb->incorrect_sdbt_entry;
+ if (v)
+ printk(KERN_ERR
+ "hwsampler: CPU %d CPUMF Incorrect SDBT address,"
+ " count=%lu.\n", cpu, v);
+
+ v = cb->sample_auth_change_alert;
+ if (v)
+ printk(KERN_ERR
+ "hwsampler: CPU %d CPUMF Sample authorization change,"
+ " count=%lu.\n", cpu, v);
+
+ return rc;
+}
+
+static int check_hardware_prerequisites(void)
+{
+ if (!test_facility(68))
+ return -EOPNOTSUPP;
+ return 0;
+}
+/*
+ * hws_oom_callback() - the OOM callback function
+ *
+ * In case the callback is invoked during memory allocation for the
+ * hw sampler, all obtained memory is deallocated and a flag is set
+ * so main sampler memory allocation can exit with a failure code.
+ * In case the callback is invoked during sampling the hw sampler
+ * is deactivated for all CPUs.
+ */
+static int hws_oom_callback(struct notifier_block *nfb,
+ unsigned long dummy, void *parm)
+{
+ unsigned long *freed;
+ int cpu;
+ struct hws_cpu_buffer *cb;
+
+ freed = parm;
+
+ mutex_lock(&hws_sem_oom);
+
+ if (hws_state == HWS_DEALLOCATED) {
+ /* during memory allocation */
+ if (oom_killer_was_active == 0) {
+ oom_killer_was_active = 1;
+ *freed += deallocate_sdbt();
+ }
+ } else {
+ int i;
+ cpu = get_cpu();
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+ if (!cb->oom) {
+ for_each_online_cpu(i) {
+ smp_ctl_ssctl_deactivate(i);
+ cb->oom = 1;
+ }
+ cb->finish = 1;
+
+ printk(KERN_INFO
+ "hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n",
+ cpu);
+ }
+ }
+
+ mutex_unlock(&hws_sem_oom);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block hws_oom_notifier = {
+ .notifier_call = hws_oom_callback
+};
+
+static int hws_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ /* We do not have sampler space available for all possible CPUs.
+ All CPUs should be online when hw sampling is activated. */
+ return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD;
+}
+
+static struct notifier_block hws_cpu_notifier = {
+ .notifier_call = hws_cpu_callback
+};
+
+/**
+ * hwsampler_deactivate() - set hardware sampling temporarily inactive
+ * @cpu: specifies the CPU to be set inactive.
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_deactivate(unsigned int cpu)
+{
+ /*
+ * Deactivate hw sampling temporarily and flush the buffer
+ * by pushing all the pending samples to oprofile buffer.
+ *
+ * This function can be called under one of the following conditions:
+ * Memory unmap, task is exiting.
+ */
+ int rc;
+ struct hws_cpu_buffer *cb;
+
+ rc = 0;
+ mutex_lock(&hws_sem);
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ if (hws_state == HWS_STARTED) {
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+ if (cb->qsi.cs) {
+ rc = smp_ctl_ssctl_deactivate(cpu);
+ if (rc) {
+ printk(KERN_INFO
+ "hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu);
+ cb->finish = 1;
+ hws_state = HWS_STOPPING;
+ } else {
+ hws_flush_all = 1;
+ /* Add work to queue to read pending samples.*/
+ queue_work_on(cpu, hws_wq, &cb->worker);
+ }
+ }
+ }
+ mutex_unlock(&hws_sem);
+
+ if (hws_wq)
+ flush_workqueue(hws_wq);
+
+ return rc;
+}
+
+/**
+ * hwsampler_activate() - activate/resume hardware sampling which was deactivated
+ * @cpu: specifies the CPU to be set active.
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_activate(unsigned int cpu)
+{
+ /*
+ * Re-activate hw sampling. This should be called in pair with
+ * hwsampler_deactivate().
+ */
+ int rc;
+ struct hws_cpu_buffer *cb;
+
+ rc = 0;
+ mutex_lock(&hws_sem);
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ if (hws_state == HWS_STARTED) {
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+ if (!cb->qsi.cs) {
+ hws_flush_all = 0;
+ rc = smp_ctl_ssctl_enable_activate(cpu, interval);
+ if (rc) {
+ printk(KERN_ERR
+ "CPU %d, CPUMF activate sampling failed.\n",
+ cpu);
+ }
+ }
+ }
+
+ mutex_unlock(&hws_sem);
+
+ return rc;
+}
+
+static int check_qsi_on_setup(void)
+{
+ int rc;
+ unsigned int cpu;
+ struct hws_cpu_buffer *cb;
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+ if (rc)
+ return -EOPNOTSUPP;
+
+ if (!cb->qsi.as) {
+ printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n");
+ return -EINVAL;
+ }
+
+ if (cb->qsi.es) {
+ printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n");
+ rc = smp_ctl_ssctl_stop(cpu);
+ if (rc)
+ return -EINVAL;
+
+ printk(KERN_INFO
+ "CPU %d, CPUMF Sampling stopped now.\n", cpu);
+ }
+ }
+ return 0;
+}
+
+static int check_qsi_on_start(void)
+{
+ unsigned int cpu;
+ int rc;
+ struct hws_cpu_buffer *cb;
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+
+ if (!cb->qsi.as)
+ return -EINVAL;
+
+ if (cb->qsi.es)
+ return -EINVAL;
+
+ if (cb->qsi.cs)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void worker_on_start(unsigned int cpu)
+{
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ cb->worker_entry = cb->first_sdbt;
+}
+
+static int worker_check_error(unsigned int cpu, int ext_params)
+{
+ int rc;
+ unsigned long *sdbt;
+ struct hws_cpu_buffer *cb;
+
+ rc = 0;
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ sdbt = (unsigned long *) cb->worker_entry;
+
+ if (!sdbt || !*sdbt)
+ return -EINVAL;
+
+ if (ext_params & CPU_MF_INT_SF_PRA)
+ cb->req_alert++;
+
+ if (ext_params & CPU_MF_INT_SF_LSDA)
+ cb->loss_of_sample_data++;
+
+ if (ext_params & CPU_MF_INT_SF_IAE) {
+ cb->invalid_entry_address++;
+ rc = -EINVAL;
+ }
+
+ if (ext_params & CPU_MF_INT_SF_ISE) {
+ cb->incorrect_sdbt_entry++;
+ rc = -EINVAL;
+ }
+
+ if (ext_params & CPU_MF_INT_SF_SACA) {
+ cb->sample_auth_change_alert++;
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static void worker_on_finish(unsigned int cpu)
+{
+ int rc, i;
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+ if (cb->finish) {
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+ if (cb->qsi.es) {
+ printk(KERN_INFO
+ "hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n",
+ cpu);
+ rc = smp_ctl_ssctl_stop(cpu);
+ if (rc)
+ printk(KERN_INFO
+ "hwsampler: CPU %d, CPUMF Deactivation failed.\n",
+ cpu);
+
+ for_each_online_cpu(i) {
+ if (i == cpu)
+ continue;
+ if (!cb->finish) {
+ cb->finish = 1;
+ queue_work_on(i, hws_wq,
+ &cb->worker);
+ }
+ }
+ }
+ }
+}
+
+static void worker_on_interrupt(unsigned int cpu)
+{
+ unsigned long *sdbt;
+ unsigned char done;
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+ sdbt = (unsigned long *) cb->worker_entry;
+
+ done = 0;
+ /* do not proceed if stop was entered,
+ * forget the buffers not yet processed */
+ while (!done && !cb->stop_mode) {
+ unsigned long *trailer;
+ struct hws_trailer_entry *te;
+ unsigned long *dear = 0;
+
+ trailer = trailer_entry_ptr(*sdbt);
+ /* leave loop if no more work to do */
+ if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) {
+ done = 1;
+ if (!hws_flush_all)
+ continue;
+ }
+
+ te = (struct hws_trailer_entry *)trailer;
+ cb->sample_overflow += te->overflow;
+
+ add_samples_to_oprofile(cpu, sdbt, dear);
+
+ /* reset trailer */
+ xchg((unsigned char *) te, 0x40);
+
+ /* advance to next sdb slot in current sdbt */
+ sdbt++;
+ /* in case link bit is set use address w/o link bit */
+ if (is_link_entry(sdbt))
+ sdbt = get_next_sdbt(sdbt);
+
+ cb->worker_entry = (unsigned long)sdbt;
+ }
+}
+
+static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
+ unsigned long *dear)
+{
+ struct hws_basic_entry *sample_data_ptr;
+ unsigned long *trailer;
+
+ trailer = trailer_entry_ptr(*sdbt);
+ if (dear) {
+ if (dear > trailer)
+ return;
+ trailer = dear;
+ }
+
+ sample_data_ptr = (struct hws_basic_entry *)(*sdbt);
+
+ while ((unsigned long *)sample_data_ptr < trailer) {
+ struct pt_regs *regs = NULL;
+ struct task_struct *tsk = NULL;
+
+ /*
+ * Check sampling mode, 1 indicates basic (=customer) sampling
+ * mode.
+ */
+ if (sample_data_ptr->def != 1) {
+ /* sample slot is not yet written */
+ break;
+ } else {
+ /* make sure we don't use it twice,
+ * the next time the sampler will set it again */
+ sample_data_ptr->def = 0;
+ }
+
+ /* Get pt_regs. */
+ if (sample_data_ptr->P == 1) {
+ /* userspace sample */
+ unsigned int pid = sample_data_ptr->prim_asn;
+ if (!counter_config.user)
+ goto skip_sample;
+ rcu_read_lock();
+ tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
+ if (tsk)
+ regs = task_pt_regs(tsk);
+ rcu_read_unlock();
+ } else {
+ /* kernelspace sample */
+ if (!counter_config.kernel)
+ goto skip_sample;
+ regs = task_pt_regs(current);
+ }
+
+ mutex_lock(&hws_sem);
+ oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
+ !sample_data_ptr->P, tsk);
+ mutex_unlock(&hws_sem);
+ skip_sample:
+ sample_data_ptr++;
+ }
+}
+
+static void worker(struct work_struct *work)
+{
+ unsigned int cpu;
+ int ext_params;
+ struct hws_cpu_buffer *cb;
+
+ cb = container_of(work, struct hws_cpu_buffer, worker);
+ cpu = smp_processor_id();
+ ext_params = atomic_xchg(&cb->ext_params, 0);
+
+ if (!cb->worker_entry)
+ worker_on_start(cpu);
+
+ if (worker_check_error(cpu, ext_params))
+ return;
+
+ if (!cb->finish)
+ worker_on_interrupt(cpu);
+
+ if (cb->finish)
+ worker_on_finish(cpu);
+}
+
+/**
+ * hwsampler_allocate() - allocate memory for the hardware sampler
+ * @sdbt: number of SDBTs per online CPU (must be > 0)
+ * @sdb: number of SDBs per SDBT (minimum 1, maximum 511)
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb)
+{
+ int cpu, rc;
+ mutex_lock(&hws_sem);
+
+ rc = -EINVAL;
+ if (hws_state != HWS_DEALLOCATED)
+ goto allocate_exit;
+
+ if (sdbt < 1)
+ goto allocate_exit;
+
+ if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB)
+ goto allocate_exit;
+
+ num_sdbt = sdbt;
+ num_sdb = sdb;
+
+ oom_killer_was_active = 0;
+ register_oom_notifier(&hws_oom_notifier);
+
+ for_each_online_cpu(cpu) {
+ if (allocate_sdbt(cpu)) {
+ unregister_oom_notifier(&hws_oom_notifier);
+ goto allocate_error;
+ }
+ }
+ unregister_oom_notifier(&hws_oom_notifier);
+ if (oom_killer_was_active)
+ goto allocate_error;
+
+ hws_state = HWS_STOPPED;
+ rc = 0;
+
+allocate_exit:
+ mutex_unlock(&hws_sem);
+ return rc;
+
+allocate_error:
+ rc = -ENOMEM;
+ printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n");
+ goto allocate_exit;
+}
+
+/**
+ * hwsampler_deallocate() - deallocate hardware sampler memory
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_deallocate(void)
+{
+ int rc;
+
+ mutex_lock(&hws_sem);
+
+ rc = -EINVAL;
+ if (hws_state != HWS_STOPPED)
+ goto deallocate_exit;
+
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ hws_alert = 0;
+ deallocate_sdbt();
+
+ hws_state = HWS_DEALLOCATED;
+ rc = 0;
+
+deallocate_exit:
+ mutex_unlock(&hws_sem);
+
+ return rc;
+}
+
+unsigned long hwsampler_query_min_interval(void)
+{
+ return min_sampler_rate;
+}
+
+unsigned long hwsampler_query_max_interval(void)
+{
+ return max_sampler_rate;
+}
+
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
+{
+ struct hws_cpu_buffer *cb;
+
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+ return cb->sample_overflow;
+}
+
+int hwsampler_setup(void)
+{
+ int rc;
+ int cpu;
+ struct hws_cpu_buffer *cb;
+
+ mutex_lock(&hws_sem);
+
+ rc = -EINVAL;
+ if (hws_state)
+ goto setup_exit;
+
+ hws_state = HWS_INIT;
+
+ init_all_cpu_buffers();
+
+ rc = check_hardware_prerequisites();
+ if (rc)
+ goto setup_exit;
+
+ rc = check_qsi_on_setup();
+ if (rc)
+ goto setup_exit;
+
+ rc = -EINVAL;
+ hws_wq = create_workqueue("hwsampler");
+ if (!hws_wq)
+ goto setup_exit;
+
+ register_cpu_notifier(&hws_cpu_notifier);
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ INIT_WORK(&cb->worker, worker);
+ rc = smp_ctl_qsi(cpu);
+ WARN_ON(rc);
+ if (min_sampler_rate != cb->qsi.min_sampl_rate) {
+ if (min_sampler_rate) {
+ printk(KERN_WARNING
+ "hwsampler: different min sampler rate values.\n");
+ if (min_sampler_rate < cb->qsi.min_sampl_rate)
+ min_sampler_rate =
+ cb->qsi.min_sampl_rate;
+ } else
+ min_sampler_rate = cb->qsi.min_sampl_rate;
+ }
+ if (max_sampler_rate != cb->qsi.max_sampl_rate) {
+ if (max_sampler_rate) {
+ printk(KERN_WARNING
+ "hwsampler: different max sampler rate values.\n");
+ if (max_sampler_rate > cb->qsi.max_sampl_rate)
+ max_sampler_rate =
+ cb->qsi.max_sampl_rate;
+ } else
+ max_sampler_rate = cb->qsi.max_sampl_rate;
+ }
+ }
+ register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
+
+ hws_state = HWS_DEALLOCATED;
+ rc = 0;
+
+setup_exit:
+ mutex_unlock(&hws_sem);
+ return rc;
+}
+
+int hwsampler_shutdown(void)
+{
+ int rc;
+
+ mutex_lock(&hws_sem);
+
+ rc = -EINVAL;
+ if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) {
+ mutex_unlock(&hws_sem);
+
+ if (hws_wq)
+ flush_workqueue(hws_wq);
+
+ mutex_lock(&hws_sem);
+
+ if (hws_state == HWS_STOPPED) {
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ hws_alert = 0;
+ deallocate_sdbt();
+ }
+ if (hws_wq) {
+ destroy_workqueue(hws_wq);
+ hws_wq = NULL;
+ }
+
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
+ hws_state = HWS_INIT;
+ rc = 0;
+ }
+ mutex_unlock(&hws_sem);
+
+ unregister_cpu_notifier(&hws_cpu_notifier);
+
+ return rc;
+}
+
+/**
+ * hwsampler_start_all() - start hardware sampling on all online CPUs
+ * @rate: specifies the used interval when samples are taken
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_start_all(unsigned long rate)
+{
+ int rc, cpu;
+
+ mutex_lock(&hws_sem);
+
+ hws_oom = 0;
+
+ rc = -EINVAL;
+ if (hws_state != HWS_STOPPED)
+ goto start_all_exit;
+
+ interval = rate;
+
+ /* fail if rate is not valid */
+ if (interval < min_sampler_rate || interval > max_sampler_rate)
+ goto start_all_exit;
+
+ rc = check_qsi_on_start();
+ if (rc)
+ goto start_all_exit;
+
+ prepare_cpu_buffers();
+
+ for_each_online_cpu(cpu) {
+ rc = start_sampling(cpu);
+ if (rc)
+ break;
+ }
+ if (rc) {
+ for_each_online_cpu(cpu) {
+ stop_sampling(cpu);
+ }
+ goto start_all_exit;
+ }
+ hws_state = HWS_STARTED;
+ rc = 0;
+
+start_all_exit:
+ mutex_unlock(&hws_sem);
+
+ if (rc)
+ return rc;
+
+ register_oom_notifier(&hws_oom_notifier);
+ hws_oom = 1;
+ hws_flush_all = 0;
+ /* now let them in, 1407 CPUMF external interrupts */
+ hws_alert = 1;
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+/**
+ * hwsampler_stop_all() - stop hardware sampling on all online CPUs
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_stop_all(void)
+{
+ int tmp_rc, rc, cpu;
+ struct hws_cpu_buffer *cb;
+
+ mutex_lock(&hws_sem);
+
+ rc = 0;
+ if (hws_state == HWS_INIT) {
+ mutex_unlock(&hws_sem);
+ return 0;
+ }
+ hws_state = HWS_STOPPING;
+ mutex_unlock(&hws_sem);
+
+ for_each_online_cpu(cpu) {
+ cb = &per_cpu(sampler_cpu_buffer, cpu);
+ cb->stop_mode = 1;
+ tmp_rc = stop_sampling(cpu);
+ if (tmp_rc)
+ rc = tmp_rc;
+ }
+
+ if (hws_wq)
+ flush_workqueue(hws_wq);
+
+ mutex_lock(&hws_sem);
+ if (hws_oom) {
+ unregister_oom_notifier(&hws_oom_notifier);
+ hws_oom = 0;
+ }
+ hws_state = HWS_STOPPED;
+ mutex_unlock(&hws_sem);
+
+ return rc;
+}
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
new file mode 100644
index 00000000000..a483d06f2fa
--- /dev/null
+++ b/arch/s390/oprofile/hwsampler.h
@@ -0,0 +1,63 @@
+/*
+ * CPUMF HW sampler functions and internal structures
+ *
+ * Copyright IBM Corp. 2010
+ * Author(s): Heinz Graalfs <graalfs@de.ibm.com>
+ */
+
+#ifndef HWSAMPLER_H_
+#define HWSAMPLER_H_
+
+#include <linux/workqueue.h>
+#include <asm/cpu_mf.h>
+
+struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */
+{ /* bytes 0 - 7 Bit(s) */
+ unsigned int s:1; /* 0: maximum buffer indicator */
+ unsigned int h:1; /* 1: part. level reserved for VM use*/
+ unsigned long b2_53:52; /* 2-53: zeros */
+ unsigned int es:1; /* 54: sampling enable control */
+ unsigned int b55_61:7; /* 55-61: - zeros */
+ unsigned int cs:1; /* 62: sampling activation control */
+ unsigned int b63:1; /* 63: zero */
+ unsigned long interval; /* 8-15: sampling interval */
+ unsigned long tear; /* 16-23: TEAR contents */
+ unsigned long dear; /* 24-31: DEAR contents */
+ /* 32-63: */
+ unsigned long rsvrd1; /* reserved */
+ unsigned long rsvrd2; /* reserved */
+ unsigned long rsvrd3; /* reserved */
+ unsigned long rsvrd4; /* reserved */
+};
+
+struct hws_cpu_buffer {
+ unsigned long first_sdbt; /* @ of 1st SDB-Table for this CP*/
+ unsigned long worker_entry;
+ unsigned long sample_overflow; /* taken from SDB ... */
+ struct hws_qsi_info_block qsi;
+ struct hws_ssctl_request_block ssctl;
+ struct work_struct worker;
+ atomic_t ext_params;
+ unsigned long req_alert;
+ unsigned long loss_of_sample_data;
+ unsigned long invalid_entry_address;
+ unsigned long incorrect_sdbt_entry;
+ unsigned long sample_auth_change_alert;
+ unsigned int finish:1;
+ unsigned int oom:1;
+ unsigned int stop_mode:1;
+};
+
+int hwsampler_setup(void);
+int hwsampler_shutdown(void);
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
+int hwsampler_deallocate(void);
+unsigned long hwsampler_query_min_interval(void);
+unsigned long hwsampler_query_max_interval(void);
+int hwsampler_start_all(unsigned long interval);
+int hwsampler_stop_all(void);
+int hwsampler_deactivate(unsigned int cpu);
+int hwsampler_activate(unsigned int cpu);
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu);
+
+#endif /*HWSAMPLER_H_*/
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 7a995113b91..9ffe645d598 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -1,26 +1,524 @@
-/**
- * arch/s390/oprofile/init.c
- *
+/*
* S390 Version
- * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright IBM Corp. 2002, 2011
* Author(s): Thomas Spatzier (tspat@de.ibm.com)
+ * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
+ * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
+ * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
*
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2011 OProfile authors
*/
#include <linux/oprofile.h>
+#include <linux/perf_event.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+#include "../../../drivers/oprofile/oprof.h"
extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
-int __init oprofile_arch_init(struct oprofile_operations* ops)
+#ifdef CONFIG_64BIT
+
+#include "hwsampler.h"
+#include "op_counter.h"
+
+#define DEFAULT_INTERVAL 4127518
+
+#define DEFAULT_SDBT_BLOCKS 1
+#define DEFAULT_SDB_BLOCKS 511
+
+static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
+static unsigned long oprofile_min_interval;
+static unsigned long oprofile_max_interval;
+
+static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
+static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
+
+static int hwsampler_enabled;
+static int hwsampler_running; /* start_mutex must be held to change */
+static int hwsampler_available;
+
+static struct oprofile_operations timer_ops;
+
+struct op_counter_config counter_config;
+
+enum __force_cpu_type {
+ reserved = 0, /* do not force */
+ timer,
+};
+static int force_cpu_type;
+
+static int set_cpu_type(const char *str, struct kernel_param *kp)
+{
+ if (!strcmp(str, "timer")) {
+ force_cpu_type = timer;
+ printk(KERN_INFO "oprofile: forcing timer to be returned "
+ "as cpu type\n");
+ } else {
+ force_cpu_type = 0;
+ }
+
+ return 0;
+}
+module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
+MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
+ "(report cpu_type \"timer\"");
+
+static int __oprofile_hwsampler_start(void)
+{
+ int retval;
+
+ retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+ if (retval)
+ return retval;
+
+ retval = hwsampler_start_all(oprofile_hw_interval);
+ if (retval)
+ hwsampler_deallocate();
+
+ return retval;
+}
+
+static int oprofile_hwsampler_start(void)
+{
+ int retval;
+
+ hwsampler_running = hwsampler_enabled;
+
+ if (!hwsampler_running)
+ return timer_ops.start();
+
+ retval = perf_reserve_sampling();
+ if (retval)
+ return retval;
+
+ retval = __oprofile_hwsampler_start();
+ if (retval)
+ perf_release_sampling();
+
+ return retval;
+}
+
+static void oprofile_hwsampler_stop(void)
+{
+ if (!hwsampler_running) {
+ timer_ops.stop();
+ return;
+ }
+
+ hwsampler_stop_all();
+ hwsampler_deallocate();
+ perf_release_sampling();
+ return;
+}
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/enabled
+ * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer)
+ */
+
+static ssize_t hwsampler_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
+}
+
+static ssize_t hwsampler_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval <= 0)
+ return retval;
+
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ if (oprofile_started)
+ /*
+ * save to do without locking as we set
+ * hwsampler_running in start() when start_mutex is
+ * held
+ */
+ return -EBUSY;
+
+ hwsampler_enabled = val;
+
+ return count;
+}
+
+static const struct file_operations hwsampler_fops = {
+ .read = hwsampler_read,
+ .write = hwsampler_write,
+};
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/count
+ * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer)
+ *
+ * Make sure that the value is within the hardware range.
+ */
+
+static ssize_t hw_interval_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
+ count, offset);
+}
+
+static ssize_t hw_interval_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval <= 0)
+ return retval;
+ if (val < oprofile_min_interval)
+ oprofile_hw_interval = oprofile_min_interval;
+ else if (val > oprofile_max_interval)
+ oprofile_hw_interval = oprofile_max_interval;
+ else
+ oprofile_hw_interval = val;
+
+ return count;
+}
+
+static const struct file_operations hw_interval_fops = {
+ .read = hw_interval_read,
+ .write = hw_interval_write,
+};
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/event
+ * Only a single event with number 0 is supported with this counter.
+ *
+ * /dev/oprofile/0/unit_mask
+ * This is a dummy file needed by the user space tools.
+ * No value other than 0 is accepted or returned.
+ */
+
+static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(0, buf, count, offset);
+}
+
+static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval <= 0)
+ return retval;
+ if (val != 0)
+ return -EINVAL;
+ return count;
+}
+
+static const struct file_operations zero_fops = {
+ .read = hwsampler_zero_read,
+ .write = hwsampler_zero_write,
+};
+
+/* /dev/oprofile/0/kernel file ops. */
+
+static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(counter_config.kernel,
+ buf, count, offset);
+}
+
+static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval <= 0)
+ return retval;
+
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ counter_config.kernel = val;
+
+ return count;
+}
+
+static const struct file_operations kernel_fops = {
+ .read = hwsampler_kernel_read,
+ .write = hwsampler_kernel_write,
+};
+
+/* /dev/oprofile/0/user file ops. */
+
+static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(counter_config.user,
+ buf, count, offset);
+}
+
+static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval <= 0)
+ return retval;
+
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ counter_config.user = val;
+
+ return count;
+}
+
+static const struct file_operations user_fops = {
+ .read = hwsampler_user_read,
+ .write = hwsampler_user_write,
+};
+
+
+/*
+ * File ops used for: /dev/oprofile/timer/enabled
+ * The value always has to be the inverted value of hwsampler_enabled. So
+ * no separate variable is created. That way we do not need locking.
+ */
+
+static ssize_t timer_enabled_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
+}
+
+static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval <= 0)
+ return retval;
+
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ /* Timer cannot be disabled without having hardware sampling. */
+ if (val == 0 && !hwsampler_available)
+ return -EINVAL;
+
+ if (oprofile_started)
+ /*
+ * save to do without locking as we set
+ * hwsampler_running in start() when start_mutex is
+ * held
+ */
+ return -EBUSY;
+
+ hwsampler_enabled = !val;
+
+ return count;
+}
+
+static const struct file_operations timer_enabled_fops = {
+ .read = timer_enabled_read,
+ .write = timer_enabled_write,
+};
+
+
+static int oprofile_create_hwsampling_files(struct dentry *root)
+{
+ struct dentry *dir;
+
+ dir = oprofilefs_mkdir(root, "timer");
+ if (!dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
+
+ if (!hwsampler_available)
+ return 0;
+
+ /* reinitialize default values */
+ hwsampler_enabled = 1;
+ counter_config.kernel = 1;
+ counter_config.user = 1;
+
+ if (!force_cpu_type) {
+ /*
+ * Create the counter file system. A single virtual
+ * counter is created which can be used to
+ * enable/disable hardware sampling dynamically from
+ * user space. The user space will configure a single
+ * counter with a single event. The value of 'event'
+ * and 'unit_mask' are not evaluated by the kernel code
+ * and can only be set to 0.
+ */
+
+ dir = oprofilefs_mkdir(root, "0");
+ if (!dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
+ oprofilefs_create_file(dir, "event", &zero_fops);
+ oprofilefs_create_file(dir, "count", &hw_interval_fops);
+ oprofilefs_create_file(dir, "unit_mask", &zero_fops);
+ oprofilefs_create_file(dir, "kernel", &kernel_fops);
+ oprofilefs_create_file(dir, "user", &user_fops);
+ oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
+
+ } else {
+ /*
+ * Hardware sampling can be used but the cpu_type is
+ * forced to timer in order to deal with legacy user
+ * space tools. The /dev/oprofile/hwsampling fs is
+ * provided in that case.
+ */
+ dir = oprofilefs_mkdir(root, "hwsampling");
+ if (!dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(dir, "hwsampler",
+ &hwsampler_fops);
+ oprofilefs_create_file(dir, "hw_interval",
+ &hw_interval_fops);
+ oprofilefs_create_ro_ulong(dir, "hw_min_interval",
+ &oprofile_min_interval);
+ oprofilefs_create_ro_ulong(dir, "hw_max_interval",
+ &oprofile_max_interval);
+ oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
+ }
+ return 0;
+}
+
+static int oprofile_hwsampler_init(struct oprofile_operations *ops)
+{
+ /*
+ * Initialize the timer mode infrastructure as well in order
+ * to be able to switch back dynamically. oprofile_timer_init
+ * is not supposed to fail.
+ */
+ if (oprofile_timer_init(ops))
+ BUG();
+
+ memcpy(&timer_ops, ops, sizeof(timer_ops));
+ ops->create_files = oprofile_create_hwsampling_files;
+
+ /*
+ * If the user space tools do not support newer cpu types,
+ * the force_cpu_type module parameter
+ * can be used to always return \"timer\" as cpu type.
+ */
+ if (force_cpu_type != timer) {
+ struct cpuid id;
+
+ get_cpu_id (&id);
+
+ switch (id.machine) {
+ case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
+ case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
+ case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
+ default: return -ENODEV;
+ }
+ }
+
+ if (hwsampler_setup())
+ return -ENODEV;
+
+ /*
+ * Query the range for the sampling interval from the
+ * hardware.
+ */
+ oprofile_min_interval = hwsampler_query_min_interval();
+ if (oprofile_min_interval == 0)
+ return -ENODEV;
+ oprofile_max_interval = hwsampler_query_max_interval();
+ if (oprofile_max_interval == 0)
+ return -ENODEV;
+
+ /* The initial value should be sane */
+ if (oprofile_hw_interval < oprofile_min_interval)
+ oprofile_hw_interval = oprofile_min_interval;
+ if (oprofile_hw_interval > oprofile_max_interval)
+ oprofile_hw_interval = oprofile_max_interval;
+
+ printk(KERN_INFO "oprofile: System z hardware sampling "
+ "facility found.\n");
+
+ ops->start = oprofile_hwsampler_start;
+ ops->stop = oprofile_hwsampler_stop;
+
+ return 0;
+}
+
+static void oprofile_hwsampler_exit(void)
+{
+ hwsampler_shutdown();
+}
+
+#endif /* CONFIG_64BIT */
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
{
ops->backtrace = s390_backtrace;
+
+#ifdef CONFIG_64BIT
+
+ /*
+ * -ENODEV is not reported to the caller. The module itself
+ * will use the timer mode sampling as fallback and this is
+ * always available.
+ */
+ hwsampler_available = oprofile_hwsampler_init(ops) == 0;
+
+ return 0;
+#else
return -ENODEV;
+#endif
}
void oprofile_arch_exit(void)
{
+#ifdef CONFIG_64BIT
+ oprofile_hwsampler_exit();
+#endif
}
diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h
new file mode 100644
index 00000000000..61b2531eef1
--- /dev/null
+++ b/arch/s390/oprofile/op_counter.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright IBM Corp. 2011
+ * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
+ *
+ * @remark Copyright 2011 OProfile authors
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+struct op_counter_config {
+ /* `enabled' maps to the hwsampler_file variable. */
+ /* `count' maps to the oprofile_hw_interval variable. */
+ /* `event' and `unit_mask' are unused. */
+ unsigned long kernel;
+ unsigned long user;
+};
+
+extern struct op_counter_config counter_config;
+
+#endif /* OP_COUNTER_H */
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
new file mode 100644
index 00000000000..a9e1dc4ae44
--- /dev/null
+++ b/arch/s390/pci/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the s390 PCI subsystem.
+#
+
+obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \
+ pci_event.o pci_debug.o pci_insn.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
new file mode 100644
index 00000000000..30de42730b2
--- /dev/null
+++ b/arch/s390/pci/pci.c
@@ -0,0 +1,935 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ *
+ * The System z PCI code is a rewrite from a prototype by
+ * the following people (Kudoz!):
+ * Alexander Schmidt
+ * Christoph Raisch
+ * Hannes Hering
+ * Hoang-Nam Nguyen
+ * Jan-Bernd Themann
+ * Stefan Roscher
+ * Thomas Klein
+ */
+
+#define COMPONENT "zPCI"
+#define pr_fmt(fmt) COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/seq_file.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+
+#include <asm/isc.h>
+#include <asm/airq.h>
+#include <asm/facility.h>
+#include <asm/pci_insn.h>
+#include <asm/pci_clp.h>
+#include <asm/pci_dma.h>
+
+#define DEBUG /* enable pr_debug */
+
+#define SIC_IRQ_MODE_ALL 0
+#define SIC_IRQ_MODE_SINGLE 1
+
+#define ZPCI_NR_DMA_SPACES 1
+#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
+
+/* list of all detected zpci devices */
+static LIST_HEAD(zpci_list);
+static DEFINE_SPINLOCK(zpci_list_lock);
+
+static struct irq_chip zpci_irq_chip = {
+ .name = "zPCI",
+ .irq_unmask = unmask_msi_irq,
+ .irq_mask = mask_msi_irq,
+};
+
+static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DEFINE_SPINLOCK(zpci_domain_lock);
+
+static struct airq_iv *zpci_aisb_iv;
+static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
+
+/* Adapter interrupt definitions */
+static void zpci_irq_handler(struct airq_struct *airq);
+
+static struct airq_struct zpci_airq = {
+ .handler = zpci_irq_handler,
+ .isc = PCI_ISC,
+};
+
+/* I/O Map */
+static DEFINE_SPINLOCK(zpci_iomap_lock);
+static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+struct zpci_iomap_entry *zpci_iomap_start;
+EXPORT_SYMBOL_GPL(zpci_iomap_start);
+
+static struct kmem_cache *zdev_fmb_cache;
+
+struct zpci_dev *get_zdev(struct pci_dev *pdev)
+{
+ return (struct zpci_dev *) pdev->sysdata;
+}
+
+struct zpci_dev *get_zdev_by_fid(u32 fid)
+{
+ struct zpci_dev *tmp, *zdev = NULL;
+
+ spin_lock(&zpci_list_lock);
+ list_for_each_entry(tmp, &zpci_list, entry) {
+ if (tmp->fid == fid) {
+ zdev = tmp;
+ break;
+ }
+ }
+ spin_unlock(&zpci_list_lock);
+ return zdev;
+}
+
+static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
+{
+ return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
+}
+
+int pci_domain_nr(struct pci_bus *bus)
+{
+ return ((struct zpci_dev *) bus->sysdata)->domain;
+}
+EXPORT_SYMBOL_GPL(pci_domain_nr);
+
+int pci_proc_domain(struct pci_bus *bus)
+{
+ return pci_domain_nr(bus);
+}
+EXPORT_SYMBOL_GPL(pci_proc_domain);
+
+/* Modify PCI: Register adapter interruptions */
+static int zpci_set_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+ struct zpci_fib fib = {0};
+
+ fib.isc = PCI_ISC;
+ fib.sum = 1; /* enable summary notifications */
+ fib.noi = airq_iv_end(zdev->aibv);
+ fib.aibv = (unsigned long) zdev->aibv->vector;
+ fib.aibvo = 0; /* each zdev has its own interrupt vector */
+ fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+ fib.aisbo = zdev->aisb & 63;
+
+ return zpci_mod_fc(req, &fib);
+}
+
+struct mod_pci_args {
+ u64 base;
+ u64 limit;
+ u64 iota;
+ u64 fmb_addr;
+};
+
+static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn);
+ struct zpci_fib fib = {0};
+
+ fib.pba = args->base;
+ fib.pal = args->limit;
+ fib.iota = args->iota;
+ fib.fmb_addr = args->fmb_addr;
+
+ return zpci_mod_fc(req, &fib);
+}
+
+/* Modify PCI: Register I/O address translation parameters */
+int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
+ u64 base, u64 limit, u64 iota)
+{
+ struct mod_pci_args args = { base, limit, iota, 0 };
+
+ WARN_ON_ONCE(iota & 0x3fff);
+ args.iota |= ZPCI_IOTA_RTTO_FLAG;
+ return mod_pci(zdev, ZPCI_MOD_FC_REG_IOAT, dmaas, &args);
+}
+
+/* Modify PCI: Unregister I/O address translation parameters */
+int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
+{
+ struct mod_pci_args args = { 0, 0, 0, 0 };
+
+ return mod_pci(zdev, ZPCI_MOD_FC_DEREG_IOAT, dmaas, &args);
+}
+
+/* Modify PCI: Unregister adapter interruptions */
+static int zpci_clear_airq(struct zpci_dev *zdev)
+{
+ struct mod_pci_args args = { 0, 0, 0, 0 };
+
+ return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args);
+}
+
+/* Modify PCI: Set PCI function measurement parameters */
+int zpci_fmb_enable_device(struct zpci_dev *zdev)
+{
+ struct mod_pci_args args = { 0, 0, 0, 0 };
+
+ if (zdev->fmb)
+ return -EINVAL;
+
+ zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL);
+ if (!zdev->fmb)
+ return -ENOMEM;
+ WARN_ON((u64) zdev->fmb & 0xf);
+
+ args.fmb_addr = virt_to_phys(zdev->fmb);
+ return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
+}
+
+/* Modify PCI: Disable PCI function measurement */
+int zpci_fmb_disable_device(struct zpci_dev *zdev)
+{
+ struct mod_pci_args args = { 0, 0, 0, 0 };
+ int rc;
+
+ if (!zdev->fmb)
+ return -EINVAL;
+
+ /* Function measurement is disabled if fmb address is zero */
+ rc = mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
+
+ kmem_cache_free(zdev_fmb_cache, zdev->fmb);
+ zdev->fmb = NULL;
+ return rc;
+}
+
+#define ZPCI_PCIAS_CFGSPC 15
+
+static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
+ u64 data;
+ int rc;
+
+ rc = zpci_load(&data, req, offset);
+ if (!rc) {
+ data = data << ((8 - len) * 8);
+ data = le64_to_cpu(data);
+ *val = (u32) data;
+ } else
+ *val = 0xffffffff;
+ return rc;
+}
+
+static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
+ u64 data = val;
+ int rc;
+
+ data = cpu_to_le64(data);
+ data = data >> ((8 - len) * 8);
+ rc = zpci_store(data, req, offset);
+ return rc;
+}
+
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+}
+
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+ resource_size_t size,
+ resource_size_t align)
+{
+ return 0;
+}
+
+/* combine single writes by using store-block insn */
+void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
+{
+ zpci_memcpy_toio(to, from, count);
+}
+
+/* Create a virtual mapping cookie for a PCI BAR */
+void __iomem *pci_iomap(struct pci_dev *pdev, int bar, unsigned long max)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+ u64 addr;
+ int idx;
+
+ if ((bar & 7) != bar)
+ return NULL;
+
+ idx = zdev->bars[bar].map_idx;
+ spin_lock(&zpci_iomap_lock);
+ zpci_iomap_start[idx].fh = zdev->fh;
+ zpci_iomap_start[idx].bar = bar;
+ spin_unlock(&zpci_iomap_lock);
+
+ addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
+ return (void __iomem *) addr;
+}
+EXPORT_SYMBOL_GPL(pci_iomap);
+
+void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
+{
+ unsigned int idx;
+
+ idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48;
+ spin_lock(&zpci_iomap_lock);
+ zpci_iomap_start[idx].fh = 0;
+ zpci_iomap_start[idx].bar = 0;
+ spin_unlock(&zpci_iomap_lock);
+}
+EXPORT_SYMBOL_GPL(pci_iounmap);
+
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+ int size, u32 *val)
+{
+ struct zpci_dev *zdev = get_zdev_by_bus(bus);
+ int ret;
+
+ if (!zdev || devfn != ZPCI_DEVFN)
+ ret = -ENODEV;
+ else
+ ret = zpci_cfg_load(zdev, where, val, size);
+
+ return ret;
+}
+
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+ int size, u32 val)
+{
+ struct zpci_dev *zdev = get_zdev_by_bus(bus);
+ int ret;
+
+ if (!zdev || devfn != ZPCI_DEVFN)
+ ret = -ENODEV;
+ else
+ ret = zpci_cfg_store(zdev, where, val, size);
+
+ return ret;
+}
+
+static struct pci_ops pci_root_ops = {
+ .read = pci_read,
+ .write = pci_write,
+};
+
+static void zpci_irq_handler(struct airq_struct *airq)
+{
+ unsigned long si, ai;
+ struct airq_iv *aibv;
+ int irqs_on = 0;
+
+ inc_irq_stat(IRQIO_PCI);
+ for (si = 0;;) {
+ /* Scan adapter summary indicator bit vector */
+ si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+ if (si == -1UL) {
+ if (irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ si = 0;
+ continue;
+ }
+
+ /* Scan the adapter interrupt vector for this device. */
+ aibv = zpci_aibv[si];
+ for (ai = 0;;) {
+ ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+ if (ai == -1UL)
+ break;
+ inc_irq_stat(IRQIO_MSI);
+ airq_iv_lock(aibv, ai);
+ generic_handle_irq(airq_iv_get_data(aibv, ai));
+ airq_iv_unlock(aibv, ai);
+ }
+ }
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+ unsigned int hwirq, msi_vecs;
+ unsigned long aisb;
+ struct msi_desc *msi;
+ struct msi_msg msg;
+ int rc, irq;
+
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+ msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
+ msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
+
+ /* Allocate adapter summary indicator bit */
+ rc = -EIO;
+ aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+ if (aisb == -1UL)
+ goto out;
+ zdev->aisb = aisb;
+
+ /* Create adapter interrupt vector */
+ rc = -ENOMEM;
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+ if (!zdev->aibv)
+ goto out_si;
+
+ /* Wire up shortcut pointer */
+ zpci_aibv[aisb] = zdev->aibv;
+
+ /* Request MSI interrupts */
+ hwirq = 0;
+ list_for_each_entry(msi, &pdev->msi_list, list) {
+ rc = -EIO;
+ irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
+ if (irq < 0)
+ goto out_msi;
+ rc = irq_set_msi_desc(irq, msi);
+ if (rc)
+ goto out_msi;
+ irq_set_chip_and_handler(irq, &zpci_irq_chip,
+ handle_simple_irq);
+ msg.data = hwirq;
+ msg.address_lo = zdev->msi_addr & 0xffffffff;
+ msg.address_hi = zdev->msi_addr >> 32;
+ write_msi_msg(irq, &msg);
+ airq_iv_set_data(zdev->aibv, hwirq, irq);
+ hwirq++;
+ }
+
+ /* Enable adapter interrupts */
+ rc = zpci_set_airq(zdev);
+ if (rc)
+ goto out_msi;
+
+ return (msi_vecs == nvec) ? 0 : msi_vecs;
+
+out_msi:
+ list_for_each_entry(msi, &pdev->msi_list, list) {
+ if (hwirq-- == 0)
+ break;
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
+ }
+ zpci_aibv[aisb] = NULL;
+ airq_iv_release(zdev->aibv);
+out_si:
+ airq_iv_free_bit(zpci_aisb_iv, aisb);
+out:
+ return rc;
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+ struct msi_desc *msi;
+ int rc;
+
+ /* Disable adapter interrupts */
+ rc = zpci_clear_airq(zdev);
+ if (rc)
+ return;
+
+ /* Release MSI interrupts */
+ list_for_each_entry(msi, &pdev->msi_list, list) {
+ if (msi->msi_attrib.is_msix)
+ default_msix_mask_irq(msi, 1);
+ else
+ default_msi_mask_irq(msi, 1, 1);
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
+ }
+
+ zpci_aibv[zdev->aisb] = NULL;
+ airq_iv_release(zdev->aibv);
+ airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
+}
+
+static void zpci_map_resources(struct zpci_dev *zdev)
+{
+ struct pci_dev *pdev = zdev->pdev;
+ resource_size_t len;
+ int i;
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ len = pci_resource_len(pdev, i);
+ if (!len)
+ continue;
+ pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
+ pdev->resource[i].end = pdev->resource[i].start + len - 1;
+ }
+}
+
+static void zpci_unmap_resources(struct zpci_dev *zdev)
+{
+ struct pci_dev *pdev = zdev->pdev;
+ resource_size_t len;
+ int i;
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ len = pci_resource_len(pdev, i);
+ if (!len)
+ continue;
+ pci_iounmap(pdev, (void *) pdev->resource[i].start);
+ }
+}
+
+static int __init zpci_irq_init(void)
+{
+ int rc;
+
+ rc = register_adapter_interrupt(&zpci_airq);
+ if (rc)
+ goto out;
+ /* Set summary to 1 to be called every time for the ISC. */
+ *zpci_airq.lsi_ptr = 1;
+
+ rc = -ENOMEM;
+ zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+ if (!zpci_aisb_iv)
+ goto out_airq;
+
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ return 0;
+
+out_airq:
+ unregister_adapter_interrupt(&zpci_airq);
+out:
+ return rc;
+}
+
+static void zpci_irq_exit(void)
+{
+ airq_iv_release(zpci_aisb_iv);
+ unregister_adapter_interrupt(&zpci_airq);
+}
+
+static int zpci_alloc_iomap(struct zpci_dev *zdev)
+{
+ int entry;
+
+ spin_lock(&zpci_iomap_lock);
+ entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+ if (entry == ZPCI_IOMAP_MAX_ENTRIES) {
+ spin_unlock(&zpci_iomap_lock);
+ return -ENOSPC;
+ }
+ set_bit(entry, zpci_iomap);
+ spin_unlock(&zpci_iomap_lock);
+ return entry;
+}
+
+static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
+{
+ spin_lock(&zpci_iomap_lock);
+ memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
+ clear_bit(entry, zpci_iomap);
+ spin_unlock(&zpci_iomap_lock);
+}
+
+static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
+ unsigned long size, unsigned long flags)
+{
+ struct resource *r;
+
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (!r)
+ return NULL;
+
+ r->start = start;
+ r->end = r->start + size - 1;
+ r->flags = flags;
+ r->name = zdev->res_name;
+
+ if (request_resource(&iomem_resource, r)) {
+ kfree(r);
+ return NULL;
+ }
+ return r;
+}
+
+static int zpci_setup_bus_resources(struct zpci_dev *zdev,
+ struct list_head *resources)
+{
+ unsigned long addr, size, flags;
+ struct resource *res;
+ int i, entry;
+
+ snprintf(zdev->res_name, sizeof(zdev->res_name),
+ "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR);
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ if (!zdev->bars[i].size)
+ continue;
+ entry = zpci_alloc_iomap(zdev);
+ if (entry < 0)
+ return entry;
+ zdev->bars[i].map_idx = entry;
+
+ /* only MMIO is supported */
+ flags = IORESOURCE_MEM;
+ if (zdev->bars[i].val & 8)
+ flags |= IORESOURCE_PREFETCH;
+ if (zdev->bars[i].val & 4)
+ flags |= IORESOURCE_MEM_64;
+
+ addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48);
+
+ size = 1UL << zdev->bars[i].size;
+
+ res = __alloc_res(zdev, addr, size, flags);
+ if (!res) {
+ zpci_free_iomap(zdev, entry);
+ return -ENOMEM;
+ }
+ zdev->bars[i].res = res;
+ pci_add_resource(resources, res);
+ }
+
+ return 0;
+}
+
+static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
+{
+ int i;
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ if (!zdev->bars[i].size)
+ continue;
+
+ zpci_free_iomap(zdev, zdev->bars[i].map_idx);
+ release_resource(zdev->bars[i].res);
+ kfree(zdev->bars[i].res);
+ }
+}
+
+int pcibios_add_device(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+ struct resource *res;
+ int i;
+
+ zdev->pdev = pdev;
+ pdev->dev.groups = zpci_attr_groups;
+ zpci_map_resources(zdev);
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ res = &pdev->resource[i];
+ if (res->parent || !res->flags)
+ continue;
+ pci_claim_resource(pdev, i);
+ }
+
+ return 0;
+}
+
+int pcibios_enable_device(struct pci_dev *pdev, int mask)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+
+ zdev->pdev = pdev;
+ zpci_debug_init_device(zdev);
+ zpci_fmb_enable_device(zdev);
+ zpci_map_resources(zdev);
+
+ return pci_enable_resources(pdev, mask);
+}
+
+void pcibios_disable_device(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = get_zdev(pdev);
+
+ zpci_unmap_resources(zdev);
+ zpci_fmb_disable_device(zdev);
+ zpci_debug_exit_device(zdev);
+ zdev->pdev = NULL;
+}
+
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+static int zpci_restore(struct device *dev)
+{
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ int ret = 0;
+
+ if (zdev->state != ZPCI_FN_STATE_ONLINE)
+ goto out;
+
+ ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
+ if (ret)
+ goto out;
+
+ zpci_map_resources(zdev);
+ zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
+ zdev->start_dma + zdev->iommu_size - 1,
+ (u64) zdev->dma_table);
+
+out:
+ return ret;
+}
+
+static int zpci_freeze(struct device *dev)
+{
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+
+ if (zdev->state != ZPCI_FN_STATE_ONLINE)
+ return 0;
+
+ zpci_unregister_ioat(zdev, 0);
+ return clp_disable_fh(zdev);
+}
+
+struct dev_pm_ops pcibios_pm_ops = {
+ .thaw_noirq = zpci_restore,
+ .freeze_noirq = zpci_freeze,
+ .restore_noirq = zpci_restore,
+ .poweroff_noirq = zpci_freeze,
+};
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
+
+static int zpci_alloc_domain(struct zpci_dev *zdev)
+{
+ spin_lock(&zpci_domain_lock);
+ zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
+ if (zdev->domain == ZPCI_NR_DEVICES) {
+ spin_unlock(&zpci_domain_lock);
+ return -ENOSPC;
+ }
+ set_bit(zdev->domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
+ return 0;
+}
+
+static void zpci_free_domain(struct zpci_dev *zdev)
+{
+ spin_lock(&zpci_domain_lock);
+ clear_bit(zdev->domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
+}
+
+void pcibios_remove_bus(struct pci_bus *bus)
+{
+ struct zpci_dev *zdev = get_zdev_by_bus(bus);
+
+ zpci_exit_slot(zdev);
+ zpci_cleanup_bus_resources(zdev);
+ zpci_free_domain(zdev);
+
+ spin_lock(&zpci_list_lock);
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
+
+ kfree(zdev);
+}
+
+static int zpci_scan_bus(struct zpci_dev *zdev)
+{
+ LIST_HEAD(resources);
+ int ret;
+
+ ret = zpci_setup_bus_resources(zdev, &resources);
+ if (ret)
+ return ret;
+
+ zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
+ zdev, &resources);
+ if (!zdev->bus) {
+ zpci_cleanup_bus_resources(zdev);
+ return -EIO;
+ }
+
+ zdev->bus->max_bus_speed = zdev->max_bus_speed;
+ return 0;
+}
+
+int zpci_enable_device(struct zpci_dev *zdev)
+{
+ int rc;
+
+ rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
+ if (rc)
+ goto out;
+
+ rc = zpci_dma_init_device(zdev);
+ if (rc)
+ goto out_dma;
+
+ zdev->state = ZPCI_FN_STATE_ONLINE;
+ return 0;
+
+out_dma:
+ clp_disable_fh(zdev);
+out:
+ return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_enable_device);
+
+int zpci_disable_device(struct zpci_dev *zdev)
+{
+ zpci_dma_exit_device(zdev);
+ return clp_disable_fh(zdev);
+}
+EXPORT_SYMBOL_GPL(zpci_disable_device);
+
+int zpci_create_device(struct zpci_dev *zdev)
+{
+ int rc;
+
+ rc = zpci_alloc_domain(zdev);
+ if (rc)
+ goto out;
+
+ if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ goto out_free;
+ }
+ rc = zpci_scan_bus(zdev);
+ if (rc)
+ goto out_disable;
+
+ spin_lock(&zpci_list_lock);
+ list_add_tail(&zdev->entry, &zpci_list);
+ spin_unlock(&zpci_list_lock);
+
+ zpci_init_slot(zdev);
+
+ return 0;
+
+out_disable:
+ if (zdev->state == ZPCI_FN_STATE_ONLINE)
+ zpci_disable_device(zdev);
+out_free:
+ zpci_free_domain(zdev);
+out:
+ return rc;
+}
+
+void zpci_stop_device(struct zpci_dev *zdev)
+{
+ zpci_dma_exit_device(zdev);
+ /*
+ * Note: SCLP disables fh via set-pci-fn so don't
+ * do that here.
+ */
+}
+EXPORT_SYMBOL_GPL(zpci_stop_device);
+
+static inline int barsize(u8 size)
+{
+ return (size) ? (1 << size) >> 10 : 0;
+}
+
+static int zpci_mem_init(void)
+{
+ zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
+ 16, 0, NULL);
+ if (!zdev_fmb_cache)
+ goto error_zdev;
+
+ /* TODO: use realloc */
+ zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
+ GFP_KERNEL);
+ if (!zpci_iomap_start)
+ goto error_iomap;
+ return 0;
+
+error_iomap:
+ kmem_cache_destroy(zdev_fmb_cache);
+error_zdev:
+ return -ENOMEM;
+}
+
+static void zpci_mem_exit(void)
+{
+ kfree(zpci_iomap_start);
+ kmem_cache_destroy(zdev_fmb_cache);
+}
+
+static unsigned int s390_pci_probe = 1;
+static unsigned int s390_pci_initialized;
+
+char * __init pcibios_setup(char *str)
+{
+ if (!strcmp(str, "off")) {
+ s390_pci_probe = 0;
+ return NULL;
+ }
+ return str;
+}
+
+bool zpci_is_enabled(void)
+{
+ return s390_pci_initialized;
+}
+
+static int __init pci_base_init(void)
+{
+ int rc;
+
+ if (!s390_pci_probe)
+ return 0;
+
+ if (!test_facility(2) || !test_facility(69)
+ || !test_facility(71) || !test_facility(72))
+ return 0;
+
+ rc = zpci_debug_init();
+ if (rc)
+ goto out;
+
+ rc = zpci_mem_init();
+ if (rc)
+ goto out_mem;
+
+ rc = zpci_irq_init();
+ if (rc)
+ goto out_irq;
+
+ rc = zpci_dma_init();
+ if (rc)
+ goto out_dma;
+
+ rc = clp_scan_pci_devices();
+ if (rc)
+ goto out_find;
+
+ s390_pci_initialized = 1;
+ return 0;
+
+out_find:
+ zpci_dma_exit();
+out_dma:
+ zpci_irq_exit();
+out_irq:
+ zpci_mem_exit();
+out_mem:
+ zpci_debug_exit();
+out:
+ return rc;
+}
+subsys_initcall_sync(pci_base_init);
+
+void zpci_rescan(void)
+{
+ if (zpci_is_enabled())
+ clp_rescan_pci_devices_simple();
+}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
new file mode 100644
index 00000000000..96545d7659f
--- /dev/null
+++ b/arch/s390/pci/pci_clp.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define COMPONENT "zPCI"
+#define pr_fmt(fmt) COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <asm/pci_debug.h>
+#include <asm/pci_clp.h>
+
+static inline void zpci_err_clp(unsigned int rsp, int rc)
+{
+ struct {
+ unsigned int rsp;
+ int rc;
+ } __packed data = {rsp, rc};
+
+ zpci_err_hex(&data, sizeof(data));
+}
+
+/*
+ * Call Logical Processor
+ * Retry logic is handled by the caller.
+ */
+static inline u8 clp_instr(void *data)
+{
+ struct { u8 _[CLP_BLK_SIZE]; } *req = data;
+ u64 ignored;
+ u8 cc;
+
+ asm volatile (
+ " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req)
+ : [req] "a" (req)
+ : "cc");
+ return cc;
+}
+
+static void *clp_alloc_block(gfp_t gfp_mask)
+{
+ return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE));
+}
+
+static void clp_free_block(void *ptr)
+{
+ free_pages((unsigned long) ptr, get_order(CLP_BLK_SIZE));
+}
+
+static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
+ struct clp_rsp_query_pci_grp *response)
+{
+ zdev->tlb_refresh = response->refresh;
+ zdev->dma_mask = response->dasm;
+ zdev->msi_addr = response->msia;
+ zdev->fmb_update = response->mui;
+
+ switch (response->version) {
+ case 1:
+ zdev->max_bus_speed = PCIE_SPEED_5_0GT;
+ break;
+ default:
+ zdev->max_bus_speed = PCI_SPEED_UNKNOWN;
+ break;
+ }
+}
+
+static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
+{
+ struct clp_req_rsp_query_pci_grp *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP;
+ rrb->response.hdr.len = sizeof(rrb->response);
+ rrb->request.pfgid = pfgid;
+
+ rc = clp_instr(rrb);
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+ clp_store_query_pci_fngrp(zdev, &rrb->response);
+ else {
+ zpci_err("Q PCI FGRP:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
+ }
+ clp_free_block(rrb);
+ return rc;
+}
+
+static int clp_store_query_pci_fn(struct zpci_dev *zdev,
+ struct clp_rsp_query_pci *response)
+{
+ int i;
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ zdev->bars[i].val = le32_to_cpu(response->bar[i]);
+ zdev->bars[i].size = response->bar_size[i];
+ }
+ zdev->start_dma = response->sdma;
+ zdev->end_dma = response->edma;
+ zdev->pchid = response->pchid;
+ zdev->pfgid = response->pfgid;
+ zdev->pft = response->pft;
+ zdev->vfn = response->vfn;
+ zdev->uid = response->uid;
+
+ memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
+ if (response->util_str_avail) {
+ memcpy(zdev->util_str, response->util_str,
+ sizeof(zdev->util_str));
+ }
+
+ return 0;
+}
+
+static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+{
+ struct clp_req_rsp_query_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
+ rrb->response.hdr.len = sizeof(rrb->response);
+ rrb->request.fh = fh;
+
+ rc = clp_instr(rrb);
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+ rc = clp_store_query_pci_fn(zdev, &rrb->response);
+ if (rc)
+ goto out;
+ if (rrb->response.pfgid)
+ rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
+ } else {
+ zpci_err("Q PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
+ }
+out:
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_add_pci_device(u32 fid, u32 fh, int configured)
+{
+ struct zpci_dev *zdev;
+ int rc;
+
+ zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured);
+ zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
+ if (!zdev)
+ return -ENOMEM;
+
+ zdev->fh = fh;
+ zdev->fid = fid;
+
+ /* Query function properties and update zdev */
+ rc = clp_query_pci_fn(zdev, fh);
+ if (rc)
+ goto error;
+
+ if (configured)
+ zdev->state = ZPCI_FN_STATE_CONFIGURED;
+ else
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+
+ rc = zpci_create_device(zdev);
+ if (rc)
+ goto error;
+ return 0;
+
+error:
+ kfree(zdev);
+ return rc;
+}
+
+/*
+ * Enable/Disable a given PCI function defined by its function handle.
+ */
+static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
+{
+ struct clp_req_rsp_set_pci *rrb;
+ int rc, retries = 100;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ do {
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_SET_PCI_FN;
+ rrb->response.hdr.len = sizeof(rrb->response);
+ rrb->request.fh = *fh;
+ rrb->request.oc = command;
+ rrb->request.ndas = nr_dma_as;
+
+ rc = clp_instr(rrb);
+ if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
+ retries--;
+ if (retries < 0)
+ break;
+ msleep(20);
+ }
+ } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
+
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+ *fh = rrb->response.fh;
+ else {
+ zpci_err("Set PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
+ }
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
+{
+ u32 fh = zdev->fh;
+ int rc;
+
+ rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
+ if (!rc)
+ /* Success -> store enabled handle in zdev */
+ zdev->fh = fh;
+
+ zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
+ return rc;
+}
+
+int clp_disable_fh(struct zpci_dev *zdev)
+{
+ u32 fh = zdev->fh;
+ int rc;
+
+ if (!zdev_enabled(zdev))
+ return 0;
+
+ rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
+ if (!rc)
+ /* Success -> store disabled handle in zdev */
+ zdev->fh = fh;
+
+ zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
+ return rc;
+}
+
+static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
+ void (*cb)(struct clp_fh_list_entry *entry))
+{
+ u64 resume_token = 0;
+ int entries, i, rc;
+
+ do {
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_LIST_PCI;
+ /* store as many entries as possible */
+ rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN;
+ rrb->request.resume_token = resume_token;
+
+ /* Get PCI function handle list */
+ rc = clp_instr(rrb);
+ if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
+ zpci_err("List PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
+ goto out;
+ }
+
+ WARN_ON_ONCE(rrb->response.entry_size !=
+ sizeof(struct clp_fh_list_entry));
+
+ entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
+ rrb->response.entry_size;
+
+ resume_token = rrb->response.resume_token;
+ for (i = 0; i < entries; i++)
+ cb(&rrb->response.fh_list[i]);
+ } while (resume_token);
+out:
+ return rc;
+}
+
+static void __clp_add(struct clp_fh_list_entry *entry)
+{
+ if (!entry->vendor_id)
+ return;
+
+ clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+}
+
+static void __clp_rescan(struct clp_fh_list_entry *entry)
+{
+ struct zpci_dev *zdev;
+
+ if (!entry->vendor_id)
+ return;
+
+ zdev = get_zdev_by_fid(entry->fid);
+ if (!zdev) {
+ clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+ return;
+ }
+
+ if (!entry->config_state) {
+ /*
+ * The handle is already disabled, that means no iota/irq freeing via
+ * the firmware interfaces anymore. Need to free resources manually
+ * (DMA memory, debug, sysfs)...
+ */
+ zpci_stop_device(zdev);
+ }
+}
+
+static void __clp_update(struct clp_fh_list_entry *entry)
+{
+ struct zpci_dev *zdev;
+
+ if (!entry->vendor_id)
+ return;
+
+ zdev = get_zdev_by_fid(entry->fid);
+ if (!zdev)
+ return;
+
+ zdev->fh = entry->fh;
+}
+
+int clp_scan_pci_devices(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, __clp_add);
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_rescan_pci_devices(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, __clp_rescan);
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_rescan_pci_devices_simple(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_NOWAIT);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, __clp_update);
+
+ clp_free_block(rrb);
+ return rc;
+}
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
new file mode 100644
index 00000000000..c5c66840ac0
--- /dev/null
+++ b/arch/s390/pci/pci_debug.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define COMPONENT "zPCI"
+#define pr_fmt(fmt) COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <asm/debug.h>
+
+#include <asm/pci_dma.h>
+
+static struct dentry *debugfs_root;
+debug_info_t *pci_debug_msg_id;
+EXPORT_SYMBOL_GPL(pci_debug_msg_id);
+debug_info_t *pci_debug_err_id;
+EXPORT_SYMBOL_GPL(pci_debug_err_id);
+
+static char *pci_perf_names[] = {
+ /* hardware counters */
+ "Load operations",
+ "Store operations",
+ "Store block operations",
+ "Refresh operations",
+ "DMA read bytes",
+ "DMA write bytes",
+ /* software counters */
+ "Allocated pages",
+ "Mapped pages",
+ "Unmapped pages",
+};
+
+static int pci_perf_show(struct seq_file *m, void *v)
+{
+ struct zpci_dev *zdev = m->private;
+ u64 *stat;
+ int i;
+
+ if (!zdev)
+ return 0;
+ if (!zdev->fmb)
+ return seq_printf(m, "FMB statistics disabled\n");
+
+ /* header */
+ seq_printf(m, "FMB @ %p\n", zdev->fmb);
+ seq_printf(m, "Update interval: %u ms\n", zdev->fmb_update);
+ seq_printf(m, "Samples: %u\n", zdev->fmb->samples);
+ seq_printf(m, "Last update TOD: %Lx\n", zdev->fmb->last_update);
+
+ /* hardware counters */
+ stat = (u64 *) &zdev->fmb->ld_ops;
+ for (i = 0; i < 4; i++)
+ seq_printf(m, "%26s:\t%llu\n",
+ pci_perf_names[i], *(stat + i));
+ if (zdev->fmb->dma_valid)
+ for (i = 4; i < 6; i++)
+ seq_printf(m, "%26s:\t%llu\n",
+ pci_perf_names[i], *(stat + i));
+ /* software counters */
+ for (i = 6; i < ARRAY_SIZE(pci_perf_names); i++)
+ seq_printf(m, "%26s:\t%llu\n",
+ pci_perf_names[i],
+ atomic64_read((atomic64_t *) (stat + i)));
+
+ return 0;
+}
+
+static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
+ size_t count, loff_t *off)
+{
+ struct zpci_dev *zdev = ((struct seq_file *) file->private_data)->private;
+ unsigned long val;
+ int rc;
+
+ if (!zdev)
+ return 0;
+
+ rc = kstrtoul_from_user(ubuf, count, 10, &val);
+ if (rc)
+ return rc;
+
+ switch (val) {
+ case 0:
+ rc = zpci_fmb_disable_device(zdev);
+ if (rc)
+ return rc;
+ break;
+ case 1:
+ rc = zpci_fmb_enable_device(zdev);
+ if (rc)
+ return rc;
+ break;
+ }
+ return count;
+}
+
+static int pci_perf_seq_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, pci_perf_show,
+ file_inode(filp)->i_private);
+}
+
+static const struct file_operations debugfs_pci_perf_fops = {
+ .open = pci_perf_seq_open,
+ .read = seq_read,
+ .write = pci_perf_seq_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void zpci_debug_init_device(struct zpci_dev *zdev)
+{
+ zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev),
+ debugfs_root);
+ if (IS_ERR(zdev->debugfs_dev))
+ zdev->debugfs_dev = NULL;
+
+ zdev->debugfs_perf = debugfs_create_file("statistics",
+ S_IFREG | S_IRUGO | S_IWUSR,
+ zdev->debugfs_dev, zdev,
+ &debugfs_pci_perf_fops);
+ if (IS_ERR(zdev->debugfs_perf))
+ zdev->debugfs_perf = NULL;
+}
+
+void zpci_debug_exit_device(struct zpci_dev *zdev)
+{
+ debugfs_remove(zdev->debugfs_perf);
+ debugfs_remove(zdev->debugfs_dev);
+}
+
+int __init zpci_debug_init(void)
+{
+ /* event trace buffer */
+ pci_debug_msg_id = debug_register("pci_msg", 8, 1, 8 * sizeof(long));
+ if (!pci_debug_msg_id)
+ return -EINVAL;
+ debug_register_view(pci_debug_msg_id, &debug_sprintf_view);
+ debug_set_level(pci_debug_msg_id, 3);
+
+ /* error log */
+ pci_debug_err_id = debug_register("pci_error", 2, 1, 16);
+ if (!pci_debug_err_id)
+ return -EINVAL;
+ debug_register_view(pci_debug_err_id, &debug_hex_ascii_view);
+ debug_set_level(pci_debug_err_id, 6);
+
+ debugfs_root = debugfs_create_dir("pci", NULL);
+ return 0;
+}
+
+void zpci_debug_exit(void)
+{
+ if (pci_debug_msg_id)
+ debug_unregister(pci_debug_msg_id);
+ if (pci_debug_err_id)
+ debug_unregister(pci_debug_err_id);
+
+ debugfs_remove(debugfs_root);
+}
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
new file mode 100644
index 00000000000..f91c0311980
--- /dev/null
+++ b/arch/s390/pci/pci_dma.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/iommu-helper.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/pci.h>
+#include <asm/pci_dma.h>
+
+static struct kmem_cache *dma_region_table_cache;
+static struct kmem_cache *dma_page_table_cache;
+
+static unsigned long *dma_alloc_cpu_table(void)
+{
+ unsigned long *table, *entry;
+
+ table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
+ if (!table)
+ return NULL;
+
+ for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
+ *entry = ZPCI_TABLE_INVALID | ZPCI_TABLE_PROTECTED;
+ return table;
+}
+
+static void dma_free_cpu_table(void *table)
+{
+ kmem_cache_free(dma_region_table_cache, table);
+}
+
+static unsigned long *dma_alloc_page_table(void)
+{
+ unsigned long *table, *entry;
+
+ table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
+ if (!table)
+ return NULL;
+
+ for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
+ *entry = ZPCI_PTE_INVALID | ZPCI_TABLE_PROTECTED;
+ return table;
+}
+
+static void dma_free_page_table(void *table)
+{
+ kmem_cache_free(dma_page_table_cache, table);
+}
+
+static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
+{
+ unsigned long *sto;
+
+ if (reg_entry_isvalid(*entry))
+ sto = get_rt_sto(*entry);
+ else {
+ sto = dma_alloc_cpu_table();
+ if (!sto)
+ return NULL;
+
+ set_rt_sto(entry, sto);
+ validate_rt_entry(entry);
+ entry_clr_protected(entry);
+ }
+ return sto;
+}
+
+static unsigned long *dma_get_page_table_origin(unsigned long *entry)
+{
+ unsigned long *pto;
+
+ if (reg_entry_isvalid(*entry))
+ pto = get_st_pto(*entry);
+ else {
+ pto = dma_alloc_page_table();
+ if (!pto)
+ return NULL;
+ set_st_pto(entry, pto);
+ validate_st_entry(entry);
+ entry_clr_protected(entry);
+ }
+ return pto;
+}
+
+static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
+{
+ unsigned long *sto, *pto;
+ unsigned int rtx, sx, px;
+
+ rtx = calc_rtx(dma_addr);
+ sto = dma_get_seg_table_origin(&rto[rtx]);
+ if (!sto)
+ return NULL;
+
+ sx = calc_sx(dma_addr);
+ pto = dma_get_page_table_origin(&sto[sx]);
+ if (!pto)
+ return NULL;
+
+ px = calc_px(dma_addr);
+ return &pto[px];
+}
+
+static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr,
+ dma_addr_t dma_addr, int flags)
+{
+ unsigned long *entry;
+
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ if (!entry) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (flags & ZPCI_PTE_INVALID) {
+ invalidate_pt_entry(entry);
+ return;
+ } else {
+ set_pt_pfaa(entry, page_addr);
+ validate_pt_entry(entry);
+ }
+
+ if (flags & ZPCI_TABLE_PROTECTED)
+ entry_set_protected(entry);
+ else
+ entry_clr_protected(entry);
+}
+
+static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+ dma_addr_t dma_addr, size_t size, int flags)
+{
+ unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ u8 *page_addr = (u8 *) (pa & PAGE_MASK);
+ dma_addr_t start_dma_addr = dma_addr;
+ unsigned long irq_flags;
+ int i, rc = 0;
+
+ if (!nr_pages)
+ return -EINVAL;
+
+ spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
+ if (!zdev->dma_table)
+ goto no_refresh;
+
+ for (i = 0; i < nr_pages; i++) {
+ dma_update_cpu_trans(zdev, page_addr, dma_addr, flags);
+ page_addr += PAGE_SIZE;
+ dma_addr += PAGE_SIZE;
+ }
+
+ /*
+ * rpcit is not required to establish new translations when previously
+ * invalid translation-table entries are validated, however it is
+ * required when altering previously valid entries.
+ */
+ if (!zdev->tlb_refresh &&
+ ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
+ /*
+ * TODO: also need to check that the old entry is indeed INVALID
+ * and not only for one page but for the whole range...
+ * -> now we WARN_ON in that case but with lazy unmap that
+ * needs to be redone!
+ */
+ goto no_refresh;
+
+ rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
+ nr_pages * PAGE_SIZE);
+
+no_refresh:
+ spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
+ return rc;
+}
+
+static void dma_free_seg_table(unsigned long entry)
+{
+ unsigned long *sto = get_rt_sto(entry);
+ int sx;
+
+ for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
+ if (reg_entry_isvalid(sto[sx]))
+ dma_free_page_table(get_st_pto(sto[sx]));
+
+ dma_free_cpu_table(sto);
+}
+
+static void dma_cleanup_tables(struct zpci_dev *zdev)
+{
+ unsigned long *table;
+ int rtx;
+
+ if (!zdev || !zdev->dma_table)
+ return;
+
+ table = zdev->dma_table;
+ for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+ if (reg_entry_isvalid(table[rtx]))
+ dma_free_seg_table(table[rtx]);
+
+ dma_free_cpu_table(table);
+ zdev->dma_table = NULL;
+}
+
+static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
+ unsigned long start, int size)
+{
+ unsigned long boundary_size;
+
+ boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
+ PAGE_SIZE) >> PAGE_SHIFT;
+ return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
+ start, size, 0, boundary_size, 0);
+}
+
+static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
+{
+ unsigned long offset, flags;
+
+ spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
+ offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
+ if (offset == -1)
+ offset = __dma_alloc_iommu(zdev, 0, size);
+
+ if (offset != -1) {
+ zdev->next_bit = offset + size;
+ if (zdev->next_bit >= zdev->iommu_pages)
+ zdev->next_bit = 0;
+ }
+ spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+ return offset;
+}
+
+static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
+ if (!zdev->iommu_bitmap)
+ goto out;
+ bitmap_clear(zdev->iommu_bitmap, offset, size);
+ if (offset >= zdev->next_bit)
+ zdev->next_bit = offset + size;
+out:
+ spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+}
+
+int dma_set_mask(struct device *dev, u64 mask)
+{
+ if (!dev->dma_mask || !dma_supported(dev, mask))
+ return -EIO;
+
+ *dev->dma_mask = mask;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dma_set_mask);
+
+static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ unsigned long nr_pages, iommu_page_index;
+ unsigned long pa = page_to_phys(page) + offset;
+ int flags = ZPCI_PTE_VALID;
+ dma_addr_t dma_addr;
+
+ /* This rounds up number of pages based on size and offset */
+ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
+ iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
+ if (iommu_page_index == -1)
+ goto out_err;
+
+ /* Use rounded up size */
+ size = nr_pages * PAGE_SIZE;
+
+ dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
+ if (dma_addr + size > zdev->end_dma)
+ goto out_free;
+
+ if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
+ flags |= ZPCI_TABLE_PROTECTED;
+
+ if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
+ atomic64_add(nr_pages, &zdev->fmb->mapped_pages);
+ return dma_addr + (offset & ~PAGE_MASK);
+ }
+
+out_free:
+ dma_free_iommu(zdev, iommu_page_index, nr_pages);
+out_err:
+ zpci_err("map error:\n");
+ zpci_err_hex(&pa, sizeof(pa));
+ return DMA_ERROR_CODE;
+}
+
+static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ unsigned long iommu_page_index;
+ int npages;
+
+ npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
+ dma_addr = dma_addr & PAGE_MASK;
+ if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
+ ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) {
+ zpci_err("unmap error:\n");
+ zpci_err_hex(&dma_addr, sizeof(dma_addr));
+ }
+
+ atomic64_add(npages, &zdev->fmb->unmapped_pages);
+ iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
+ dma_free_iommu(zdev, iommu_page_index, npages);
+}
+
+static void *s390_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct page *page;
+ unsigned long pa;
+ dma_addr_t map;
+
+ size = PAGE_ALIGN(size);
+ page = alloc_pages(flag, get_order(size));
+ if (!page)
+ return NULL;
+
+ pa = page_to_phys(page);
+ memset((void *) pa, 0, size);
+
+ map = s390_dma_map_pages(dev, page, pa % PAGE_SIZE,
+ size, DMA_BIDIRECTIONAL, NULL);
+ if (dma_mapping_error(dev, map)) {
+ free_pages(pa, get_order(size));
+ return NULL;
+ }
+
+ atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+ if (dma_handle)
+ *dma_handle = map;
+ return (void *) pa;
+}
+
+static void s390_dma_free(struct device *dev, size_t size,
+ void *pa, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+
+ size = PAGE_ALIGN(size);
+ atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages);
+ s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
+ free_pages((unsigned long) pa, get_order(size));
+}
+
+static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nr_elements, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ int mapped_elements = 0;
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nr_elements, i) {
+ struct page *page = sg_page(s);
+ s->dma_address = s390_dma_map_pages(dev, page, s->offset,
+ s->length, dir, NULL);
+ if (!dma_mapping_error(dev, s->dma_address)) {
+ s->dma_length = s->length;
+ mapped_elements++;
+ } else
+ goto unmap;
+ }
+out:
+ return mapped_elements;
+
+unmap:
+ for_each_sg(sg, s, mapped_elements, i) {
+ if (s->dma_address)
+ s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
+ dir, NULL);
+ s->dma_address = 0;
+ s->dma_length = 0;
+ }
+ mapped_elements = 0;
+ goto out;
+}
+
+static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nr_elements, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nr_elements, i) {
+ s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL);
+ s->dma_address = 0;
+ s->dma_length = 0;
+ }
+}
+
+int zpci_dma_init_device(struct zpci_dev *zdev)
+{
+ int rc;
+
+ spin_lock_init(&zdev->iommu_bitmap_lock);
+ spin_lock_init(&zdev->dma_table_lock);
+
+ zdev->dma_table = dma_alloc_cpu_table();
+ if (!zdev->dma_table) {
+ rc = -ENOMEM;
+ goto out_clean;
+ }
+
+ zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
+ zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
+ zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
+ if (!zdev->iommu_bitmap) {
+ rc = -ENOMEM;
+ goto out_reg;
+ }
+
+ rc = zpci_register_ioat(zdev,
+ 0,
+ zdev->start_dma + PAGE_OFFSET,
+ zdev->start_dma + zdev->iommu_size - 1,
+ (u64) zdev->dma_table);
+ if (rc)
+ goto out_reg;
+ return 0;
+
+out_reg:
+ dma_free_cpu_table(zdev->dma_table);
+out_clean:
+ return rc;
+}
+
+void zpci_dma_exit_device(struct zpci_dev *zdev)
+{
+ zpci_unregister_ioat(zdev, 0);
+ dma_cleanup_tables(zdev);
+ vfree(zdev->iommu_bitmap);
+ zdev->iommu_bitmap = NULL;
+ zdev->next_bit = 0;
+}
+
+static int __init dma_alloc_cpu_table_caches(void)
+{
+ dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
+ ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
+ 0, NULL);
+ if (!dma_region_table_cache)
+ return -ENOMEM;
+
+ dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
+ ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
+ 0, NULL);
+ if (!dma_page_table_cache) {
+ kmem_cache_destroy(dma_region_table_cache);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int __init zpci_dma_init(void)
+{
+ return dma_alloc_cpu_table_caches();
+}
+
+void zpci_dma_exit(void)
+{
+ kmem_cache_destroy(dma_page_table_cache);
+ kmem_cache_destroy(dma_region_table_cache);
+}
+
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+static int __init dma_debug_do_init(void)
+{
+ dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+ return 0;
+}
+fs_initcall(dma_debug_do_init);
+
+struct dma_map_ops s390_dma_ops = {
+ .alloc = s390_dma_alloc,
+ .free = s390_dma_free,
+ .map_sg = s390_dma_map_sg,
+ .unmap_sg = s390_dma_unmap_sg,
+ .map_page = s390_dma_map_pages,
+ .unmap_page = s390_dma_unmap_pages,
+ /* if we support direct DMA this must be conditional */
+ .is_phys = 0,
+ /* dma_supported is unconditionally true without a callback */
+};
+EXPORT_SYMBOL_GPL(s390_dma_ops);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
new file mode 100644
index 00000000000..6d7f5a3016c
--- /dev/null
+++ b/arch/s390/pci/pci_event.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define COMPONENT "zPCI"
+#define pr_fmt(fmt) COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <asm/pci_debug.h>
+#include <asm/sclp.h>
+
+/* Content Code Description for PCI Function Error */
+struct zpci_ccdf_err {
+ u32 reserved1;
+ u32 fh; /* function handle */
+ u32 fid; /* function id */
+ u32 ett : 4; /* expected table type */
+ u32 mvn : 12; /* MSI vector number */
+ u32 dmaas : 8; /* DMA address space */
+ u32 : 6;
+ u32 q : 1; /* event qualifier */
+ u32 rw : 1; /* read/write */
+ u64 faddr; /* failing address */
+ u32 reserved3;
+ u16 reserved4;
+ u16 pec; /* PCI event code */
+} __packed;
+
+/* Content Code Description for PCI Function Availability */
+struct zpci_ccdf_avail {
+ u32 reserved1;
+ u32 fh; /* function handle */
+ u32 fid; /* function id */
+ u32 reserved2;
+ u32 reserved3;
+ u32 reserved4;
+ u32 reserved5;
+ u16 reserved6;
+ u16 pec; /* PCI event code */
+} __packed;
+
+static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
+{
+ struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+
+ zpci_err("error CCDF:\n");
+ zpci_err_hex(ccdf, sizeof(*ccdf));
+
+ if (!zdev)
+ return;
+
+ pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
+ pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
+}
+
+void zpci_event_error(void *data)
+{
+ if (zpci_is_enabled())
+ __zpci_event_error(data);
+}
+
+static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
+{
+ struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+ struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+ int ret;
+
+ pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
+ pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+ zpci_err("avail CCDF:\n");
+ zpci_err_hex(ccdf, sizeof(*ccdf));
+
+ switch (ccdf->pec) {
+ case 0x0301: /* Standby -> Configured */
+ if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
+ break;
+ zdev->state = ZPCI_FN_STATE_CONFIGURED;
+ zdev->fh = ccdf->fh;
+ ret = zpci_enable_device(zdev);
+ if (ret)
+ break;
+ pci_rescan_bus(zdev->bus);
+ break;
+ case 0x0302: /* Reserved -> Standby */
+ if (!zdev)
+ clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+ break;
+ case 0x0303: /* Deconfiguration requested */
+ if (pdev)
+ pci_stop_and_remove_bus_device(pdev);
+
+ ret = zpci_disable_device(zdev);
+ if (ret)
+ break;
+
+ ret = sclp_pci_deconfigure(zdev->fid);
+ zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
+ if (!ret)
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+
+ break;
+ case 0x0304: /* Configured -> Standby */
+ if (pdev) {
+ /* Give the driver a hint that the function is
+ * already unusable. */
+ pdev->error_state = pci_channel_io_perm_failure;
+ pci_stop_and_remove_bus_device(pdev);
+ }
+
+ zdev->fh = ccdf->fh;
+ zpci_disable_device(zdev);
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+ break;
+ case 0x0306: /* 0x308 or 0x302 for multiple devices */
+ clp_rescan_pci_devices();
+ break;
+ case 0x0308: /* Standby -> Reserved */
+ if (!zdev)
+ break;
+ pci_stop_root_bus(zdev->bus);
+ pci_remove_root_bus(zdev->bus);
+ break;
+ default:
+ break;
+ }
+}
+
+void zpci_event_availability(void *data)
+{
+ if (zpci_is_enabled())
+ __zpci_event_availability(data);
+}
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
new file mode 100644
index 00000000000..85267c058af
--- /dev/null
+++ b/arch/s390/pci/pci_insn.c
@@ -0,0 +1,202 @@
+/*
+ * s390 specific pci instructions
+ *
+ * Copyright IBM Corp. 2013
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <asm/pci_insn.h>
+#include <asm/processor.h>
+
+#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
+
+/* Modify PCI Function Controls */
+static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
+{
+ u8 cc;
+
+ asm volatile (
+ " .insn rxy,0xe300000000d0,%[req],%[fib]\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib)
+ : : "cc");
+ *status = req >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_mod_fc(u64 req, struct zpci_fib *fib)
+{
+ u8 cc, status;
+
+ do {
+ cc = __mpcifc(req, fib, &status);
+ if (cc == 2)
+ msleep(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ printk_once(KERN_ERR "%s: error cc: %d status: %d\n",
+ __func__, cc, status);
+ return (cc) ? -EIO : 0;
+}
+
+/* Refresh PCI Translations */
+static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
+{
+ register u64 __addr asm("2") = addr;
+ register u64 __range asm("3") = range;
+ u8 cc;
+
+ asm volatile (
+ " .insn rre,0xb9d30000,%[fn],%[addr]\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc), [fn] "+d" (fn)
+ : [addr] "d" (__addr), "d" (__range)
+ : "cc");
+ *status = fn >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
+{
+ u8 cc, status;
+
+ do {
+ cc = __rpcit(fn, addr, range, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n",
+ __func__, cc, status, addr, range);
+ return (cc) ? -EIO : 0;
+}
+
+/* Set Interruption Controls */
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+{
+ asm volatile (
+ " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
+ : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));
+}
+
+/* PCI Load */
+static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+{
+ register u64 __req asm("2") = req;
+ register u64 __offset asm("3") = offset;
+ int cc = -ENXIO;
+ u64 __data;
+
+ asm volatile (
+ " .insn rre,0xb9d20000,%[data],%[req]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [data] "=d" (__data), [req] "+d" (__req)
+ : "d" (__offset)
+ : "cc");
+ *status = __req >> 24 & 0xff;
+ if (!cc)
+ *data = __data;
+
+ return cc;
+}
+
+int zpci_load(u64 *data, u64 req, u64 offset)
+{
+ u8 status;
+ int cc;
+
+ do {
+ cc = __pcilg(data, req, offset, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
+ __func__, cc, status, req, offset);
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_load);
+
+/* PCI Store */
+static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
+{
+ register u64 __req asm("2") = req;
+ register u64 __offset asm("3") = offset;
+ int cc = -ENXIO;
+
+ asm volatile (
+ " .insn rre,0xb9d00000,%[data],%[req]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [req] "+d" (__req)
+ : "d" (__offset), [data] "d" (data)
+ : "cc");
+ *status = __req >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_store(u64 data, u64 req, u64 offset)
+{
+ u8 status;
+ int cc;
+
+ do {
+ cc = __pcistg(data, req, offset, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
+ __func__, cc, status, req, offset);
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_store);
+
+/* PCI Store Block */
+static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
+{
+ int cc = -ENXIO;
+
+ asm volatile (
+ " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [req] "+d" (req)
+ : [offset] "d" (offset), [data] "Q" (*data)
+ : "cc");
+ *status = req >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_store_block(const u64 *data, u64 req, u64 offset)
+{
+ u8 status;
+ int cc;
+
+ do {
+ cc = __pcistb(data, req, offset, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n",
+ __func__, cc, status, req, offset);
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
new file mode 100644
index 00000000000..9190214b870
--- /dev/null
+++ b/arch/s390/pci/pci_sysfs.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define COMPONENT "zPCI"
+#define pr_fmt(fmt) COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/pci.h>
+
+#define zpci_attr(name, fmt, member) \
+static ssize_t name##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); \
+ \
+ return sprintf(buf, fmt, zdev->member); \
+} \
+static DEVICE_ATTR_RO(name)
+
+zpci_attr(function_id, "0x%08x\n", fid);
+zpci_attr(function_handle, "0x%08x\n", fh);
+zpci_attr(pchid, "0x%04x\n", pchid);
+zpci_attr(pfgid, "0x%02x\n", pfgid);
+zpci_attr(vfn, "0x%04x\n", vfn);
+zpci_attr(pft, "0x%02x\n", pft);
+zpci_attr(uid, "0x%x\n", uid);
+zpci_attr(segment0, "0x%02x\n", pfip[0]);
+zpci_attr(segment1, "0x%02x\n", pfip[1]);
+zpci_attr(segment2, "0x%02x\n", pfip[2]);
+zpci_attr(segment3, "0x%02x\n", pfip[3]);
+
+static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = get_zdev(pdev);
+ int ret;
+
+ if (!device_remove_file_self(dev, attr))
+ return count;
+
+ pci_stop_and_remove_bus_device(pdev);
+ ret = zpci_disable_device(zdev);
+ if (ret)
+ return ret;
+
+ ret = zpci_enable_device(zdev);
+ if (ret)
+ return ret;
+
+ pci_rescan_bus(zdev->bus);
+ return count;
+}
+static DEVICE_ATTR_WO(recover);
+
+static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = get_zdev(pdev);
+
+ return memory_read_from_buffer(buf, count, &off, zdev->util_str,
+ sizeof(zdev->util_str));
+}
+static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+static struct bin_attribute *zpci_bin_attrs[] = {
+ &bin_attr_util_string,
+ NULL,
+};
+
+static struct attribute *zpci_dev_attrs[] = {
+ &dev_attr_function_id.attr,
+ &dev_attr_function_handle.attr,
+ &dev_attr_pchid.attr,
+ &dev_attr_pfgid.attr,
+ &dev_attr_pft.attr,
+ &dev_attr_vfn.attr,
+ &dev_attr_uid.attr,
+ &dev_attr_recover.attr,
+ NULL,
+};
+static struct attribute_group zpci_attr_group = {
+ .attrs = zpci_dev_attrs,
+ .bin_attrs = zpci_bin_attrs,
+};
+
+static struct attribute *pfip_attrs[] = {
+ &dev_attr_segment0.attr,
+ &dev_attr_segment1.attr,
+ &dev_attr_segment2.attr,
+ &dev_attr_segment3.attr,
+ NULL,
+};
+static struct attribute_group pfip_attr_group = {
+ .name = "pfip",
+ .attrs = pfip_attrs,
+};
+
+const struct attribute_group *zpci_attr_groups[] = {
+ &zpci_attr_group,
+ &pfip_attr_group,
+ NULL,
+};