diff options
Diffstat (limited to 'arch/s390')
364 files changed, 39871 insertions, 17960 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index ae4b01060ed..647c3eccc3d 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -1,6 +1,9 @@ -obj-y += kernel/ -obj-y += mm/ -obj-y += crypto/ -obj-y += appldata/ -obj-y += hypfs/ -obj-y += kvm/ +obj-y += kernel/ +obj-y += mm/ +obj-$(CONFIG_KVM) += kvm/ +obj-$(CONFIG_CRYPTO_HW) += crypto/ +obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ +obj-$(CONFIG_APPLDATA_BASE) += appldata/ +obj-$(CONFIG_MATHEMU) += math-emu/ +obj-y += net/ +obj-$(CONFIG_PCI) += pci/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 636bcb81d06..bb63499fc5d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -2,7 +2,7 @@ config MMU def_bool y config ZONE_DMA - def_bool y if 64BIT + def_bool y config LOCKDEP_SUPPORT def_bool y @@ -28,24 +28,12 @@ config ARCH_HAS_ILOG2_U64 config GENERIC_HWEIGHT def_bool y -config GENERIC_TIME_VSYSCALL - def_bool y - -config GENERIC_CLOCKEVENTS - def_bool y - config GENERIC_BUG def_bool y if BUG config GENERIC_BUG_RELATIVE_POINTERS def_bool y -config NO_IOMEM - def_bool y - -config NO_DMA - def_bool y - config ARCH_DMA_ADDR_T_64BIT def_bool 64BIT @@ -55,66 +43,109 @@ config GENERIC_LOCKBREAK config PGSTE def_bool y if KVM -config VIRT_CPU_ACCOUNTING +config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config ARCH_SUPPORTS_DEBUG_PAGEALLOC +config KEXEC def_bool y +config AUDIT_ARCH + def_bool y + +config NO_IOPORT_MAP + def_bool y + +config PCI_QUIRKS + def_bool n + config S390 def_bool y - select USE_GENERIC_SMP_HELPERS if SMP - select HAVE_SYSCALL_WRAPPERS - select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_TRACE_MCOUNT_TEST - select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_C_RECORDMCOUNT - select HAVE_SYSCALL_TRACEPOINTS - select HAVE_DYNAMIC_FTRACE - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_OPROFILE - select HAVE_KPROBES - select HAVE_KRETPROBES - select HAVE_KVM if 64BIT - select HAVE_ARCH_TRACEHOOK - select INIT_ALL_POSSIBLE - select HAVE_IRQ_WORK - select HAVE_PERF_EVENTS - select HAVE_KERNEL_GZIP - select HAVE_KERNEL_BZIP2 - select HAVE_KERNEL_LZMA - select HAVE_KERNEL_LZO - select HAVE_GET_USER_PAGES_FAST - select HAVE_ARCH_MUTEX_CPU_RELAX - select ARCH_INLINE_SPIN_TRYLOCK - select ARCH_INLINE_SPIN_TRYLOCK_BH - select ARCH_INLINE_SPIN_LOCK - select ARCH_INLINE_SPIN_LOCK_BH - select ARCH_INLINE_SPIN_LOCK_IRQ - select ARCH_INLINE_SPIN_LOCK_IRQSAVE - select ARCH_INLINE_SPIN_UNLOCK - select ARCH_INLINE_SPIN_UNLOCK_BH - select ARCH_INLINE_SPIN_UNLOCK_IRQ - select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE - select ARCH_INLINE_READ_TRYLOCK + select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH select ARCH_INLINE_READ_LOCK_IRQ select ARCH_INLINE_READ_LOCK_IRQSAVE + select ARCH_INLINE_READ_TRYLOCK select ARCH_INLINE_READ_UNLOCK select ARCH_INLINE_READ_UNLOCK_BH select ARCH_INLINE_READ_UNLOCK_IRQ select ARCH_INLINE_READ_UNLOCK_IRQRESTORE - select ARCH_INLINE_WRITE_TRYLOCK + select ARCH_INLINE_SPIN_LOCK + select ARCH_INLINE_SPIN_LOCK_BH + select ARCH_INLINE_SPIN_LOCK_IRQ + select ARCH_INLINE_SPIN_LOCK_IRQSAVE + select ARCH_INLINE_SPIN_TRYLOCK + select ARCH_INLINE_SPIN_TRYLOCK_BH + select ARCH_INLINE_SPIN_UNLOCK + select ARCH_INLINE_SPIN_UNLOCK_BH + select ARCH_INLINE_SPIN_UNLOCK_IRQ + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE select ARCH_INLINE_WRITE_LOCK select ARCH_INLINE_WRITE_LOCK_BH select ARCH_INLINE_WRITE_LOCK_IRQ select ARCH_INLINE_WRITE_LOCK_IRQSAVE + select ARCH_INLINE_WRITE_TRYLOCK select ARCH_INLINE_WRITE_UNLOCK select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_WANT_IPC_PARSE_VERSION + select BUILDTIME_EXTABLE_SORT + select CLONE_BACKWARDS2 + select GENERIC_CLOCKEVENTS + select GENERIC_CPU_DEVICES if !SMP + select GENERIC_FIND_FIRST_BIT + select GENERIC_SMP_IDLE_THREAD + select GENERIC_TIME_VSYSCALL + select HAVE_ALIGNED_STRUCT_PAGE if SLUB + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT + select HAVE_BPF_JIT if 64BIT && PACK_STACK + select HAVE_CMPXCHG_DOUBLE + select HAVE_CMPXCHG_LOCAL + select HAVE_C_RECORDMCOUNT + select HAVE_DEBUG_KMEMLEAK + select HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 + select HAVE_KERNEL_LZMA + select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_KPROBES + select HAVE_KRETPROBES + select HAVE_KVM if 64BIT + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_MEMBLOCK_PHYS_MAP + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_OPROFILE + select HAVE_PERF_EVENTS + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_UID16 if 32BIT + select HAVE_VIRT_CPU_ACCOUNTING + select KTIME_SCALAR if 32BIT + select MODULES_USE_ELF_RELA + select NO_BOOTMEM + select OLD_SIGACTION + select OLD_SIGSUSPEND3 + select SYSCTL_EXCEPTION_TRACE + select TTY + select VIRT_CPU_ACCOUNTING + select VIRT_TO_BUS config SCHED_OMIT_FRAME_POINTER def_bool y @@ -123,11 +154,152 @@ source "init/Kconfig" source "kernel/Kconfig.freezer" -menu "Base setup" +menu "Processor type and features" -comment "Processor type and features" +config HAVE_MARCH_Z900_FEATURES + def_bool n -source "kernel/time/Kconfig" +config HAVE_MARCH_Z990_FEATURES + def_bool n + select HAVE_MARCH_Z900_FEATURES + +config HAVE_MARCH_Z9_109_FEATURES + def_bool n + select HAVE_MARCH_Z990_FEATURES + +config HAVE_MARCH_Z10_FEATURES + def_bool n + select HAVE_MARCH_Z9_109_FEATURES + +config HAVE_MARCH_Z196_FEATURES + def_bool n + select HAVE_MARCH_Z10_FEATURES + +config HAVE_MARCH_ZEC12_FEATURES + def_bool n + select HAVE_MARCH_Z196_FEATURES + +choice + prompt "Processor type" + default MARCH_G5 + +config MARCH_G5 + bool "System/390 model G5 and G6" + depends on !64BIT + help + Select this to build a 31 bit kernel that works + on all ESA/390 and z/Architecture machines. + +config MARCH_Z900 + bool "IBM zSeries model z800 and z900" + select HAVE_MARCH_Z900_FEATURES if 64BIT + help + Select this to enable optimizations for model z800/z900 (2064 and + 2066 series). This will enable some optimizations that are not + available on older ESA/390 (31 Bit) only CPUs. + +config MARCH_Z990 + bool "IBM zSeries model z890 and z990" + select HAVE_MARCH_Z990_FEATURES if 64BIT + help + Select this to enable optimizations for model z890/z990 (2084 and + 2086 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z9_109 + bool "IBM System z9" + select HAVE_MARCH_Z9_109_FEATURES if 64BIT + help + Select this to enable optimizations for IBM System z9 (2094 and + 2096 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z10 + bool "IBM System z10" + select HAVE_MARCH_Z10_FEATURES if 64BIT + help + Select this to enable optimizations for IBM System z10 (2097 and + 2098 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z196 + bool "IBM zEnterprise 114 and 196" + select HAVE_MARCH_Z196_FEATURES if 64BIT + help + Select this to enable optimizations for IBM zEnterprise 114 and 196 + (2818 and 2817 series). The kernel will be slightly faster but will + not work on older machines. + +config MARCH_ZEC12 + bool "IBM zBC12 and zEC12" + select HAVE_MARCH_ZEC12_FEATURES if 64BIT + help + Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and + 2827 series). The kernel will be slightly faster but will not work on + older machines. + +endchoice + +config MARCH_G5_TUNE + def_bool TUNE_G5 || MARCH_G5 && TUNE_DEFAULT + +config MARCH_Z900_TUNE + def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT + +config MARCH_Z990_TUNE + def_bool TUNE_Z990 || MARCH_Z990 && TUNE_DEFAULT + +config MARCH_Z9_109_TUNE + def_bool TUNE_Z9_109 || MARCH_Z9_109 && TUNE_DEFAULT + +config MARCH_Z10_TUNE + def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT + +config MARCH_Z196_TUNE + def_bool TUNE_Z196 || MARCH_Z196 && TUNE_DEFAULT + +config MARCH_ZEC12_TUNE + def_bool TUNE_ZEC12 || MARCH_ZEC12 && TUNE_DEFAULT + +choice + prompt "Tune code generation" + default TUNE_DEFAULT + help + Cause the compiler to tune (-mtune) the generated code for a machine. + This will make the code run faster on the selected machine but + somewhat slower on other machines. + This option only changes how the compiler emits instructions, not the + selection of instructions itself, so the resulting kernel will run on + all other machines. + +config TUNE_DEFAULT + bool "Default" + help + Tune the generated code for the target processor for which the kernel + will be compiled. + +config TUNE_G5 + bool "System/390 model G5 and G6" + +config TUNE_Z900 + bool "IBM zSeries model z800 and z900" + +config TUNE_Z990 + bool "IBM zSeries model z890 and z990" + +config TUNE_Z9_109 + bool "IBM System z9" + +config TUNE_Z10 + bool "IBM System z10" + +config TUNE_Z196 + bool "IBM zEnterprise 114 and 196" + +config TUNE_ZEC12 + bool "IBM zBC12 and zEC12" + +endchoice config 64BIT def_bool y @@ -139,8 +311,24 @@ config 64BIT config 32BIT def_bool y if !64BIT -config KTIME_SCALAR - def_bool 32BIT +config COMPAT + def_bool y + prompt "Kernel support for 31 bit emulation" + depends on 64BIT + select COMPAT_BINFMT_ELF if BINFMT_ELF + select ARCH_WANT_OLD_COMPAT_IPC + select COMPAT_OLD_SIGACTION + help + Select this option if you want to enable your system kernel to + handle system-calls from ELF binaries for 31 bit ESA. This option + (and some other stuff like libraries and such) is needed for + executing 31 bit applications. It is safe to say "Y". + +config SYSVIPC_COMPAT + def_bool y if COMPAT && SYSVIPC + +config KEYS_COMPAT + def_bool y if COMPAT && KEYS config SMP def_bool y @@ -150,10 +338,10 @@ config SMP a system with only one CPU, like most personal computers, say N. If you have a system with more than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. See also the SMP-HOWTO available at @@ -162,14 +350,14 @@ config SMP Even if you don't know what to do here, say Y. config NR_CPUS - int "Maximum number of CPUs (2-64)" - range 2 64 + int "Maximum number of CPUs (2-256)" + range 2 256 depends on SMP default "32" if !64BIT default "64" if 64BIT help This allows you to specify the maximum number of CPUs which this - kernel will support. The maximum supported value is 64 and the + kernel will support. The maximum supported value is 256 and the minimum value which makes sense is 2. This is purely to save memory - each supported CPU adds @@ -179,29 +367,25 @@ config HOTPLUG_CPU def_bool y prompt "Support for hot-pluggable CPUs" depends on SMP - select HOTPLUG help Say Y here to be able to turn CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. config SCHED_MC - def_bool y - prompt "Multi-core scheduler support" - depends on SMP - help - Multi-core scheduler support improves the CPU scheduler's decision - making when dealing with multi-core CPU chips at a cost of slightly - increased overhead in some places. + def_bool n config SCHED_BOOK def_bool y prompt "Book scheduler support" - depends on SMP && SCHED_MC + depends on SMP + select SCHED_MC help Book scheduler support improves the CPU scheduler's decision making when dealing with machines that have several books. +source kernel/Kconfig.preempt + config MATHEMU def_bool y prompt "IEEE FPU emulation" @@ -211,83 +395,39 @@ config MATHEMU on older ESA/390 machines. Say Y unless you know your machine doesn't need this. -config COMPAT - def_bool y - prompt "Kernel support for 31 bit emulation" - depends on 64BIT - select COMPAT_BINFMT_ELF - help - Select this option if you want to enable your system kernel to - handle system-calls from ELF binaries for 31 bit ESA. This option - (and some other stuff like libraries and such) is needed for - executing 31 bit applications. It is safe to say "Y". +source kernel/Kconfig.hz -config SYSVIPC_COMPAT - def_bool y if COMPAT && SYSVIPC +endmenu -config AUDIT_ARCH - def_bool y +menu "Memory setup" -config S390_EXEC_PROTECT +config ARCH_SPARSEMEM_ENABLE def_bool y - prompt "Data execute protection" - help - This option allows to enable a buffer overflow protection for user - space programs and it also selects the addressing mode option above. - The kernel parameter noexec=on will enable this feature and also - switch the addressing modes, default is disabled. Enabling this (via - kernel parameter) on machines earlier than IBM System z9 this will - reduce system performance. - -comment "Code generation options" - -choice - prompt "Processor type" - default MARCH_G5 + select SPARSEMEM_VMEMMAP_ENABLE + select SPARSEMEM_VMEMMAP + select SPARSEMEM_STATIC if !64BIT -config MARCH_G5 - bool "System/390 model G5 and G6" - depends on !64BIT - help - Select this to build a 31 bit kernel that works - on all ESA/390 and z/Architecture machines. +config ARCH_SPARSEMEM_DEFAULT + def_bool y -config MARCH_Z900 - bool "IBM zSeries model z800 and z900" - help - Select this to enable optimizations for model z800/z900 (2064 and - 2066 series). This will enable some optimizations that are not - available on older ESA/390 (31 Bit) only CPUs. +config ARCH_SELECT_MEMORY_MODEL + def_bool y -config MARCH_Z990 - bool "IBM zSeries model z890 and z990" - help - Select this to enable optimizations for model z890/z990 (2084 and - 2086 series). The kernel will be slightly faster but will not work - on older machines. +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y if SPARSEMEM -config MARCH_Z9_109 - bool "IBM System z9" - help - Select this to enable optimizations for IBM System z9 (2094 and - 2096 series). The kernel will be slightly faster but will not work - on older machines. +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y -config MARCH_Z10 - bool "IBM System z10" - help - Select this to enable optimizations for IBM System z10 (2097 and - 2098 series). The kernel will be slightly faster but will not work - on older machines. +config ARCH_ENABLE_SPLIT_PMD_PTLOCK + def_bool y + depends on 64BIT -config MARCH_Z196 - bool "IBM zEnterprise 196" - help - Select this to enable optimizations for IBM zEnterprise 196 - (2817 series). The kernel will be slightly faster but will not work - on older machines. +config FORCE_MAX_ZONEORDER + int + default "9" -endchoice +source "mm/Kconfig" config PACK_STACK def_bool y @@ -303,19 +443,6 @@ config PACK_STACK Say Y if you are unsure. -config SMALL_STACK - def_bool n - prompt "Use 8kb for kernel stack instead of 16kb" - depends on PACK_STACK && 64BIT && !LOCKDEP - help - If you say Y here and the compiler supports the -mkernel-backchain - option the kernel will use a smaller kernel stack size. The reduced - size is 8kb instead of 16kb. This allows to run more threads on a - system and reduces the pressure on the memory management for higher - order page allocations. - - Say N if you are unsure. - config CHECK_STACK def_bool y prompt "Detect kernel stack overflow" @@ -341,69 +468,87 @@ config STACK_GUARD The minimum size for the stack guard should be 256 for 31 bit and 512 for 64 bit. -config WARN_STACK +config WARN_DYNAMIC_STACK def_bool n - prompt "Emit compiler warnings for function with broken stack usage" + prompt "Emit compiler warnings for function with dynamic stack usage" help - This option enables the compiler options -mwarn-framesize and - -mwarn-dynamicstack. If the compiler supports these options it - will generate warnings for function which either use alloca or - create a stack frame bigger than CONFIG_WARN_STACK_SIZE. + This option enables the compiler option -mwarn-dynamicstack. If the + compiler supports this options generates warnings for functions + that dynamically allocate stack space using alloca. Say N if you are unsure. -config WARN_STACK_SIZE - int "Maximum frame size considered safe (128-2048)" - range 128 2048 - depends on WARN_STACK - default "2048" - help - This allows you to specify the maximum frame size a function may - have without the compiler complaining about it. +endmenu -config ARCH_POPULATES_NODE_MAP - def_bool y +menu "I/O subsystem" + +config QDIO + def_tristate y + prompt "QDIO support" + ---help--- + This driver provides the Queued Direct I/O base support for + IBM System z. -comment "Kernel preemption" + To compile this driver as a module, choose M here: the + module will be called qdio. -source "kernel/Kconfig.preempt" + If unsure, say Y. -config ARCH_SPARSEMEM_ENABLE - def_bool y - select SPARSEMEM_VMEMMAP_ENABLE - select SPARSEMEM_VMEMMAP - select SPARSEMEM_STATIC if !64BIT +menuconfig PCI + bool "PCI support" + default n + depends on 64BIT + select PCI_MSI + help + Enable PCI support. -config ARCH_SPARSEMEM_DEFAULT - def_bool y +if PCI -config ARCH_SELECT_MEMORY_MODEL - def_bool y +config PCI_NR_FUNCTIONS + int "Maximum number of PCI functions (1-4096)" + range 1 4096 + default "64" + help + This allows you to specify the maximum number of PCI functions which + this kernel will support. -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y if SPARSEMEM +config PCI_NR_MSI + int "Maximum number of MSI interrupts (64-32768)" + range 64 32768 + default "256" + help + This defines the number of virtual interrupts the kernel will + provide for MSI interrupts. If you configure your system to have + too few drivers will fail to allocate MSI interrupts for all + PCI devices. -config ARCH_ENABLE_MEMORY_HOTREMOVE - def_bool y +source "drivers/pci/Kconfig" +source "drivers/pci/pcie/Kconfig" +source "drivers/pci/hotplug/Kconfig" -config ARCH_HIBERNATION_POSSIBLE - def_bool y if 64BIT +endif # PCI -source "mm/Kconfig" +config PCI_DOMAINS + def_bool PCI -comment "I/O subsystem configuration" +config HAS_IOMEM + def_bool PCI -config QDIO - def_tristate y - prompt "QDIO support" - ---help--- - This driver provides the Queued Direct I/O base support for - IBM System z. +config IOMMU_HELPER + def_bool PCI - To compile this driver as a module, choose M here: the - module will be called qdio. +config HAS_DMA + def_bool PCI + select HAVE_DMA_API_DEBUG - If unsure, say Y. +config NEED_SG_DMA_LENGTH + def_bool PCI + +config HAVE_DMA_ATTRS + def_bool PCI + +config NEED_DMA_MAP_STATE + def_bool PCI config CHSC_SCH def_tristate m @@ -422,40 +567,96 @@ config CHSC_SCH If unsure, say N. -comment "Misc" - -config IPL +config SCM_BUS def_bool y - prompt "Builtin IPL record support" + depends on 64BIT + prompt "SCM bus driver" help - If you want to use the produced kernel to IPL directly from a - device, you have to merge a bootsector specific to the device - into the first bytes of the kernel. You will have to select the - IPL device. + Bus driver for Storage Class Memory. -choice - prompt "IPL method generated into head.S" - depends on IPL - default IPL_VM +config EADM_SCH + def_tristate m + prompt "Support for EADM subchannels" + depends on SCM_BUS help - Select "tape" if you want to IPL the image from a Tape. + This driver allows usage of EADM subchannels. EADM subchannels act + as a communication vehicle for SCM increments. - Select "vm_reader" if you are running under VM/ESA and want - to IPL the image from the emulated card reader. + To compile this driver as a module, choose M here: the + module will be called eadm_sch. -config IPL_TAPE - bool "tape" +endmenu -config IPL_VM - bool "vm_reader" +menu "Dump support" -endchoice +config CRASH_DUMP + bool "kernel crash dumps" + depends on 64BIT && SMP + select KEXEC + help + Generate crash dump after being started by kexec. + Crash dump kernels are loaded in the main kernel with kexec-tools + into a specially reserved region and then later executed after + a crash by kdump/kexec. + Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. + This option also enables s390 zfcpdump. + See also <file:Documentation/s390/zfcpdump.txt> + +endmenu + +menu "Executable file formats / Emulations" source "fs/Kconfig.binfmt" -config FORCE_MAX_ZONEORDER - int - default "9" +config SECCOMP + def_bool y + prompt "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc/<pid>/seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. + +endmenu + +menu "Power Management" + +config ARCH_HIBERNATION_POSSIBLE + def_bool y if 64BIT + +source "kernel/power/Kconfig" + +endmenu + +source "net/Kconfig" + +config PCMCIA + def_bool n + +config CCW + def_bool y + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "arch/s390/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" + +menu "Virtualization" config PFAULT def_bool y @@ -471,8 +672,8 @@ config PFAULT this option. config SHARED_KERNEL - def_bool y - prompt "VM shared kernel support" + bool "VM shared kernel support" + depends on !JUMP_LABEL help Select this option, if you want to share the text segment of the Linux kernel between different VM guests. This reduces memory @@ -567,8 +768,6 @@ config APPLDATA_NET_SUM This can also be compiled as a module, which will be called appldata_net_sum.o. -source kernel/Kconfig.hz - config S390_HYPFS_FS def_bool y prompt "s390 hypervisor file system support" @@ -577,78 +776,21 @@ config S390_HYPFS_FS This is a virtual file system intended to provide accounting information in an s390 hypervisor environment. -config KEXEC - def_bool n - prompt "kexec system call" - help - kexec is a system call that implements the ability to shutdown your - current kernel, and to start another kernel. It is like a reboot - but is independent of hardware/microcode support. - -config ZFCPDUMP - def_bool n - prompt "zfcpdump support" - select SMP - help - Select this option if you want to build an zfcpdump enabled kernel. - Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. +source "arch/s390/kvm/Kconfig" config S390_GUEST def_bool y - prompt "s390 guest support for KVM (EXPERIMENTAL)" - depends on 64BIT && EXPERIMENTAL + prompt "s390 support for virtio devices" + depends on 64BIT + select TTY + select VIRTUALIZATION select VIRTIO - select VIRTIO_RING select VIRTIO_CONSOLE help - Select this option if you want to run the kernel as a guest under - the KVM hypervisor. This will add detection for KVM as well as a - virtio transport. If KVM is detected, the virtio console will be - the default console. - -config SECCOMP - def_bool y - prompt "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc/<pid>/seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. - -endmenu + Enabling this option adds support for virtio based paravirtual device + drivers on s390. -menu "Power Management" - -source "kernel/power/Kconfig" + Select this option if you want to run the kernel as a guest under + the KVM hypervisor. endmenu - -source "net/Kconfig" - -config PCMCIA - def_bool n - -config CCW - def_bool y - -source "drivers/Kconfig" - -source "fs/Kconfig" - -source "arch/s390/Kconfig.debug" - -source "security/Kconfig" - -source "crypto/Kconfig" - -source "lib/Kconfig" - -source "arch/s390/kvm/Kconfig" diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 2b380df9560..c56878e1245 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -17,18 +17,19 @@ config STRICT_DEVMEM If you are unsure, say Y. -config DEBUG_STRICT_USER_COPY_CHECKS - def_bool n - prompt "Strict user copy size checks" +config S390_PTDUMP + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL + select DEBUG_FS ---help--- - Enabling this option turns a certain set of sanity checks for user - copy operations into compile time warnings. - - The copy_from_user() etc checks are there to help test if there - are sufficient security checks on the length argument of - the copy operation, by having gcc prove that the argument is - within bounds. - - If unsure, or if you run an older (pre 4.4) gcc, say N. - + Say Y here if you want to show the kernel pagetable layout in a + debugfs file. This information is only useful for kernel developers + who are working in architecture specific areas of the kernel. + It is probably not a good idea to enable this feature in a production + kernel. + If in doubt, say "N" + +config DEBUG_SET_MODULE_RONX + def_bool y + depends on MODULES endmenu diff --git a/arch/s390/Makefile b/arch/s390/Makefile index d5b8a6ade52..874e6d6e9c5 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -24,8 +24,8 @@ CHECKFLAGS += -D__s390__ -msize-long else LD_BFD := elf64-s390 LDFLAGS := -m elf64_s390 -KBUILD_AFLAGS_MODULE += -fpic -D__PIC__ -KBUILD_CFLAGS_MODULE += -fpic -D__PIC__ +KBUILD_AFLAGS_MODULE += -fPIC +KBUILD_CFLAGS_MODULE += -fPIC KBUILD_CFLAGS += -m64 KBUILD_AFLAGS += -m64 UTS_MACHINE := s390x @@ -35,12 +35,21 @@ endif export LD_BFD -cflags-$(CONFIG_MARCH_G5) += $(call cc-option,-march=g5) -cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900) -cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990) -cflags-$(CONFIG_MARCH_Z9_109) += $(call cc-option,-march=z9-109) -cflags-$(CONFIG_MARCH_Z10) += $(call cc-option,-march=z10) -cflags-$(CONFIG_MARCH_Z196) += $(call cc-option,-march=z196) +cflags-$(CONFIG_MARCH_G5) += -march=g5 +cflags-$(CONFIG_MARCH_Z900) += -march=z900 +cflags-$(CONFIG_MARCH_Z990) += -march=z990 +cflags-$(CONFIG_MARCH_Z9_109) += -march=z9-109 +cflags-$(CONFIG_MARCH_Z10) += -march=z10 +cflags-$(CONFIG_MARCH_Z196) += -march=z196 +cflags-$(CONFIG_MARCH_ZEC12) += -march=zEC12 + +cflags-$(CONFIG_MARCH_G5_TUNE) += -mtune=g5 +cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900 +cflags-$(CONFIG_MARCH_Z990_TUNE) += -mtune=z990 +cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109 +cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10 +cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196 +cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 #KBUILD_IMAGE is necessary for make rpm KBUILD_IMAGE :=arch/s390/boot/image @@ -54,22 +63,12 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls ifeq ($(call cc-option-yn,-mkernel-backchain),y) cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -ifdef CONFIG_SMALL_STACK -STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) ) -endif endif # new style option for packed stacks ifeq ($(call cc-option-yn,-mpacked-stack),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -ifdef CONFIG_SMALL_STACK -STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) ) -endif endif ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y) @@ -80,8 +79,7 @@ endif endif ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) -cflags-$(CONFIG_WARN_STACK) += -mwarn-dynamicstack -cflags-$(CONFIG_WARN_STACK) += -mwarn-framesize=$(CONFIG_WARN_STACK_SIZE) +cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack endif KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) @@ -89,18 +87,15 @@ KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare KBUILD_AFLAGS += $(aflags-y) OBJCOPYFLAGS := -O binary -LDFLAGS_vmlinux := -e start head-y := arch/s390/kernel/head.o head-y += arch/s390/kernel/$(if $(CONFIG_64BIT),head64.o,head31.o) -head-y += arch/s390/kernel/init_task.o # See arch/s390/Kbuild for content of core part of the kernel core-y += arch/s390/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ -drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ # must be linked after kernel drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/ diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h index f0b23fc759b..4a67f2b5f6a 100644 --- a/arch/s390/appldata/appldata.h +++ b/arch/s390/appldata/appldata.h @@ -1,6 +1,4 @@ /* - * arch/s390/appldata/appldata.h - * * Definitions and interface for Linux - z/VM Monitor Stream. * * Copyright IBM Corp. 2003, 2008 diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 5c91995b74e..47c8630c93c 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -1,6 +1,4 @@ /* - * arch/s390/appldata/appldata_base.c - * * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1. * Exports appldata_register_ops() and appldata_unregister_ops() for the * data gathering modules. @@ -29,7 +27,7 @@ #include <linux/suspend.h> #include <linux/platform_device.h> #include <asm/appldata.h> -#include <asm/timer.h> +#include <asm/vtimer.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/smp.h> @@ -50,9 +48,9 @@ static struct platform_device *appldata_pdev; * /proc entries (sysctl) */ static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; -static int appldata_timer_handler(ctl_table *ctl, int write, +static int appldata_timer_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static int appldata_interval_handler(ctl_table *ctl, int write, +static int appldata_interval_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -84,8 +82,7 @@ static struct ctl_table appldata_dir_table[] = { /* * Timer */ -static DEFINE_PER_CPU(struct vtimer_list, appldata_timer); -static atomic_t appldata_expire_count = ATOMIC_INIT(0); +static struct vtimer_list appldata_timer; static DEFINE_SPINLOCK(appldata_timer_lock); static int appldata_interval = APPLDATA_CPU_INTERVAL; @@ -115,10 +112,7 @@ static LIST_HEAD(appldata_ops_list); */ static void appldata_timer_function(unsigned long data) { - if (atomic_dec_and_test(&appldata_expire_count)) { - atomic_set(&appldata_expire_count, num_online_cpus()); - queue_work(appldata_wq, (struct work_struct *) data); - } + queue_work(appldata_wq, (struct work_struct *) data); } /* @@ -130,10 +124,7 @@ static void appldata_work_fn(struct work_struct *work) { struct list_head *lh; struct appldata_ops *ops; - int i; - i = 0; - get_online_cpus(); mutex_lock(&appldata_ops_mutex); list_for_each(lh, &appldata_ops_list) { ops = list_entry(lh, struct appldata_ops, list); @@ -142,7 +133,6 @@ static void appldata_work_fn(struct work_struct *work) } } mutex_unlock(&appldata_ops_mutex); - put_online_cpus(); } /* @@ -170,20 +160,6 @@ int appldata_diag(char record_nr, u16 function, unsigned long buffer, /****************************** /proc stuff **********************************/ -/* - * appldata_mod_vtimer_wrap() - * - * wrapper function for mod_virt_timer(), because smp_call_function_single() - * accepts only one parameter. - */ -static void __appldata_mod_vtimer_wrap(void *p) { - struct { - struct vtimer_list *timer; - u64 expires; - } *args = p; - mod_virt_timer_periodic(args->timer, args->expires); -} - #define APPLDATA_ADD_TIMER 0 #define APPLDATA_DEL_TIMER 1 #define APPLDATA_MOD_TIMER 2 @@ -194,49 +170,28 @@ static void __appldata_mod_vtimer_wrap(void *p) { * Add, delete or modify virtual timers on all online cpus. * The caller needs to get the appldata_timer_lock spinlock. */ -static void -__appldata_vtimer_setup(int cmd) +static void __appldata_vtimer_setup(int cmd) { - u64 per_cpu_interval; - int i; + u64 timer_interval = (u64) appldata_interval * 1000 * TOD_MICRO; switch (cmd) { case APPLDATA_ADD_TIMER: if (appldata_timer_active) break; - per_cpu_interval = (u64) (appldata_interval*1000 / - num_online_cpus()) * TOD_MICRO; - for_each_online_cpu(i) { - per_cpu(appldata_timer, i).expires = per_cpu_interval; - smp_call_function_single(i, add_virt_timer_periodic, - &per_cpu(appldata_timer, i), - 1); - } + appldata_timer.expires = timer_interval; + add_virt_timer_periodic(&appldata_timer); appldata_timer_active = 1; break; case APPLDATA_DEL_TIMER: - for_each_online_cpu(i) - del_virt_timer(&per_cpu(appldata_timer, i)); + del_virt_timer(&appldata_timer); if (!appldata_timer_active) break; appldata_timer_active = 0; - atomic_set(&appldata_expire_count, num_online_cpus()); break; case APPLDATA_MOD_TIMER: - per_cpu_interval = (u64) (appldata_interval*1000 / - num_online_cpus()) * TOD_MICRO; if (!appldata_timer_active) break; - for_each_online_cpu(i) { - struct { - struct vtimer_list *timer; - u64 expires; - } args; - args.timer = &per_cpu(appldata_timer, i); - args.expires = per_cpu_interval; - smp_call_function_single(i, __appldata_mod_vtimer_wrap, - &args, 1); - } + mod_virt_timer_periodic(&appldata_timer, timer_interval); } } @@ -246,10 +201,10 @@ __appldata_vtimer_setup(int cmd) * Start/Stop timer, show status of timer (0 = not active, 1 = active) */ static int -appldata_timer_handler(ctl_table *ctl, int write, +appldata_timer_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int len; + unsigned int len; char buf[2]; if (!*lenp || *ppos) { @@ -257,7 +212,9 @@ appldata_timer_handler(ctl_table *ctl, int write, return 0; } if (!write) { - len = sprintf(buf, appldata_timer_active ? "1\n" : "0\n"); + strncpy(buf, appldata_timer_active ? "1\n" : "0\n", + ARRAY_SIZE(buf)); + len = strnlen(buf, ARRAY_SIZE(buf)); if (len > *lenp) len = *lenp; if (copy_to_user(buffer, buf, len)) @@ -267,14 +224,12 @@ appldata_timer_handler(ctl_table *ctl, int write, len = *lenp; if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) return -EFAULT; - get_online_cpus(); spin_lock(&appldata_timer_lock); if (buf[0] == '1') __appldata_vtimer_setup(APPLDATA_ADD_TIMER); else if (buf[0] == '0') __appldata_vtimer_setup(APPLDATA_DEL_TIMER); spin_unlock(&appldata_timer_lock); - put_online_cpus(); out: *lenp = len; *ppos += len; @@ -288,10 +243,11 @@ out: * current timer interval. */ static int -appldata_interval_handler(ctl_table *ctl, int write, +appldata_interval_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int len, interval; + unsigned int len; + int interval; char buf[16]; if (!*lenp || *ppos) { @@ -307,20 +263,17 @@ appldata_interval_handler(ctl_table *ctl, int write, goto out; } len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) { + if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) return -EFAULT; - } interval = 0; sscanf(buf, "%i", &interval); if (interval <= 0) return -EINVAL; - get_online_cpus(); spin_lock(&appldata_timer_lock); appldata_interval = interval; __appldata_vtimer_setup(APPLDATA_MOD_TIMER); spin_unlock(&appldata_timer_lock); - put_online_cpus(); out: *lenp = len; *ppos += len; @@ -334,11 +287,12 @@ out: * monitoring (0 = not in process, 1 = in process) */ static int -appldata_generic_handler(ctl_table *ctl, int write, +appldata_generic_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; - int rc, len, found; + unsigned int len; + int rc, found; char buf[2]; struct list_head *lh; @@ -367,7 +321,8 @@ appldata_generic_handler(ctl_table *ctl, int write, return 0; } if (!write) { - len = sprintf(buf, ops->active ? "1\n" : "0\n"); + strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf)); + len = strnlen(buf, ARRAY_SIZE(buf)); if (len > *lenp) len = *lenp; if (copy_to_user(buffer, buf, len)) { @@ -487,14 +442,12 @@ static int appldata_freeze(struct device *dev) int rc; struct list_head *lh; - get_online_cpus(); spin_lock(&appldata_timer_lock); if (appldata_timer_active) { __appldata_vtimer_setup(APPLDATA_DEL_TIMER); appldata_timer_suspended = 1; } spin_unlock(&appldata_timer_lock); - put_online_cpus(); mutex_lock(&appldata_ops_mutex); list_for_each(lh, &appldata_ops_list) { @@ -518,14 +471,12 @@ static int appldata_restore(struct device *dev) int rc; struct list_head *lh; - get_online_cpus(); spin_lock(&appldata_timer_lock); if (appldata_timer_suspended) { __appldata_vtimer_setup(APPLDATA_ADD_TIMER); appldata_timer_suspended = 0; } spin_unlock(&appldata_timer_lock); - put_online_cpus(); mutex_lock(&appldata_ops_mutex); list_for_each(lh, &appldata_ops_list) { @@ -569,53 +520,6 @@ static struct platform_driver appldata_pdrv = { /******************************* init / exit *********************************/ -static void __cpuinit appldata_online_cpu(int cpu) -{ - init_virt_timer(&per_cpu(appldata_timer, cpu)); - per_cpu(appldata_timer, cpu).function = appldata_timer_function; - per_cpu(appldata_timer, cpu).data = (unsigned long) - &appldata_work; - atomic_inc(&appldata_expire_count); - spin_lock(&appldata_timer_lock); - __appldata_vtimer_setup(APPLDATA_MOD_TIMER); - spin_unlock(&appldata_timer_lock); -} - -static void __cpuinit appldata_offline_cpu(int cpu) -{ - del_virt_timer(&per_cpu(appldata_timer, cpu)); - if (atomic_dec_and_test(&appldata_expire_count)) { - atomic_set(&appldata_expire_count, num_online_cpus()); - queue_work(appldata_wq, &appldata_work); - } - spin_lock(&appldata_timer_lock); - __appldata_vtimer_setup(APPLDATA_MOD_TIMER); - spin_unlock(&appldata_timer_lock); -} - -static int __cpuinit appldata_cpu_notify(struct notifier_block *self, - unsigned long action, - void *hcpu) -{ - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - appldata_online_cpu((long) hcpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - appldata_offline_cpu((long) hcpu); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata appldata_nb = { - .notifier_call = appldata_cpu_notify, -}; - /* * appldata_init() * @@ -623,7 +527,11 @@ static struct notifier_block __cpuinitdata appldata_nb = { */ static int __init appldata_init(void) { - int i, rc; + int rc; + + init_virt_timer(&appldata_timer); + appldata_timer.function = appldata_timer_function; + appldata_timer.data = (unsigned long) &appldata_work; rc = platform_driver_register(&appldata_pdrv); if (rc) @@ -641,14 +549,6 @@ static int __init appldata_init(void) goto out_device; } - get_online_cpus(); - for_each_online_cpu(i) - appldata_online_cpu(i); - put_online_cpus(); - - /* Register cpu hotplug notifier */ - register_hotcpu_notifier(&appldata_nb); - appldata_sysctl_header = register_sysctl_table(appldata_dir_table); return 0; diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index e43fe753703..edcf2a70694 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -1,10 +1,8 @@ /* - * arch/s390/appldata/appldata_mem.c - * * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects data related to memory management. * - * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright IBM Corp. 2003, 2006 * * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ @@ -15,6 +13,7 @@ #include <linux/kernel_stat.h> #include <linux/pagemap.h> #include <linux/swap.h> +#include <linux/slab.h> #include <asm/io.h> #include "appldata.h" @@ -34,7 +33,7 @@ * book: * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ -static struct appldata_mem_data { +struct appldata_mem_data { u64 timestamp; u32 sync_count_1; /* after VM collected the record data, */ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the @@ -65,7 +64,7 @@ static struct appldata_mem_data { u64 pgmajfault; /* page faults (major only) */ // <-- New in 2.6 -} __attribute__((packed)) appldata_mem_data; +} __packed; /* @@ -92,9 +91,7 @@ static void appldata_get_mem_data(void *data) mem_data->pswpin = ev[PSWPIN]; mem_data->pswpout = ev[PSWPOUT]; mem_data->pgalloc = ev[PGALLOC_NORMAL]; -#ifdef CONFIG_ZONE_DMA mem_data->pgalloc += ev[PGALLOC_DMA]; -#endif mem_data->pgfault = ev[PGFAULT]; mem_data->pgmajfault = ev[PGMAJFAULT]; @@ -112,7 +109,7 @@ static void appldata_get_mem_data(void *data) mem_data->totalswap = P2K(val.totalswap); mem_data->freeswap = P2K(val.freeswap); - mem_data->timestamp = get_clock(); + mem_data->timestamp = get_tod_clock(); mem_data->sync_count_2++; } @@ -122,7 +119,6 @@ static struct appldata_ops ops = { .record_nr = APPLDATA_RECORD_MEM_ID, .size = sizeof(struct appldata_mem_data), .callback = &appldata_get_mem_data, - .data = &appldata_mem_data, .owner = THIS_MODULE, .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -135,7 +131,17 @@ static struct appldata_ops ops = { */ static int __init appldata_mem_init(void) { - return appldata_register_ops(&ops); + int ret; + + ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; + + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -146,6 +152,7 @@ static int __init appldata_mem_init(void) static void __exit appldata_mem_exit(void) { appldata_unregister_ops(&ops); + kfree(ops.data); } diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 5da7c562a90..66037d2622b 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -1,11 +1,9 @@ /* - * arch/s390/appldata/appldata_net_sum.c - * * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects accumulated network statistics (Packets received/transmitted, * dropped, errors, ...). * - * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright IBM Corp. 2003, 2006 * * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ @@ -31,7 +29,7 @@ * book: * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ -static struct appldata_net_sum_data { +struct appldata_net_sum_data { u64 timestamp; u32 sync_count_1; /* after VM collected the record data, */ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the @@ -53,7 +51,7 @@ static struct appldata_net_sum_data { u64 rx_dropped; /* no space in linux buffers */ u64 tx_dropped; /* no space available in linux */ u64 collisions; /* collisions while transmitting */ -} __attribute__((packed)) appldata_net_sum_data; +} __packed; /* @@ -113,7 +111,7 @@ static void appldata_get_net_sum_data(void *data) net_data->tx_dropped = tx_dropped; net_data->collisions = collisions; - net_data->timestamp = get_clock(); + net_data->timestamp = get_tod_clock(); net_data->sync_count_2++; } @@ -123,7 +121,6 @@ static struct appldata_ops ops = { .record_nr = APPLDATA_RECORD_NET_SUM_ID, .size = sizeof(struct appldata_net_sum_data), .callback = &appldata_get_net_sum_data, - .data = &appldata_net_sum_data, .owner = THIS_MODULE, .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -136,7 +133,17 @@ static struct appldata_ops ops = { */ static int __init appldata_net_init(void) { - return appldata_register_ops(&ops); + int ret; + + ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; + + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -147,6 +154,7 @@ static int __init appldata_net_init(void) static void __exit appldata_net_exit(void) { appldata_unregister_ops(&ops); + kfree(ops.data); } diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 92f1cb745d6..69b23b25ac3 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -1,10 +1,8 @@ /* - * arch/s390/appldata/appldata_os.c - * * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects misc. OS related data (CPU utilization, running processes). * - * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright IBM Corp. 2003, 2006 * * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ @@ -115,21 +113,21 @@ static void appldata_get_os_data(void *data) j = 0; for_each_online_cpu(i) { os_data->os_cpu[j].per_cpu_user = - cputime_to_jiffies(kstat_cpu(i).cpustat.user); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]); os_data->os_cpu[j].per_cpu_nice = - cputime_to_jiffies(kstat_cpu(i).cpustat.nice); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]); os_data->os_cpu[j].per_cpu_system = - cputime_to_jiffies(kstat_cpu(i).cpustat.system); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]); os_data->os_cpu[j].per_cpu_idle = - cputime_to_jiffies(kstat_cpu(i).cpustat.idle); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]); os_data->os_cpu[j].per_cpu_irq = - cputime_to_jiffies(kstat_cpu(i).cpustat.irq); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]); os_data->os_cpu[j].per_cpu_softirq = - cputime_to_jiffies(kstat_cpu(i).cpustat.softirq); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]); os_data->os_cpu[j].per_cpu_iowait = - cputime_to_jiffies(kstat_cpu(i).cpustat.iowait); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]); os_data->os_cpu[j].per_cpu_steal = - cputime_to_jiffies(kstat_cpu(i).cpustat.steal); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]); os_data->os_cpu[j].cpu_id = i; j++; } @@ -158,7 +156,7 @@ static void appldata_get_os_data(void *data) } ops.size = new_size; } - os_data->timestamp = get_clock(); + os_data->timestamp = get_tod_clock(); os_data->sync_count_2++; } @@ -173,7 +171,7 @@ static int __init appldata_os_init(void) int rc, max_size; max_size = sizeof(struct appldata_os_data) + - (NR_CPUS * sizeof(struct appldata_os_per_cpu)); + (num_possible_cpus() * sizeof(struct appldata_os_per_cpu)); if (max_size > APPLDATA_MAX_REC_SIZE) { pr_err("Maximum OS record size %i exceeds the maximum " "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE); diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore new file mode 100644 index 00000000000..017d5912ad2 --- /dev/null +++ b/arch/s390/boot/.gitignore @@ -0,0 +1,2 @@ +image +bzImage diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 8800cf09069..9a42ecec564 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -6,7 +6,7 @@ COMPILE_VERSION := __linux_compile_version_id__`hostname | \ tr -c '[0-9A-Za-z]' '_'`__`date | \ tr -c '[0-9A-Za-z]' '_'`_t -EXTRA_CFLAGS := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I. +ccflags-y := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I. targets := image targets += bzImage @@ -21,6 +21,6 @@ $(obj)/bzImage: $(obj)/compressed/vmlinux FORCE $(obj)/compressed/vmlinux: FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ -install: $(CONFIGURE) $(obj)/image - sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/image \ - System.map Kerntypes "$(INSTALL_PATH)" +install: $(CONFIGURE) $(obj)/bzImage + sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ + System.map "$(INSTALL_PATH)" diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore new file mode 100644 index 00000000000..ae06b9b4c02 --- /dev/null +++ b/arch/s390/boot/compressed/.gitignore @@ -0,0 +1,3 @@ +sizes.h +vmlinux +vmlinux.lds diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 1c999f726a5..f90d1fc6d60 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -6,11 +6,13 @@ BITS := $(if $(CONFIG_64BIT),64,31) -targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \ - vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o sizes.h head$(BITS).o +targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 +targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 +targets += misc.o piggy.o sizes.h head$(BITS).o KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 -KBUILD_CFLAGS += $(cflags-y) +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) @@ -46,17 +48,23 @@ vmlinux.bin.all-y := $(obj)/vmlinux.bin suffix-$(CONFIG_KERNEL_GZIP) := gz suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZ4) := lz4 suffix-$(CONFIG_KERNEL_LZMA) := lzma suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_XZ) := xz $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) $(call if_changed,gzip) $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) $(call if_changed,bzip2) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) + $(call if_changed,lz4) $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) $(call if_changed,lzma) $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) $(call if_changed,lzo) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) + $(call if_changed,xzkern) LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) diff --git a/arch/s390/boot/compressed/head31.S b/arch/s390/boot/compressed/head31.S index 2a5523a32bc..e8c9e18b803 100644 --- a/arch/s390/boot/compressed/head31.S +++ b/arch/s390/boot/compressed/head31.S @@ -7,14 +7,14 @@ */ #include <linux/init.h> +#include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> #include "sizes.h" __HEAD - .globl startup_continue -startup_continue: +ENTRY(startup_continue) basr %r13,0 # get base .LPG1: # setup stack diff --git a/arch/s390/boot/compressed/head64.S b/arch/s390/boot/compressed/head64.S index 2982cb14055..f86a4eef28a 100644 --- a/arch/s390/boot/compressed/head64.S +++ b/arch/s390/boot/compressed/head64.S @@ -7,14 +7,14 @@ */ #include <linux/init.h> +#include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> #include "sizes.h" __HEAD - .globl startup_continue -startup_continue: +ENTRY(startup_continue) basr %r13,0 # get base .LPG1: # setup stack diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 2751b3a8a66..57cbaff1f39 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -19,6 +19,7 @@ #undef memset #undef memcpy #undef memmove +#define memmove memmove #define memzero(s, n) memset((s), 0, (n)) /* Symbols defined by linker scripts */ @@ -46,6 +47,10 @@ static unsigned long free_mem_end_ptr; #include "../../../../lib/decompress_bunzip2.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + #ifdef CONFIG_KERNEL_LZMA #include "../../../../lib/decompress_unlzma.c" #endif @@ -54,9 +59,13 @@ static unsigned long free_mem_end_ptr; #include "../../../../lib/decompress_unlzo.c" #endif +#ifdef CONFIG_KERNEL_XZ +#include "../../../../lib/decompress_unxz.c" +#endif + extern _sclp_print_early(const char *); -int puts(const char *s) +static int puts(const char *s) { _sclp_print_early(s); return 0; @@ -66,34 +75,37 @@ void *memset(void *s, int c, size_t n) { char *xs; - if (c == 0) - return __builtin_memset(s, 0, n); - - xs = (char *) s; - if (n > 0) - do { - *xs++ = c; - } while (--n > 0); + xs = s; + while (n--) + *xs++ = c; return s; } -void *memcpy(void *__dest, __const void *__src, size_t __n) +void *memcpy(void *dest, const void *src, size_t n) { - return __builtin_memcpy(__dest, __src, __n); + const char *s = src; + char *d = dest; + + while (n--) + *d++ = *s++; + return dest; } -void *memmove(void *__dest, __const void *__src, size_t __n) +void *memmove(void *dest, const void *src, size_t n) { - char *d; - const char *s; - - if (__dest <= __src) - return __builtin_memcpy(__dest, __src, __n); - d = __dest + __n; - s = __src + __n; - while (__n--) - *--d = *--s; - return __dest; + const char *s = src; + char *d = dest; + + if (d <= s) { + while (n--) + *d++ = *s++; + } else { + d += n; + s += n; + while (n--) + *--d = *--s; + } + return dest; } static void error(char *x) diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index d80f79d8dd9..8e1fb823928 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -5,7 +5,7 @@ OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) #else OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") -OUTPUT_ARCH(s390) +OUTPUT_ARCH(s390:31-bit) #endif ENTRY(startup) diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig new file mode 100644 index 00000000000..fd09a10a2b5 --- /dev/null +++ b/arch/s390/configs/default_defconfig @@ -0,0 +1,687 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_PERF=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_CGROUP=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=m +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_DEFAULT_DEADLINE=y +CONFIG_MARCH_Z196=y +CONFIG_TUNE_ZEC12=y +CONFIG_NR_CPUS=256 +CONFIG_PREEMPT=y +CONFIG_HZ_100=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_PCI=y +CONFIG_PCI_DEBUG=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_S390=y +CONFIG_CHSC_SCH=y +CONFIG_CRASH_DUMP=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=m +CONFIG_HIBERNATION=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=m +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=m +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=m +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_INET_UDP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_IPV6=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CT_PROTO_DCCP=m +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_TABLES=m +CONFIG_NFT_EXTHDR=m +CONFIG_NFT_META=m +CONFIG_NFT_CT=m +CONFIG_NFT_RBTREE=m +CONFIG_NFT_HASH=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XT_SET=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_SET_BITMAP_IP=m +CONFIG_IP_SET_BITMAP_IPMAC=m +CONFIG_IP_SET_BITMAP_PORT=m +CONFIG_IP_SET_HASH_IP=m +CONFIG_IP_SET_HASH_IPPORT=m +CONFIG_IP_SET_HASH_IPPORTIP=m +CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m +CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m +CONFIG_IP_SET_HASH_NETPORT=m +CONFIG_IP_SET_HASH_NETIFACE=m +CONFIG_IP_SET_LIST_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_FTP=m +CONFIG_IP_VS_PE_SIP=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_NF_TABLES_IPV4=m +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NF_TABLES_ARP=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=m +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_NF_NAT_IPV6=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NET_SCTPPROBE=m +CONFIG_RDS=m +CONFIG_RDS_RDMA=m +CONFIG_RDS_TCP=m +CONFIG_RDS_DEBUG=y +CONFIG_L2TP=m +CONFIG_L2TP_DEBUGFS=m +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFB=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_MQPRIO=m +CONFIG_NET_SCH_CHOKE=m +CONFIG_NET_SCH_QFQ=m +CONFIG_NET_SCH_CODEL=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_PLUG=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_CSUM=m +CONFIG_DNS_RESOLVER=y +CONFIG_BPF_JIT=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_TCPPROBE=m +CONFIG_DEVTMPFS=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_OSD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=32768 +CONFIG_BLK_DEV_XIP=y +CONFIG_CDROM_PKTCDVD=m +CONFIG_ATA_OVER_ETH=m +CONFIG_VIRTIO_BLK=y +CONFIG_ENCLOSURE_SERVICES=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI=y +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_ENCLOSURE=m +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SRP_ATTRS=m +CONFIG_SCSI_SRP_TGT_ATTRS=y +CONFIG_ISCSI_TCP=m +CONFIG_LIBFCOE=m +CONFIG_SCSI_DEBUG=m +CONFIG_ZFCP=y +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_SCSI_OSD_INITIATOR=m +CONFIG_SCSI_OSD_ULD=m +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +CONFIG_DM_UEVENT=y +CONFIG_DM_FLAKEY=m +CONFIG_DM_VERITY=m +CONFIG_DM_SWITCH=m +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_EQUALIZER=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_TUN=m +CONFIG_VETH=m +CONFIG_VIRTIO_NET=m +CONFIG_NLMON=m +CONFIG_VHOST_NET=m +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_CADENCE is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +CONFIG_MLX4_EN=m +# CONFIG_NET_VENDOR_NATSEMI is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPTP=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y +CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_HW_RANDOM_VIRTIO=m +CONFIG_RAW_DRIVER=m +CONFIG_HANGCHECK_TIMER=m +CONFIG_TN3270_FS=y +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=m +CONFIG_DIAG288_WATCHDOG=m +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_VIRTIO_BALLOON=m +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_JBD_DEBUG=y +CONFIG_JBD2_DEBUG=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +CONFIG_JFS_STATISTICS=y +CONFIG_XFS_FS=m +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +CONFIG_XFS_DEBUG=y +CONFIG_GFS2_FS=m +CONFIG_OCFS2_FS=m +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_FANOTIFY=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +CONFIG_NTFS_RW=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_ROMFS_FS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_SWAP=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_V4_SECURITY_LABEL=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_STATS2=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +# CONFIG_CIFS_DEBUG is not set +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_PRINTK_TIME=y +CONFIG_DYNAMIC_DEBUG=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +CONFIG_READABLE_ASM=y +CONFIG_UNUSED_SYMBOLS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_SELFTEST=y +CONFIG_DEBUG_OBJECTS_FREE=y +CONFIG_DEBUG_OBJECTS_TIMERS=y +CONFIG_DEBUG_OBJECTS_WORK=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y +CONFIG_SLUB_DEBUG_ON=y +CONFIG_SLUB_STATS=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_VM_RB=y +CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m +CONFIG_DEBUG_PER_CPU_MAPS=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_RT_MUTEX_TESTER=y +CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCK_STAT=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_LOCKING_API_SELFTESTS=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_PROVE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_CPU_STALL_TIMEOUT=300 +CONFIG_NOTIFIER_ERROR_INJECTION=m +CONFIG_CPU_NOTIFIER_ERROR_INJECT=m +CONFIG_PM_NOTIFIER_ERROR_INJECT=m +CONFIG_FAULT_INJECTION=y +CONFIG_FAILSLAB=y +CONFIG_FAIL_PAGE_ALLOC=y +CONFIG_FAIL_MAKE_REQUEST=y +CONFIG_FAIL_IO_TIMEOUT=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y +CONFIG_LATENCYTOP=y +CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_KPROBE_EVENT is not set +CONFIG_LKDTM=m +CONFIG_TEST_LIST_SORT=y +CONFIG_KPROBES_SANITY_TEST=y +CONFIG_RBTREE_TEST=y +CONFIG_INTERVAL_TREE_TEST=m +CONFIG_PERCPU_TEST=m +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DMA_API_DEBUG=y +# CONFIG_STRICT_DEVMEM is not set +CONFIG_S390_PTDUMP=y +CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_IMA=y +CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=y +CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_ZCRYPT=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_ASYMMETRIC_KEY_TYPE=m +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m +CONFIG_X509_CERTIFICATE_PARSER=m +CONFIG_CRC7=m +CONFIG_CRC8=m +CONFIG_XZ_DEC_X86=y +CONFIG_XZ_DEC_POWERPC=y +CONFIG_XZ_DEC_IA64=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_ARMTHUMB=y +CONFIG_XZ_DEC_SPARC=y +CONFIG_CORDIC=m +CONFIG_CMM=m +CONFIG_APPLDATA_BASE=y +CONFIG_KVM=m +CONFIG_KVM_S390_UCONTROL=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig new file mode 100644 index 00000000000..b061180d354 --- /dev/null +++ b/arch/s390/configs/gcov_defconfig @@ -0,0 +1,640 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_PERF=y +CONFIG_BLK_CGROUP=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=m +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_GCOV_KERNEL=y +CONFIG_GCOV_PROFILE_ALL=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_DEFAULT_DEADLINE=y +CONFIG_MARCH_Z196=y +CONFIG_TUNE_ZEC12=y +CONFIG_NR_CPUS=256 +CONFIG_HZ_100=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_PCI=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_S390=y +CONFIG_CHSC_SCH=y +CONFIG_CRASH_DUMP=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=m +CONFIG_HIBERNATION=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=m +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=m +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=m +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_INET_UDP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_IPV6=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CT_PROTO_DCCP=m +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_TABLES=m +CONFIG_NFT_EXTHDR=m +CONFIG_NFT_META=m +CONFIG_NFT_CT=m +CONFIG_NFT_RBTREE=m +CONFIG_NFT_HASH=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XT_SET=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_SET_BITMAP_IP=m +CONFIG_IP_SET_BITMAP_IPMAC=m +CONFIG_IP_SET_BITMAP_PORT=m +CONFIG_IP_SET_HASH_IP=m +CONFIG_IP_SET_HASH_IPPORT=m +CONFIG_IP_SET_HASH_IPPORTIP=m +CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m +CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m +CONFIG_IP_SET_HASH_NETPORT=m +CONFIG_IP_SET_HASH_NETIFACE=m +CONFIG_IP_SET_LIST_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_FTP=m +CONFIG_IP_VS_PE_SIP=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_NF_TABLES_IPV4=m +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NF_TABLES_ARP=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=m +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_NF_NAT_IPV6=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NET_SCTPPROBE=m +CONFIG_RDS=m +CONFIG_RDS_RDMA=m +CONFIG_RDS_TCP=m +CONFIG_L2TP=m +CONFIG_L2TP_DEBUGFS=m +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFB=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_MQPRIO=m +CONFIG_NET_SCH_CHOKE=m +CONFIG_NET_SCH_QFQ=m +CONFIG_NET_SCH_CODEL=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_PLUG=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_CSUM=m +CONFIG_DNS_RESOLVER=y +CONFIG_BPF_JIT=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_TCPPROBE=m +CONFIG_DEVTMPFS=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_OSD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=32768 +CONFIG_BLK_DEV_XIP=y +CONFIG_CDROM_PKTCDVD=m +CONFIG_ATA_OVER_ETH=m +CONFIG_VIRTIO_BLK=y +CONFIG_ENCLOSURE_SERVICES=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI=y +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_ENCLOSURE=m +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SRP_ATTRS=m +CONFIG_SCSI_SRP_TGT_ATTRS=y +CONFIG_ISCSI_TCP=m +CONFIG_LIBFCOE=m +CONFIG_SCSI_DEBUG=m +CONFIG_ZFCP=y +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_SCSI_OSD_INITIATOR=m +CONFIG_SCSI_OSD_ULD=m +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +CONFIG_DM_UEVENT=y +CONFIG_DM_FLAKEY=m +CONFIG_DM_VERITY=m +CONFIG_DM_SWITCH=m +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_EQUALIZER=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_TUN=m +CONFIG_VETH=m +CONFIG_VIRTIO_NET=m +CONFIG_NLMON=m +CONFIG_VHOST_NET=m +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_CADENCE is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +CONFIG_MLX4_EN=m +# CONFIG_NET_VENDOR_NATSEMI is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPTP=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y +CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_HW_RANDOM_VIRTIO=m +CONFIG_RAW_DRIVER=m +CONFIG_HANGCHECK_TIMER=m +CONFIG_TN3270_FS=y +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=m +CONFIG_DIAG288_WATCHDOG=m +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_VIRTIO_BALLOON=m +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_JBD_DEBUG=y +CONFIG_JBD2_DEBUG=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +CONFIG_JFS_STATISTICS=y +CONFIG_XFS_FS=m +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +CONFIG_GFS2_FS=m +CONFIG_OCFS2_FS=m +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_FANOTIFY=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +CONFIG_NTFS_RW=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_ROMFS_FS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_SWAP=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_V4_SECURITY_LABEL=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_STATS2=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +# CONFIG_CIFS_DEBUG is not set +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +CONFIG_UNUSED_SYMBOLS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m +CONFIG_TIMER_STATS=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_NOTIFIER_ERROR_INJECTION=m +CONFIG_CPU_NOTIFIER_ERROR_INJECT=m +CONFIG_PM_NOTIFIER_ERROR_INJECT=m +CONFIG_LATENCYTOP=y +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_KPROBE_EVENT is not set +CONFIG_LKDTM=m +CONFIG_RBTREE_TEST=m +CONFIG_INTERVAL_TREE_TEST=m +CONFIG_PERCPU_TEST=m +CONFIG_ATOMIC64_SELFTEST=y +# CONFIG_STRICT_DEVMEM is not set +CONFIG_S390_PTDUMP=y +CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_IMA=y +CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=y +CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_ZCRYPT=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_ASYMMETRIC_KEY_TYPE=m +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m +CONFIG_X509_CERTIFICATE_PARSER=m +CONFIG_CRC7=m +CONFIG_CRC8=m +CONFIG_XZ_DEC_X86=y +CONFIG_XZ_DEC_POWERPC=y +CONFIG_XZ_DEC_IA64=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_ARMTHUMB=y +CONFIG_XZ_DEC_SPARC=y +CONFIG_CORDIC=m +CONFIG_CMM=m +CONFIG_APPLDATA_BASE=y +CONFIG_KVM=m +CONFIG_KVM_S390_UCONTROL=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig new file mode 100644 index 00000000000..d279baa0801 --- /dev/null +++ b/arch/s390/configs/performance_defconfig @@ -0,0 +1,632 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_PERF=y +CONFIG_BLK_CGROUP=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=m +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_DEFAULT_DEADLINE=y +CONFIG_MARCH_Z196=y +CONFIG_TUNE_ZEC12=y +CONFIG_NR_CPUS=256 +CONFIG_HZ_100=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_PCI=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_S390=y +CONFIG_CHSC_SCH=y +CONFIG_CRASH_DUMP=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=m +CONFIG_HIBERNATION=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=m +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=m +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=m +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_INET_UDP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_IPV6=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CT_PROTO_DCCP=m +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_TABLES=m +CONFIG_NFT_EXTHDR=m +CONFIG_NFT_META=m +CONFIG_NFT_CT=m +CONFIG_NFT_RBTREE=m +CONFIG_NFT_HASH=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XT_SET=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_SET_BITMAP_IP=m +CONFIG_IP_SET_BITMAP_IPMAC=m +CONFIG_IP_SET_BITMAP_PORT=m +CONFIG_IP_SET_HASH_IP=m +CONFIG_IP_SET_HASH_IPPORT=m +CONFIG_IP_SET_HASH_IPPORTIP=m +CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m +CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m +CONFIG_IP_SET_HASH_NETPORT=m +CONFIG_IP_SET_HASH_NETIFACE=m +CONFIG_IP_SET_LIST_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_FTP=m +CONFIG_IP_VS_PE_SIP=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_NF_TABLES_IPV4=m +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NF_TABLES_ARP=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=m +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_NF_NAT_IPV6=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NET_SCTPPROBE=m +CONFIG_RDS=m +CONFIG_RDS_RDMA=m +CONFIG_RDS_TCP=m +CONFIG_L2TP=m +CONFIG_L2TP_DEBUGFS=m +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFB=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_MQPRIO=m +CONFIG_NET_SCH_CHOKE=m +CONFIG_NET_SCH_QFQ=m +CONFIG_NET_SCH_CODEL=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_PLUG=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_CSUM=m +CONFIG_DNS_RESOLVER=y +CONFIG_BPF_JIT=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_TCPPROBE=m +CONFIG_DEVTMPFS=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_OSD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=32768 +CONFIG_BLK_DEV_XIP=y +CONFIG_CDROM_PKTCDVD=m +CONFIG_ATA_OVER_ETH=m +CONFIG_VIRTIO_BLK=y +CONFIG_ENCLOSURE_SERVICES=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI=y +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_ENCLOSURE=m +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SRP_ATTRS=m +CONFIG_SCSI_SRP_TGT_ATTRS=y +CONFIG_ISCSI_TCP=m +CONFIG_LIBFCOE=m +CONFIG_SCSI_DEBUG=m +CONFIG_ZFCP=y +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_SCSI_OSD_INITIATOR=m +CONFIG_SCSI_OSD_ULD=m +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +CONFIG_DM_UEVENT=y +CONFIG_DM_FLAKEY=m +CONFIG_DM_VERITY=m +CONFIG_DM_SWITCH=m +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_EQUALIZER=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_TUN=m +CONFIG_VETH=m +CONFIG_VIRTIO_NET=m +CONFIG_NLMON=m +CONFIG_VHOST_NET=m +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_CADENCE is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +CONFIG_MLX4_EN=m +# CONFIG_NET_VENDOR_NATSEMI is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPTP=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y +CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_HW_RANDOM_VIRTIO=m +CONFIG_RAW_DRIVER=m +CONFIG_HANGCHECK_TIMER=m +CONFIG_TN3270_FS=y +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=m +CONFIG_DIAG288_WATCHDOG=m +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_VIRTIO_BALLOON=m +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_JBD_DEBUG=y +CONFIG_JBD2_DEBUG=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +CONFIG_JFS_STATISTICS=y +CONFIG_XFS_FS=m +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +CONFIG_GFS2_FS=m +CONFIG_OCFS2_FS=m +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_FANOTIFY=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +CONFIG_NTFS_RW=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_ROMFS_FS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_SWAP=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_V4_SECURITY_LABEL=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_STATS2=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +# CONFIG_CIFS_DEBUG is not set +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +CONFIG_UNUSED_SYMBOLS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_TIMER_STATS=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_LATENCYTOP=y +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_KPROBE_EVENT is not set +CONFIG_LKDTM=m +CONFIG_PERCPU_TEST=m +CONFIG_ATOMIC64_SELFTEST=y +# CONFIG_STRICT_DEVMEM is not set +CONFIG_S390_PTDUMP=y +CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_IMA=y +CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=y +CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_ZCRYPT=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_ASYMMETRIC_KEY_TYPE=m +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m +CONFIG_X509_CERTIFICATE_PARSER=m +CONFIG_CRC7=m +CONFIG_CRC8=m +CONFIG_XZ_DEC_X86=y +CONFIG_XZ_DEC_POWERPC=y +CONFIG_XZ_DEC_IA64=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_ARMTHUMB=y +CONFIG_XZ_DEC_SPARC=y +CONFIG_CORDIC=m +CONFIG_CMM=m +CONFIG_APPLDATA_BASE=y +CONFIG_KVM=m +CONFIG_KVM_S390_UCONTROL=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig new file mode 100644 index 00000000000..948e0e057a2 --- /dev/null +++ b/arch/s390/configs/zfcpdump_defconfig @@ -0,0 +1,86 @@ +# CONFIG_SWAP is not set +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +CONFIG_DEFAULT_DEADLINE=y +CONFIG_MARCH_Z196=y +CONFIG_TUNE_ZEC12=y +# CONFIG_COMPAT is not set +CONFIG_NR_CPUS=2 +# CONFIG_HOTPLUG_CPU is not set +CONFIG_HZ_100=y +# CONFIG_COMPACTION is not set +# CONFIG_MIGRATION is not set +# CONFIG_CHECK_STACK is not set +# CONFIG_CHSC_SCH is not set +# CONFIG_SCM_BUS is not set +CONFIG_CRASH_DUMP=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_SECCOMP is not set +# CONFIG_IUCV is not set +CONFIG_ATM=y +CONFIG_ATM_LANE=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +# CONFIG_BLK_DEV_XPRAM is not set +# CONFIG_DCSSBLK is not set +# CONFIG_DASD is not set +CONFIG_ENCLOSURE_SERVICES=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_ENCLOSURE=y +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SRP_ATTRS=y +CONFIG_ZFCP=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_HVC_IUCV is not set +CONFIG_RAW_DRIVER=y +# CONFIG_SCLP_ASYNC is not set +# CONFIG_HMC_DRV is not set +# CONFIG_S390_TAPE is not set +# CONFIG_VMCP is not set +# CONFIG_MONWRITER is not set +# CONFIG_S390_VMUR is not set +# CONFIG_HID is not set +CONFIG_MEMSTICK=y +CONFIG_MEMSTICK_DEBUG=y +CONFIG_MEMSTICK_UNSAFE_RESUME=y +CONFIG_MSPRO_BLOCK=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +# CONFIG_INOTIFY_USER is not set +CONFIG_CONFIGFS_FS=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_FS=y +CONFIG_DEBUG_KERNEL=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_FTRACE is not set +# CONFIG_STRICT_DEVMEM is not set +CONFIG_XZ_DEC_X86=y +CONFIG_XZ_DEC_POWERPC=y +CONFIG_XZ_DEC_IA64=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_ARMTHUMB=y +CONFIG_XZ_DEC_SPARC=y +# CONFIG_PFAULT is not set +# CONFIG_S390_HYPFS_FS is not set +# CONFIG_VIRTUALIZATION is not set +# CONFIG_S390_GUEST is not set diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 1cf81d77c5a..7f0b7cda625 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o obj-$(CONFIG_S390_PRNG) += prng.o +obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 58f46734465..23223cd63e5 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -4,7 +4,7 @@ * s390 implementation of the AES Cipher Algorithm. * * s390 Version: - * Copyright IBM Corp. 2005,2007 + * Copyright IBM Corp. 2005, 2007 * Author(s): Jan Glauber (jang@de.ibm.com) * Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback * @@ -25,16 +25,18 @@ #include <linux/err.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/spinlock.h> #include "crypt_s390.h" #define AES_KEYLEN_128 1 #define AES_KEYLEN_192 2 #define AES_KEYLEN_256 4 -static char keylen_flag = 0; +static u8 *ctrblk; +static DEFINE_SPINLOCK(ctrblk_lock); +static char keylen_flag; struct s390_aes_ctx { - u8 iv[AES_BLOCK_SIZE]; u8 key[AES_MAX_KEY_SIZE]; long enc; long dec; @@ -45,6 +47,23 @@ struct s390_aes_ctx { } fallback; }; +struct pcc_param { + u8 key[32]; + u8 tweak[16]; + u8 block[16]; + u8 bit[16]; + u8 xts[16]; +}; + +struct s390_xts_ctx { + u8 key[32]; + u8 pcc_key[32]; + long enc; + long dec; + int key_len; + struct crypto_blkcipher *fallback; +}; + /* * Check if the key_len is supported by the HW. * Returns 0 if it is, a positive number if it is not and software fallback is @@ -197,7 +216,6 @@ static struct crypto_alg aes_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_aes_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), .cra_init = fallback_init_cip, .cra_exit = fallback_exit_cip, .cra_u = { @@ -307,7 +325,8 @@ static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param, u8 *in = walk->src.virt.addr; ret = crypt_s390_km(func, param, out, in, n); - BUG_ON((ret < 0) || (ret != n)); + if (ret < 0 || ret != n) + return -EIO; nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); @@ -379,7 +398,6 @@ static struct crypto_alg ecb_aes_alg = { .cra_ctxsize = sizeof(struct s390_aes_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ecb_aes_alg.cra_list), .cra_init = fallback_init_blk, .cra_exit = fallback_exit_blk, .cra_u = { @@ -423,29 +441,36 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return aes_set_key(tfm, in_key, key_len); } -static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, void *param, +static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, struct blkcipher_walk *walk) { + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes = walk->nbytes; + struct { + u8 iv[AES_BLOCK_SIZE]; + u8 key[AES_MAX_KEY_SIZE]; + } param; if (!nbytes) goto out; - memcpy(param, walk->iv, AES_BLOCK_SIZE); + memcpy(param.iv, walk->iv, AES_BLOCK_SIZE); + memcpy(param.key, sctx->key, sctx->key_len); do { /* only use complete blocks */ unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1); u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_kmc(func, param, out, in, n); - BUG_ON((ret < 0) || (ret != n)); + ret = crypt_s390_kmc(func, ¶m, out, in, n); + if (ret < 0 || ret != n) + return -EIO; nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); } while ((nbytes = walk->nbytes)); - memcpy(walk->iv, param, AES_BLOCK_SIZE); + memcpy(walk->iv, param.iv, AES_BLOCK_SIZE); out: return ret; @@ -462,7 +487,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, return fallback_blk_enc(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk); + return cbc_aes_crypt(desc, sctx->enc, &walk); } static int cbc_aes_decrypt(struct blkcipher_desc *desc, @@ -476,7 +501,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, return fallback_blk_dec(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk); + return cbc_aes_crypt(desc, sctx->dec, &walk); } static struct crypto_alg cbc_aes_alg = { @@ -489,7 +514,6 @@ static struct crypto_alg cbc_aes_alg = { .cra_ctxsize = sizeof(struct s390_aes_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(cbc_aes_alg.cra_list), .cra_init = fallback_init_blk, .cra_exit = fallback_exit_blk, .cra_u = { @@ -504,15 +528,375 @@ static struct crypto_alg cbc_aes_alg = { } }; +static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int len) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + unsigned int ret; + + xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int xts_fallback_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm; + unsigned int ret; + + tfm = desc->tfm; + desc->tfm = xts_ctx->fallback; + + ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int xts_fallback_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm; + unsigned int ret; + + tfm = desc->tfm; + desc->tfm = xts_ctx->fallback; + + ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + switch (key_len) { + case 32: + xts_ctx->enc = KM_XTS_128_ENCRYPT; + xts_ctx->dec = KM_XTS_128_DECRYPT; + memcpy(xts_ctx->key + 16, in_key, 16); + memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16); + break; + case 48: + xts_ctx->enc = 0; + xts_ctx->dec = 0; + xts_fallback_setkey(tfm, in_key, key_len); + break; + case 64: + xts_ctx->enc = KM_XTS_256_ENCRYPT; + xts_ctx->dec = KM_XTS_256_DECRYPT; + memcpy(xts_ctx->key, in_key, 32); + memcpy(xts_ctx->pcc_key, in_key + 32, 32); + break; + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + xts_ctx->key_len = key_len; + return 0; +} + +static int xts_aes_crypt(struct blkcipher_desc *desc, long func, + struct s390_xts_ctx *xts_ctx, + struct blkcipher_walk *walk) +{ + unsigned int offset = (xts_ctx->key_len >> 1) & 0x10; + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + unsigned int n; + u8 *in, *out; + struct pcc_param pcc_param; + struct { + u8 key[32]; + u8 init[16]; + } xts_param; + + if (!nbytes) + goto out; + + memset(pcc_param.block, 0, sizeof(pcc_param.block)); + memset(pcc_param.bit, 0, sizeof(pcc_param.bit)); + memset(pcc_param.xts, 0, sizeof(pcc_param.xts)); + memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); + memcpy(pcc_param.key, xts_ctx->pcc_key, 32); + ret = crypt_s390_pcc(func, &pcc_param.key[offset]); + if (ret < 0) + return -EIO; + + memcpy(xts_param.key, xts_ctx->key, 32); + memcpy(xts_param.init, pcc_param.xts, 16); + do { + /* only use complete blocks */ + n = nbytes & ~(AES_BLOCK_SIZE - 1); + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + + ret = crypt_s390_km(func, &xts_param.key[offset], out, in, n); + if (ret < 0 || ret != n) + return -EIO; + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); +out: + return ret; +} + +static int xts_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(xts_ctx->key_len == 48)) + return xts_fallback_encrypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk); +} + +static int xts_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(xts_ctx->key_len == 48)) + return xts_fallback_decrypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk); +} + +static int xts_fallback_init(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + + xts_ctx->fallback = crypto_alloc_blkcipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(xts_ctx->fallback)) { + pr_err("Allocating XTS fallback algorithm %s failed\n", + name); + return PTR_ERR(xts_ctx->fallback); + } + return 0; +} + +static void xts_fallback_exit(struct crypto_tfm *tfm) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + + crypto_free_blkcipher(xts_ctx->fallback); + xts_ctx->fallback = NULL; +} + +static struct crypto_alg xts_aes_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_xts_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = xts_fallback_init, + .cra_exit = xts_fallback_exit, + .cra_u = { + .blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aes_set_key, + .encrypt = xts_aes_encrypt, + .decrypt = xts_aes_decrypt, + } + } +}; + +static int xts_aes_alg_reg; + +static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + switch (key_len) { + case 16: + sctx->enc = KMCTR_AES_128_ENCRYPT; + sctx->dec = KMCTR_AES_128_DECRYPT; + break; + case 24: + sctx->enc = KMCTR_AES_192_ENCRYPT; + sctx->dec = KMCTR_AES_192_DECRYPT; + break; + case 32: + sctx->enc = KMCTR_AES_256_ENCRYPT; + sctx->dec = KMCTR_AES_256_DECRYPT; + break; + } + + return aes_set_key(tfm, in_key, key_len); +} + +static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) +{ + unsigned int i, n; + + /* only use complete blocks, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); + for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) { + memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrptr + i, AES_BLOCK_SIZE); + } + return n; +} + +static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, + struct s390_aes_ctx *sctx, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); + unsigned int n, nbytes; + u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE]; + u8 *out, *in, *ctrptr = ctrbuf; + + if (!walk->nbytes) + return ret; + + if (spin_trylock(&ctrblk_lock)) + ctrptr = ctrblk; + + memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE); + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + while (nbytes >= AES_BLOCK_SIZE) { + if (ctrptr == ctrblk) + n = __ctrblk_init(ctrptr, nbytes); + else + n = AES_BLOCK_SIZE; + ret = crypt_s390_kmctr(func, sctx->key, out, in, + n, ctrptr); + if (ret < 0 || ret != n) { + if (ctrptr == ctrblk) + spin_unlock(&ctrblk_lock); + return -EIO; + } + if (n > AES_BLOCK_SIZE) + memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrptr, AES_BLOCK_SIZE); + out += n; + in += n; + nbytes -= n; + } + ret = blkcipher_walk_done(desc, walk, nbytes); + } + if (ctrptr == ctrblk) { + if (nbytes) + memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE); + else + memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE); + spin_unlock(&ctrblk_lock); + } else { + if (!nbytes) + memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE); + } + /* + * final block may be < AES_BLOCK_SIZE, copy only nbytes + */ + if (nbytes) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + ret = crypt_s390_kmctr(func, sctx->key, buf, in, + AES_BLOCK_SIZE, ctrbuf); + if (ret < 0 || ret != AES_BLOCK_SIZE) + return -EIO; + memcpy(out, buf, nbytes); + crypto_inc(ctrbuf, AES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, 0); + memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE); + } + + return ret; +} + +static int ctr_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_aes_crypt(desc, sctx->enc, sctx, &walk); +} + +static int ctr_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_aes_crypt(desc, sctx->dec, sctx, &walk); +} + +static struct crypto_alg ctr_aes_alg = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ctr_aes_set_key, + .encrypt = ctr_aes_encrypt, + .decrypt = ctr_aes_decrypt, + } + } +}; + +static int ctr_aes_alg_reg; + static int __init aes_s390_init(void) { int ret; - if (crypt_s390_func_available(KM_AES_128_ENCRYPT)) + if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA)) keylen_flag |= AES_KEYLEN_128; - if (crypt_s390_func_available(KM_AES_192_ENCRYPT)) + if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA)) keylen_flag |= AES_KEYLEN_192; - if (crypt_s390_func_available(KM_AES_256_ENCRYPT)) + if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA)) keylen_flag |= AES_KEYLEN_256; if (!keylen_flag) @@ -535,9 +919,42 @@ static int __init aes_s390_init(void) if (ret) goto cbc_aes_err; + if (crypt_s390_func_available(KM_XTS_128_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KM_XTS_256_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ret = crypto_register_alg(&xts_aes_alg); + if (ret) + goto xts_aes_err; + xts_aes_alg_reg = 1; + } + + if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_AES_192_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_AES_256_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + if (!ctrblk) { + ret = -ENOMEM; + goto ctr_aes_err; + } + ret = crypto_register_alg(&ctr_aes_alg); + if (ret) { + free_page((unsigned long) ctrblk); + goto ctr_aes_err; + } + ctr_aes_alg_reg = 1; + } + out: return ret; +ctr_aes_err: + crypto_unregister_alg(&xts_aes_alg); +xts_aes_err: + crypto_unregister_alg(&cbc_aes_alg); cbc_aes_err: crypto_unregister_alg(&ecb_aes_alg); ecb_aes_err: @@ -548,6 +965,12 @@ aes_err: static void __exit aes_s390_fini(void) { + if (ctr_aes_alg_reg) { + crypto_unregister_alg(&ctr_aes_alg); + free_page((unsigned long) ctrblk); + } + if (xts_aes_alg_reg) + crypto_unregister_alg(&xts_aes_alg); crypto_unregister_alg(&cbc_aes_alg); crypto_unregister_alg(&ecb_aes_alg); crypto_unregister_alg(&aes_alg); diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index 7ee9a1b4ad9..6c5cc6da711 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -3,7 +3,7 @@ * * Support for s390 cryptographic instructions. * - * Copyright IBM Corp. 2003,2007 + * Copyright IBM Corp. 2003, 2007 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) * @@ -17,6 +17,7 @@ #define _CRYPTO_ARCH_S390_CRYPT_S390_H #include <asm/errno.h> +#include <asm/facility.h> #define CRYPT_S390_OP_MASK 0xFF00 #define CRYPT_S390_FUNC_MASK 0x00FF @@ -24,13 +25,18 @@ #define CRYPT_S390_PRIORITY 300 #define CRYPT_S390_COMPOSITE_PRIORITY 400 +#define CRYPT_S390_MSA 0x1 +#define CRYPT_S390_MSA3 0x2 +#define CRYPT_S390_MSA4 0x4 + /* s390 cryptographic operations */ enum crypt_s390_operations { CRYPT_S390_KM = 0x0100, CRYPT_S390_KMC = 0x0200, CRYPT_S390_KIMD = 0x0300, CRYPT_S390_KLMD = 0x0400, - CRYPT_S390_KMAC = 0x0500 + CRYPT_S390_KMAC = 0x0500, + CRYPT_S390_KMCTR = 0x0600 }; /* @@ -51,6 +57,10 @@ enum crypt_s390_km_func { KM_AES_192_DECRYPT = CRYPT_S390_KM | 0x13 | 0x80, KM_AES_256_ENCRYPT = CRYPT_S390_KM | 0x14, KM_AES_256_DECRYPT = CRYPT_S390_KM | 0x14 | 0x80, + KM_XTS_128_ENCRYPT = CRYPT_S390_KM | 0x32, + KM_XTS_128_DECRYPT = CRYPT_S390_KM | 0x32 | 0x80, + KM_XTS_256_ENCRYPT = CRYPT_S390_KM | 0x34, + KM_XTS_256_DECRYPT = CRYPT_S390_KM | 0x34 | 0x80, }; /* @@ -75,6 +85,26 @@ enum crypt_s390_kmc_func { }; /* + * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER) + * instruction + */ +enum crypt_s390_kmctr_func { + KMCTR_QUERY = CRYPT_S390_KMCTR | 0x0, + KMCTR_DEA_ENCRYPT = CRYPT_S390_KMCTR | 0x1, + KMCTR_DEA_DECRYPT = CRYPT_S390_KMCTR | 0x1 | 0x80, + KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2, + KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80, + KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3, + KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80, + KMCTR_AES_128_ENCRYPT = CRYPT_S390_KMCTR | 0x12, + KMCTR_AES_128_DECRYPT = CRYPT_S390_KMCTR | 0x12 | 0x80, + KMCTR_AES_192_ENCRYPT = CRYPT_S390_KMCTR | 0x13, + KMCTR_AES_192_DECRYPT = CRYPT_S390_KMCTR | 0x13 | 0x80, + KMCTR_AES_256_ENCRYPT = CRYPT_S390_KMCTR | 0x14, + KMCTR_AES_256_DECRYPT = CRYPT_S390_KMCTR | 0x14 | 0x80, +}; + +/* * function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) * instruction */ @@ -83,6 +113,7 @@ enum crypt_s390_kimd_func { KIMD_SHA_1 = CRYPT_S390_KIMD | 1, KIMD_SHA_256 = CRYPT_S390_KIMD | 2, KIMD_SHA_512 = CRYPT_S390_KIMD | 3, + KIMD_GHASH = CRYPT_S390_KIMD | 65, }; /* @@ -284,6 +315,45 @@ static inline int crypt_s390_kmac(long func, void *param, } /** + * crypt_s390_kmctr: + * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * @counter: address of counter value + * + * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for encryption/decryption funcs + */ +static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, + const u8 *src, long src_len, u8 *counter) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; + register long __src_len asm("3") = src_len; + register u8 *__dest asm("4") = dest; + register u8 *__ctr asm("6") = counter; + int ret = -1; + + asm volatile( + "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest), + "+a" (__ctr) + : "d" (__func), "a" (__param) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; +} + +/** * crypt_s390_func_available: * @func: the function code of the specific function; 0 if op in general * @@ -291,13 +361,20 @@ static inline int crypt_s390_kmac(long func, void *param, * * Returns 1 if func available; 0 if func or op in general not available */ -static inline int crypt_s390_func_available(int func) +static inline int crypt_s390_func_available(int func, + unsigned int facility_mask) { unsigned char status[16]; int ret; - /* check if CPACF facility (bit 17) is available */ - if (!test_facility(17)) + if (facility_mask & CRYPT_S390_MSA && !test_facility(17)) + return 0; + + if (facility_mask & CRYPT_S390_MSA3 && + (!test_facility(2) || !test_facility(76))) + return 0; + if (facility_mask & CRYPT_S390_MSA4 && + (!test_facility(2) || !test_facility(77))) return 0; switch (func & CRYPT_S390_OP_MASK) { @@ -316,6 +393,10 @@ static inline int crypt_s390_func_available(int func) case CRYPT_S390_KMAC: ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); break; + case CRYPT_S390_KMCTR: + ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0, + NULL); + break; default: return 0; } @@ -326,4 +407,31 @@ static inline int crypt_s390_func_available(int func) return (status[func >> 3] & (0x80 >> (func & 7))) != 0; } +/** + * crypt_s390_pcc: + * @func: the function code passed to KM; see crypt_s390_km_func + * @param: address of parameter block; see POP for details on each func + * + * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU. + * + * Returns -1 for failure, 0 for success. + */ +static inline int crypt_s390_pcc(long func, void *param) +{ + register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */ + register void *__param asm("1") = param; + int ret = -1; + + asm volatile( + "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "+d" (ret) + : "d" (__func), "a" (__param) : "cc", "memory"); + return ret; +} + + #endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */ diff --git a/arch/s390/crypto/crypto_des.h b/arch/s390/crypto/crypto_des.h deleted file mode 100644 index 6210457ceeb..00000000000 --- a/arch/s390/crypto/crypto_des.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Cryptographic API. - * - * Function for checking keys for the DES and Tripple DES Encryption - * algorithms. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ -#ifndef __CRYPTO_DES_H__ -#define __CRYPTO_DES_H__ - -extern int crypto_des_check_key(const u8*, unsigned int, u32*); - -#endif /*__CRYPTO_DES_H__*/ diff --git a/arch/s390/crypto/des_check_key.c b/arch/s390/crypto/des_check_key.c deleted file mode 100644 index 5706af26644..00000000000 --- a/arch/s390/crypto/des_check_key.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Cryptographic API. - * - * Function for checking keys for the DES and Tripple DES Encryption - * algorithms. - * - * Originally released as descore by Dana L. How <how@isl.stanford.edu>. - * Modified by Raimar Falke <rf13@inf.tu-dresden.de> for the Linux-Kernel. - * Derived from Cryptoapi and Nettle implementations, adapted for in-place - * scatterlist interface. Changed LGPL to GPL per section 3 of the LGPL. - * - * s390 Version: - * Copyright IBM Corp. 2003 - * Author(s): Thomas Spatzier - * Jan Glauber (jan.glauber@de.ibm.com) - * - * Derived from "crypto/des.c" - * Copyright (c) 1992 Dana L. How. - * Copyright (c) Raimar Falke <rf13@inf.tu-dresden.de> - * Copyright (c) Gisle Sflensminde <gisle@ii.uib.no> - * Copyright (C) 2001 Niels Mvller. - * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/crypto.h> -#include "crypto_des.h" - -#define ROR(d,c,o) ((d) = (d) >> (c) | (d) << (o)) - -static const u8 parity[] = { - 8,1,0,8,0,8,8,0,0,8,8,0,8,0,2,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,3, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 4,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,5,0,8,0,8,8,0,0,8,8,0,8,0,6,8, -}; - -/* - * RFC2451: Weak key checks SHOULD be performed. - */ -int -crypto_des_check_key(const u8 *key, unsigned int keylen, u32 *flags) -{ - u32 n, w; - - n = parity[key[0]]; n <<= 4; - n |= parity[key[1]]; n <<= 4; - n |= parity[key[2]]; n <<= 4; - n |= parity[key[3]]; n <<= 4; - n |= parity[key[4]]; n <<= 4; - n |= parity[key[5]]; n <<= 4; - n |= parity[key[6]]; n <<= 4; - n |= parity[key[7]]; - w = 0x88888888L; - - if ((*flags & CRYPTO_TFM_REQ_WEAK_KEY) - && !((n - (w >> 3)) & w)) { /* 1 in 10^10 keys passes this test */ - if (n < 0x41415151) { - if (n < 0x31312121) { - if (n < 0x14141515) { - /* 01 01 01 01 01 01 01 01 */ - if (n == 0x11111111) goto weak; - /* 01 1F 01 1F 01 0E 01 0E */ - if (n == 0x13131212) goto weak; - } else { - /* 01 E0 01 E0 01 F1 01 F1 */ - if (n == 0x14141515) goto weak; - /* 01 FE 01 FE 01 FE 01 FE */ - if (n == 0x16161616) goto weak; - } - } else { - if (n < 0x34342525) { - /* 1F 01 1F 01 0E 01 0E 01 */ - if (n == 0x31312121) goto weak; - /* 1F 1F 1F 1F 0E 0E 0E 0E (?) */ - if (n == 0x33332222) goto weak; - } else { - /* 1F E0 1F E0 0E F1 0E F1 */ - if (n == 0x34342525) goto weak; - /* 1F FE 1F FE 0E FE 0E FE */ - if (n == 0x36362626) goto weak; - } - } - } else { - if (n < 0x61616161) { - if (n < 0x44445555) { - /* E0 01 E0 01 F1 01 F1 01 */ - if (n == 0x41415151) goto weak; - /* E0 1F E0 1F F1 0E F1 0E */ - if (n == 0x43435252) goto weak; - } else { - /* E0 E0 E0 E0 F1 F1 F1 F1 (?) */ - if (n == 0x44445555) goto weak; - /* E0 FE E0 FE F1 FE F1 FE */ - if (n == 0x46465656) goto weak; - } - } else { - if (n < 0x64646565) { - /* FE 01 FE 01 FE 01 FE 01 */ - if (n == 0x61616161) goto weak; - /* FE 1F FE 1F FE 0E FE 0E */ - if (n == 0x63636262) goto weak; - } else { - /* FE E0 FE E0 FE F1 FE F1 */ - if (n == 0x64646565) goto weak; - /* FE FE FE FE FE FE FE FE */ - if (n == 0x66666666) goto weak; - } - } - } - } - return 0; -weak: - *flags |= CRYPTO_TFM_RES_WEAK_KEY; - return -EINVAL; -} - -EXPORT_SYMBOL(crypto_des_check_key); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Key Check function for DES & DES3 Cipher Algorithms"); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index cc542011839..7acb77f7ef1 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -3,7 +3,7 @@ * * s390 implementation of the DES Cipher Algorithm. * - * Copyright IBM Corp. 2003,2007 + * Copyright IBM Corp. 2003, 2011 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) * @@ -22,22 +22,20 @@ #include "crypt_s390.h" -#define DES3_192_KEY_SIZE (3 * DES_KEY_SIZE) +#define DES3_KEY_SIZE (3 * DES_KEY_SIZE) -struct crypt_s390_des_ctx { - u8 iv[DES_BLOCK_SIZE]; - u8 key[DES_KEY_SIZE]; -}; +static u8 *ctrblk; +static DEFINE_SPINLOCK(ctrblk_lock); -struct crypt_s390_des3_192_ctx { +struct s390_des_ctx { u8 iv[DES_BLOCK_SIZE]; - u8 key[DES3_192_KEY_SIZE]; + u8 key[DES3_KEY_SIZE]; }; static int des_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) + unsigned int key_len) { - struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; u32 tmp[DES_EXPKEY_WORDS]; @@ -47,22 +45,22 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key, return -EINVAL; } - memcpy(dctx->key, key, keylen); + memcpy(ctx->key, key, key_len); return 0; } static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_DEA_ENCRYPT, dctx->key, out, in, DES_BLOCK_SIZE); + crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE); } static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE); + crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE); } static struct crypto_alg des_alg = { @@ -71,9 +69,8 @@ static struct crypto_alg des_alg = { .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(des_alg.cra_list), .cra_u = { .cipher = { .cia_min_keysize = DES_KEY_SIZE, @@ -86,7 +83,7 @@ static struct crypto_alg des_alg = { }; static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, - void *param, struct blkcipher_walk *walk) + u8 *key, struct blkcipher_walk *walk) { int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes; @@ -97,8 +94,9 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_km(func, param, out, in, n); - BUG_ON((ret < 0) || (ret != n)); + ret = crypt_s390_km(func, key, out, in, n); + if (ret < 0 || ret != n) + return -EIO; nbytes &= DES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); @@ -108,28 +106,35 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, } static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, - void *param, struct blkcipher_walk *walk) + struct blkcipher_walk *walk) { + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); int ret = blkcipher_walk_virt(desc, walk); unsigned int nbytes = walk->nbytes; + struct { + u8 iv[DES_BLOCK_SIZE]; + u8 key[DES3_KEY_SIZE]; + } param; if (!nbytes) goto out; - memcpy(param, walk->iv, DES_BLOCK_SIZE); + memcpy(param.iv, walk->iv, DES_BLOCK_SIZE); + memcpy(param.key, ctx->key, DES3_KEY_SIZE); do { /* only use complete blocks */ unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_kmc(func, param, out, in, n); - BUG_ON((ret < 0) || (ret != n)); + ret = crypt_s390_kmc(func, ¶m, out, in, n); + if (ret < 0 || ret != n) + return -EIO; nbytes &= DES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, walk, nbytes); } while ((nbytes = walk->nbytes)); - memcpy(walk->iv, param, DES_BLOCK_SIZE); + memcpy(walk->iv, param.iv, DES_BLOCK_SIZE); out: return ret; @@ -139,22 +144,22 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk); } static int ecb_des_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_DEA_DECRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk); } static struct crypto_alg ecb_des_alg = { @@ -163,10 +168,9 @@ static struct crypto_alg ecb_des_alg = { .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ecb_des_alg.cra_list), .cra_u = { .blkcipher = { .min_keysize = DES_KEY_SIZE, @@ -182,22 +186,20 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk); } static int cbc_des_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk); } static struct crypto_alg cbc_des_alg = { @@ -206,10 +208,9 @@ static struct crypto_alg cbc_des_alg = { .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(cbc_des_alg.cra_list), .cra_u = { .blkcipher = { .min_keysize = DES_KEY_SIZE, @@ -235,154 +236,311 @@ static struct crypto_alg cbc_des_alg = { * property. * */ -static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) { - struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; - if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && - memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], - DES_KEY_SIZE)) && + if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && + crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], + DES_KEY_SIZE)) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } - memcpy(dctx->key, key, keylen); + memcpy(ctx->key, key, key_len); return 0; } -static void des3_192_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src, - DES_BLOCK_SIZE); + crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); } -static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm); + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src, - DES_BLOCK_SIZE); + crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); } -static struct crypto_alg des3_192_alg = { +static struct crypto_alg des3_alg = { .cra_name = "des3_ede", .cra_driver_name = "des3_ede-s390", .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(des3_192_alg.cra_list), .cra_u = { .cipher = { - .cia_min_keysize = DES3_192_KEY_SIZE, - .cia_max_keysize = DES3_192_KEY_SIZE, - .cia_setkey = des3_192_setkey, - .cia_encrypt = des3_192_encrypt, - .cia_decrypt = des3_192_decrypt, + .cia_min_keysize = DES3_KEY_SIZE, + .cia_max_keysize = DES3_KEY_SIZE, + .cia_setkey = des3_setkey, + .cia_encrypt = des3_encrypt, + .cia_decrypt = des3_decrypt, } } }; -static int ecb_des3_192_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk); } -static int ecb_des3_192_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, sctx->key, &walk); + return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk); } -static struct crypto_alg ecb_des3_192_alg = { +static struct crypto_alg ecb_des3_alg = { .cra_name = "ecb(des3_ede)", .cra_driver_name = "ecb-des3_ede-s390", .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT( - ecb_des3_192_alg.cra_list), .cra_u = { .blkcipher = { - .min_keysize = DES3_192_KEY_SIZE, - .max_keysize = DES3_192_KEY_SIZE, - .setkey = des3_192_setkey, - .encrypt = ecb_des3_192_encrypt, - .decrypt = ecb_des3_192_decrypt, + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .setkey = des3_setkey, + .encrypt = ecb_des3_encrypt, + .decrypt = ecb_des3_decrypt, } } }; -static int cbc_des3_192_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk); } -static int cbc_des3_192_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, sctx->iv, &walk); + return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk); } -static struct crypto_alg cbc_des3_192_alg = { +static struct crypto_alg cbc_des3_alg = { .cra_name = "cbc(des3_ede)", .cra_driver_name = "cbc-des3_ede-s390", .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des3_setkey, + .encrypt = cbc_des3_encrypt, + .decrypt = cbc_des3_decrypt, + } + } +}; + +static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) +{ + unsigned int i, n; + + /* align to block size, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1); + for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) { + memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + crypto_inc(ctrptr + i, DES_BLOCK_SIZE); + } + return n; +} + +static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, + struct s390_des_ctx *ctx, + struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); + unsigned int n, nbytes; + u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE]; + u8 *out, *in, *ctrptr = ctrbuf; + + if (!walk->nbytes) + return ret; + + if (spin_trylock(&ctrblk_lock)) + ctrptr = ctrblk; + + memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE); + while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + while (nbytes >= DES_BLOCK_SIZE) { + if (ctrptr == ctrblk) + n = __ctrblk_init(ctrptr, nbytes); + else + n = DES_BLOCK_SIZE; + ret = crypt_s390_kmctr(func, ctx->key, out, in, + n, ctrptr); + if (ret < 0 || ret != n) { + if (ctrptr == ctrblk) + spin_unlock(&ctrblk_lock); + return -EIO; + } + if (n > DES_BLOCK_SIZE) + memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE, + DES_BLOCK_SIZE); + crypto_inc(ctrptr, DES_BLOCK_SIZE); + out += n; + in += n; + nbytes -= n; + } + ret = blkcipher_walk_done(desc, walk, nbytes); + } + if (ctrptr == ctrblk) { + if (nbytes) + memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE); + else + memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE); + spin_unlock(&ctrblk_lock); + } else { + if (!nbytes) + memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE); + } + /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ + if (nbytes) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + ret = crypt_s390_kmctr(func, ctx->key, buf, in, + DES_BLOCK_SIZE, ctrbuf); + if (ret < 0 || ret != DES_BLOCK_SIZE) + return -EIO; + memcpy(out, buf, nbytes); + crypto_inc(ctrbuf, DES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, 0); + memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE); + } + return ret; +} + +static int ctr_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk); +} + +static int ctr_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk); +} + +static struct crypto_alg ctr_des_alg = { + .cra_name = "ctr(des)", + .cra_driver_name = "ctr-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_setkey, + .encrypt = ctr_des_encrypt, + .decrypt = ctr_des_decrypt, + } + } +}; + +static int ctr_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk); +} + +static int ctr_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk); +} + +static struct crypto_alg ctr_des3_alg = { + .cra_name = "ctr(des3_ede)", + .cra_driver_name = "ctr-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT( - cbc_des3_192_alg.cra_list), .cra_u = { .blkcipher = { - .min_keysize = DES3_192_KEY_SIZE, - .max_keysize = DES3_192_KEY_SIZE, + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, .ivsize = DES_BLOCK_SIZE, - .setkey = des3_192_setkey, - .encrypt = cbc_des3_192_encrypt, - .decrypt = cbc_des3_192_decrypt, + .setkey = des3_setkey, + .encrypt = ctr_des3_encrypt, + .decrypt = ctr_des3_decrypt, } } }; -static int des_s390_init(void) +static int __init des_s390_init(void) { int ret; - if (!crypt_s390_func_available(KM_DEA_ENCRYPT) || - !crypt_s390_func_available(KM_TDEA_192_ENCRYPT)) + if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) || + !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA)) return -EOPNOTSUPP; ret = crypto_register_alg(&des_alg); @@ -394,23 +552,46 @@ static int des_s390_init(void) ret = crypto_register_alg(&cbc_des_alg); if (ret) goto cbc_des_err; - ret = crypto_register_alg(&des3_192_alg); + ret = crypto_register_alg(&des3_alg); if (ret) - goto des3_192_err; - ret = crypto_register_alg(&ecb_des3_192_alg); + goto des3_err; + ret = crypto_register_alg(&ecb_des3_alg); if (ret) - goto ecb_des3_192_err; - ret = crypto_register_alg(&cbc_des3_192_alg); + goto ecb_des3_err; + ret = crypto_register_alg(&cbc_des3_alg); if (ret) - goto cbc_des3_192_err; + goto cbc_des3_err; + + if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ret = crypto_register_alg(&ctr_des_alg); + if (ret) + goto ctr_des_err; + ret = crypto_register_alg(&ctr_des3_alg); + if (ret) + goto ctr_des3_err; + ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + if (!ctrblk) { + ret = -ENOMEM; + goto ctr_mem_err; + } + } out: return ret; -cbc_des3_192_err: - crypto_unregister_alg(&ecb_des3_192_alg); -ecb_des3_192_err: - crypto_unregister_alg(&des3_192_alg); -des3_192_err: +ctr_mem_err: + crypto_unregister_alg(&ctr_des3_alg); +ctr_des3_err: + crypto_unregister_alg(&ctr_des_alg); +ctr_des_err: + crypto_unregister_alg(&cbc_des3_alg); +cbc_des3_err: + crypto_unregister_alg(&ecb_des3_alg); +ecb_des3_err: + crypto_unregister_alg(&des3_alg); +des3_err: crypto_unregister_alg(&cbc_des_alg); cbc_des_err: crypto_unregister_alg(&ecb_des_alg); @@ -422,9 +603,14 @@ des_err: static void __exit des_s390_exit(void) { - crypto_unregister_alg(&cbc_des3_192_alg); - crypto_unregister_alg(&ecb_des3_192_alg); - crypto_unregister_alg(&des3_192_alg); + if (ctrblk) { + crypto_unregister_alg(&ctr_des_alg); + crypto_unregister_alg(&ctr_des3_alg); + free_page((unsigned long) ctrblk); + } + crypto_unregister_alg(&cbc_des3_alg); + crypto_unregister_alg(&ecb_des3_alg); + crypto_unregister_alg(&des3_alg); crypto_unregister_alg(&cbc_des_alg); crypto_unregister_alg(&ecb_des_alg); crypto_unregister_alg(&des_alg); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c new file mode 100644 index 00000000000..d43485d142e --- /dev/null +++ b/arch/s390/crypto/ghash_s390.c @@ -0,0 +1,166 @@ +/* + * Cryptographic API. + * + * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode). + * + * Copyright IBM Corp. 2011 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <crypto/internal/hash.h> +#include <linux/module.h> + +#include "crypt_s390.h" + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +struct ghash_ctx { + u8 icv[16]; + u8 key[16]; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key, key, GHASH_BLOCK_SIZE); + memset(ctx->icv, 0, GHASH_BLOCK_SIZE); + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + unsigned int n; + u8 *buf = dctx->buffer; + int ret; + + if (dctx->bytes) { + u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); + + n = min(srclen, dctx->bytes); + dctx->bytes -= n; + srclen -= n; + + memcpy(pos, src, n); + src += n; + + if (!dctx->bytes) { + ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, + GHASH_BLOCK_SIZE); + if (ret != GHASH_BLOCK_SIZE) + return -EIO; + } + } + + n = srclen & ~(GHASH_BLOCK_SIZE - 1); + if (n) { + ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n); + if (ret != n) + return -EIO; + src += n; + srclen -= n; + } + + if (srclen) { + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + memcpy(buf, src, srclen); + } + + return 0; +} + +static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *buf = dctx->buffer; + int ret; + + if (dctx->bytes) { + u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); + + memset(pos, 0, dctx->bytes); + + ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE); + if (ret != GHASH_BLOCK_SIZE) + return -EIO; + } + + dctx->bytes = 0; + return 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + int ret; + + ret = ghash_flush(ctx, dctx); + if (!ret) + memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE); + return ret; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + }, +}; + +static int __init ghash_mod_init(void) +{ + if (!crypt_s390_func_available(KIMD_GHASH, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) + return -EOPNOTSUPP; + + return crypto_register_shash(&ghash_alg); +} + +static void __exit ghash_mod_exit(void) +{ + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_mod_init); +module_exit(ghash_mod_exit); + +MODULE_ALIAS("ghash"); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation"); diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 975e3ab13cb..94a35a4c1b4 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2006,2007 + * Copyright IBM Corp. 2006, 2007 * Author(s): Jan Glauber <jan.glauber@de.ibm.com> * Driver for the s390 pseudo random number generator */ @@ -76,7 +76,7 @@ static void prng_seed(int nbytes) /* Add the entropy */ while (nbytes >= 8) { - *((__u64 *)parm_block) ^= *((__u64 *)buf+i*8); + *((__u64 *)parm_block) ^= *((__u64 *)(buf+i)); prng_add_entropy(); i += 8; nbytes -= 8; @@ -166,7 +166,7 @@ static int __init prng_init(void) int ret; /* check if the CPU has a PRNG */ - if (!crypt_s390_func_available(KMC_PRNG)) + if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA)) return -EOPNOTSUPP; if (prng_chunk_size < 8) diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index f6de7826c97..a1b3a9dc9d8 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -8,7 +8,7 @@ * implementation written by Steve Reid. * * s390 Version: - * Copyright IBM Corp. 2003,2007 + * Copyright IBM Corp. 2003, 2007 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) * @@ -90,7 +90,7 @@ static struct shash_alg alg = { static int __init sha1_s390_init(void) { - if (!crypt_s390_func_available(KIMD_SHA_1)) + if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA)) return -EOPNOTSUPP; return crypto_register_shash(&alg); } diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 61a7db37212..9b853809a49 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -1,15 +1,12 @@ /* * Cryptographic API. * - * s390 implementation of the SHA256 Secure Hash Algorithm. + * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm. * * s390 Version: - * Copyright IBM Corp. 2005,2007 + * Copyright IBM Corp. 2005, 2011 * Author(s): Jan Glauber (jang@de.ibm.com) * - * Derived from "crypto/sha256_generic.c" - * and "arch/s390/crypto/sha1_s390.c" - * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) @@ -65,7 +62,7 @@ static int sha256_import(struct shash_desc *desc, const void *in) return 0; } -static struct shash_alg alg = { +static struct shash_alg sha256_alg = { .digestsize = SHA256_DIGEST_SIZE, .init = sha256_init, .update = s390_sha_update, @@ -84,22 +81,69 @@ static struct shash_alg alg = { } }; -static int sha256_s390_init(void) +static int sha224_init(struct shash_desc *desc) { - if (!crypt_s390_func_available(KIMD_SHA_256)) - return -EOPNOTSUPP; + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - return crypto_register_shash(&alg); + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + sctx->func = KIMD_SHA_256; + + return 0; +} + +static struct shash_alg sha224_alg = { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name= "sha224-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha256_s390_init(void) +{ + int ret; + + if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + ret = crypto_register_shash(&sha256_alg); + if (ret < 0) + goto out; + ret = crypto_register_shash(&sha224_alg); + if (ret < 0) + crypto_unregister_shash(&sha256_alg); +out: + return ret; } static void __exit sha256_s390_fini(void) { - crypto_unregister_shash(&alg); + crypto_unregister_shash(&sha224_alg); + crypto_unregister_shash(&sha256_alg); } module_init(sha256_s390_init); module_exit(sha256_s390_fini); MODULE_ALIAS("sha256"); +MODULE_ALIAS("sha224"); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm"); +MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 4bf73d0dc52..32a81383b69 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -132,7 +132,7 @@ static int __init init(void) { int ret; - if (!crypt_s390_func_available(KIMD_SHA_512)) + if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA)) return -EOPNOTSUPP; if ((ret = crypto_register_shash(&sha512_alg)) < 0) goto out; diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index 48884f89ab9..8620b0ec9c4 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -14,6 +14,7 @@ */ #include <crypto/internal/hash.h> +#include <linux/module.h> #include "sha.h" #include "crypt_s390.h" @@ -35,7 +36,8 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) if (index) { memcpy(ctx->buf + index, data, bsize - index); ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, bsize); - BUG_ON(ret != bsize); + if (ret != bsize) + return -EIO; data += bsize - index; len -= bsize - index; index = 0; @@ -45,7 +47,8 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) if (len >= bsize) { ret = crypt_s390_kimd(ctx->func, ctx->state, data, len & ~(bsize - 1)); - BUG_ON(ret != (len & ~(bsize - 1))); + if (ret != (len & ~(bsize - 1))) + return -EIO; data += ret; len -= ret; } @@ -87,7 +90,8 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) memcpy(ctx->buf + end - 8, &bits, sizeof(bits)); ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, end); - BUG_ON(ret != end); + if (ret != end) + return -EIO; /* copy digest to out */ memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm)); diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 29c82c640a8..2e56498a40d 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,51 +1,62 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_AUDIT=y -CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y +CONFIG_RD_XZ=y +CONFIG_RD_LZO=y +CONFIG_RD_LZ4=y +CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set -CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y CONFIG_DEFAULT_DEADLINE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_PREEMPT=y +CONFIG_MARCH_Z196=y +CONFIG_NR_CPUS=256 +CONFIG_HZ_100=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_CMA=y +CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m -CONFIG_CMM=m -CONFIG_HZ_100=y -CONFIG_KEXEC=y -CONFIG_PM=y CONFIG_HIBERNATION=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_NET_KEY=y -CONFIG_AFIUCV=m CONFIG_INET=y CONFIG_IP_MULTICAST=y # CONFIG_INET_LRO is not set CONFIG_IPV6=y -CONFIG_NET_SCTPPROBE=m CONFIG_L2TP=m CONFIG_L2TP_DEBUGFS=m CONFIG_VLAN_8021Q=y @@ -67,8 +78,9 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y +CONFIG_BPF_JIT=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y @@ -84,48 +96,58 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y CONFIG_ZFCP=y -CONFIG_ZFCP_DIF=y +CONFIG_SCSI_VIRTIO=y CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_EQUALIZER=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_VIRTIO_NET=y +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set CONFIG_RAW_DRIVER=m -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_VIRTIO_BALLOON=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +CONFIG_BTRFS_FS=y +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_FANOTIFY=y +CONFIG_FUSE_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_IBM_PARTITION=y -CONFIG_DLM=m +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DETECT_HUNG_TASK=y CONFIG_TIMER_STATS=y +CONFIG_DEBUG_RT_MUTEXES=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_PI_LIST=y +CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_KPROBES_SANITY_TEST=y -CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y -CONFIG_CPU_NOTIFIER_ERROR_INJECT=m +CONFIG_PROVE_RCU=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_RCU_TRACE=y CONFIG_LATENCYTOP=y -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_DEBUG_PAGEALLOC=y -# CONFIG_FTRACE is not set +CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_KPROBES_SANITY_TEST=y # CONFIG_STRICT_DEVMEM is not set -CONFIG_CRYPTO_NULL=m +CONFIG_S390_PTDUMP=y CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_TEST=m @@ -137,8 +159,11 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_CRCT10DIF=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m @@ -155,7 +180,6 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m @@ -166,6 +190,8 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m CONFIG_ZCRYPT=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m @@ -173,4 +199,10 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRC7=m -CONFIG_VIRTIO_BALLOON=y +# CONFIG_XZ_DEC_X86 is not set +# CONFIG_XZ_DEC_POWERPC is not set +# CONFIG_XZ_DEC_IA64 is not set +# CONFIG_XZ_DEC_ARM is not set +# CONFIG_XZ_DEC_ARMTHUMB is not set +# CONFIG_XZ_DEC_SPARC is not set +CONFIG_CMM=m diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile index 2e671d5004c..06f8d95a16c 100644 --- a/arch/s390/hypfs/Makefile +++ b/arch/s390/hypfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o -s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o +s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index 80c1526f2af..b34b5ab90a3 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -1,8 +1,7 @@ /* - * arch/s390/hypfs/hypfs.h * Hypervisor filesystem for Linux on s390. * - * Copyright (C) IBM Corp. 2006 + * Copyright IBM Corp. 2006 * Author(s): Michael Holzheu <holzheu@de.ibm.com> */ @@ -14,31 +13,33 @@ #include <linux/debugfs.h> #include <linux/workqueue.h> #include <linux/kref.h> +#include <asm/hypfs.h> #define REG_FILE_MODE 0440 #define UPDATE_FILE_MODE 0220 #define DIR_MODE 0550 -extern struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent, - const char *name); +extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name); -extern struct dentry *hypfs_create_u64(struct super_block *sb, - struct dentry *dir, const char *name, +extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name, __u64 value); -extern struct dentry *hypfs_create_str(struct super_block *sb, - struct dentry *dir, const char *name, +extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name, char *string); /* LPAR Hypervisor */ extern int hypfs_diag_init(void); extern void hypfs_diag_exit(void); -extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root); +extern int hypfs_diag_create_files(struct dentry *root); /* VM Hypervisor */ extern int hypfs_vm_init(void); extern void hypfs_vm_exit(void); -extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root); +extern int hypfs_vm_create_files(struct dentry *root); + +/* Set Partition-Resource Parameter */ +int hypfs_sprp_init(void); +void hypfs_sprp_exit(void); /* debugfs interface */ struct hypfs_dbfs_file; @@ -47,7 +48,7 @@ struct hypfs_dbfs_data { void *buf; void *buf_free_ptr; size_t size; - struct hypfs_dbfs_file *dbfs_file;; + struct hypfs_dbfs_file *dbfs_file; struct kref kref; }; @@ -56,6 +57,8 @@ struct hypfs_dbfs_file { int (*data_create)(void **data, void **data_free_ptr, size_t *size); void (*data_free)(const void *buf_free_ptr); + long (*unlocked_ioctl) (struct file *, unsigned int, + unsigned long); /* Private data for hypfs_dbfs.c */ struct hypfs_dbfs_data *data; diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index b478013b7fe..2badf2bf9cd 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -1,7 +1,7 @@ /* * Hypervisor filesystem for Linux on s390 - debugfs interface * - * Copyright (C) IBM Corp. 2010 + * Copyright IBM Corp. 2010 * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> */ @@ -54,7 +54,7 @@ static ssize_t dbfs_read(struct file *file, char __user *buf, if (*ppos != 0) return 0; - df = file->f_path.dentry->d_inode->i_private; + df = file_inode(file)->i_private; mutex_lock(&df->lock); if (!df->data) { data = hypfs_dbfs_data_alloc(df); @@ -81,9 +81,25 @@ static ssize_t dbfs_read(struct file *file, char __user *buf, return rc; } +static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct hypfs_dbfs_file *df; + long rc; + + df = file->f_path.dentry->d_inode->i_private; + mutex_lock(&df->lock); + if (df->unlocked_ioctl) + rc = df->unlocked_ioctl(file, cmd, arg); + else + rc = -ENOTTY; + mutex_unlock(&df->lock); + return rc; +} + static const struct file_operations dbfs_ops = { .read = dbfs_read, .llseek = no_llseek, + .unlocked_ioctl = dbfs_ioctl, }; int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) @@ -105,9 +121,7 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) int hypfs_dbfs_init(void) { dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); - if (IS_ERR(dbfs_dir)) - return PTR_ERR(dbfs_dir); - return 0; + return PTR_ERR_OR_ZERO(dbfs_dir); } void hypfs_dbfs_exit(void) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 6023c6dc1fb..5eeffeefae0 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -1,5 +1,4 @@ /* - * arch/s390/hypfs/hypfs_diag.c * Hypervisor filesystem for Linux on s390. Diag 204 and 224 * implementation. * @@ -562,10 +561,9 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size) void *base; buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr); - base = vmalloc(buf_size); + base = vzalloc(buf_size); if (!base) return -ENOMEM; - memset(base, 0, buf_size); d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr); rc = diag204_do_store(d204->buf, diag204_buf_pages); if (rc) { @@ -625,8 +623,7 @@ void hypfs_diag_exit(void) * ******************************************* */ -static int hypfs_create_cpu_files(struct super_block *sb, - struct dentry *cpus_dir, void *cpu_info) +static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) { struct dentry *cpu_dir; char buffer[TMP_SIZE]; @@ -634,32 +631,29 @@ static int hypfs_create_cpu_files(struct super_block *sb, snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type, cpu_info)); - cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer); - rc = hypfs_create_u64(sb, cpu_dir, "mgmtime", + cpu_dir = hypfs_mkdir(cpus_dir, buffer); + rc = hypfs_create_u64(cpu_dir, "mgmtime", cpu_info__acc_time(diag204_info_type, cpu_info) - cpu_info__lp_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); - rc = hypfs_create_u64(sb, cpu_dir, "cputime", + rc = hypfs_create_u64(cpu_dir, "cputime", cpu_info__lp_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); if (diag204_info_type == INFO_EXT) { - rc = hypfs_create_u64(sb, cpu_dir, "onlinetime", + rc = hypfs_create_u64(cpu_dir, "onlinetime", cpu_info__online_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); } diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); - rc = hypfs_create_str(sb, cpu_dir, "type", buffer); - if (IS_ERR(rc)) - return PTR_ERR(rc); - return 0; + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_RET(rc); } -static void *hypfs_create_lpar_files(struct super_block *sb, - struct dentry *systems_dir, void *part_hdr) +static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) { struct dentry *cpus_dir; struct dentry *lpar_dir; @@ -669,16 +663,16 @@ static void *hypfs_create_lpar_files(struct super_block *sb, part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); lpar_name[LPAR_NAME_LEN] = 0; - lpar_dir = hypfs_mkdir(sb, systems_dir, lpar_name); + lpar_dir = hypfs_mkdir(systems_dir, lpar_name); if (IS_ERR(lpar_dir)) return lpar_dir; - cpus_dir = hypfs_mkdir(sb, lpar_dir, "cpus"); + cpus_dir = hypfs_mkdir(lpar_dir, "cpus"); if (IS_ERR(cpus_dir)) return cpus_dir; cpu_info = part_hdr + part_hdr__size(diag204_info_type); for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) { int rc; - rc = hypfs_create_cpu_files(sb, cpus_dir, cpu_info); + rc = hypfs_create_cpu_files(cpus_dir, cpu_info); if (rc) return ERR_PTR(rc); cpu_info += cpu_info__size(diag204_info_type); @@ -686,8 +680,7 @@ static void *hypfs_create_lpar_files(struct super_block *sb, return cpu_info; } -static int hypfs_create_phys_cpu_files(struct super_block *sb, - struct dentry *cpus_dir, void *cpu_info) +static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) { struct dentry *cpu_dir; char buffer[TMP_SIZE]; @@ -695,34 +688,31 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb, snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type, cpu_info)); - cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer); + cpu_dir = hypfs_mkdir(cpus_dir, buffer); if (IS_ERR(cpu_dir)) return PTR_ERR(cpu_dir); - rc = hypfs_create_u64(sb, cpu_dir, "mgmtime", + rc = hypfs_create_u64(cpu_dir, "mgmtime", phys_cpu__mgm_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); - rc = hypfs_create_str(sb, cpu_dir, "type", buffer); - if (IS_ERR(rc)) - return PTR_ERR(rc); - return 0; + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_RET(rc); } -static void *hypfs_create_phys_files(struct super_block *sb, - struct dentry *parent_dir, void *phys_hdr) +static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) { int i; void *cpu_info; struct dentry *cpus_dir; - cpus_dir = hypfs_mkdir(sb, parent_dir, "cpus"); + cpus_dir = hypfs_mkdir(parent_dir, "cpus"); if (IS_ERR(cpus_dir)) return cpus_dir; cpu_info = phys_hdr + phys_hdr__size(diag204_info_type); for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) { int rc; - rc = hypfs_create_phys_cpu_files(sb, cpus_dir, cpu_info); + rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info); if (rc) return ERR_PTR(rc); cpu_info += phys_cpu__size(diag204_info_type); @@ -730,7 +720,7 @@ static void *hypfs_create_phys_files(struct super_block *sb, return cpu_info; } -int hypfs_diag_create_files(struct super_block *sb, struct dentry *root) +int hypfs_diag_create_files(struct dentry *root) { struct dentry *systems_dir, *hyp_dir; void *time_hdr, *part_hdr; @@ -741,7 +731,7 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root) if (IS_ERR(buffer)) return PTR_ERR(buffer); - systems_dir = hypfs_mkdir(sb, root, "systems"); + systems_dir = hypfs_mkdir(root, "systems"); if (IS_ERR(systems_dir)) { rc = PTR_ERR(systems_dir); goto err_out; @@ -749,25 +739,25 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root) time_hdr = (struct x_info_blk_hdr *)buffer; part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type); for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) { - part_hdr = hypfs_create_lpar_files(sb, systems_dir, part_hdr); + part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr); if (IS_ERR(part_hdr)) { rc = PTR_ERR(part_hdr); goto err_out; } } if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) { - ptr = hypfs_create_phys_files(sb, root, part_hdr); + ptr = hypfs_create_phys_files(root, part_hdr); if (IS_ERR(ptr)) { rc = PTR_ERR(ptr); goto err_out; } } - hyp_dir = hypfs_mkdir(sb, root, "hyp"); + hyp_dir = hypfs_mkdir(root, "hyp"); if (IS_ERR(hyp_dir)) { rc = PTR_ERR(hyp_dir); goto err_out; } - ptr = hypfs_create_str(sb, hyp_dir, "type", "LPAR Hypervisor"); + ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); if (IS_ERR(ptr)) { rc = PTR_ERR(ptr); goto err_out; diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c new file mode 100644 index 00000000000..f043c3c7e73 --- /dev/null +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -0,0 +1,141 @@ +/* + * Hypervisor filesystem for Linux on s390. + * Set Partition-Resource Parameter interface. + * + * Copyright IBM Corp. 2013 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/compat.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <asm/compat.h> +#include <asm/sclp.h> +#include "hypfs.h" + +#define DIAG304_SET_WEIGHTS 0 +#define DIAG304_QUERY_PRP 1 +#define DIAG304_SET_CAPPING 2 + +#define DIAG304_CMD_MAX 2 + +static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd) +{ + register unsigned long _data asm("2") = (unsigned long) data; + register unsigned long _rc asm("3"); + register unsigned long _cmd asm("4") = cmd; + + asm volatile("diag %1,%2,0x304\n" + : "=d" (_rc) : "d" (_data), "d" (_cmd) : "memory"); + + return _rc; +} + +static void hypfs_sprp_free(const void *data) +{ + free_page((unsigned long) data); +} + +static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size) +{ + unsigned long rc; + void *data; + + data = (void *) get_zeroed_page(GFP_KERNEL); + if (!data) + return -ENOMEM; + rc = hypfs_sprp_diag304(data, DIAG304_QUERY_PRP); + if (rc != 1) { + *data_ptr = *free_ptr = NULL; + *size = 0; + free_page((unsigned long) data); + return -EIO; + } + *data_ptr = *free_ptr = data; + *size = PAGE_SIZE; + return 0; +} + +static int __hypfs_sprp_ioctl(void __user *user_area) +{ + struct hypfs_diag304 diag304; + unsigned long cmd; + void __user *udata; + void *data; + int rc; + + if (copy_from_user(&diag304, user_area, sizeof(diag304))) + return -EFAULT; + if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX) + return -EINVAL; + + data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!data) + return -ENOMEM; + + udata = (void __user *)(unsigned long) diag304.data; + if (diag304.args[1] == DIAG304_SET_WEIGHTS || + diag304.args[1] == DIAG304_SET_CAPPING) + if (copy_from_user(data, udata, PAGE_SIZE)) { + rc = -EFAULT; + goto out; + } + + cmd = *(unsigned long *) &diag304.args[0]; + diag304.rc = hypfs_sprp_diag304(data, cmd); + + if (diag304.args[1] == DIAG304_QUERY_PRP) + if (copy_to_user(udata, data, PAGE_SIZE)) { + rc = -EFAULT; + goto out; + } + + rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0; +out: + free_page((unsigned long) data); + return rc; +} + +static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + void __user *argp; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (is_compat_task()) + argp = compat_ptr(arg); + else + argp = (void __user *) arg; + switch (cmd) { + case HYPFS_DIAG304: + return __hypfs_sprp_ioctl(argp); + default: /* unknown ioctl number */ + return -ENOTTY; + } + return 0; +} + +static struct hypfs_dbfs_file hypfs_sprp_file = { + .name = "diag_304", + .data_create = hypfs_sprp_create, + .data_free = hypfs_sprp_free, + .unlocked_ioctl = hypfs_sprp_ioctl, +}; + +int hypfs_sprp_init(void) +{ + if (!sclp_has_sprp()) + return 0; + return hypfs_dbfs_create_file(&hypfs_sprp_file); +} + +void hypfs_sprp_exit(void) +{ + if (!sclp_has_sprp()) + return; + hypfs_dbfs_remove_file(&hypfs_sprp_file); +} diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index e54796002f6..32040ace00e 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -1,7 +1,7 @@ /* * Hypervisor filesystem for Linux on s390. z/VM implementation. * - * Copyright (C) IBM Corp. 2006 + * Copyright IBM Corp. 2006 * Author(s): Michael Holzheu <holzheu@de.ibm.com> */ @@ -32,7 +32,7 @@ struct diag2fc_data { __u32 pcpus; __u32 lcpus; __u32 vcpus; - __u32 cpu_min; + __u32 ocpus; __u32 cpu_max; __u32 cpu_shares; __u32 cpu_use_samp; @@ -107,16 +107,15 @@ static void diag2fc_free(const void *data) vfree(data); } -#define ATTRIBUTE(sb, dir, name, member) \ +#define ATTRIBUTE(dir, name, member) \ do { \ void *rc; \ - rc = hypfs_create_u64(sb, dir, name, member); \ + rc = hypfs_create_u64(dir, name, member); \ if (IS_ERR(rc)) \ return PTR_ERR(rc); \ } while(0) -static int hpyfs_vm_create_guest(struct super_block *sb, - struct dentry *systems_dir, +static int hpyfs_vm_create_guest(struct dentry *systems_dir, struct diag2fc_data *data) { char guest_name[NAME_LEN + 1] = {}; @@ -130,46 +129,51 @@ static int hpyfs_vm_create_guest(struct super_block *sb, memcpy(guest_name, data->guest_name, NAME_LEN); EBCASC(guest_name, NAME_LEN); strim(guest_name); - guest_dir = hypfs_mkdir(sb, systems_dir, guest_name); + guest_dir = hypfs_mkdir(systems_dir, guest_name); if (IS_ERR(guest_dir)) return PTR_ERR(guest_dir); - ATTRIBUTE(sb, guest_dir, "onlinetime_us", data->el_time); + ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time); /* logical cpu information */ - cpus_dir = hypfs_mkdir(sb, guest_dir, "cpus"); + cpus_dir = hypfs_mkdir(guest_dir, "cpus"); if (IS_ERR(cpus_dir)) return PTR_ERR(cpus_dir); - ATTRIBUTE(sb, cpus_dir, "cputime_us", data->used_cpu); - ATTRIBUTE(sb, cpus_dir, "capped", capped_value); - ATTRIBUTE(sb, cpus_dir, "dedicated", dedicated_flag); - ATTRIBUTE(sb, cpus_dir, "count", data->vcpus); - ATTRIBUTE(sb, cpus_dir, "weight_min", data->cpu_min); - ATTRIBUTE(sb, cpus_dir, "weight_max", data->cpu_max); - ATTRIBUTE(sb, cpus_dir, "weight_cur", data->cpu_shares); + ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu); + ATTRIBUTE(cpus_dir, "capped", capped_value); + ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag); + ATTRIBUTE(cpus_dir, "count", data->vcpus); + /* + * Note: The "weight_min" attribute got the wrong name. + * The value represents the number of non-stopped (operating) + * CPUS. + */ + ATTRIBUTE(cpus_dir, "weight_min", data->ocpus); + ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max); + ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares); /* memory information */ - mem_dir = hypfs_mkdir(sb, guest_dir, "mem"); + mem_dir = hypfs_mkdir(guest_dir, "mem"); if (IS_ERR(mem_dir)) return PTR_ERR(mem_dir); - ATTRIBUTE(sb, mem_dir, "min_KiB", data->mem_min_kb); - ATTRIBUTE(sb, mem_dir, "max_KiB", data->mem_max_kb); - ATTRIBUTE(sb, mem_dir, "used_KiB", data->mem_used_kb); - ATTRIBUTE(sb, mem_dir, "share_KiB", data->mem_share_kb); + ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb); + ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb); + ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb); + ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb); /* samples */ - samples_dir = hypfs_mkdir(sb, guest_dir, "samples"); + samples_dir = hypfs_mkdir(guest_dir, "samples"); if (IS_ERR(samples_dir)) return PTR_ERR(samples_dir); - ATTRIBUTE(sb, samples_dir, "cpu_using", data->cpu_use_samp); - ATTRIBUTE(sb, samples_dir, "cpu_delay", data->cpu_delay_samp); - ATTRIBUTE(sb, samples_dir, "mem_delay", data->page_wait_samp); - ATTRIBUTE(sb, samples_dir, "idle", data->idle_samp); - ATTRIBUTE(sb, samples_dir, "other", data->other_samp); - ATTRIBUTE(sb, samples_dir, "total", data->total_samp); + ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp); + ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp); + ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp); + ATTRIBUTE(samples_dir, "idle", data->idle_samp); + ATTRIBUTE(samples_dir, "other", data->other_samp); + ATTRIBUTE(samples_dir, "total", data->total_samp); return 0; } -int hypfs_vm_create_files(struct super_block *sb, struct dentry *root) +int hypfs_vm_create_files(struct dentry *root) { struct dentry *dir, *file; struct diag2fc_data *data; @@ -181,38 +185,38 @@ int hypfs_vm_create_files(struct super_block *sb, struct dentry *root) return PTR_ERR(data); /* Hpervisor Info */ - dir = hypfs_mkdir(sb, root, "hyp"); + dir = hypfs_mkdir(root, "hyp"); if (IS_ERR(dir)) { rc = PTR_ERR(dir); goto failed; } - file = hypfs_create_str(sb, dir, "type", "z/VM Hypervisor"); + file = hypfs_create_str(dir, "type", "z/VM Hypervisor"); if (IS_ERR(file)) { rc = PTR_ERR(file); goto failed; } /* physical cpus */ - dir = hypfs_mkdir(sb, root, "cpus"); + dir = hypfs_mkdir(root, "cpus"); if (IS_ERR(dir)) { rc = PTR_ERR(dir); goto failed; } - file = hypfs_create_u64(sb, dir, "count", data->lcpus); + file = hypfs_create_u64(dir, "count", data->lcpus); if (IS_ERR(file)) { rc = PTR_ERR(file); goto failed; } /* guests */ - dir = hypfs_mkdir(sb, root, "systems"); + dir = hypfs_mkdir(root, "systems"); if (IS_ERR(dir)) { rc = PTR_ERR(dir); goto failed; } for (i = 0; i < count; i++) { - rc = hpyfs_vm_create_guest(sb, dir, &(data[i])); + rc = hpyfs_vm_create_guest(dir, &(data[i])); if (rc) goto failed; } @@ -245,7 +249,7 @@ static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size) d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr)); if (IS_ERR(d2fc)) return PTR_ERR(d2fc); - get_clock_ext(d2fc->hdr.tod_ext); + get_tod_clock_ext(d2fc->hdr.tod_ext); d2fc->hdr.len = count * sizeof(struct diag2fc_data); d2fc->hdr.version = DBFS_D2FC_HDR_VERSION; d2fc->hdr.count = count; diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 6fe874fc5f8..c952b981e4f 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -1,5 +1,4 @@ /* - * arch/s390/hypfs/inode.c * Hypervisor filesystem for Linux on s390. * * Copyright IBM Corp. 2006, 2008 @@ -22,18 +21,18 @@ #include <linux/module.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/aio.h> #include <asm/ebcdic.h> #include "hypfs.h" #define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */ #define TMP_SIZE 64 /* size of temporary buffers */ -static struct dentry *hypfs_create_update_file(struct super_block *sb, - struct dentry *dir); +static struct dentry *hypfs_create_update_file(struct dentry *dir); struct hypfs_sb_info { - uid_t uid; /* uid used for files and dirs */ - gid_t gid; /* gid used for files and dirs */ + kuid_t uid; /* uid used for files and dirs */ + kgid_t gid; /* gid used for files and dirs */ struct dentry *update_file; /* file to trigger update */ time_t last_update; /* last update time in secs since 1970 */ struct mutex lock; /* lock to protect update process */ @@ -73,8 +72,6 @@ static void hypfs_remove(struct dentry *dentry) struct dentry *parent; parent = dentry->d_parent; - if (!parent || !parent->d_inode) - return; mutex_lock(&parent->d_inode->i_mutex); if (hypfs_positive(dentry)) { if (S_ISDIR(dentry->d_inode->i_mode)) @@ -97,33 +94,32 @@ static void hypfs_delete_tree(struct dentry *root) } } -static struct inode *hypfs_make_inode(struct super_block *sb, int mode) +static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode) { struct inode *ret = new_inode(sb); if (ret) { struct hypfs_sb_info *hypfs_info = sb->s_fs_info; + ret->i_ino = get_next_ino(); ret->i_mode = mode; ret->i_uid = hypfs_info->uid; ret->i_gid = hypfs_info->gid; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; - if (mode & S_IFDIR) - ret->i_nlink = 2; - else - ret->i_nlink = 1; + if (S_ISDIR(mode)) + set_nlink(ret, 2); } return ret; } static void hypfs_evict_inode(struct inode *inode) { - end_writeback(inode); + clear_inode(inode); kfree(inode->i_private); } static int hypfs_open(struct inode *inode, struct file *filp) { - char *data = filp->f_path.dentry->d_inode->i_private; + char *data = file_inode(filp)->i_private; struct hypfs_sb_info *fs_info; if (filp->f_mode & FMODE_WRITE) { @@ -175,12 +171,10 @@ static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t offset) { int rc; - struct super_block *sb; - struct hypfs_sb_info *fs_info; + struct super_block *sb = file_inode(iocb->ki_filp)->i_sb; + struct hypfs_sb_info *fs_info = sb->s_fs_info; size_t count = iov_length(iov, nr_segs); - sb = iocb->ki_filp->f_path.dentry->d_inode->i_sb; - fs_info = sb->s_fs_info; /* * Currently we only allow one update per second for two reasons: * 1. diag 204 is VERY expensive @@ -198,9 +192,9 @@ static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov, } hypfs_delete_tree(sb->s_root); if (MACHINE_IS_VM) - rc = hypfs_vm_create_files(sb, sb->s_root); + rc = hypfs_vm_create_files(sb->s_root); else - rc = hypfs_diag_create_files(sb, sb->s_root); + rc = hypfs_diag_create_files(sb->s_root); if (rc) { pr_err("Updating the hypfs tree failed\n"); hypfs_delete_tree(sb->s_root); @@ -231,6 +225,8 @@ static int hypfs_parse_options(char *options, struct super_block *sb) { char *str; substring_t args[MAX_OPT_ARGS]; + kuid_t uid; + kgid_t gid; if (!options) return 0; @@ -245,12 +241,18 @@ static int hypfs_parse_options(char *options, struct super_block *sb) case opt_uid: if (match_int(&args[0], &option)) return -EINVAL; - hypfs_info->uid = option; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) + return -EINVAL; + hypfs_info->uid = uid; break; case opt_gid: if (match_int(&args[0], &option)) return -EINVAL; - hypfs_info->gid = option; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) + return -EINVAL; + hypfs_info->gid = gid; break; case opt_err: default: @@ -261,12 +263,12 @@ static int hypfs_parse_options(char *options, struct super_block *sb) return 0; } -static int hypfs_show_options(struct seq_file *s, struct vfsmount *mnt) +static int hypfs_show_options(struct seq_file *s, struct dentry *root) { - struct hypfs_sb_info *hypfs_info = mnt->mnt_sb->s_fs_info; + struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info; - seq_printf(s, ",uid=%u", hypfs_info->uid); - seq_printf(s, ",gid=%u", hypfs_info->gid); + seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, hypfs_info->uid)); + seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, hypfs_info->gid)); return 0; } @@ -295,18 +297,16 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; root_inode->i_op = &simple_dir_inode_operations; root_inode->i_fop = &simple_dir_operations; - sb->s_root = root_dentry = d_alloc_root(root_inode); - if (!root_dentry) { - iput(root_inode); + sb->s_root = root_dentry = d_make_root(root_inode); + if (!root_dentry) return -ENOMEM; - } if (MACHINE_IS_VM) - rc = hypfs_vm_create_files(sb, root_dentry); + rc = hypfs_vm_create_files(root_dentry); else - rc = hypfs_diag_create_files(sb, root_dentry); + rc = hypfs_diag_create_files(root_dentry); if (rc) return rc; - sbi->update_file = hypfs_create_update_file(sb, root_dentry); + sbi->update_file = hypfs_create_update_file(root_dentry); if (IS_ERR(sbi->update_file)) return PTR_ERR(sbi->update_file); hypfs_update_update(sb); @@ -333,9 +333,8 @@ static void hypfs_kill_super(struct super_block *sb) kill_litter_super(sb); } -static struct dentry *hypfs_create_file(struct super_block *sb, - struct dentry *parent, const char *name, - char *data, mode_t mode) +static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, + char *data, umode_t mode) { struct dentry *dentry; struct inode *inode; @@ -346,22 +345,22 @@ static struct dentry *hypfs_create_file(struct super_block *sb, dentry = ERR_PTR(-ENOMEM); goto fail; } - inode = hypfs_make_inode(sb, mode); + inode = hypfs_make_inode(parent->d_sb, mode); if (!inode) { dput(dentry); dentry = ERR_PTR(-ENOMEM); goto fail; } - if (mode & S_IFREG) { + if (S_ISREG(mode)) { inode->i_fop = &hypfs_file_ops; if (data) inode->i_size = strlen(data); else inode->i_size = 0; - } else if (mode & S_IFDIR) { + } else if (S_ISDIR(mode)) { inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; - parent->d_inode->i_nlink++; + inc_nlink(parent->d_inode); } else BUG(); inode->i_private = data; @@ -372,24 +371,22 @@ fail: return dentry; } -struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent, - const char *name) +struct dentry *hypfs_mkdir(struct dentry *parent, const char *name) { struct dentry *dentry; - dentry = hypfs_create_file(sb, parent, name, NULL, S_IFDIR | DIR_MODE); + dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE); if (IS_ERR(dentry)) return dentry; hypfs_add_dentry(dentry); return dentry; } -static struct dentry *hypfs_create_update_file(struct super_block *sb, - struct dentry *dir) +static struct dentry *hypfs_create_update_file(struct dentry *dir) { struct dentry *dentry; - dentry = hypfs_create_file(sb, dir, "update", NULL, + dentry = hypfs_create_file(dir, "update", NULL, S_IFREG | UPDATE_FILE_MODE); /* * We do not put the update file on the 'delete' list with @@ -399,7 +396,7 @@ static struct dentry *hypfs_create_update_file(struct super_block *sb, return dentry; } -struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir, +struct dentry *hypfs_create_u64(struct dentry *dir, const char *name, __u64 value) { char *buffer; @@ -411,7 +408,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir, if (!buffer) return ERR_PTR(-ENOMEM); dentry = - hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE); + hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); if (IS_ERR(dentry)) { kfree(buffer); return ERR_PTR(-ENOMEM); @@ -420,7 +417,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir, return dentry; } -struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir, +struct dentry *hypfs_create_str(struct dentry *dir, const char *name, char *string) { char *buffer; @@ -431,7 +428,7 @@ struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir, return ERR_PTR(-ENOMEM); sprintf(buffer, "%s\n", string); dentry = - hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE); + hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); if (IS_ERR(dentry)) { kfree(buffer); return ERR_PTR(-ENOMEM); @@ -456,6 +453,7 @@ static struct file_system_type hypfs_type = { .mount = hypfs_mount, .kill_sb = hypfs_kill_super }; +MODULE_ALIAS_FS("s390_hypfs"); static const struct super_operations hypfs_s_ops = { .statfs = simple_statfs, @@ -480,10 +478,14 @@ static int __init hypfs_init(void) rc = -ENODATA; goto fail_hypfs_diag_exit; } + if (hypfs_sprp_init()) { + rc = -ENODATA; + goto fail_hypfs_vm_exit; + } s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); if (!s390_kobj) { rc = -ENOMEM; - goto fail_hypfs_vm_exit; + goto fail_hypfs_sprp_exit; } rc = register_filesystem(&hypfs_type); if (rc) @@ -492,6 +494,8 @@ static int __init hypfs_init(void) fail_filesystem: kobject_put(s390_kobj); +fail_hypfs_sprp_exit: + hypfs_sprp_exit(); fail_hypfs_vm_exit: hypfs_vm_exit(); fail_hypfs_diag_exit: @@ -504,11 +508,12 @@ fail_dbfs_exit: static void __exit hypfs_exit(void) { - hypfs_diag_exit(); - hypfs_vm_exit(); - hypfs_dbfs_exit(); unregister_filesystem(&hypfs_type); kobject_put(s390_kobj); + hypfs_sprp_exit(); + hypfs_vm_exit(); + hypfs_diag_exit(); + hypfs_dbfs_exit(); } module_init(hypfs_init) diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 287d7bbb6d3..57892a8a905 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -1,15 +1,7 @@ -include include/asm-generic/Kbuild.asm -header-y += chpid.h -header-y += chsc.h -header-y += cmb.h -header-y += dasd.h -header-y += debug.h -header-y += kvm_virtio.h -header-y += monwriter.h -header-y += qeth.h -header-y += schid.h -header-y += tape390.h -header-y += ucontext.h -header-y += vtoc.h -header-y += zcrypt.h + +generic-y += clkdev.h +generic-y += hash.h +generic-y += mcs_spinlock.h +generic-y += preempt.h +generic-y += trace_clock.h diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index 1ac80d6b058..bd93ff6661b 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -1,7 +1,5 @@ /* - * include/asm-s390/airq.h - * - * Copyright IBM Corp. 2002,2007 + * Copyright IBM Corp. 2002, 2007 * Author(s): Ingo Adlung <adlung@de.ibm.com> * Cornelia Huck <cornelia.huck@de.ibm.com> * Arnd Bergmann <arndb@de.ibm.com> @@ -11,9 +9,95 @@ #ifndef _ASM_S390_AIRQ_H #define _ASM_S390_AIRQ_H -typedef void (*adapter_int_handler_t)(void *, void *); +#include <linux/bit_spinlock.h> -void *s390_register_adapter_interrupt(adapter_int_handler_t, void *, u8); -void s390_unregister_adapter_interrupt(void *, u8); +struct airq_struct { + struct hlist_node list; /* Handler queueing. */ + void (*handler)(struct airq_struct *); /* Thin-interrupt handler */ + u8 *lsi_ptr; /* Local-Summary-Indicator pointer */ + u8 lsi_mask; /* Local-Summary-Indicator mask */ + u8 isc; /* Interrupt-subclass */ + u8 flags; +}; + +#define AIRQ_PTR_ALLOCATED 0x01 + +int register_adapter_interrupt(struct airq_struct *airq); +void unregister_adapter_interrupt(struct airq_struct *airq); + +/* Adapter interrupt bit vector */ +struct airq_iv { + unsigned long *vector; /* Adapter interrupt bit vector */ + unsigned long *avail; /* Allocation bit mask for the bit vector */ + unsigned long *bitlock; /* Lock bit mask for the bit vector */ + unsigned long *ptr; /* Pointer associated with each bit */ + unsigned int *data; /* 32 bit value associated with each bit */ + unsigned long bits; /* Number of bits in the vector */ + unsigned long end; /* Number of highest allocated bit + 1 */ + spinlock_t lock; /* Lock to protect alloc & free */ +}; + +#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */ +#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */ +#define AIRQ_IV_PTR 4 /* Allocate the ptr array */ +#define AIRQ_IV_DATA 8 /* Allocate the data array */ + +struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags); +void airq_iv_release(struct airq_iv *iv); +unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num); +void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num); +unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start, + unsigned long end); + +static inline unsigned long airq_iv_alloc_bit(struct airq_iv *iv) +{ + return airq_iv_alloc(iv, 1); +} + +static inline void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit) +{ + airq_iv_free(iv, bit, 1); +} + +static inline unsigned long airq_iv_end(struct airq_iv *iv) +{ + return iv->end; +} + +static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + bit_spin_lock(bit ^ be_to_le, iv->bitlock); +} + +static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + bit_spin_unlock(bit ^ be_to_le, iv->bitlock); +} + +static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit, + unsigned int data) +{ + iv->data[bit] = data; +} + +static inline unsigned int airq_iv_get_data(struct airq_iv *iv, + unsigned long bit) +{ + return iv->data[bit]; +} + +static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit, + unsigned long ptr) +{ + iv->ptr[bit] = ptr; +} + +static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv, + unsigned long bit) +{ + return iv->ptr[bit]; +} #endif /* _ASM_S390_AIRQ_H */ diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h index 79283dac828..32a70598715 100644 --- a/arch/s390/include/asm/appldata.h +++ b/arch/s390/include/asm/appldata.h @@ -1,7 +1,5 @@ /* - * include/asm-s390/appldata.h - * - * Copyright (C) IBM Corp. 2006 + * Copyright IBM Corp. 2006 * * Author(s): Melissa Howland <melissah@us.ibm.com> */ @@ -72,7 +70,7 @@ static inline int appldata_asm(struct appldata_product_id *id, int ry; if (!MACHINE_IS_VM) - return -ENOSYS; + return -EOPNOTSUPP; parm_list.diag = 0xdc; parm_list.function = fn; parm_list.parlist_length = sizeof(parm_list); diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 5c5ba10384c..fa934fe080c 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -1,39 +1,75 @@ -#ifndef __ARCH_S390_ATOMIC__ -#define __ARCH_S390_ATOMIC__ - /* - * Copyright 1999,2009 IBM Corp. + * Copyright IBM Corp. 1999, 2009 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, * Denis Joseph Barrow, * Arnd Bergmann <arndb@de.ibm.com>, * * Atomic operations that C can't guarantee us. * Useful for resource counting etc. - * s390 uses 'Compare And Swap' for atomicity in SMP enviroment. + * s390 uses 'Compare And Swap' for atomicity in SMP environment. * */ +#ifndef __ARCH_S390_ATOMIC__ +#define __ARCH_S390_ATOMIC__ + #include <linux/compiler.h> #include <linux/types.h> -#include <asm/system.h> +#include <asm/barrier.h> +#include <asm/cmpxchg.h> #define ATOMIC_INIT(i) { (i) } -#define __CS_LOOP(ptr, op_val, op_string) ({ \ +#define __ATOMIC_NO_BARRIER "\n" + +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define __ATOMIC_OR "lao" +#define __ATOMIC_AND "lan" +#define __ATOMIC_ADD "laa" +#define __ATOMIC_BARRIER "bcr 14,0\n" + +#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \ +({ \ + int old_val; \ + \ + typecheck(atomic_t *, ptr); \ + asm volatile( \ + __barrier \ + op_string " %0,%2,%1\n" \ + __barrier \ + : "=d" (old_val), "+Q" ((ptr)->counter) \ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +#define __ATOMIC_OR "or" +#define __ATOMIC_AND "nr" +#define __ATOMIC_ADD "ar" +#define __ATOMIC_BARRIER "\n" + +#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \ +({ \ int old_val, new_val; \ + \ + typecheck(atomic_t *, ptr); \ asm volatile( \ " l %0,%2\n" \ "0: lr %1,%0\n" \ op_string " %1,%3\n" \ " cs %0,%1,%2\n" \ " jl 0b" \ - : "=&d" (old_val), "=&d" (new_val), \ - "=Q" (((atomic_t *)(ptr))->counter) \ - : "d" (op_val), "Q" (((atomic_t *)(ptr))->counter) \ + : "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\ + : "d" (op_val) \ : "cc", "memory"); \ - new_val; \ + old_val; \ }) +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + static inline int atomic_read(const atomic_t *v) { int c; @@ -53,32 +89,43 @@ static inline void atomic_set(atomic_t *v, int i) static inline int atomic_add_return(int i, atomic_t *v) { - return __CS_LOOP(v, i, "ar"); + return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i; } -#define atomic_add(_i, _v) atomic_add_return(_i, _v) -#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0) -#define atomic_inc(_v) atomic_add_return(1, _v) -#define atomic_inc_return(_v) atomic_add_return(1, _v) -#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0) -static inline int atomic_sub_return(int i, atomic_t *v) +static inline void atomic_add(int i, atomic_t *v) { - return __CS_LOOP(v, i, "sr"); +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + asm volatile( + "asi %0,%1\n" + : "+Q" (v->counter) + : "i" (i) + : "cc", "memory"); + return; + } +#endif + __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_NO_BARRIER); } -#define atomic_sub(_i, _v) atomic_sub_return(_i, _v) + +#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0) +#define atomic_inc(_v) atomic_add(1, _v) +#define atomic_inc_return(_v) atomic_add_return(1, _v) +#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0) +#define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v) +#define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v) #define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0) -#define atomic_dec(_v) atomic_sub_return(1, _v) +#define atomic_dec(_v) atomic_sub(1, _v) #define atomic_dec_return(_v) atomic_sub_return(1, _v) #define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0) -static inline void atomic_clear_mask(unsigned long mask, atomic_t *v) +static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) { - __CS_LOOP(v, ~mask, "nr"); + __ATOMIC_LOOP(v, ~mask, __ATOMIC_AND, __ATOMIC_NO_BARRIER); } -static inline void atomic_set_mask(unsigned long mask, atomic_t *v) +static inline void atomic_set_mask(unsigned int mask, atomic_t *v) { - __CS_LOOP(v, mask, "or"); + __ATOMIC_LOOP(v, mask, __ATOMIC_OR, __ATOMIC_NO_BARRIER); } #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) @@ -87,13 +134,13 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { asm volatile( " cs %0,%2,%1" - : "+d" (old), "=Q" (v->counter) - : "d" (new), "Q" (v->counter) + : "+d" (old), "+Q" (v->counter) + : "d" (new) : "cc", "memory"); return old; } -static inline int atomic_add_unless(atomic_t *v, int a, int u) +static inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; c = atomic_read(v); @@ -105,32 +152,66 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) break; c = old; } - return c != u; + return c; } -#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) -#undef __CS_LOOP +#undef __ATOMIC_LOOP #define ATOMIC64_INIT(i) { (i) } #ifdef CONFIG_64BIT -#define __CSG_LOOP(ptr, op_val, op_string) ({ \ +#define __ATOMIC64_NO_BARRIER "\n" + +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define __ATOMIC64_OR "laog" +#define __ATOMIC64_AND "lang" +#define __ATOMIC64_ADD "laag" +#define __ATOMIC64_BARRIER "bcr 14,0\n" + +#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \ +({ \ + long long old_val; \ + \ + typecheck(atomic64_t *, ptr); \ + asm volatile( \ + __barrier \ + op_string " %0,%2,%1\n" \ + __barrier \ + : "=d" (old_val), "+Q" ((ptr)->counter) \ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +#define __ATOMIC64_OR "ogr" +#define __ATOMIC64_AND "ngr" +#define __ATOMIC64_ADD "agr" +#define __ATOMIC64_BARRIER "\n" + +#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \ +({ \ long long old_val, new_val; \ + \ + typecheck(atomic64_t *, ptr); \ asm volatile( \ " lg %0,%2\n" \ "0: lgr %1,%0\n" \ op_string " %1,%3\n" \ " csg %0,%1,%2\n" \ " jl 0b" \ - : "=&d" (old_val), "=&d" (new_val), \ - "=Q" (((atomic_t *)(ptr))->counter) \ - : "d" (op_val), "Q" (((atomic_t *)(ptr))->counter) \ + : "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\ + : "d" (op_val) \ : "cc", "memory"); \ - new_val; \ + old_val; \ }) +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + static inline long long atomic64_read(const atomic64_t *v) { long long c; @@ -150,22 +231,32 @@ static inline void atomic64_set(atomic64_t *v, long long i) static inline long long atomic64_add_return(long long i, atomic64_t *v) { - return __CSG_LOOP(v, i, "agr"); + return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i; } -static inline long long atomic64_sub_return(long long i, atomic64_t *v) +static inline void atomic64_add(long long i, atomic64_t *v) { - return __CSG_LOOP(v, i, "sgr"); +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + asm volatile( + "agsi %0,%1\n" + : "+Q" (v->counter) + : "i" (i) + : "cc", "memory"); + return; + } +#endif + __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER); } static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v) { - __CSG_LOOP(v, ~mask, "ngr"); + __ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND, __ATOMIC64_NO_BARRIER); } static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v) { - __CSG_LOOP(v, mask, "ogr"); + __ATOMIC64_LOOP(v, mask, __ATOMIC64_OR, __ATOMIC64_NO_BARRIER); } #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) @@ -175,13 +266,13 @@ static inline long long atomic64_cmpxchg(atomic64_t *v, { asm volatile( " csg %0,%2,%1" - : "+d" (old), "=Q" (v->counter) - : "d" (new), "Q" (v->counter) + : "+d" (old), "+Q" (v->counter) + : "d" (new) : "cc", "memory"); return old; } -#undef __CSG_LOOP +#undef __ATOMIC64_LOOP #else /* CONFIG_64BIT */ @@ -217,8 +308,8 @@ static inline long long atomic64_xchg(atomic64_t *v, long long new) " lm %0,%N0,%1\n" "0: cds %0,%2,%1\n" " jl 0b\n" - : "=&d" (rp_old), "=Q" (v->counter) - : "d" (rp_new), "Q" (v->counter) + : "=&d" (rp_old), "+Q" (v->counter) + : "d" (rp_new) : "cc"); return rp_old.pair; } @@ -231,8 +322,8 @@ static inline long long atomic64_cmpxchg(atomic64_t *v, asm volatile( " cds %0,%2,%1" - : "+&d" (rp_old), "=Q" (v->counter) - : "d" (rp_new), "Q" (v->counter) + : "+&d" (rp_old), "+Q" (v->counter) + : "d" (rp_new) : "cc"); return rp_old.pair; } @@ -249,17 +340,6 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) return new; } -static inline long long atomic64_sub_return(long long i, atomic64_t *v) -{ - long long old, new; - - do { - old = atomic64_read(v); - new = old - i; - } while (atomic64_cmpxchg(v, old, new) != old); - return new; -} - static inline void atomic64_set_mask(unsigned long long mask, atomic64_t *v) { long long old, new; @@ -280,9 +360,14 @@ static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v) } while (atomic64_cmpxchg(v, old, new) != old); } +static inline void atomic64_add(long long i, atomic64_t *v) +{ + atomic64_add_return(i, v); +} + #endif /* CONFIG_64BIT */ -static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u) { long long c, old; @@ -290,7 +375,7 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) for (;;) { if (unlikely(c == u)) break; - old = atomic64_cmpxchg(v, c, c + a); + old = atomic64_cmpxchg(v, c, c + i); if (likely(old == c)) break; c = old; @@ -315,23 +400,16 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) return dec; } -#define atomic64_add(_i, _v) atomic64_add_return(_i, _v) #define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0) -#define atomic64_inc(_v) atomic64_add_return(1, _v) +#define atomic64_inc(_v) atomic64_add(1, _v) #define atomic64_inc_return(_v) atomic64_add_return(1, _v) #define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0) -#define atomic64_sub(_i, _v) atomic64_sub_return(_i, _v) +#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v) +#define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v) #define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0) -#define atomic64_dec(_v) atomic64_sub_return(1, _v) +#define atomic64_dec(_v) atomic64_sub(1, _v) #define atomic64_dec_return(_v) atomic64_sub_return(1, _v) #define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) -#define smp_mb__before_atomic_dec() smp_mb() -#define smp_mb__after_atomic_dec() smp_mb() -#define smp_mb__before_atomic_inc() smp_mb() -#define smp_mb__after_atomic_inc() smp_mb() - -#include <asm-generic/atomic-long.h> - #endif /* __ARCH_S390_ATOMIC__ */ diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h new file mode 100644 index 00000000000..19ff956b752 --- /dev/null +++ b/arch/s390/include/asm/barrier.h @@ -0,0 +1,51 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ + +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES +/* Fast-BCR without checkpoint synchronization */ +#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0) +#else +#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0) +#endif + +#define rmb() mb() +#define wmb() mb() +#define read_barrier_depends() do { } while(0) +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() + +#define smp_mb__before_atomic() smp_mb() +#define smp_mb__after_atomic() smp_mb() + +#define set_mb(var, value) do { var = value; mb(); } while (0) + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + +#endif /* __ASM_BARRIER_H */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 2e05972c508..52054247767 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -1,579 +1,397 @@ -#ifndef _S390_BITOPS_H -#define _S390_BITOPS_H - /* - * include/asm-s390/bitops.h + * Copyright IBM Corp. 1999,2013 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * + * The description below was taken in large parts from the powerpc + * bitops header file: + * Within a word, bits are numbered LSB first. Lot's of places make + * this assumption by directly testing bits with (val & (1<<nr)). + * This can cause confusion for large (> 1 word) bitmaps on a + * big-endian system because, unlike little endian, the number of each + * bit depends on the word size. + * + * The bitop functions are defined to work on unsigned longs, so for an + * s390x system the bits end up numbered: + * |63..............0|127............64|191...........128|255...........192| + * and on s390: + * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224| * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * There are a few little-endian macros used mostly for filesystem + * bitmaps, these work on similar bit arrays layouts, but + * byte-oriented: + * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56| * - * Derived from "include/asm-i386/bitops.h" - * Copyright (C) 1992, Linus Torvalds + * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit + * number field needs to be reversed compared to the big-endian bit + * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b). + * + * We also have special functions which work with an MSB0 encoding: + * on an s390x system the bits are numbered: + * |0..............63|64............127|128...........191|192...........255| + * and on s390: + * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| + * + * The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit + * number field needs to be reversed compared to the LSB0 encoded bit + * fields. This can be achieved by XOR with 0x3f (64b) or 0x1f (32b). * */ -#ifdef __KERNEL__ +#ifndef _S390_BITOPS_H +#define _S390_BITOPS_H #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly #endif +#include <linux/typecheck.h> #include <linux/compiler.h> +#include <asm/barrier.h> -/* - * 32 bit bitops format: - * bit 0 is the LSB of *addr; bit 31 is the MSB of *addr; - * bit 32 is the LSB of *(addr+4). That combined with the - * big endian byte order on S390 give the following bit - * order in memory: - * 1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10 \ - * 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - * after that follows the next long with bit numbers - * 3f 3e 3d 3c 3b 3a 39 38 37 36 35 34 33 32 31 30 - * 2f 2e 2d 2c 2b 2a 29 28 27 26 25 24 23 22 21 20 - * The reason for this bit ordering is the fact that - * in the architecture independent code bits operations - * of the form "flags |= (1 << bitnr)" are used INTERMIXED - * with operation of the form "set_bit(bitnr, flags)". - * - * 64 bit bitops format: - * bit 0 is the LSB of *addr; bit 63 is the MSB of *addr; - * bit 64 is the LSB of *(addr+8). That combined with the - * big endian byte order on S390 give the following bit - * order in memory: - * 3f 3e 3d 3c 3b 3a 39 38 37 36 35 34 33 32 31 30 - * 2f 2e 2d 2c 2b 2a 29 28 27 26 25 24 23 22 21 20 - * 1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10 - * 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 - * after that follows the next long with bit numbers - * 7f 7e 7d 7c 7b 7a 79 78 77 76 75 74 73 72 71 70 - * 6f 6e 6d 6c 6b 6a 69 68 67 66 65 64 63 62 61 60 - * 5f 5e 5d 5c 5b 5a 59 58 57 56 55 54 53 52 51 50 - * 4f 4e 4d 4c 4b 4a 49 48 47 46 45 44 43 42 41 40 - * The reason for this bit ordering is the fact that - * in the architecture independent code bits operations - * of the form "flags |= (1 << bitnr)" are used INTERMIXED - * with operation of the form "set_bit(bitnr, flags)". - */ - -/* bitmap tables from arch/s390/kernel/bitmap.c */ -extern const char _oi_bitmap[]; -extern const char _ni_bitmap[]; -extern const char _zb_findmap[]; -extern const char _sb_findmap[]; +#define __BITOPS_NO_BARRIER "\n" -#ifndef __s390x__ +#ifndef CONFIG_64BIT -#define __BITOPS_ALIGN 3 -#define __BITOPS_WORDSIZE 32 #define __BITOPS_OR "or" #define __BITOPS_AND "nr" #define __BITOPS_XOR "xr" +#define __BITOPS_BARRIER "\n" -#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string) \ +#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \ +({ \ + unsigned long __old, __new; \ + \ + typecheck(unsigned long *, (__addr)); \ asm volatile( \ " l %0,%2\n" \ "0: lr %1,%0\n" \ __op_string " %1,%3\n" \ " cs %0,%1,%2\n" \ " jl 0b" \ - : "=&d" (__old), "=&d" (__new), \ - "=Q" (*(unsigned long *) __addr) \ - : "d" (__val), "Q" (*(unsigned long *) __addr) \ - : "cc"); + : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\ + : "d" (__val) \ + : "cc", "memory"); \ + __old; \ +}) + +#else /* CONFIG_64BIT */ + +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define __BITOPS_OR "laog" +#define __BITOPS_AND "lang" +#define __BITOPS_XOR "laxg" +#define __BITOPS_BARRIER "bcr 14,0\n" + +#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \ +({ \ + unsigned long __old; \ + \ + typecheck(unsigned long *, (__addr)); \ + asm volatile( \ + __barrier \ + __op_string " %0,%2,%1\n" \ + __barrier \ + : "=d" (__old), "+Q" (*(__addr)) \ + : "d" (__val) \ + : "cc", "memory"); \ + __old; \ +}) -#else /* __s390x__ */ +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ -#define __BITOPS_ALIGN 7 -#define __BITOPS_WORDSIZE 64 #define __BITOPS_OR "ogr" #define __BITOPS_AND "ngr" #define __BITOPS_XOR "xgr" +#define __BITOPS_BARRIER "\n" -#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string) \ +#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \ +({ \ + unsigned long __old, __new; \ + \ + typecheck(unsigned long *, (__addr)); \ asm volatile( \ " lg %0,%2\n" \ "0: lgr %1,%0\n" \ __op_string " %1,%3\n" \ " csg %0,%1,%2\n" \ " jl 0b" \ - : "=&d" (__old), "=&d" (__new), \ - "=Q" (*(unsigned long *) __addr) \ - : "d" (__val), "Q" (*(unsigned long *) __addr) \ - : "cc"); + : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\ + : "d" (__val) \ + : "cc", "memory"); \ + __old; \ +}) -#endif /* __s390x__ */ +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ -#define __BITOPS_WORDS(bits) (((bits)+__BITOPS_WORDSIZE-1)/__BITOPS_WORDSIZE) -#define __BITOPS_BARRIER() asm volatile("" : : : "memory") +#endif /* CONFIG_64BIT */ -#ifdef CONFIG_SMP -/* - * SMP safe set_bit routine based on compare and swap (CS) - */ -static inline void set_bit_cs(unsigned long nr, volatile unsigned long *ptr) +#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) + +static inline unsigned long * +__bitops_word(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr, old, new, mask; - - addr = (unsigned long) ptr; - /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; - /* make OR mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); - /* Do the atomic update. */ - __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR); + unsigned long addr; + + addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3); + return (unsigned long *)addr; } -/* - * SMP safe clear_bit routine based on compare and swap (CS) - */ -static inline void clear_bit_cs(unsigned long nr, volatile unsigned long *ptr) +static inline unsigned char * +__bitops_byte(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr, old, new, mask; - - addr = (unsigned long) ptr; - /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; - /* make AND mask */ - mask = ~(1UL << (nr & (__BITOPS_WORDSIZE - 1))); - /* Do the atomic update. */ - __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND); + return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); } -/* - * SMP safe change_bit routine based on compare and swap (CS) - */ -static inline void change_bit_cs(unsigned long nr, volatile unsigned long *ptr) +static inline void set_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr, old, new, mask; - - addr = (unsigned long) ptr; - /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; - /* make XOR mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); - /* Do the atomic update. */ - __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR); + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long mask; + +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + if (__builtin_constant_p(nr)) { + unsigned char *caddr = __bitops_byte(nr, ptr); + + asm volatile( + "oi %0,%b1\n" + : "+Q" (*caddr) + : "i" (1 << (nr & 7)) + : "cc", "memory"); + return; + } +#endif + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_NO_BARRIER); } -/* - * SMP safe test_and_set_bit routine based on compare and swap (CS) - */ -static inline int -test_and_set_bit_cs(unsigned long nr, volatile unsigned long *ptr) +static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr, old, new, mask; - - addr = (unsigned long) ptr; - /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; - /* make OR/test mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); - /* Do the atomic update. */ - __BITOPS_LOOP(old, new, addr, mask, __BITOPS_OR); - __BITOPS_BARRIER(); - return (old & mask) != 0; + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long mask; + +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + if (__builtin_constant_p(nr)) { + unsigned char *caddr = __bitops_byte(nr, ptr); + + asm volatile( + "ni %0,%b1\n" + : "+Q" (*caddr) + : "i" (~(1 << (nr & 7))) + : "cc", "memory"); + return; + } +#endif + mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); + __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_NO_BARRIER); } -/* - * SMP safe test_and_clear_bit routine based on compare and swap (CS) - */ -static inline int -test_and_clear_bit_cs(unsigned long nr, volatile unsigned long *ptr) +static inline void change_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr, old, new, mask; - - addr = (unsigned long) ptr; - /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; - /* make AND/test mask */ - mask = ~(1UL << (nr & (__BITOPS_WORDSIZE - 1))); - /* Do the atomic update. */ - __BITOPS_LOOP(old, new, addr, mask, __BITOPS_AND); - __BITOPS_BARRIER(); - return (old ^ new) != 0; + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long mask; + +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + if (__builtin_constant_p(nr)) { + unsigned char *caddr = __bitops_byte(nr, ptr); + + asm volatile( + "xi %0,%b1\n" + : "+Q" (*caddr) + : "i" (1 << (nr & 7)) + : "cc", "memory"); + return; + } +#endif + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_NO_BARRIER); } -/* - * SMP safe test_and_change_bit routine based on compare and swap (CS) - */ static inline int -test_and_change_bit_cs(unsigned long nr, volatile unsigned long *ptr) +test_and_set_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr, old, new, mask; - - addr = (unsigned long) ptr; - /* calculate address for CS */ - addr += (nr ^ (nr & (__BITOPS_WORDSIZE - 1))) >> 3; - /* make XOR/test mask */ - mask = 1UL << (nr & (__BITOPS_WORDSIZE - 1)); - /* Do the atomic update. */ - __BITOPS_LOOP(old, new, addr, mask, __BITOPS_XOR); - __BITOPS_BARRIER(); + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long old, mask; + + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + old = __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_BARRIER); return (old & mask) != 0; } -#endif /* CONFIG_SMP */ -/* - * fast, non-SMP set_bit routine - */ -static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr) +static inline int +test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr; + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long old, mask; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); - asm volatile( - " oc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); + mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); + old = __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_BARRIER); + return (old & ~mask) != 0; } -static inline void -__constant_set_bit(const unsigned long nr, volatile unsigned long *ptr) +static inline int +test_and_change_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr; + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long old, mask; - addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); - *(unsigned char *) addr |= 1 << (nr & 7); + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_BARRIER); + return (old & mask) != 0; } -#define set_bit_simple(nr,addr) \ -(__builtin_constant_p((nr)) ? \ - __constant_set_bit((nr),(addr)) : \ - __set_bit((nr),(addr)) ) - -/* - * fast, non-SMP clear_bit routine - */ -static inline void -__clear_bit(unsigned long nr, volatile unsigned long *ptr) +static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr; + unsigned char *addr = __bitops_byte(nr, ptr); - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); - asm volatile( - " nc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" ); + *addr |= 1 << (nr & 7); } static inline void -__constant_clear_bit(const unsigned long nr, volatile unsigned long *ptr) +__clear_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr; + unsigned char *addr = __bitops_byte(nr, ptr); - addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); - *(unsigned char *) addr &= ~(1 << (nr & 7)); + *addr &= ~(1 << (nr & 7)); } -#define clear_bit_simple(nr,addr) \ -(__builtin_constant_p((nr)) ? \ - __constant_clear_bit((nr),(addr)) : \ - __clear_bit((nr),(addr)) ) - -/* - * fast, non-SMP change_bit routine - */ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr; + unsigned char *addr = __bitops_byte(nr, ptr); - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); - asm volatile( - " xc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); + *addr ^= 1 << (nr & 7); } -static inline void -__constant_change_bit(const unsigned long nr, volatile unsigned long *ptr) -{ - unsigned long addr; - - addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); - *(unsigned char *) addr ^= 1 << (nr & 7); -} - -#define change_bit_simple(nr,addr) \ -(__builtin_constant_p((nr)) ? \ - __constant_change_bit((nr),(addr)) : \ - __change_bit((nr),(addr)) ) - -/* - * fast, non-SMP test_and_set_bit routine - */ static inline int -test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr) +__test_and_set_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr; + unsigned char *addr = __bitops_byte(nr, ptr); unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); - ch = *(unsigned char *) addr; - asm volatile( - " oc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) - : "cc", "memory"); + ch = *addr; + *addr |= 1 << (nr & 7); return (ch >> (nr & 7)) & 1; } -#define __test_and_set_bit(X,Y) test_and_set_bit_simple(X,Y) -/* - * fast, non-SMP test_and_clear_bit routine - */ static inline int -test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr) +__test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr; + unsigned char *addr = __bitops_byte(nr, ptr); unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); - ch = *(unsigned char *) addr; - asm volatile( - " nc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) - : "cc", "memory"); + ch = *addr; + *addr &= ~(1 << (nr & 7)); return (ch >> (nr & 7)) & 1; } -#define __test_and_clear_bit(X,Y) test_and_clear_bit_simple(X,Y) -/* - * fast, non-SMP test_and_change_bit routine - */ static inline int -test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr) +__test_and_change_bit(unsigned long nr, volatile unsigned long *ptr) { - unsigned long addr; + unsigned char *addr = __bitops_byte(nr, ptr); unsigned char ch; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); - ch = *(unsigned char *) addr; - asm volatile( - " xc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) - : "cc", "memory"); + ch = *addr; + *addr ^= 1 << (nr & 7); return (ch >> (nr & 7)) & 1; } -#define __test_and_change_bit(X,Y) test_and_change_bit_simple(X,Y) - -#ifdef CONFIG_SMP -#define set_bit set_bit_cs -#define clear_bit clear_bit_cs -#define change_bit change_bit_cs -#define test_and_set_bit test_and_set_bit_cs -#define test_and_clear_bit test_and_clear_bit_cs -#define test_and_change_bit test_and_change_bit_cs -#else -#define set_bit set_bit_simple -#define clear_bit clear_bit_simple -#define change_bit change_bit_simple -#define test_and_set_bit test_and_set_bit_simple -#define test_and_clear_bit test_and_clear_bit_simple -#define test_and_change_bit test_and_change_bit_simple -#endif - -/* - * This routine doesn't need to be atomic. - */ - -static inline int __test_bit(unsigned long nr, const volatile unsigned long *ptr) +static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr) { - unsigned long addr; - unsigned char ch; + const volatile unsigned char *addr; - addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3); - ch = *(volatile unsigned char *) addr; - return (ch >> (nr & 7)) & 1; -} - -static inline int -__constant_test_bit(unsigned long nr, const volatile unsigned long *addr) { - return (((volatile char *) addr) - [(nr^(__BITOPS_WORDSIZE-8))>>3] & (1<<(nr&7))) != 0; + addr = ((const volatile unsigned char *)ptr); + addr += (nr ^ (BITS_PER_LONG - 8)) >> 3; + return (*addr >> (nr & 7)) & 1; } -#define test_bit(nr,addr) \ -(__builtin_constant_p((nr)) ? \ - __constant_test_bit((nr),(addr)) : \ - __test_bit((nr),(addr)) ) - /* - * Optimized find bit helper functions. + * Functions which use MSB0 bit numbering. + * On an s390x system the bits are numbered: + * |0..............63|64............127|128...........191|192...........255| + * and on s390: + * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| */ +unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size); +unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size, + unsigned long offset); -/** - * __ffz_word_loop - find byte offset of first long != -1UL - * @addr: pointer to array of unsigned long - * @size: size of the array in bits - */ -static inline unsigned long __ffz_word_loop(const unsigned long *addr, - unsigned long size) -{ - typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype; - unsigned long bytes = 0; - - asm volatile( -#ifndef __s390x__ - " ahi %1,-1\n" - " sra %1,5\n" - " jz 1f\n" - "0: c %2,0(%0,%3)\n" - " jne 1f\n" - " la %0,4(%0)\n" - " brct %1,0b\n" - "1:\n" -#else - " aghi %1,-1\n" - " srag %1,%1,6\n" - " jz 1f\n" - "0: cg %2,0(%0,%3)\n" - " jne 1f\n" - " la %0,8(%0)\n" - " brct %1,0b\n" - "1:\n" -#endif - : "+&a" (bytes), "+&d" (size) - : "d" (-1UL), "a" (addr), "m" (*(addrtype *) addr) - : "cc" ); - return bytes; -} - -/** - * __ffs_word_loop - find byte offset of first long != 0UL - * @addr: pointer to array of unsigned long - * @size: size of the array in bits - */ -static inline unsigned long __ffs_word_loop(const unsigned long *addr, - unsigned long size) +static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr) { - typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype; - unsigned long bytes = 0; - - asm volatile( -#ifndef __s390x__ - " ahi %1,-1\n" - " sra %1,5\n" - " jz 1f\n" - "0: c %2,0(%0,%3)\n" - " jne 1f\n" - " la %0,4(%0)\n" - " brct %1,0b\n" - "1:\n" -#else - " aghi %1,-1\n" - " srag %1,%1,6\n" - " jz 1f\n" - "0: cg %2,0(%0,%3)\n" - " jne 1f\n" - " la %0,8(%0)\n" - " brct %1,0b\n" - "1:\n" -#endif - : "+&a" (bytes), "+&a" (size) - : "d" (0UL), "a" (addr), "m" (*(addrtype *) addr) - : "cc" ); - return bytes; + return set_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -/** - * __ffz_word - add number of the first unset bit - * @nr: base value the bit number is added to - * @word: the word that is searched for unset bits - */ -static inline unsigned long __ffz_word(unsigned long nr, unsigned long word) +static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr) { -#ifdef __s390x__ - if ((word & 0xffffffff) == 0xffffffff) { - word >>= 32; - nr += 32; - } -#endif - if ((word & 0xffff) == 0xffff) { - word >>= 16; - nr += 16; - } - if ((word & 0xff) == 0xff) { - word >>= 8; - nr += 8; - } - return nr + _zb_findmap[(unsigned char) word]; + return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -/** - * __ffs_word - add number of the first set bit - * @nr: base value the bit number is added to - * @word: the word that is searched for set bits - */ -static inline unsigned long __ffs_word(unsigned long nr, unsigned long word) +static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr) { -#ifdef __s390x__ - if ((word & 0xffffffff) == 0) { - word >>= 32; - nr += 32; - } -#endif - if ((word & 0xffff) == 0) { - word >>= 16; - nr += 16; - } - if ((word & 0xff) == 0) { - word >>= 8; - nr += 8; - } - return nr + _sb_findmap[(unsigned char) word]; + return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr); } - -/** - * __load_ulong_be - load big endian unsigned long - * @p: pointer to array of unsigned long - * @offset: byte offset of source value in the array - */ -static inline unsigned long __load_ulong_be(const unsigned long *p, - unsigned long offset) +static inline void __clear_bit_inv(unsigned long nr, volatile unsigned long *ptr) { - p = (unsigned long *)((unsigned long) p + offset); - return *p; + return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -/** - * __load_ulong_le - load little endian unsigned long - * @p: pointer to array of unsigned long - * @offset: byte offset of source value in the array - */ -static inline unsigned long __load_ulong_le(const unsigned long *p, - unsigned long offset) +static inline int test_bit_inv(unsigned long nr, + const volatile unsigned long *ptr) { - unsigned long word; - - p = (unsigned long *)((unsigned long) p + offset); -#ifndef __s390x__ - asm volatile( - " ic %0,%O1(%R1)\n" - " icm %0,2,%O1+1(%R1)\n" - " icm %0,4,%O1+2(%R1)\n" - " icm %0,8,%O1+3(%R1)" - : "=&d" (word) : "Q" (*p) : "cc"); -#else - asm volatile( - " lrvg %0,%1" - : "=d" (word) : "m" (*p) ); -#endif - return word; + return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -/* - * The various find bit functions. - */ +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES -/* - * ffz - find first zero in word. - * @word: The word to search +/** + * __flogr - find leftmost one + * @word - The word to search * - * Undefined if no zero exists, so code should check against ~0UL first. - */ -static inline unsigned long ffz(unsigned long word) -{ - return __ffz_word(0, word); + * Returns the bit number of the most significant bit set, + * where the most significant bit has bit number 0. + * If no bit is set this function returns 64. + */ +static inline unsigned char __flogr(unsigned long word) +{ + if (__builtin_constant_p(word)) { + unsigned long bit = 0; + + if (!word) + return 64; + if (!(word & 0xffffffff00000000UL)) { + word <<= 32; + bit += 32; + } + if (!(word & 0xffff000000000000UL)) { + word <<= 16; + bit += 16; + } + if (!(word & 0xff00000000000000UL)) { + word <<= 8; + bit += 8; + } + if (!(word & 0xf000000000000000UL)) { + word <<= 4; + bit += 4; + } + if (!(word & 0xc000000000000000UL)) { + word <<= 2; + bit += 2; + } + if (!(word & 0x8000000000000000UL)) { + word <<= 1; + bit += 1; + } + return bit; + } else { + register unsigned long bit asm("4") = word; + register unsigned long out asm("5"); + + asm volatile( + " flogr %[bit],%[bit]\n" + : [bit] "+d" (bit), [out] "=d" (out) : : "cc"); + return bit; + } } /** @@ -582,260 +400,83 @@ static inline unsigned long ffz(unsigned long word) * * Undefined if no bit exists, so code should check against 0 first. */ -static inline unsigned long __ffs (unsigned long word) +static inline unsigned long __ffs(unsigned long word) { - return __ffs_word(0, word); + return __flogr(-word & word) ^ (BITS_PER_LONG - 1); } /** * ffs - find first bit set - * @x: the word to search + * @word: the word to search * - * This is defined the same way as - * the libc and compiler builtin ffs routines, therefore - * differs in spirit from the above ffz (man ffs). + * This is defined the same way as the libc and + * compiler builtin ffs routines (man ffs). */ -static inline int ffs(int x) +static inline int ffs(int word) { - if (!x) - return 0; - return __ffs_word(1, x); + unsigned long mask = 2 * BITS_PER_LONG - 1; + unsigned int val = (unsigned int)word; + + return (1 + (__flogr(-val & val) ^ (BITS_PER_LONG - 1))) & mask; } /** - * find_first_zero_bit - find the first zero bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search + * __fls - find last (most-significant) set bit in a long word + * @word: the word to search * - * Returns the bit-number of the first zero bit, not the number of the byte - * containing a bit. + * Undefined if no set bit exists, so code should check against 0 first. */ -static inline unsigned long find_first_zero_bit(const unsigned long *addr, - unsigned long size) +static inline unsigned long __fls(unsigned long word) { - unsigned long bytes, bits; - - if (!size) - return 0; - bytes = __ffz_word_loop(addr, size); - bits = __ffz_word(bytes*8, __load_ulong_be(addr, bytes)); - return (bits < size) ? bits : size; + return __flogr(word) ^ (BITS_PER_LONG - 1); } /** - * find_first_bit - find the first set bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search + * fls64 - find last set bit in a 64-bit word + * @word: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffsll, but returns the position of the most significant set bit. * - * Returns the bit-number of the first set bit, not the number of the byte - * containing a bit. + * fls64(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 64. */ -static inline unsigned long find_first_bit(const unsigned long * addr, - unsigned long size) +static inline int fls64(unsigned long word) { - unsigned long bytes, bits; + unsigned long mask = 2 * BITS_PER_LONG - 1; - if (!size) - return 0; - bytes = __ffs_word_loop(addr, size); - bits = __ffs_word(bytes*8, __load_ulong_be(addr, bytes)); - return (bits < size) ? bits : size; + return (1 + (__flogr(word) ^ (BITS_PER_LONG - 1))) & mask; } /** - * find_next_zero_bit - find the first zero bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -static inline int find_next_zero_bit (const unsigned long * addr, - unsigned long size, - unsigned long offset) -{ - const unsigned long *p; - unsigned long bit, set; - - if (offset >= size) - return size; - bit = offset & (__BITOPS_WORDSIZE - 1); - offset -= bit; - size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; - if (bit) { - /* - * __ffz_word returns __BITOPS_WORDSIZE - * if no zero bit is present in the word. - */ - set = __ffz_word(bit, *p >> bit); - if (set >= size) - return size + offset; - if (set < __BITOPS_WORDSIZE) - return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; - p++; - } - return offset + find_first_zero_bit(p, size); -} - -/** - * find_next_bit - find the first set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search + * fls - find last (most-significant) bit set + * @word: the word to search + * + * This is defined the same way as ffs. + * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static inline int find_next_bit (const unsigned long * addr, - unsigned long size, - unsigned long offset) +static inline int fls(int word) { - const unsigned long *p; - unsigned long bit, set; - - if (offset >= size) - return size; - bit = offset & (__BITOPS_WORDSIZE - 1); - offset -= bit; - size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; - if (bit) { - /* - * __ffs_word returns __BITOPS_WORDSIZE - * if no one bit is present in the word. - */ - set = __ffs_word(0, *p & (~0UL << bit)); - if (set >= size) - return size + offset; - if (set < __BITOPS_WORDSIZE) - return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; - p++; - } - return offset + find_first_bit(p, size); + return fls64((unsigned int)word); } -/* - * Every architecture must define this function. It's the fastest - * way of searching a 140-bit bitmap where the first 100 bits are - * unlikely to be set. It's guaranteed that at least one of the 140 - * bits is cleared. - */ -static inline int sched_find_first_bit(unsigned long *b) -{ - return find_first_bit(b, 140); -} +#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ -#include <asm-generic/bitops/fls.h> +#include <asm-generic/bitops/__ffs.h> +#include <asm-generic/bitops/ffs.h> #include <asm-generic/bitops/__fls.h> +#include <asm-generic/bitops/fls.h> #include <asm-generic/bitops/fls64.h> +#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ + +#include <asm-generic/bitops/ffz.h> +#include <asm-generic/bitops/find.h> #include <asm-generic/bitops/hweight.h> #include <asm-generic/bitops/lock.h> - -/* - * ATTENTION: intel byte ordering convention for ext2 and minix !! - * bit 0 is the LSB of addr; bit 31 is the MSB of addr; - * bit 32 is the LSB of (addr+4). - * That combined with the little endian byte order of Intel gives the - * following bit order in memory: - * 07 06 05 04 03 02 01 00 15 14 13 12 11 10 09 08 \ - * 23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24 - */ - -#define ext2_set_bit(nr, addr) \ - __test_and_set_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) -#define ext2_set_bit_atomic(lock, nr, addr) \ - test_and_set_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) -#define ext2_clear_bit(nr, addr) \ - __test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) -#define ext2_clear_bit_atomic(lock, nr, addr) \ - test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) -#define ext2_test_bit(nr, addr) \ - test_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) - -static inline int ext2_find_first_zero_bit(void *vaddr, unsigned int size) -{ - unsigned long bytes, bits; - - if (!size) - return 0; - bytes = __ffz_word_loop(vaddr, size); - bits = __ffz_word(bytes*8, __load_ulong_le(vaddr, bytes)); - return (bits < size) ? bits : size; -} - -static inline int ext2_find_next_zero_bit(void *vaddr, unsigned long size, - unsigned long offset) -{ - unsigned long *addr = vaddr, *p; - unsigned long bit, set; - - if (offset >= size) - return size; - bit = offset & (__BITOPS_WORDSIZE - 1); - offset -= bit; - size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; - if (bit) { - /* - * s390 version of ffz returns __BITOPS_WORDSIZE - * if no zero bit is present in the word. - */ - set = __ffz_word(bit, __load_ulong_le(p, 0) >> bit); - if (set >= size) - return size + offset; - if (set < __BITOPS_WORDSIZE) - return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; - p++; - } - return offset + ext2_find_first_zero_bit(p, size); -} - -static inline unsigned long ext2_find_first_bit(void *vaddr, - unsigned long size) -{ - unsigned long bytes, bits; - - if (!size) - return 0; - bytes = __ffs_word_loop(vaddr, size); - bits = __ffs_word(bytes*8, __load_ulong_le(vaddr, bytes)); - return (bits < size) ? bits : size; -} - -static inline int ext2_find_next_bit(void *vaddr, unsigned long size, - unsigned long offset) -{ - unsigned long *addr = vaddr, *p; - unsigned long bit, set; - - if (offset >= size) - return size; - bit = offset & (__BITOPS_WORDSIZE - 1); - offset -= bit; - size -= offset; - p = addr + offset / __BITOPS_WORDSIZE; - if (bit) { - /* - * s390 version of ffz returns __BITOPS_WORDSIZE - * if no zero bit is present in the word. - */ - set = __ffs_word(0, __load_ulong_le(p, 0) & (~0UL << bit)); - if (set >= size) - return size + offset; - if (set < __BITOPS_WORDSIZE) - return set + offset; - offset += __BITOPS_WORDSIZE; - size -= __BITOPS_WORDSIZE; - p++; - } - return offset + ext2_find_first_bit(p, size); -} - -#include <asm-generic/bitops/minix.h> - -#endif /* __KERNEL__ */ +#include <asm-generic/bitops/sched.h> +#include <asm-generic/bitops/le.h> +#include <asm-generic/bitops/ext2-atomic-setbit.h> #endif /* _S390_BITOPS_H */ diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h index 011f1e6a2a6..0f5bd894f4d 100644 --- a/arch/s390/include/asm/bugs.h +++ b/arch/s390/include/asm/bugs.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/bugs.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) * * Derived from "include/asm-i386/bugs.h" diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h index 2a30d5ac066..4d7ccac5fd1 100644 --- a/arch/s390/include/asm/cache.h +++ b/arch/s390/include/asm/cache.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/cache.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * * Derived from "include/asm-i386/cache.h" * Copyright (C) 1992, Linus Torvalds diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h index 7e1f7762062..3e20383d092 100644 --- a/arch/s390/include/asm/cacheflush.h +++ b/arch/s390/include/asm/cacheflush.h @@ -8,4 +8,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable); #endif +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); + #endif /* _S390_CACHEFLUSH_H */ diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index ff6f62e0ec3..b80e456d642 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2002, 2009 + * Copyright IBM Corp. 2002, 2009 * * Author(s): Arnd Bergmann <arndb@de.ibm.com> * @@ -11,6 +11,8 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> #include <asm/fcx.h> +#include <asm/irq.h> +#include <asm/schid.h> /* structs from asm/cio.h */ struct irb; @@ -112,7 +114,6 @@ enum uc_todo { /** * struct ccw driver - device driver for channel attached devices - * @owner: owning module * @ids: ids supported by this driver * @probe: function called on probe * @remove: function called on remove @@ -128,10 +129,9 @@ enum uc_todo { * @restore: callback for restoring after hibernation * @uc_handler: callback for unit check handler * @driver: embedded device driver structure - * @name: device driver name + * @int_class: interruption class to use for accounting interrupts */ struct ccw_driver { - struct module *owner; struct ccw_device_id *ids; int (*probe) (struct ccw_device *); void (*remove) (struct ccw_device *); @@ -147,7 +147,7 @@ struct ccw_driver { int (*restore)(struct ccw_device *); enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); struct device_driver driver; - char *name; + enum interruption_class int_class; }; extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv, @@ -219,13 +219,15 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); #define to_ccwdev(n) container_of(n, struct ccw_device, dev) #define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) -extern struct ccw_device *ccw_device_probe_console(void); -extern int ccw_device_force_console(void); +extern struct ccw_device *ccw_device_create_console(struct ccw_driver *); +extern void ccw_device_destroy_console(struct ccw_device *); +extern int ccw_device_enable_console(struct ccw_device *); +extern void ccw_device_wait_idle(struct ccw_device *); +extern int ccw_device_force_console(struct ccw_device *); int ccw_device_siosl(struct ccw_device *); -// FIXME: these have to go -extern int _ccw_device_get_subchannel_number(struct ccw_device *); +extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *); -extern void *ccw_device_get_chp_desc(struct ccw_device *, int); +struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *, int); #endif /* _S390_CCWDEV_H_ */ diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index c79c1e787b8..057ce0ca637 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -6,14 +6,14 @@ struct ccw_driver; /** * struct ccwgroup_device - ccw group device - * @creator_id: unique number of the driver * @state: online/offline state * @count: number of attached slave devices * @dev: embedded device structure * @cdev: variable number of slave devices, allocated as needed + * @ungroup_work: work to be done when a ccwgroup notifier has action + * type %BUS_NOTIFY_UNBIND_DRIVER */ struct ccwgroup_device { - unsigned long creator_id; enum { CCWGROUP_OFFLINE, CCWGROUP_ONLINE, @@ -24,16 +24,13 @@ struct ccwgroup_device { /* public: */ unsigned int count; struct device dev; + struct work_struct ungroup_work; struct ccw_device *cdev[0]; }; /** * struct ccwgroup_driver - driver for ccw group devices - * @owner: driver owner - * @name: driver name - * @max_slaves: maximum number of slave devices - * @driver_id: unique id - * @probe: function called on probe + * @setup: function called during device creation to setup the device * @remove: function called on remove * @set_online: function called when device is set online * @set_offline: function called when device is set offline @@ -46,12 +43,7 @@ struct ccwgroup_device { * @driver: embedded driver structure */ struct ccwgroup_driver { - struct module *owner; - char *name; - int max_slaves; - unsigned long driver_id; - - int (*probe) (struct ccwgroup_device *); + int (*setup) (struct ccwgroup_device *); void (*remove) (struct ccwgroup_device *); int (*set_online) (struct ccwgroup_device *); int (*set_offline) (struct ccwgroup_device *); @@ -67,9 +59,11 @@ struct ccwgroup_driver { extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver); extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver); -int ccwgroup_create_from_string(struct device *root, unsigned int creator_id, - struct ccw_driver *cdrv, int num_devices, - const char *buf); +int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv, + int num_devices, const char *buf); + +extern int ccwgroup_set_online(struct ccwgroup_device *gdev); +extern int ccwgroup_set_offline(struct ccwgroup_device *gdev); extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev); extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev); diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 6c00f6800a3..74036485635 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -1,18 +1,16 @@ -#ifndef _S390_CHECKSUM_H -#define _S390_CHECKSUM_H - /* - * include/asm-s390/checksum.h * S390 fast network checksum routines - * see also arch/S390/lib/checksum.c * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Ulrich Hild (first version) * Martin Schwidefsky (heavily optimized CKSM version) * D.J. Barrow (third attempt) */ +#ifndef _S390_CHECKSUM_H +#define _S390_CHECKSUM_H + #include <asm/uaccess.h> /* @@ -46,22 +44,15 @@ csum_partial(const void *buff, int len, __wsum sum) * here even more important to align src and dst on a 32-bit (or even * better 64-bit) boundary * - * Copy from userspace and compute checksum. If we catch an exception - * then zero the rest of the buffer. + * Copy from userspace and compute checksum. */ static inline __wsum csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) { - int missing; - - missing = copy_from_user(dst, src, len); - if (missing) { - memset(dst + len - missing, 0, missing); + if (unlikely(copy_from_user(dst, src, len))) *err_ptr = -EFAULT; - } - return csum_partial(dst, len, sum); } diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h index fc71d8a6709..7298eec9854 100644 --- a/arch/s390/include/asm/chpid.h +++ b/arch/s390/include/asm/chpid.h @@ -1,28 +1,24 @@ /* - * drivers/s390/cio/chpid.h - * - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007, 2012 * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> */ - #ifndef _ASM_S390_CHPID_H -#define _ASM_S390_CHPID_H _ASM_S390_CHPID_H - -#include <linux/string.h> -#include <linux/types.h> - -#define __MAX_CHPID 255 +#define _ASM_S390_CHPID_H -struct chp_id { - u8 reserved1; - u8 cssid; - u8 reserved2; - u8 id; -} __attribute__((packed)); - -#ifdef __KERNEL__ +#include <uapi/asm/chpid.h> #include <asm/cio.h> +struct channel_path_desc { + u8 flags; + u8 lsn; + u8 desc; + u8 chpid; + u8 swla; + u8 zeroes; + u8 chla; + u8 chpp; +} __packed; + static inline void chp_id_init(struct chp_id *chpid) { memset(chpid, 0, sizeof(struct chp_id)); @@ -51,6 +47,4 @@ static inline int chp_id_is_valid(struct chp_id *chpid) #define chp_id_for_each(c) \ for (chp_id_init(c); chp_id_is_valid(c); chp_id_next(c)) -#endif /* __KERNEL */ - #endif /* _ASM_S390_CHPID_H */ diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index e34347d567a..09633920776 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -1,7 +1,4 @@ /* - * include/asm-s390/cio.h - * include/asm-s390x/cio.h - * * Common interface for I/O on S/390 */ #ifndef _ASM_S390_CIO_H_ @@ -10,10 +7,10 @@ #include <linux/spinlock.h> #include <asm/types.h> -#ifdef __KERNEL__ - #define LPM_ANYPATH 0xff #define __MAX_CSSID 0 +#define __MAX_SUBCHANNEL 65535 +#define __MAX_SSID 3 #include <asm/scsw.h> @@ -85,6 +82,18 @@ struct erw { } __attribute__ ((packed)); /** + * struct erw_eadm - EADM Subchannel extended report word + * @b: aob error + * @r: arsb error + */ +struct erw_eadm { + __u32 : 16; + __u32 b : 1; + __u32 r : 1; + __u32 : 14; +} __packed; + +/** * struct sublog - subchannel logout area * @res0: reserved * @esf: extended status flags @@ -175,15 +184,28 @@ struct esw3 { } __attribute__ ((packed)); /** + * struct esw_eadm - EADM Subchannel Extended Status Word (ESW) + * @sublog: subchannel logout + * @erw: extended report word + */ +struct esw_eadm { + __u32 sublog; + struct erw_eadm erw; + __u32 : 32; + __u32 : 32; + __u32 : 32; +} __packed; + +/** * struct irb - interruption response block * @scsw: subchannel status word - * @esw: extened status word, 4 formats + * @esw: extended status word * @ecw: extended control word * * The irb that is handed to the device driver when an interrupt occurs. For * solicited interrupts, the common I/O layer already performs checks whether * a field is valid; a field not being valid is always passed as %0. - * If a unit check occured, @ecw may contain sense data; this is retrieved + * If a unit check occurred, @ecw may contain sense data; this is retrieved * by the common I/O layer itself if the device doesn't support concurrent * sense (so that the device driver never needs to perform basic sene itself). * For unsolicited interrupts, the irb is passed as-is (expect for sense data, @@ -196,6 +218,7 @@ struct irb { struct esw1 esw1; struct esw2 esw2; struct esw3 esw3; + struct esw_eadm eadm; } esw; __u8 ecw[32]; } __attribute__ ((packed,aligned(4))); @@ -273,8 +296,7 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, return 0; } -extern void wait_cons_dev(void); - +void channel_subsystem_reinit(void); extern void css_schedule_reprobe(void); extern void reipl_ccw_dev(struct ccw_dev_id *id); @@ -291,5 +313,3 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl); int chsc_sstpi(void *page, void *result, size_t size); #endif - -#endif diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h new file mode 100644 index 00000000000..a0e71a501f7 --- /dev/null +++ b/arch/s390/include/asm/clp.h @@ -0,0 +1,28 @@ +#ifndef _ASM_S390_CLP_H +#define _ASM_S390_CLP_H + +/* CLP common request & response block size */ +#define CLP_BLK_SIZE PAGE_SIZE + +struct clp_req_hdr { + u16 len; + u16 cmd; +} __packed; + +struct clp_rsp_hdr { + u16 len; + u16 rsp; +} __packed; + +/* CLP Response Codes */ +#define CLP_RC_OK 0x0010 /* Command request successfully */ +#define CLP_RC_CMD 0x0020 /* Command code not recognized */ +#define CLP_RC_PERM 0x0030 /* Command not authorized */ +#define CLP_RC_FMT 0x0040 /* Invalid command request format */ +#define CLP_RC_LEN 0x0050 /* Invalid command request length */ +#define CLP_RC_8K 0x0060 /* Command requires 8K LPCB */ +#define CLP_RC_RESNOT0 0x0070 /* Reserved field not zero */ +#define CLP_RC_NODATA 0x0080 /* No data available */ +#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ + +#endif diff --git a/arch/s390/include/asm/cmb.h b/arch/s390/include/asm/cmb.h index 39ae0329479..806eac12e3b 100644 --- a/arch/s390/include/asm/cmb.h +++ b/arch/s390/include/asm/cmb.h @@ -1,61 +1,12 @@ #ifndef S390_CMB_H #define S390_CMB_H -#include <linux/types.h> +#include <uapi/asm/cmb.h> -/** - * struct cmbdata - channel measurement block data for user space - * @size: size of the stored data - * @elapsed_time: time since last sampling - * @ssch_rsch_count: number of ssch and rsch - * @sample_count: number of samples - * @device_connect_time: time of device connect - * @function_pending_time: time of function pending - * @device_disconnect_time: time of device disconnect - * @control_unit_queuing_time: time of control unit queuing - * @device_active_only_time: time of device active only - * @device_busy_time: time of device busy (ext. format) - * @initial_command_response_time: initial command response time (ext. format) - * - * All values are stored as 64 bit for simplicity, especially - * in 32 bit emulation mode. All time values are normalized to - * nanoseconds. - * Currently, two formats are known, which differ by the size of - * this structure, i.e. the last two members are only set when - * the extended channel measurement facility (first shipped in - * z990 machines) is activated. - * Potentially, more fields could be added, which would result in a - * new ioctl number. - */ -struct cmbdata { - __u64 size; - __u64 elapsed_time; - /* basic and exended format: */ - __u64 ssch_rsch_count; - __u64 sample_count; - __u64 device_connect_time; - __u64 function_pending_time; - __u64 device_disconnect_time; - __u64 control_unit_queuing_time; - __u64 device_active_only_time; - /* extended format only: */ - __u64 device_busy_time; - __u64 initial_command_response_time; -}; - -/* enable channel measurement */ -#define BIODASDCMFENABLE _IO(DASD_IOCTL_LETTER, 32) -/* enable channel measurement */ -#define BIODASDCMFDISABLE _IO(DASD_IOCTL_LETTER, 33) -/* read channel measurement data */ -#define BIODASDREADALLCMB _IOWR(DASD_IOCTL_LETTER, 33, struct cmbdata) - -#ifdef __KERNEL__ struct ccw_device; extern int enable_cmf(struct ccw_device *cdev); extern int disable_cmf(struct ccw_device *cdev); extern u64 cmf_read(struct ccw_device *cdev, int index); extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data); -#endif /* __KERNEL__ */ #endif /* S390_CMB_H */ diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h new file mode 100644 index 00000000000..4236408070e --- /dev/null +++ b/arch/s390/include/asm/cmpxchg.h @@ -0,0 +1,304 @@ +/* + * Copyright IBM Corp. 1999, 2011 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + */ + +#ifndef __ASM_CMPXCHG_H +#define __ASM_CMPXCHG_H + +#include <linux/mmdebug.h> +#include <linux/types.h> +#include <linux/bug.h> + +extern void __xchg_called_with_bad_pointer(void); + +static inline unsigned long __xchg(unsigned long x, void *ptr, int size) +{ + unsigned long addr, old; + int shift; + + switch (size) { + case 1: + addr = (unsigned long) ptr; + shift = (3 ^ (addr & 3)) << 3; + addr ^= addr & 3; + asm volatile( + " l %0,%4\n" + "0: lr 0,%0\n" + " nr 0,%3\n" + " or 0,%2\n" + " cs %0,0,%4\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) addr) + : "d" ((x & 0xff) << shift), "d" (~(0xff << shift)), + "Q" (*(int *) addr) : "memory", "cc", "0"); + return old >> shift; + case 2: + addr = (unsigned long) ptr; + shift = (2 ^ (addr & 2)) << 3; + addr ^= addr & 2; + asm volatile( + " l %0,%4\n" + "0: lr 0,%0\n" + " nr 0,%3\n" + " or 0,%2\n" + " cs %0,0,%4\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) addr) + : "d" ((x & 0xffff) << shift), "d" (~(0xffff << shift)), + "Q" (*(int *) addr) : "memory", "cc", "0"); + return old >> shift; + case 4: + asm volatile( + " l %0,%3\n" + "0: cs %0,%2,%3\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) ptr) + : "d" (x), "Q" (*(int *) ptr) + : "memory", "cc"); + return old; +#ifdef CONFIG_64BIT + case 8: + asm volatile( + " lg %0,%3\n" + "0: csg %0,%2,%3\n" + " jl 0b\n" + : "=&d" (old), "=m" (*(long *) ptr) + : "d" (x), "Q" (*(long *) ptr) + : "memory", "cc"); + return old; +#endif /* CONFIG_64BIT */ + } + __xchg_called_with_bad_pointer(); + return x; +} + +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\ + __ret; \ +}) + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#define __HAVE_ARCH_CMPXCHG + +extern void __cmpxchg_called_with_bad_pointer(void); + +static inline unsigned long __cmpxchg(void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long addr, prev, tmp; + int shift; + + switch (size) { + case 1: + addr = (unsigned long) ptr; + shift = (3 ^ (addr & 3)) << 3; + addr ^= addr & 3; + asm volatile( + " l %0,%2\n" + "0: nr %0,%5\n" + " lr %1,%0\n" + " or %0,%3\n" + " or %1,%4\n" + " cs %0,%1,%2\n" + " jnl 1f\n" + " xr %1,%0\n" + " nr %1,%5\n" + " jnz 0b\n" + "1:" + : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr) + : "d" ((old & 0xff) << shift), + "d" ((new & 0xff) << shift), + "d" (~(0xff << shift)) + : "memory", "cc"); + return prev >> shift; + case 2: + addr = (unsigned long) ptr; + shift = (2 ^ (addr & 2)) << 3; + addr ^= addr & 2; + asm volatile( + " l %0,%2\n" + "0: nr %0,%5\n" + " lr %1,%0\n" + " or %0,%3\n" + " or %1,%4\n" + " cs %0,%1,%2\n" + " jnl 1f\n" + " xr %1,%0\n" + " nr %1,%5\n" + " jnz 0b\n" + "1:" + : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr) + : "d" ((old & 0xffff) << shift), + "d" ((new & 0xffff) << shift), + "d" (~(0xffff << shift)) + : "memory", "cc"); + return prev >> shift; + case 4: + asm volatile( + " cs %0,%3,%1\n" + : "=&d" (prev), "=Q" (*(int *) ptr) + : "0" (old), "d" (new), "Q" (*(int *) ptr) + : "memory", "cc"); + return prev; +#ifdef CONFIG_64BIT + case 8: + asm volatile( + " csg %0,%3,%1\n" + : "=&d" (prev), "=Q" (*(long *) ptr) + : "0" (old), "d" (new), "Q" (*(long *) ptr) + : "memory", "cc"); + return prev; +#endif /* CONFIG_64BIT */ + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg((ptr), (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + __ret; \ +}) + +#ifdef CONFIG_64BIT +#define cmpxchg64(ptr, o, n) \ +({ \ + cmpxchg((ptr), (o), (n)); \ +}) +#else /* CONFIG_64BIT */ +static inline unsigned long long __cmpxchg64(void *ptr, + unsigned long long old, + unsigned long long new) +{ + register_pair rp_old = {.pair = old}; + register_pair rp_new = {.pair = new}; + unsigned long long *ullptr = ptr; + + asm volatile( + " cds %0,%2,%1" + : "+d" (rp_old), "+Q" (*ullptr) + : "d" (rp_new) + : "memory", "cc"); + return rp_old.pair; +} + +#define cmpxchg64(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n)); \ + __ret; \ +}) +#endif /* CONFIG_64BIT */ + +#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ +({ \ + register __typeof__(*(p1)) __old1 asm("2") = (o1); \ + register __typeof__(*(p2)) __old2 asm("3") = (o2); \ + register __typeof__(*(p1)) __new1 asm("4") = (n1); \ + register __typeof__(*(p2)) __new2 asm("5") = (n2); \ + int cc; \ + asm volatile( \ + insn " %[old],%[new],%[ptr]\n" \ + " ipm %[cc]\n" \ + " srl %[cc],28" \ + : [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2) \ + : [new] "d" (__new1), "d" (__new2), \ + [ptr] "Q" (*(p1)), "Q" (*(p2)) \ + : "memory", "cc"); \ + !cc; \ +}) + +#define __cmpxchg_double_4(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cds") + +#define __cmpxchg_double_8(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cdsg") + +extern void __cmpxchg_double_called_with_bad_pointer(void); + +#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \ +({ \ + int __ret; \ + switch (sizeof(*(p1))) { \ + case 4: \ + __ret = __cmpxchg_double_4(p1, p2, o1, o2, n1, n2); \ + break; \ + case 8: \ + __ret = __cmpxchg_double_8(p1, p2, o1, o2, n1, n2); \ + break; \ + default: \ + __cmpxchg_double_called_with_bad_pointer(); \ + } \ + __ret; \ +}) + +#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ +({ \ + __typeof__(p1) __p1 = (p1); \ + __typeof__(p2) __p2 = (p2); \ + int __ret; \ + BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ + BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ + VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\ + if (sizeof(long) == 4) \ + __ret = __cmpxchg_double_4(__p1, __p2, o1, o2, n1, n2); \ + else \ + __ret = __cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2); \ + __ret; \ +}) + +#define system_has_cmpxchg_double() 1 + +#include <asm-generic/cmpxchg-local.h> + +static inline unsigned long __cmpxchg_local(void *ptr, + unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + return __cmpxchg(ptr, old, new, size); + default: + return __cmpxchg_local_generic(ptr, old, new, size); + } + + return old; +} + +/* + * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make + * them available. + */ +#define cmpxchg_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) + +#define cmpxchg64_local(ptr, o, n) cmpxchg64((ptr), (o), (n)) + +#endif /* __ASM_CMPXCHG_H */ diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index da359ca6fe5..d350ed9d0fb 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -7,33 +7,43 @@ #include <linux/sched.h> #include <linux/thread_info.h> +#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64)) + +#define __SC_DELOUSE(t,v) ({ \ + BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \ + (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \ +}) + #define PSW32_MASK_PER 0x40000000UL #define PSW32_MASK_DAT 0x04000000UL #define PSW32_MASK_IO 0x02000000UL #define PSW32_MASK_EXT 0x01000000UL #define PSW32_MASK_KEY 0x00F00000UL +#define PSW32_MASK_BASE 0x00080000UL /* Always one */ #define PSW32_MASK_MCHECK 0x00040000UL #define PSW32_MASK_WAIT 0x00020000UL #define PSW32_MASK_PSTATE 0x00010000UL #define PSW32_MASK_ASC 0x0000C000UL #define PSW32_MASK_CC 0x00003000UL #define PSW32_MASK_PM 0x00000f00UL +#define PSW32_MASK_RI 0x00000080UL + +#define PSW32_MASK_USER 0x0000FF00UL -#define PSW32_ADDR_AMODE31 0x80000000UL +#define PSW32_ADDR_AMODE 0x80000000UL #define PSW32_ADDR_INSN 0x7FFFFFFFUL -#define PSW32_BASE_BITS 0x00080000UL +#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20) #define PSW32_ASC_PRIMARY 0x00000000UL #define PSW32_ASC_ACCREG 0x00004000UL #define PSW32_ASC_SECONDARY 0x00008000UL #define PSW32_ASC_HOME 0x0000C000UL -#define PSW32_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \ - ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM))) - -extern long psw32_user_bits; +#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \ + PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \ + PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \ + PSW32_ASC_PRIMARY) #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "s390\0\0\0\0" @@ -66,6 +76,23 @@ typedef s64 compat_s64; typedef u32 compat_uint_t; typedef u32 compat_ulong_t; typedef u64 compat_u64; +typedef u32 compat_uptr_t; + +typedef struct { + u32 mask; + u32 addr; +} __aligned(8) psw_compat_t; + +typedef struct { + psw_compat_t psw; + u32 gprs[NUM_GPRS]; + u32 acrs[NUM_ACRS]; + u32 orig_gpr2; +} s390_compat_regs; + +typedef struct { + u32 gprs_high[NUM_GPRS]; +} s390_compat_regs_high; struct compat_timespec { compat_time_t tv_sec; @@ -121,17 +148,33 @@ struct compat_flock64 { }; struct compat_statfs { - s32 f_type; - s32 f_bsize; - s32 f_blocks; - s32 f_bfree; - s32 f_bavail; - s32 f_files; - s32 f_ffree; + u32 f_type; + u32 f_bsize; + u32 f_blocks; + u32 f_bfree; + u32 f_bavail; + u32 f_files; + u32 f_ffree; + compat_fsid_t f_fsid; + u32 f_namelen; + u32 f_frsize; + u32 f_flags; + u32 f_spare[4]; +}; + +struct compat_statfs64 { + u32 f_type; + u32 f_bsize; + u64 f_blocks; + u64 f_bfree; + u64 f_bavail; + u64 f_files; + u64 f_ffree; compat_fsid_t f_fsid; - s32 f_namelen; - s32 f_frsize; - s32 f_spare[6]; + u32 f_namelen; + u32 f_frsize; + u32 f_flags; + u32 f_spare[4]; }; #define COMPAT_RLIM_OLD_INFINITY 0x7fffffff @@ -144,6 +187,79 @@ typedef u32 compat_old_sigset_t; /* at least 32 bits */ typedef u32 compat_sigset_word; +typedef union compat_sigval { + compat_int_t sival_int; + compat_uptr_t sival_ptr; +} compat_sigval_t; + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[128/sizeof(int) - 3]; + + /* kill() */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + pid_t _pid; /* which child */ + uid_t _uid; /* sender's uid */ + int _status;/* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + __u32 _addr; /* faulting insn/memory ref. - pointer */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + +/* + * How these fields are to be accessed. + */ +#define si_pid _sifields._kill._pid +#define si_uid _sifields._kill._uid +#define si_status _sifields._sigchld._status +#define si_utime _sifields._sigchld._utime +#define si_stime _sifields._sigchld._stime +#define si_value _sifields._rt._sigval +#define si_int _sifields._rt._sigval.sival_int +#define si_ptr _sifields._rt._sigval.sival_ptr +#define si_addr _sifields._sigfault._addr +#define si_band _sifields._sigpoll._band +#define si_fd _sifields._sigpoll._fd +#define si_tid _sifields._timer._tid +#define si_overrun _sifields._timer._overrun + #define COMPAT_OFF_T_MAX 0x7fffffff #define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL @@ -153,7 +269,6 @@ typedef u32 compat_sigset_word; * as pointers because the syscall entry code will have * appropriately converted them already. */ -typedef u32 compat_uptr_t; static inline void __user *compat_ptr(compat_uptr_t uptr) { @@ -172,15 +287,6 @@ static inline int is_compat_task(void) return is_32bit_task(); } -#else - -static inline int is_compat_task(void) -{ - return 0; -} - -#endif - static inline void __user *arch_compat_alloc_user_space(long len) { unsigned long stack; @@ -191,6 +297,8 @@ static inline void __user *arch_compat_alloc_user_space(long len) return (void __user *) (stack - len); } +#endif + struct compat_ipc64_perm { compat_key_t key; __compat_uid32_t uid; diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h index 48a9eab1642..3dfadb5d648 100644 --- a/arch/s390/include/asm/cpcmd.h +++ b/arch/s390/include/asm/cpcmd.h @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/cpcmd.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Christian Borntraeger (cborntra@de.ibm.com), */ diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h index e0b69540216..f5a8e2fcde0 100644 --- a/arch/s390/include/asm/cpu.h +++ b/arch/s390/include/asm/cpu.h @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2000,2009 + * Copyright IBM Corp. 2000, 2009 * Author(s): Hartmut Penner <hp@de.ibm.com>, * Martin Schwidefsky <schwidefsky@de.ibm.com>, * Christian Ehrhardt <ehrhardt@de.ibm.com>, diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h new file mode 100644 index 00000000000..cb700d54bd8 --- /dev/null +++ b/arch/s390/include/asm/cpu_mf.h @@ -0,0 +1,283 @@ +/* + * CPU-measurement facilities + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * Jan Glauber <jang@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#ifndef _ASM_S390_CPU_MF_H +#define _ASM_S390_CPU_MF_H + +#include <linux/errno.h> +#include <asm/facility.h> + +#define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */ +#define CPU_MF_INT_SF_ISE (1 << 30) /* incorrect SDBT entry */ +#define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ +#define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */ +#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ +#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ +#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ +#define CPU_MF_INT_RI_HALTED (1 << 5) /* run-time instr. halted */ +#define CPU_MF_INT_RI_BUF_FULL (1 << 4) /* run-time instr. program + buffer full */ + +#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) +#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ + CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ + CPU_MF_INT_SF_LSDA) +#define CPU_MF_INT_RI_MASK (CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL) + +/* CPU measurement facility support */ +static inline int cpum_cf_avail(void) +{ + return MACHINE_HAS_LPP && test_facility(67); +} + +static inline int cpum_sf_avail(void) +{ + return MACHINE_HAS_LPP && test_facility(68); +} + + +struct cpumf_ctr_info { + u16 cfvn; + u16 auth_ctl; + u16 enable_ctl; + u16 act_ctl; + u16 max_cpu; + u16 csvn; + u16 max_cg; + u16 reserved1; + u32 reserved2[12]; +} __packed; + +/* QUERY SAMPLING INFORMATION block */ +struct hws_qsi_info_block { /* Bit(s) */ + unsigned int b0_13:14; /* 0-13: zeros */ + unsigned int as:1; /* 14: basic-sampling authorization */ + unsigned int ad:1; /* 15: diag-sampling authorization */ + unsigned int b16_21:6; /* 16-21: zeros */ + unsigned int es:1; /* 22: basic-sampling enable control */ + unsigned int ed:1; /* 23: diag-sampling enable control */ + unsigned int b24_29:6; /* 24-29: zeros */ + unsigned int cs:1; /* 30: basic-sampling activation control */ + unsigned int cd:1; /* 31: diag-sampling activation control */ + unsigned int bsdes:16; /* 4-5: size of basic sampling entry */ + unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */ + unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ + unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ + unsigned long tear; /* 24-31: TEAR contents */ + unsigned long dear; /* 32-39: DEAR contents */ + unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int cpu_speed; /* 44-47: CPU speed */ + unsigned long long rsvrd1; /* 48-55: reserved */ + unsigned long long rsvrd2; /* 56-63: reserved */ +} __packed; + +/* SET SAMPLING CONTROLS request block */ +struct hws_lsctl_request_block { + unsigned int s:1; /* 0: maximum buffer indicator */ + unsigned int h:1; /* 1: part. level reserved for VM use*/ + unsigned long long b2_53:52;/* 2-53: zeros */ + unsigned int es:1; /* 54: basic-sampling enable control */ + unsigned int ed:1; /* 55: diag-sampling enable control */ + unsigned int b56_61:6; /* 56-61: - zeros */ + unsigned int cs:1; /* 62: basic-sampling activation control */ + unsigned int cd:1; /* 63: diag-sampling activation control */ + unsigned long interval; /* 8-15: sampling interval */ + unsigned long tear; /* 16-23: TEAR contents */ + unsigned long dear; /* 24-31: DEAR contents */ + /* 32-63: */ + unsigned long rsvrd1; /* reserved */ + unsigned long rsvrd2; /* reserved */ + unsigned long rsvrd3; /* reserved */ + unsigned long rsvrd4; /* reserved */ +} __packed; + +struct hws_basic_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:4; /* 16-19 reserved */ + unsigned int U:4; /* 20-23 Number of unique instruct. */ + unsigned int z:2; /* zeros */ + unsigned int T:1; /* 26 PSW DAT mode */ + unsigned int W:1; /* 27 PSW wait state */ + unsigned int P:1; /* 28 PSW Problem state */ + unsigned int AS:2; /* 29-30 PSW address-space control */ + unsigned int I:1; /* 31 entry valid or invalid */ + unsigned int:16; + unsigned int prim_asn:16; /* primary ASN */ + unsigned long long ia; /* Instruction Address */ + unsigned long long gpp; /* Guest Program Parameter */ + unsigned long long hpp; /* Host Program Parameter */ +} __packed; + +struct hws_diag_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:14; /* 16-19 and 20-30 reserved */ + unsigned int I:1; /* 31 entry valid or invalid */ + u8 data[]; /* Machine-dependent sample data */ +} __packed; + +struct hws_combined_entry { + struct hws_basic_entry basic; /* Basic-sampling data entry */ + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ +} __packed; + +struct hws_trailer_entry { + union { + struct { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned long long:61; /* 3 - 63: Reserved */ + }; + unsigned long long flags; /* 0 - 63: All indicators */ + }; + unsigned long long overflow; /* 64 - sample Overflow count */ + unsigned char timestamp[16]; /* 16 - 31 timestamp */ + unsigned long long reserved1; /* 32 -Reserved */ + unsigned long long reserved2; /* */ + unsigned long long progusage1; /* 48 - reserved for programming use */ + unsigned long long progusage2; /* */ +} __packed; + +/* Query counter information */ +static inline int qctri(struct cpumf_ctr_info *info) +{ + int rc = -EINVAL; + + asm volatile ( + "0: .insn s,0xb28e0000,%1\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(1b, 2b) + : "+d" (rc), "=Q" (*info)); + return rc; +} + +/* Load CPU-counter-set controls */ +static inline int lcctl(u64 ctl) +{ + int cc; + + asm volatile ( + " .insn s,0xb2840000,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) : "m" (ctl) : "cc"); + return cc; +} + +/* Extract CPU counter */ +static inline int ecctr(u64 ctr, u64 *val) +{ + register u64 content asm("4") = 0; + int cc; + + asm volatile ( + " .insn rre,0xb2e40000,%0,%2\n" + " ipm %1\n" + " srl %1,28\n" + : "=d" (content), "=d" (cc) : "d" (ctr) : "cc"); + if (!cc) + *val = content; + return cc; +} + +/* Query sampling information */ +static inline int qsi(struct hws_qsi_info_block *info) +{ + int cc; + cc = 1; + + asm volatile( + "0: .insn s,0xb2860000,0(%1)\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "=d" (cc), "+a" (info) + : "m" (*info) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +/* Load sampling controls */ +static inline int lsctl(struct hws_lsctl_request_block *req) +{ + int cc; + + cc = 1; + asm volatile( + "0: .insn s,0xb2870000,0(%1)\n" + "1: ipm %0\n" + " srl %0,28\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (cc), "+a" (req) + : "m" (*req) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +/* Sampling control helper functions */ + +#include <linux/time.h> + +static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi, + unsigned long freq) +{ + return (USEC_PER_SEC / freq) * qsi->cpu_speed; +} + +static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, + unsigned long rate) +{ + return USEC_PER_SEC * qsi->cpu_speed / rate; +} + +#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL +#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL + +/* Return TOD timestamp contained in an trailer entry */ +static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) +{ + /* TOD in STCKE format */ + if (te->t) + return *((unsigned long long *) &te->timestamp[1]); + + /* TOD in STCK format */ + return *((unsigned long long *) &te->timestamp[0]); +} + +/* Return pointer to trailer entry of an sample data block */ +static inline unsigned long *trailer_entry_ptr(unsigned long v) +{ + void *ret; + + ret = (void *) v; + ret += PAGE_SIZE; + ret -= sizeof(struct hws_trailer_entry); + + return (unsigned long *) ret; +} + +/* Return if the entry in the sample data block table (sdbt) + * is a link to the next sdbt */ +static inline int is_link_entry(unsigned long *s) +{ + return *s & 0x1ul ? 1 : 0; +} + +/* Return pointer to the linked sdbt */ +static inline unsigned long *get_next_sdbt(unsigned long *s) +{ + return (unsigned long *) (*s & ~0x1ul); +} +#endif /* _ASM_S390_CPU_MF_H */ diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 08143487829..f65bd363451 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -1,7 +1,5 @@ /* - * include/asm-s390/cputime.h - * - * (C) Copyright IBM Corp. 2004 + * Copyright IBM Corp. 2004 * * Author: Martin Schwidefsky <schwidefsky@de.ibm.com> */ @@ -14,116 +12,103 @@ #include <linux/spinlock.h> #include <asm/div64.h> -/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ -typedef unsigned long long cputime_t; -typedef unsigned long long cputime64_t; +/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ -#ifndef __s390x__ +typedef unsigned long long __nocast cputime_t; +typedef unsigned long long __nocast cputime64_t; -static inline unsigned int -__div(unsigned long long n, unsigned int base) +static inline unsigned long __div(unsigned long long n, unsigned long base) { +#ifndef CONFIG_64BIT register_pair rp; rp.pair = n >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1)); return rp.subreg.odd; +#else /* CONFIG_64BIT */ + return n / base; +#endif /* CONFIG_64BIT */ } -#else /* __s390x__ */ +#define cputime_one_jiffy jiffies_to_cputime(1) -static inline unsigned int -__div(unsigned long long n, unsigned int base) +/* + * Convert cputime to jiffies and back. + */ +static inline unsigned long cputime_to_jiffies(const cputime_t cputime) { - return n / base; + return __div((__force unsigned long long) cputime, 4096000000ULL / HZ); } -#endif /* __s390x__ */ +static inline cputime_t jiffies_to_cputime(const unsigned int jif) +{ + return (__force cputime_t)(jif * (4096000000ULL / HZ)); +} -#define cputime_zero (0ULL) -#define cputime_one_jiffy jiffies_to_cputime(1) -#define cputime_max ((~0UL >> 1) - 1) -#define cputime_add(__a, __b) ((__a) + (__b)) -#define cputime_sub(__a, __b) ((__a) - (__b)) -#define cputime_div(__a, __n) ({ \ - unsigned long long __div = (__a); \ - do_div(__div,__n); \ - __div; \ -}) -#define cputime_halve(__a) ((__a) >> 1) -#define cputime_eq(__a, __b) ((__a) == (__b)) -#define cputime_gt(__a, __b) ((__a) > (__b)) -#define cputime_ge(__a, __b) ((__a) >= (__b)) -#define cputime_lt(__a, __b) ((__a) < (__b)) -#define cputime_le(__a, __b) ((__a) <= (__b)) -#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ)) -#define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ)) - -#define cputime64_zero (0ULL) -#define cputime64_add(__a, __b) ((__a) + (__b)) -#define cputime_to_cputime64(__ct) (__ct) - -static inline u64 -cputime64_to_jiffies64(cputime64_t cputime) -{ - do_div(cputime, 4096000000ULL / HZ); - return cputime; +static inline u64 cputime64_to_jiffies64(cputime64_t cputime) +{ + unsigned long long jif = (__force unsigned long long) cputime; + do_div(jif, 4096000000ULL / HZ); + return jif; +} + +static inline cputime64_t jiffies64_to_cputime64(const u64 jif) +{ + return (__force cputime64_t)(jif * (4096000000ULL / HZ)); } /* * Convert cputime to microseconds and back. */ -static inline unsigned int -cputime_to_usecs(const cputime_t cputime) +static inline unsigned int cputime_to_usecs(const cputime_t cputime) { - return cputime_div(cputime, 4096); + return (__force unsigned long long) cputime >> 12; } -static inline cputime_t -usecs_to_cputime(const unsigned int m) +static inline cputime_t usecs_to_cputime(const unsigned int m) { - return (cputime_t) m * 4096; + return (__force cputime_t)(m * 4096ULL); } +#define usecs_to_cputime64(m) usecs_to_cputime(m) + /* * Convert cputime to milliseconds and back. */ -static inline unsigned int -cputime_to_secs(const cputime_t cputime) +static inline unsigned int cputime_to_secs(const cputime_t cputime) { - return __div(cputime, 2048000000) >> 1; + return __div((__force unsigned long long) cputime, 2048000000) >> 1; } -static inline cputime_t -secs_to_cputime(const unsigned int s) +static inline cputime_t secs_to_cputime(const unsigned int s) { - return (cputime_t) s * 4096000000ULL; + return (__force cputime_t)(s * 4096000000ULL); } /* * Convert cputime to timespec and back. */ -static inline cputime_t -timespec_to_cputime(const struct timespec *value) +static inline cputime_t timespec_to_cputime(const struct timespec *value) { - return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL; + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000); } -static inline void -cputime_to_timespec(const cputime_t cputime, struct timespec *value) +static inline void cputime_to_timespec(const cputime_t cputime, + struct timespec *value) { -#ifndef __s390x__ + unsigned long long __cputime = (__force unsigned long long) cputime; +#ifndef CONFIG_64BIT register_pair rp; - rp.pair = cputime >> 1; + rp.pair = __cputime >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); value->tv_nsec = rp.subreg.even * 1000 / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096; - value->tv_sec = cputime / 4096000000ULL; + value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096; + value->tv_sec = __cputime / 4096000000ULL; #endif } @@ -132,74 +117,71 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value) * Since cputime and timeval have the same resolution (microseconds) * this is easy. */ -static inline cputime_t -timeval_to_cputime(const struct timeval *value) +static inline cputime_t timeval_to_cputime(const struct timeval *value) { - return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL; + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_usec * 4096ULL); } -static inline void -cputime_to_timeval(const cputime_t cputime, struct timeval *value) +static inline void cputime_to_timeval(const cputime_t cputime, + struct timeval *value) { -#ifndef __s390x__ + unsigned long long __cputime = (__force unsigned long long) cputime; +#ifndef CONFIG_64BIT register_pair rp; - rp.pair = cputime >> 1; + rp.pair = __cputime >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); value->tv_usec = rp.subreg.even / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_usec = (cputime % 4096000000ULL) / 4096; - value->tv_sec = cputime / 4096000000ULL; + value->tv_usec = (__cputime % 4096000000ULL) / 4096; + value->tv_sec = __cputime / 4096000000ULL; #endif } /* * Convert cputime to clock and back. */ -static inline clock_t -cputime_to_clock_t(cputime_t cputime) +static inline clock_t cputime_to_clock_t(cputime_t cputime) { - return cputime_div(cputime, 4096000000ULL / USER_HZ); + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; } -static inline cputime_t -clock_t_to_cputime(unsigned long x) +static inline cputime_t clock_t_to_cputime(unsigned long x) { - return (cputime_t) x * (4096000000ULL / USER_HZ); + return (__force cputime_t)(x * (4096000000ULL / USER_HZ)); } /* * Convert cputime64 to clock. */ -static inline clock_t -cputime64_to_clock_t(cputime64_t cputime) +static inline clock_t cputime64_to_clock_t(cputime64_t cputime) { - return cputime_div(cputime, 4096000000ULL / USER_HZ); + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; } struct s390_idle_data { + int nohz_delay; unsigned int sequence; unsigned long long idle_count; - unsigned long long idle_enter; unsigned long long idle_time; - int nohz_delay; + unsigned long long clock_idle_enter; + unsigned long long clock_idle_exit; + unsigned long long timer_idle_enter; + unsigned long long timer_idle_exit; }; DECLARE_PER_CPU(struct s390_idle_data, s390_idle); -void vtime_start_cpu(__u64 int_clock, __u64 enter_timer); cputime64_t s390_get_idle_time(int cpu); #define arch_idle_time(cpu) s390_get_idle_time(cpu) -static inline void s390_idle_check(struct pt_regs *regs, __u64 int_clock, - __u64 enter_timer) -{ - if (regs->psw.mask & PSW_MASK_WAIT) - vtime_start_cpu(int_clock, enter_timer); -} - static inline int s390_nohz_delay(int cpu) { return __get_cpu_var(s390_idle).nohz_delay != 0; diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h index 749a97e61be..7c31d3e25cd 100644 --- a/arch/s390/include/asm/crw.h +++ b/arch/s390/include/asm/crw.h @@ -1,6 +1,6 @@ /* * Data definitions for channel report processing - * Copyright IBM Corp. 2000,2009 + * Copyright IBM Corp. 2000, 2009 * Author(s): Ingo Adlung <adlung@de.ibm.com>, * Martin Schwidefsky <schwidefsky@de.ibm.com>, * Cornelia Huck <cornelia.huck@de.ibm.com>, diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h new file mode 100644 index 00000000000..09d1dd46bd5 --- /dev/null +++ b/arch/s390/include/asm/css_chars.h @@ -0,0 +1,38 @@ +#ifndef _ASM_CSS_CHARS_H +#define _ASM_CSS_CHARS_H + +#include <linux/types.h> + +struct css_general_char { + u64 : 12; + u32 dynio : 1; /* bit 12 */ + u32 : 4; + u32 eadm : 1; /* bit 17 */ + u32 : 23; + u32 aif : 1; /* bit 41 */ + u32 : 3; + u32 mcss : 1; /* bit 45 */ + u32 fcs : 1; /* bit 46 */ + u32 : 1; + u32 ext_mb : 1; /* bit 48 */ + u32 : 7; + u32 aif_tdd : 1; /* bit 56 */ + u32 : 1; + u32 qebsm : 1; /* bit 58 */ + u32 : 8; + u32 aif_osa : 1; /* bit 67 */ + u32 : 12; + u32 eadm_rf : 1; /* bit 80 */ + u32 : 1; + u32 cib : 1; /* bit 82 */ + u32 : 5; + u32 fcx : 1; /* bit 88 */ + u32 : 19; + u32 alt_ssi : 1; /* bit 108 */ + u32:1; + u32 narf:1; /* bit 110 */ +} __packed; + +extern struct css_general_char css_general_characteristics; + +#endif diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h new file mode 100644 index 00000000000..31ab9f346d7 --- /dev/null +++ b/arch/s390/include/asm/ctl_reg.h @@ -0,0 +1,82 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_CTL_REG_H +#define __ASM_CTL_REG_H + +#include <linux/bug.h> + +#ifdef CONFIG_64BIT +# define __CTL_LOAD "lctlg" +# define __CTL_STORE "stctg" +#else +# define __CTL_LOAD "lctl" +# define __CTL_STORE "stctl" +#endif + +#define __ctl_load(array, low, high) { \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + \ + BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\ + asm volatile( \ + __CTL_LOAD " %1,%2,%0\n" \ + : : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high));\ +} + +#define __ctl_store(array, low, high) { \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + \ + BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\ + asm volatile( \ + __CTL_STORE " %1,%2,%0\n" \ + : "=Q" (*(addrtype *)(&array)) \ + : "i" (low), "i" (high)); \ +} + +static inline void __ctl_set_bit(unsigned int cr, unsigned int bit) +{ + unsigned long reg; + + __ctl_store(reg, cr, cr); + reg |= 1UL << bit; + __ctl_load(reg, cr, cr); +} + +static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit) +{ + unsigned long reg; + + __ctl_store(reg, cr, cr); + reg &= ~(1UL << bit); + __ctl_load(reg, cr, cr); +} + +void smp_ctl_set_bit(int cr, int bit); +void smp_ctl_clear_bit(int cr, int bit); + +union ctlreg0 { + unsigned long val; + struct { +#ifdef CONFIG_64BIT + unsigned long : 32; +#endif + unsigned long : 3; + unsigned long lap : 1; /* Low-address-protection control */ + unsigned long : 4; + unsigned long edat : 1; /* Enhanced-DAT-enablement control */ + unsigned long : 23; + }; +}; + +#ifdef CONFIG_SMP +# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) +# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) +#else +# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit) +# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit) +#endif + +#endif /* __ASM_CTL_REG_H */ diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h index 83cf36cde2d..b80941f30df 100644 --- a/arch/s390/include/asm/current.h +++ b/arch/s390/include/asm/current.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/current.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) * * Derived from "include/asm-i386/current.h" @@ -11,13 +9,10 @@ #ifndef _S390_CURRENT_H #define _S390_CURRENT_H -#ifdef __KERNEL__ #include <asm/lowcore.h> struct task_struct; #define current ((struct task_struct *const)S390_lowcore.current_task) -#endif - #endif /* !(_S390_CURRENT_H) */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 18124b75a7a..530c15eb01e 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -1,43 +1,16 @@ /* - * include/asm-s390/debug.h * S/390 debug facility * - * Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH, - * IBM Corporation + * Copyright IBM Corp. 1999, 2000 */ - #ifndef DEBUG_H #define DEBUG_H -#include <linux/fs.h> - -/* Note: - * struct __debug_entry must be defined outside of #ifdef __KERNEL__ - * in order to allow a user program to analyze the 'raw'-view. - */ - -struct __debug_entry{ - union { - struct { - unsigned long long clock:52; - unsigned long long exception:1; - unsigned long long level:3; - unsigned long long cpuid:8; - } fields; - - unsigned long long stck; - } id; - void* caller; -} __attribute__((packed)); - - -#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ - -#ifdef __KERNEL__ #include <linux/string.h> #include <linux/spinlock.h> #include <linux/kernel.h> #include <linux/time.h> +#include <uapi/asm/debug.h> #define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ #define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */ @@ -73,7 +46,7 @@ typedef struct debug_info { struct dentry* debugfs_entries[DEBUG_MAX_VIEWS]; struct debug_view* views[DEBUG_MAX_VIEWS]; char name[DEBUG_MAX_NAME_LEN]; - mode_t mode; + umode_t mode; } debug_info_t; typedef int (debug_header_proc_t) (debug_info_t* id, @@ -124,15 +97,21 @@ debug_info_t *debug_register(const char *name, int pages, int nr_areas, int buf_size); debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, - int buf_size, mode_t mode, uid_t uid, + int buf_size, umode_t mode, uid_t uid, gid_t gid); void debug_unregister(debug_info_t* id); void debug_set_level(debug_info_t* id, int new_level); +void debug_set_critical(void); void debug_stop_all(void); +static inline bool debug_level_enabled(debug_info_t* id, int level) +{ + return level <= id->level; +} + static inline debug_entry_t* debug_event(debug_info_t* id, int level, void* data, int length) { @@ -255,5 +234,4 @@ int debug_unregister_view(debug_info_t* id, struct debug_view* view); #define PRINT_FATAL(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) #endif /* DASD_DEBUG */ -#endif /* __KERNEL__ */ #endif /* DEBUG_H */ diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h index 8a096b83f51..3f6e4095f47 100644 --- a/arch/s390/include/asm/delay.h +++ b/arch/s390/include/asm/delay.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/delay.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) * * Derived from "include/asm-i386/delay.h" @@ -14,10 +12,12 @@ #ifndef _S390_DELAY_H #define _S390_DELAY_H -extern void __udelay(unsigned long long usecs); -extern void udelay_simple(unsigned long long usecs); -extern void __delay(unsigned long loops); +void __ndelay(unsigned long long nsecs); +void __udelay(unsigned long long usecs); +void udelay_simple(unsigned long long usecs); +void __delay(unsigned long loops); +#define ndelay(n) __ndelay((unsigned long long) (n)) #define udelay(n) __udelay((unsigned long long) (n)) #define mdelay(n) __udelay((unsigned long long) (n) * 1000) diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 72b2e2f2d32..7e91c58072e 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -9,9 +9,22 @@ #define _ASM_S390_DIAG_H /* - * Diagnose 10: Release pages + * Diagnose 10: Release page range */ -extern void diag10(unsigned long addr); +static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) +{ + unsigned long start_addr, end_addr; + + start_addr = start_pfn << PAGE_SHIFT; + end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT; + + asm volatile( + "0: diag %0,%1,0x10\n" + "1:\n" + EX_TABLE(0b, 1b) + EX_TABLE(1b, 1b) + : : "a" (start_addr), "a" (end_addr)); +} /* * Diagnose 14: Input spool file manipulation diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h new file mode 100644 index 00000000000..04a83f5773c --- /dev/null +++ b/arch/s390/include/asm/dis.h @@ -0,0 +1,52 @@ +/* + * Disassemble s390 instructions. + * + * Copyright IBM Corp. 2007 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + */ + +#ifndef __ASM_S390_DIS_H__ +#define __ASM_S390_DIS_H__ + +/* Type of operand */ +#define OPERAND_GPR 0x1 /* Operand printed as %rx */ +#define OPERAND_FPR 0x2 /* Operand printed as %fx */ +#define OPERAND_AR 0x4 /* Operand printed as %ax */ +#define OPERAND_CR 0x8 /* Operand printed as %cx */ +#define OPERAND_DISP 0x10 /* Operand printed as displacement */ +#define OPERAND_BASE 0x20 /* Operand printed as base register */ +#define OPERAND_INDEX 0x40 /* Operand printed as index register */ +#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */ +#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */ +#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */ + + +struct s390_operand { + int bits; /* The number of bits in the operand. */ + int shift; /* The number of bits to shift. */ + int flags; /* One bit syntax flags. */ +}; + +struct s390_insn { + const char name[5]; + unsigned char opfrag; + unsigned char format; +}; + + +static inline int insn_length(unsigned char code) +{ + return ((((int) code + 64) >> 7) + 1) << 1; +} + +void show_code(struct pt_regs *regs); +void print_fn_code(unsigned char *code, unsigned long len); +int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len); +struct s390_insn *find_insn(unsigned char *code); + +static inline int is_known_insn(unsigned char *code) +{ + return !!find_insn(code); +} + +#endif /* __ASM_S390_DIS_H__ */ diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h new file mode 100644 index 00000000000..3fbc67d9e19 --- /dev/null +++ b/arch/s390/include/asm/dma-mapping.h @@ -0,0 +1,79 @@ +#ifndef _ASM_S390_DMA_MAPPING_H +#define _ASM_S390_DMA_MAPPING_H + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/scatterlist.h> +#include <linux/dma-attrs.h> +#include <linux/dma-debug.h> +#include <linux/io.h> + +#define DMA_ERROR_CODE (~(dma_addr_t) 0x0) + +extern struct dma_map_ops s390_dma_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + return &s390_dma_ops; +} + +extern int dma_set_mask(struct device *dev, u64 mask); + +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ +} + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +#include <asm-generic/dma-mapping-common.h> + +static inline int dma_supported(struct device *dev, u64 mask) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops->dma_supported == NULL) + return 1; + return dma_ops->dma_supported(dev, mask); +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + return addr + size - 1 <= *dev->dma_mask; +} + +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + debug_dma_mapping_error(dev, dma_addr); + if (dma_ops->mapping_error) + return dma_ops->mapping_error(dev, dma_addr); + return dma_addr == DMA_ERROR_CODE; +} + +static inline void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + void *ret; + + ret = ops->alloc(dev, size, dma_handle, flag, NULL); + debug_dma_alloc_coherent(dev, size, *dma_handle, ret); + return ret; +} + +static inline void dma_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + dma_ops->free(dev, size, cpu_addr, dma_handle, NULL); +} + +#endif /* _ASM_S390_DMA_MAPPING_H */ diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h index 7425c6af6cd..bb9bdcd2086 100644 --- a/arch/s390/include/asm/dma.h +++ b/arch/s390/include/asm/dma.h @@ -1,16 +1,19 @@ -/* - * include/asm-s390/dma.h - * - * S390 version - */ - -#ifndef _ASM_DMA_H -#define _ASM_DMA_H +#ifndef _ASM_S390_DMA_H +#define _ASM_S390_DMA_H -#include <asm/io.h> /* need byte IO */ +#include <asm/io.h> +/* + * MAX_DMA_ADDRESS is ambiguous because on s390 its completely unrelated + * to DMA. It _is_ used for the s390 memory zone split at 2GB caused + * by the 31 bit heritage. + */ #define MAX_DMA_ADDRESS 0x80000000 -#define free_dma(x) do { } while (0) +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; +#else +#define isa_dma_bridge_buggy (0) +#endif -#endif /* _ASM_DMA_H */ +#endif /* _ASM_S390_DMA_H */ diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h new file mode 100644 index 00000000000..67026300c88 --- /dev/null +++ b/arch/s390/include/asm/eadm.h @@ -0,0 +1,117 @@ +#ifndef _ASM_S390_EADM_H +#define _ASM_S390_EADM_H + +#include <linux/types.h> +#include <linux/device.h> + +struct arqb { + u64 data; + u16 fmt:4; + u16:12; + u16 cmd_code; + u16:16; + u16 msb_count; + u32 reserved[12]; +} __packed; + +#define ARQB_CMD_MOVE 1 + +struct arsb { + u16 fmt:4; + u32:28; + u8 ef; + u8:8; + u8 ecbi; + u8:8; + u8 fvf; + u16:16; + u8 eqc; + u32:32; + u64 fail_msb; + u64 fail_aidaw; + u64 fail_ms; + u64 fail_scm; + u32 reserved[4]; +} __packed; + +#define EQC_WR_PROHIBIT 22 + +struct msb { + u8 fmt:4; + u8 oc:4; + u8 flags; + u16:12; + u16 bs:4; + u32 blk_count; + u64 data_addr; + u64 scm_addr; + u64:64; +} __packed; + +struct aidaw { + u8 flags; + u32 :24; + u32 :32; + u64 data_addr; +} __packed; + +#define MSB_OC_CLEAR 0 +#define MSB_OC_READ 1 +#define MSB_OC_WRITE 2 +#define MSB_OC_RELEASE 3 + +#define MSB_FLAG_BNM 0x80 +#define MSB_FLAG_IDA 0x40 + +#define MSB_BS_4K 0 +#define MSB_BS_1M 1 + +#define AOB_NR_MSB 124 + +struct aob { + struct arqb request; + struct arsb response; + struct msb msb[AOB_NR_MSB]; +} __packed __aligned(PAGE_SIZE); + +struct aob_rq_header { + struct scm_device *scmdev; + char data[0]; +}; + +struct scm_device { + u64 address; + u64 size; + unsigned int nr_max_block; + struct device dev; + struct { + unsigned int persistence:4; + unsigned int oper_state:4; + unsigned int data_state:4; + unsigned int rank:4; + unsigned int release:1; + unsigned int res_id:8; + } __packed attrs; +}; + +#define OP_STATE_GOOD 1 +#define OP_STATE_TEMP_ERR 2 +#define OP_STATE_PERM_ERR 3 + +enum scm_event {SCM_CHANGE, SCM_AVAIL}; + +struct scm_driver { + struct device_driver drv; + int (*probe) (struct scm_device *scmdev); + int (*remove) (struct scm_device *scmdev); + void (*notify) (struct scm_device *scmdev, enum scm_event event); + void (*handler) (struct scm_device *scmdev, void *data, int error); +}; + +int scm_driver_register(struct scm_driver *scmdrv); +void scm_driver_unregister(struct scm_driver *scmdrv); + +int eadm_start_aob(struct aob *aob); +void scm_irq_handler(struct aob *aob, int error); + +#endif /* _ASM_S390_EADM_H */ diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h index 7f6f641d32f..c5befc5a3bf 100644 --- a/arch/s390/include/asm/ebcdic.h +++ b/arch/s390/include/asm/ebcdic.h @@ -1,9 +1,8 @@ /* - * include/asm-s390/ebcdic.h * EBCDIC -> ASCII, ASCII -> EBCDIC conversion routines. * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 10c029cfcc7..78f4f8711d5 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/elf.h - * * S390 version * * Derived from "include/asm-i386/elf.h" @@ -103,15 +101,16 @@ #define HWCAP_S390_HPAGE 128 #define HWCAP_S390_ETF3EH 256 #define HWCAP_S390_HIGH_GPRS 512 +#define HWCAP_S390_TE 1024 /* * These are used to set parameters in the core dumps. */ -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define ELF_CLASS ELFCLASS32 -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ #define ELF_CLASS ELFCLASS64 -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ #define ELF_DATA ELFDATA2MSB #define ELF_ARCH EM_S390 @@ -120,6 +119,8 @@ */ #include <asm/ptrace.h> +#include <asm/compat.h> +#include <asm/syscall.h> #include <asm/user.h> typedef s390_fp_regs elf_fpregset_t; @@ -129,7 +130,6 @@ typedef s390_fp_regs compat_elf_fpregset_t; typedef s390_compat_regs compat_elf_gregset_t; #include <linux/sched.h> /* for task_struct */ -#include <asm/system.h> /* for save_access_regs */ #include <asm/mmu_context.h> #include <asm/vdso.h> @@ -182,31 +182,31 @@ extern unsigned long elf_hwcap; extern char elf_platform[]; #define ELF_PLATFORM (elf_platform) -#ifndef __s390x__ -#define SET_PERSONALITY(ex) set_personality(PER_LINUX) -#else /* __s390x__ */ +#ifndef CONFIG_COMPAT +#define SET_PERSONALITY(ex) \ +do { \ + set_personality(PER_LINUX | \ + (current->personality & (~PER_MASK))); \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table; \ +} while (0) +#else /* CONFIG_COMPAT */ #define SET_PERSONALITY(ex) \ do { \ if (personality(current->personality) != PER_LINUX32) \ - set_personality(PER_LINUX); \ - if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ + set_personality(PER_LINUX | \ + (current->personality & ~PER_MASK)); \ + if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \ set_thread_flag(TIF_31BIT); \ - else \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table_emu; \ + } else { \ clear_thread_flag(TIF_31BIT); \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table; \ + } \ } while (0) -#endif /* __s390x__ */ - -/* - * An executable for which elf_read_implies_exec() returns TRUE will - * have the READ_IMPLIES_EXEC personality flag set automatically. - */ -#define elf_read_implies_exec(ex, executable_stack) \ -({ \ - if (current->mm->context.noexec && \ - executable_stack != EXSTACK_DISABLE_X) \ - disable_noexec(current->mm, current); \ - current->mm->context.noexec == 0; \ -}) +#endif /* CONFIG_COMPAT */ #define STACK_RND_MASK 0x7ffUL @@ -225,4 +225,6 @@ int arch_setup_additional_pages(struct linux_binprm *, int); extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); + #endif diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h index 538e1b36a72..629b79a9316 100644 --- a/arch/s390/include/asm/etr.h +++ b/arch/s390/include/asm/etr.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/etr.h - * * Copyright IBM Corp. 2006 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ @@ -142,7 +140,7 @@ struct etr_ptff_qto { /* Inline assembly helper functions */ static inline int etr_setr(struct etr_eacr *ctrl) { - int rc = -ENOSYS; + int rc = -EOPNOTSUPP; asm volatile( " .insn s,0xb2160000,%1\n" @@ -156,7 +154,7 @@ static inline int etr_setr(struct etr_eacr *ctrl) /* Stores a format 1 aib with 64 bytes */ static inline int etr_stetr(struct etr_aib *aib) { - int rc = -ENOSYS; + int rc = -EOPNOTSUPP; asm volatile( " .insn s,0xb2170000,%1\n" @@ -171,7 +169,7 @@ static inline int etr_stetr(struct etr_aib *aib) static inline int etr_steai(struct etr_aib *aib, unsigned int func) { register unsigned int reg0 asm("0") = func; - int rc = -ENOSYS; + int rc = -EOPNOTSUPP; asm volatile( " .insn s,0xb2b30000,%1\n" @@ -192,7 +190,7 @@ static inline int etr_ptff(void *ptff_block, unsigned int func) { register unsigned int reg0 asm("0") = func; register unsigned long reg1 asm("1") = (unsigned long) ptff_block; - int rc = -ENOSYS; + int rc = -EOPNOTSUPP; asm volatile( " .word 0x0104\n" diff --git a/arch/s390/include/asm/exec.h b/arch/s390/include/asm/exec.h new file mode 100644 index 00000000000..c4a93d6327f --- /dev/null +++ b/arch/s390/include/asm/exec.h @@ -0,0 +1,12 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_EXEC_H +#define __ASM_EXEC_H + +extern unsigned long arch_align_stack(unsigned long sp); + +#endif /* __ASM_EXEC_H */ diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h index 33837d75618..6276002d76b 100644 --- a/arch/s390/include/asm/extmem.h +++ b/arch/s390/include/asm/extmem.h @@ -1,8 +1,6 @@ /* - * include/asm-s390x/extmem.h - * * definitions for external memory segment support - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2003 */ #ifndef _ASM_S390X_DCSS_H diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h new file mode 100644 index 00000000000..0aa6a7ed95a --- /dev/null +++ b/arch/s390/include/asm/facility.h @@ -0,0 +1,67 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_FACILITY_H +#define __ASM_FACILITY_H + +#include <linux/string.h> +#include <linux/preempt.h> +#include <asm/lowcore.h> + +#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ + +static inline int __test_facility(unsigned long nr, void *facilities) +{ + unsigned char *ptr; + + if (nr >= MAX_FACILITY_BIT) + return 0; + ptr = (unsigned char *) facilities + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; +} + +/* + * The test_facility function uses the bit odering where the MSB is bit 0. + * That makes it easier to query facility bits with the bit number as + * documented in the Principles of Operation. + */ +static inline int test_facility(unsigned long nr) +{ + return __test_facility(nr, &S390_lowcore.stfle_fac_list); +} + +/** + * stfle - Store facility list extended + * @stfle_fac_list: array where facility list can be stored + * @size: size of passed in array in double words + */ +static inline void stfle(u64 *stfle_fac_list, int size) +{ + unsigned long nr; + + preempt_disable(); + asm volatile( + " .insn s,0xb2b10000,0(0)\n" /* stfl */ + "0:\n" + EX_TABLE(0b, 0b) + : "+m" (S390_lowcore.stfl_fac_list)); + nr = 4; /* bytes stored by stfl */ + memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); + if (S390_lowcore.stfl_fac_list & 0x01000000) { + /* More facility bits available with stfle */ + register unsigned long reg0 asm("0") = size - 1; + + asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ + : "+d" (reg0) + : "a" (stfle_fac_list) + : "memory", "cc"); + nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ + } + memset((char *) stfle_fac_list + nr, 0, size * 8 - nr); + preempt_enable(); +} + +#endif /* __ASM_FACILITY_H */ diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h index ef617099507..7ecb92b469b 100644 --- a/arch/s390/include/asm/fcx.h +++ b/arch/s390/include/asm/fcx.h @@ -12,9 +12,9 @@ #define TCW_FORMAT_DEFAULT 0 #define TCW_TIDAW_FORMAT_DEFAULT 0 -#define TCW_FLAGS_INPUT_TIDA 1 << (23 - 5) -#define TCW_FLAGS_TCCB_TIDA 1 << (23 - 6) -#define TCW_FLAGS_OUTPUT_TIDA 1 << (23 - 7) +#define TCW_FLAGS_INPUT_TIDA (1 << (23 - 5)) +#define TCW_FLAGS_TCCB_TIDA (1 << (23 - 6)) +#define TCW_FLAGS_OUTPUT_TIDA (1 << (23 - 7)) #define TCW_FLAGS_TIDAW_FORMAT(x) ((x) & 3) << (23 - 9) #define TCW_FLAGS_GET_TIDAW_FORMAT(x) (((x) >> (23 - 9)) & 3) @@ -54,11 +54,11 @@ struct tcw { u32 intrg; } __attribute__ ((packed, aligned(64))); -#define TIDAW_FLAGS_LAST 1 << (7 - 0) -#define TIDAW_FLAGS_SKIP 1 << (7 - 1) -#define TIDAW_FLAGS_DATA_INT 1 << (7 - 2) -#define TIDAW_FLAGS_TTIC 1 << (7 - 3) -#define TIDAW_FLAGS_INSERT_CBC 1 << (7 - 4) +#define TIDAW_FLAGS_LAST (1 << (7 - 0)) +#define TIDAW_FLAGS_SKIP (1 << (7 - 1)) +#define TIDAW_FLAGS_DATA_INT (1 << (7 - 2)) +#define TIDAW_FLAGS_TTIC (1 << (7 - 3)) +#define TIDAW_FLAGS_INSERT_CBC (1 << (7 - 4)) /** * struct tidaw - Transport-Indirect-Addressing Word (TIDAW) @@ -106,9 +106,9 @@ struct tsa_ddpc { u8 sense[32]; } __attribute__ ((packed)); -#define TSA_INTRG_FLAGS_CU_STATE_VALID 1 << (7 - 0) -#define TSA_INTRG_FLAGS_DEV_STATE_VALID 1 << (7 - 1) -#define TSA_INTRG_FLAGS_OP_STATE_VALID 1 << (7 - 2) +#define TSA_INTRG_FLAGS_CU_STATE_VALID (1 << (7 - 0)) +#define TSA_INTRG_FLAGS_DEV_STATE_VALID (1 << (7 - 1)) +#define TSA_INTRG_FLAGS_OP_STATE_VALID (1 << (7 - 2)) /** * struct tsa_intrg - Interrogate Transport-Status Area (Intrg. TSA) @@ -140,10 +140,10 @@ struct tsa_intrg { #define TSB_FORMAT_DDPC 2 #define TSB_FORMAT_INTRG 3 -#define TSB_FLAGS_DCW_OFFSET_VALID 1 << (7 - 0) -#define TSB_FLAGS_COUNT_VALID 1 << (7 - 1) -#define TSB_FLAGS_CACHE_MISS 1 << (7 - 2) -#define TSB_FLAGS_TIME_VALID 1 << (7 - 3) +#define TSB_FLAGS_DCW_OFFSET_VALID (1 << (7 - 0)) +#define TSB_FLAGS_COUNT_VALID (1 << (7 - 1)) +#define TSB_FLAGS_CACHE_MISS (1 << (7 - 2)) +#define TSB_FLAGS_TIME_VALID (1 << (7 - 3)) #define TSB_FLAGS_FORMAT(x) ((x) & 7) #define TSB_FORMAT(t) ((t)->flags & 7) @@ -179,9 +179,9 @@ struct tsb { #define DCW_INTRG_RCQ_PRIMARY 1 #define DCW_INTRG_RCQ_SECONDARY 2 -#define DCW_INTRG_FLAGS_MPM 1 < (7 - 0) -#define DCW_INTRG_FLAGS_PPR 1 < (7 - 1) -#define DCW_INTRG_FLAGS_CRIT 1 < (7 - 2) +#define DCW_INTRG_FLAGS_MPM (1 << (7 - 0)) +#define DCW_INTRG_FLAGS_PPR (1 << (7 - 1)) +#define DCW_INTRG_FLAGS_CRIT (1 << (7 - 2)) /** * struct dcw_intrg_data - Interrogate DCW data @@ -216,7 +216,7 @@ struct dcw_intrg_data { u8 prog_data[0]; } __attribute__ ((packed)); -#define DCW_FLAGS_CC 1 << (7 - 1) +#define DCW_FLAGS_CC (1 << (7 - 1)) #define DCW_CMD_WRITE 0x01 #define DCW_CMD_READ 0x02 diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 3c29be4836e..bf246dae136 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -9,18 +9,18 @@ struct dyn_arch_ftrace { }; #define MCOUNT_ADDR ((long)_mcount) -#ifdef CONFIG_64BIT -#define MCOUNT_INSN_SIZE 12 -#define MCOUNT_OFFSET 8 -#else -#define MCOUNT_INSN_SIZE 20 -#define MCOUNT_OFFSET 4 -#endif static inline unsigned long ftrace_call_adjust(unsigned long addr) { - return addr - MCOUNT_OFFSET; + return addr; } #endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_64BIT +#define MCOUNT_INSN_SIZE 12 +#else +#define MCOUNT_INSN_SIZE 22 +#endif + #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 5c5d02de49e..a4811aa0304 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -1,28 +1,63 @@ #ifndef _ASM_S390_FUTEX_H #define _ASM_S390_FUTEX_H -#ifdef __KERNEL__ - -#include <linux/futex.h> #include <linux/uaccess.h> +#include <linux/futex.h> +#include <asm/mmu_context.h> #include <asm/errno.h> -static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ + asm volatile( \ + " sacf 256\n" \ + "0: l %1,0(%6)\n" \ + "1:"insn \ + "2: cs %1,%2,0(%6)\n" \ + "3: jl 1b\n" \ + " lhi %0,0\n" \ + "4: sacf 768\n" \ + EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ + : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ + "=m" (*uaddr) \ + : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ + "m" (*uaddr) : "cc"); + +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval, ret; + int oldval = 0, newval, ret; + load_kernel_asce(); if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - pagefault_disable(); - ret = uaccess.futex_atomic_op(op, uaddr, oparg, &oldval); + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("lr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("lr %2,%1\nar %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("lr %2,%1\nor %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("lr %2,%1\nnr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("lr %2,%1\nxr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } pagefault_enable(); if (!ret) { @@ -39,14 +74,23 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, - int oldval, int newval) +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; + int ret; - return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval); + load_kernel_asce(); + asm volatile( + " sacf 256\n" + "0: cs %1,%4,0(%5)\n" + "1: la %0,0\n" + "2: sacf 768\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+d" (oldval), "=m" (*uaddr) + : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + : "cc", "memory"); + *uval = oldval; + return ret; } -#endif /* __KERNEL__ */ #endif /* _ASM_S390_FUTEX_H */ diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index e4155d3eb2c..b7eabaaeffb 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/hardirq.h - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) * @@ -18,7 +16,11 @@ #define __ARCH_IRQ_STAT #define __ARCH_HAS_DO_SOFTIRQ +#define __ARCH_IRQ_EXIT_IRQS_DISABLED -#define HARDIRQ_BITS 8 +static inline void ack_bad_irq(unsigned int irq) +{ + printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); +} #endif /* __ASM_HARDIRQ_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index b56403c2df2..11eae5f55b7 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -17,6 +17,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +pte_t huge_ptep_get(pte_t *ptep); +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); /* * If the arch doesn't supply something else, assume that hugepage @@ -33,128 +36,80 @@ static inline int prepare_hugepage_range(struct file *file, } #define hugetlb_prefault_arch_hook(mm) do { } while (0) +#define arch_clear_hugepage_flags(page) do { } while (0) int arch_prepare_hugepage(struct page *page); void arch_release_hugepage(struct page *page); -static inline pte_t huge_pte_wrprotect(pte_t pte) +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - pte_val(pte) |= _PAGE_RO; - return pte; + pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; } -static inline int huge_pte_none(pte_t pte) +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { - return (pte_val(pte) & _SEGMENT_ENTRY_INV) && - !(pte_val(pte) & _SEGMENT_ENTRY_RO); + huge_ptep_get_and_clear(vma->vm_mm, address, ptep); } -static inline pte_t huge_ptep_get(pte_t *ptep) +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) { - pte_t pte = *ptep; - unsigned long mask; - - if (!MACHINE_HAS_HPAGE) { - ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN); - if (ptep) { - mask = pte_val(pte) & - (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); - pte = pte_mkhuge(*ptep); - pte_val(pte) |= mask; - } + int changed = !pte_same(huge_ptep_get(ptep), pte); + if (changed) { + huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } - return pte; + return changed; } -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - pte_t pte = huge_ptep_get(ptep); + pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); + set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); +} - mm->context.flush_mm = 1; - pmd_clear((pmd_t *) ptep); - return pte; +static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) +{ + return mk_pte(page, pgprot); } -static inline void __pmd_csp(pmd_t *pmdp) +static inline int huge_pte_none(pte_t pte) { - register unsigned long reg2 asm("2") = pmd_val(*pmdp); - register unsigned long reg3 asm("3") = pmd_val(*pmdp) | - _SEGMENT_ENTRY_INV; - register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; - - asm volatile( - " csp %1,%3" - : "=m" (*pmdp) - : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); - pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; + return pte_none(pte); } -static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) +static inline int huge_pte_write(pte_t pte) { - unsigned long sto = (unsigned long) pmdp - - pmd_index(address) * sizeof(pmd_t); - - if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { - asm volatile( - " .insn rrf,0xb98e0000,%2,%3,0,0" - : "=m" (*pmdp) - : "m" (*pmdp), "a" (sto), - "a" ((address & HPAGE_MASK)) - ); - } - pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; + return pte_write(pte); } -static inline void huge_ptep_invalidate(struct mm_struct *mm, - unsigned long address, pte_t *ptep) +static inline int huge_pte_dirty(pte_t pte) { - pmd_t *pmdp = (pmd_t *) ptep; - - if (!MACHINE_HAS_IDTE) { - __pmd_csp(pmdp); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); - __pmd_csp(pmdp); - } - return; - } + return pte_dirty(pte); +} - __pmd_idte(address, pmdp); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); - __pmd_idte(address, pmdp); - } - return; +static inline pte_t huge_pte_mkwrite(pte_t pte) +{ + return pte_mkwrite(pte); } -#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ -({ \ - int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ - if (__changed) { \ - huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ - set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ - } \ - __changed; \ -}) - -#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ -({ \ - pte_t __pte = huge_ptep_get(__ptep); \ - if (pte_write(__pte)) { \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - huge_ptep_invalidate(__mm, __addr, __ptep); \ - set_huge_pte_at(__mm, __addr, __ptep, \ - huge_pte_wrprotect(__pte)); \ - } \ -}) +static inline pte_t huge_pte_mkdirty(pte_t pte) +{ + return pte_mkdirty(pte); +} -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) { - huge_ptep_invalidate(vma->vm_mm, address, ptep); + return pte_modify(pte, newprot); } #endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h new file mode 100644 index 00000000000..ee96a8b697f --- /dev/null +++ b/arch/s390/include/asm/hw_irq.h @@ -0,0 +1,11 @@ +#ifndef _HW_IRQ_H +#define _HW_IRQ_H + +#include <linux/msi.h> +#include <linux/pci.h> + +void __init init_airq_interrupts(void); +void __init init_cio_interrupts(void); +void __init init_ext_interrupts(void); + +#endif diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h index aae276d0038..ea5a6e45fd9 100644 --- a/arch/s390/include/asm/idals.h +++ b/arch/s390/include/asm/idals.h @@ -1,10 +1,9 @@ /* - * File...........: linux/include/asm-s390x/idals.h * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> * Bugreports.to..: <Linux390@de.ibm.com> - * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 2000a - + * Copyright IBM Corp. 2000 + * * History of changes * 07/24/00 new file * 05/04/02 code restructuring. @@ -20,7 +19,7 @@ #include <asm/cio.h> #include <asm/uaccess.h> -#ifdef __s390x__ +#ifdef CONFIG_64BIT #define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */ #else #define IDA_SIZE_LOG 11 /* 11 for 2k , 12 for 4k */ @@ -33,7 +32,7 @@ static inline int idal_is_needed(void *vaddr, unsigned int length) { -#ifdef __s390x__ +#ifdef CONFIG_64BIT return ((__pa(vaddr) + length - 1) >> 31) != 0; #else return 0; @@ -78,7 +77,7 @@ static inline unsigned long *idal_create_words(unsigned long *idaws, static inline int set_normalized_cda(struct ccw1 * ccw, void *vaddr) { -#ifdef __s390x__ +#ifdef CONFIG_64BIT unsigned int nridaws; unsigned long *idal; @@ -105,7 +104,7 @@ set_normalized_cda(struct ccw1 * ccw, void *vaddr) static inline void clear_normalized_cda(struct ccw1 * ccw) { -#ifdef __s390x__ +#ifdef CONFIG_64BIT if (ccw->flags & CCW_FLAG_IDA) { kfree((void *)(unsigned long) ccw->cda); ccw->flags &= ~CCW_FLAG_IDA; @@ -182,7 +181,7 @@ idal_buffer_free(struct idal_buffer *ib) static inline int __idal_buffer_is_needed(struct idal_buffer *ib) { -#ifdef __s390x__ +#ifdef CONFIG_64BIT return ib->size > (4096ul << ib->page_order) || idal_is_needed(ib->data[0], ib->size); #else diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index b7ff6afc3ca..cd6b9ee7b69 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/io.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) * * Derived from "include/asm-i386/io.h" @@ -11,44 +9,64 @@ #ifndef _S390_IO_H #define _S390_IO_H -#ifdef __KERNEL__ - +#include <linux/kernel.h> #include <asm/page.h> +#include <asm/pci_io.h> -#define IO_SPACE_LIMIT 0xffffffff +void *xlate_dev_mem_ptr(unsigned long phys); +#define xlate_dev_mem_ptr xlate_dev_mem_ptr +void unxlate_dev_mem_ptr(unsigned long phys, void *addr); /* - * Change virtual addresses to physical addresses and vv. - * These are pretty trivial + * Convert a virtual cached pointer to an uncached pointer */ -static inline unsigned long virt_to_phys(volatile void * address) +#define xlate_dev_kmem_ptr(p) p + +#define IO_SPACE_LIMIT 0 + +#ifdef CONFIG_PCI + +#define ioremap_nocache(addr, size) ioremap(addr, size) +#define ioremap_wc ioremap_nocache + +static inline void __iomem *ioremap(unsigned long offset, unsigned long size) { - unsigned long real_address; - asm volatile( - " lra %0,0(%1)\n" - " jz 0f\n" - " la %0,0\n" - "0:" - : "=a" (real_address) : "a" (address) : "cc"); - return real_address; + return (void __iomem *) offset; } -static inline void * phys_to_virt(unsigned long address) +static inline void iounmap(volatile void __iomem *addr) { - return (void *) address; } /* - * Convert a physical pointer to a virtual kernel pointer for /dev/mem - * access + * s390 needs a private implementation of pci_iomap since ioremap with its + * offset parameter isn't sufficient. That's because BAR spaces are not + * disjunctive on s390 so we need the bar parameter of pci_iomap to find + * the corresponding device and create the mapping cookie. */ -#define xlate_dev_mem_ptr(p) __va(p) +#define pci_iomap pci_iomap +#define pci_iounmap pci_iounmap -/* - * Convert a virtual cached pointer to an uncached pointer - */ -#define xlate_dev_kmem_ptr(p) p +#define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count) +#define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count) +#define memset_io(dst, val, count) zpci_memset_io(dst, val, count) + +#define __raw_readb zpci_read_u8 +#define __raw_readw zpci_read_u16 +#define __raw_readl zpci_read_u32 +#define __raw_readq zpci_read_u64 +#define __raw_writeb zpci_write_u8 +#define __raw_writew zpci_write_u16 +#define __raw_writel zpci_write_u32 +#define __raw_writeq zpci_write_u64 + +#define readb_relaxed readb +#define readw_relaxed readw +#define readl_relaxed readl +#define readq_relaxed readq + +#endif /* CONFIG_PCI */ -#endif /* __KERNEL__ */ +#include <asm-generic/io.h> #endif diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 5e95d95450b..2fcccc0c997 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -7,6 +7,7 @@ #ifndef _ASM_S390_IPL_H #define _ASM_S390_IPL_H +#include <asm/lowcore.h> #include <asm/types.h> #include <asm/cio.h> #include <asm/setup.h> @@ -86,7 +87,14 @@ struct ipl_parameter_block { */ extern u32 ipl_flags; extern u32 dump_prefix_page; -extern unsigned int zfcpdump_prefix_array[]; + +struct dump_save_areas { + struct save_area **areas; + int count; +}; + +extern struct dump_save_areas dump_save_areas; +struct save_area *dump_save_area_create(int cpu); extern void do_reipl(void); extern void do_halt(void); @@ -167,5 +175,8 @@ enum diag308_rc { }; extern int diag308(unsigned long subcode, void *addr); +extern void diag308_reset(void); +extern void store_status(void); +extern void lgr_info_log(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index db14a311f1d..c4dd400a279 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -1,33 +1,109 @@ #ifndef _ASM_IRQ_H #define _ASM_IRQ_H +#define EXT_INTERRUPT 1 +#define IO_INTERRUPT 2 +#define THIN_INTERRUPT 3 + +#define NR_IRQS_BASE 4 + +#ifdef CONFIG_PCI_NR_MSI +# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI) +#else +# define NR_IRQS NR_IRQS_BASE +#endif + +/* This number is used when no interrupt has been assigned */ +#define NO_IRQ 0 + +/* External interruption codes */ +#define EXT_IRQ_INTERRUPT_KEY 0x0040 +#define EXT_IRQ_CLK_COMP 0x1004 +#define EXT_IRQ_CPU_TIMER 0x1005 +#define EXT_IRQ_WARNING_TRACK 0x1007 +#define EXT_IRQ_MALFUNC_ALERT 0x1200 +#define EXT_IRQ_EMERGENCY_SIG 0x1201 +#define EXT_IRQ_EXTERNAL_CALL 0x1202 +#define EXT_IRQ_TIMING_ALERT 0x1406 +#define EXT_IRQ_MEASURE_ALERT 0x1407 +#define EXT_IRQ_SERVICE_SIG 0x2401 +#define EXT_IRQ_CP_SERVICE 0x2603 +#define EXT_IRQ_IUCV 0x4000 + +#ifndef __ASSEMBLY__ + #include <linux/hardirq.h> +#include <linux/percpu.h> +#include <linux/cache.h> +#include <linux/types.h> enum interruption_class { - EXTERNAL_INTERRUPT, - IO_INTERRUPT, - EXTINT_CLK, - EXTINT_IPI, - EXTINT_TMR, - EXTINT_TLA, - EXTINT_PFL, - EXTINT_DSD, - EXTINT_VRT, - EXTINT_SCP, - EXTINT_IUC, - IOINT_QAI, - IOINT_QDI, - IOINT_DAS, - IOINT_C15, - IOINT_C70, - IOINT_TAP, - IOINT_VMR, - IOINT_LCS, - IOINT_CLW, - IOINT_CTC, - IOINT_APB, + IRQEXT_CLK, + IRQEXT_EXC, + IRQEXT_EMS, + IRQEXT_TMR, + IRQEXT_TLA, + IRQEXT_PFL, + IRQEXT_DSD, + IRQEXT_VRT, + IRQEXT_SCP, + IRQEXT_IUC, + IRQEXT_CMS, + IRQEXT_CMC, + IRQEXT_CMR, + IRQIO_CIO, + IRQIO_QAI, + IRQIO_DAS, + IRQIO_C15, + IRQIO_C70, + IRQIO_TAP, + IRQIO_VMR, + IRQIO_LCS, + IRQIO_CLW, + IRQIO_CTC, + IRQIO_APB, + IRQIO_ADM, + IRQIO_CSC, + IRQIO_PCI, + IRQIO_MSI, + IRQIO_VIR, + IRQIO_VAI, NMI_NMI, - NR_IRQS, + CPU_RST, + NR_ARCH_IRQS +}; + +struct irq_stat { + unsigned int irqs[NR_ARCH_IRQS]; }; +DECLARE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); + +static __always_inline void inc_irq_stat(enum interruption_class irq) +{ + __get_cpu_var(irq_stat).irqs[irq]++; +} + +struct ext_code { + unsigned short subcode; + unsigned short code; +}; + +typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long); + +int register_external_irq(u16 code, ext_int_handler_t handler); +int unregister_external_irq(u16 code, ext_int_handler_t handler); + +enum irq_subclass { + IRQ_SUBCLASS_MEASUREMENT_ALERT = 5, + IRQ_SUBCLASS_SERVICE_SIGNAL = 9, +}; + +void irq_subclass_register(enum irq_subclass subclass); +void irq_subclass_unregister(enum irq_subclass subclass); + +#define irq_canonicalize(irq) (irq) + +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h index 865d6d891ac..37b9091ab8c 100644 --- a/arch/s390/include/asm/irqflags.h +++ b/arch/s390/include/asm/irqflags.h @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2006,2010 + * Copyright IBM Corp. 2006, 2010 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ @@ -29,42 +29,42 @@ }) /* set system mask. */ -static inline void __arch_local_irq_ssm(unsigned long flags) +static inline notrace void __arch_local_irq_ssm(unsigned long flags) { asm volatile("ssm %0" : : "Q" (flags) : "memory"); } -static inline unsigned long arch_local_save_flags(void) +static inline notrace unsigned long arch_local_save_flags(void) { return __arch_local_irq_stosm(0x00); } -static inline unsigned long arch_local_irq_save(void) +static inline notrace unsigned long arch_local_irq_save(void) { return __arch_local_irq_stnsm(0xfc); } -static inline void arch_local_irq_disable(void) +static inline notrace void arch_local_irq_disable(void) { arch_local_irq_save(); } -static inline void arch_local_irq_enable(void) +static inline notrace void arch_local_irq_enable(void) { __arch_local_irq_stosm(0x03); } -static inline void arch_local_irq_restore(unsigned long flags) +static inline notrace void arch_local_irq_restore(unsigned long flags) { __arch_local_irq_ssm(flags); } -static inline bool arch_irqs_disabled_flags(unsigned long flags) +static inline notrace bool arch_irqs_disabled_flags(unsigned long flags) { return !(flags & (3UL << (BITS_PER_LONG - 8))); } -static inline bool arch_irqs_disabled(void) +static inline notrace bool arch_irqs_disabled(void) { return arch_irqs_disabled_flags(arch_local_save_flags()); } diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h index 1420a111594..68d7d68300f 100644 --- a/arch/s390/include/asm/isc.h +++ b/arch/s390/include/asm/isc.h @@ -14,9 +14,11 @@ /* Regular I/O interrupts. */ #define IO_SCH_ISC 3 /* regular I/O subchannels */ #define CONSOLE_ISC 1 /* console I/O subchannel */ +#define EADM_SCH_ISC 4 /* EADM subchannels */ #define CHSC_SCH_ISC 7 /* CHSC subchannels */ /* Adapter interrupts. */ #define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ +#define PCI_ISC 2 /* PCI I/O subchannels */ #define AP_ISC 6 /* adjunct processor (crypto) devices */ /* Functions for registration of I/O interruption subclasses */ diff --git a/arch/s390/include/asm/itcw.h b/arch/s390/include/asm/itcw.h index a9bc5c36b32..fb1bedd3dc0 100644 --- a/arch/s390/include/asm/itcw.h +++ b/arch/s390/include/asm/itcw.h @@ -6,7 +6,7 @@ */ #ifndef _ASM_S390_ITCW_H -#define _ASM_S390_ITCW_H _ASM_S390_ITCW_H +#define _ASM_S390_ITCW_H #include <linux/types.h> #include <asm/fcx.h> diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h new file mode 100644 index 00000000000..346b1c85ffb --- /dev/null +++ b/arch/s390/include/asm/jump_label.h @@ -0,0 +1,37 @@ +#ifndef _ASM_S390_JUMP_LABEL_H +#define _ASM_S390_JUMP_LABEL_H + +#include <linux/types.h> + +#define JUMP_LABEL_NOP_SIZE 6 + +#ifdef CONFIG_64BIT +#define ASM_PTR ".quad" +#define ASM_ALIGN ".balign 8" +#else +#define ASM_PTR ".long" +#define ASM_ALIGN ".balign 4" +#endif + +static __always_inline bool arch_static_branch(struct static_key *key) +{ + asm_volatile_goto("0: brcl 0,0\n" + ".pushsection __jump_table, \"aw\"\n" + ASM_ALIGN "\n" + ASM_PTR " 0b, %l[label], %0\n" + ".popsection\n" + : : "X" (key) : : label); + return false; +label: + return true; +} + +typedef unsigned long jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h index 40db27cd6e6..5c1abd47612 100644 --- a/arch/s390/include/asm/kdebug.h +++ b/arch/s390/include/asm/kdebug.h @@ -22,6 +22,6 @@ enum die_val { DIE_NMI_IPI, }; -extern void die(const char *, struct pt_regs *, long); +extern void die(struct pt_regs *, const char *); #endif diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index bb729b84a21..694bcd6bd92 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -1,7 +1,5 @@ /* - * include/asm-s390/kexec.h - * - * (C) Copyright IBM Corp. 2005 + * Copyright IBM Corp. 2005 * * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> * @@ -10,10 +8,8 @@ #ifndef _S390_KEXEC_H #define _S390_KEXEC_H -#ifdef __KERNEL__ -#include <asm/page.h> -#endif #include <asm/processor.h> +#include <asm/page.h> /* * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. * I.e. Maximum page that is mapped directly into kernel memory, @@ -30,12 +26,36 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Maximum address we can use for the crash control pages */ +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) + /* Allocate one page for the pdp and the second for the code */ #define KEXEC_CONTROL_PAGE_SIZE 4096 +/* Alignment of crashkernel memory */ +#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE + /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_S390 +/* + * Size for s390x ELF notes per CPU + * + * Seven notes plus zero note at the end: prstatus, fpregset, timer, + * tod_cmp, tod_reg, control regs, and prefix + */ +#define KEXEC_NOTE_BYTES \ + (ALIGN(sizeof(struct elf_note), 4) * 8 + \ + ALIGN(sizeof("CORE"), 4) * 7 + \ + ALIGN(sizeof(struct elf_prstatus), 4) + \ + ALIGN(sizeof(elf_fpregset_t), 4) + \ + ALIGN(sizeof(u64), 4) + \ + ALIGN(sizeof(u64), 4) + \ + ALIGN(sizeof(u32), 4) + \ + ALIGN(sizeof(u64) * 16, 4) + \ + ALIGN(sizeof(u32), 4) \ + ) + /* Provide a dummy definition to avoid build failures. */ static inline void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) { } diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h index 94ec3ee0798..0a88622339e 100644 --- a/arch/s390/include/asm/kmap_types.h +++ b/arch/s390/include/asm/kmap_types.h @@ -1,8 +1,6 @@ -#ifdef __KERNEL__ #ifndef _ASM_KMAP_TYPES_H #define _ASM_KMAP_TYPES_H #include <asm-generic/kmap_types.h> #endif -#endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index a231a9439c4..4176dfe0fba 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright (C) IBM Corporation, 2002, 2006 + * Copyright IBM Corp. 2002, 2006 * * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel * Probes initial implementation ( includes suggestions from @@ -31,6 +31,8 @@ #include <linux/ptrace.h> #include <linux/percpu.h> +#define __ARCH_WANT_KPROBES_INSN_SLOT + struct pt_regs; struct kprobe; @@ -57,7 +59,7 @@ typedef u16 kprobe_opcode_t; /* Architecture specific copy of original instruction */ struct arch_specific_insn { /* copy of original instruction */ - kprobe_opcode_t insn[MAX_INSN_SIZE]; + kprobe_opcode_t *insn; }; struct prev_kprobe { diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h deleted file mode 100644 index 82b32a100c7..00000000000 --- a/arch/s390/include/asm/kvm.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef __LINUX_KVM_S390_H -#define __LINUX_KVM_S390_H -/* - * asm-s390/kvm.h - KVM s390 specific structures and definitions - * - * Copyright IBM Corp. 2008 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * - * Author(s): Carsten Otte <cotte@de.ibm.com> - * Christian Borntraeger <borntraeger@de.ibm.com> - */ -#include <linux/types.h> - -#define __KVM_S390 - -/* for KVM_GET_REGS and KVM_SET_REGS */ -struct kvm_regs { - /* general purpose regs for s390 */ - __u64 gprs[16]; -}; - -/* for KVM_GET_SREGS and KVM_SET_SREGS */ -struct kvm_sregs { - __u32 acrs[16]; - __u64 crs[16]; -}; - -/* for KVM_GET_FPU and KVM_SET_FPU */ -struct kvm_fpu { - __u32 fpc; - __u64 fprs[16]; -}; - -struct kvm_debug_exit_arch { -}; - -/* for KVM_SET_GUEST_DEBUG */ -struct kvm_guest_debug_arch { -}; - -#endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index cef7dbf69df..4181d7baabb 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1,7 +1,7 @@ /* - * asm-s390/kvm_host.h - definition for kernel virtual machines on s390 + * definition for kernel virtual machines on s390 * - * Copyright IBM Corp. 2008,2009 + * Copyright IBM Corp. 2008, 2009 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -16,38 +16,49 @@ #include <linux/hrtimer.h> #include <linux/interrupt.h> #include <linux/kvm_host.h> +#include <linux/kvm.h> #include <asm/debug.h> #include <asm/cpu.h> +#include <asm/isc.h> #define KVM_MAX_VCPUS 64 -#define KVM_MEMORY_SLOTS 32 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_USER_MEM_SLOTS 32 + +/* + * These seem to be used for allocating ->chip in the routing table, + * which we don't use. 4096 is an out-of-thin-air value. If we need + * to look at ->chip later on, we'll need to revisit this. + */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 4096 + +#define SIGP_CTRL_C 0x00800000 struct sca_entry { - atomic_t scn; + atomic_t ctrl; __u32 reserved; __u64 sda; __u64 reserved2[2]; } __attribute__((packed)); +union ipte_control { + unsigned long val; + struct { + unsigned long k : 1; + unsigned long kh : 31; + unsigned long kg : 32; + }; +}; struct sca_block { - __u64 ipte_control; + union ipte_control ipte_control; __u64 reserved[5]; __u64 mcn; __u64 reserved2; struct sca_entry cpu[64]; } __attribute__((packed)); -#define KVM_NR_PAGE_SIZES 2 -#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8) -#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) -#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) -#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) -#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) - -#define CPUSTAT_HOST 0x80000000 +#define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 #define CPUSTAT_STOP_INT 0x04000000 @@ -63,23 +74,51 @@ struct sca_block { #define CPUSTAT_ZARCH 0x00000800 #define CPUSTAT_MCDS 0x00000100 #define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 #define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 #define CPUSTAT_J 0x00000002 #define CPUSTAT_P 0x00000001 struct kvm_s390_sie_block { atomic_t cpuflags; /* 0x0000 */ - __u32 prefix; /* 0x0004 */ - __u8 reserved8[32]; /* 0x0008 */ + __u32 : 1; /* 0x0004 */ + __u32 prefix : 18; + __u32 : 13; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE (1<<0) + __u32 prog0c; /* 0x000c */ + __u8 reserved10[16]; /* 0x0010 */ +#define PROG_BLOCK_SIE 0x00000001 + atomic_t prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ __u64 cputm; /* 0x0028 */ __u64 ckc; /* 0x0030 */ __u64 epoch; /* 0x0038 */ __u8 reserved40[4]; /* 0x0040 */ #define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 __u16 lctl; /* 0x0044 */ __s16 icpua; /* 0x0046 */ +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 __u32 ictl; /* 0x0048 */ __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 __u8 icptcode; /* 0x0050 */ __u8 reserved51; /* 0x0051 */ __u16 ihcpu; /* 0x0052 */ @@ -89,26 +128,57 @@ struct kvm_s390_sie_block { __u32 scaoh; /* 0x005c */ __u8 reserved60; /* 0x0060 */ __u8 ecb; /* 0x0061 */ - __u8 reserved62[2]; /* 0x0062 */ + __u8 ecb2; /* 0x0062 */ + __u8 reserved63[1]; /* 0x0063 */ __u32 scaol; /* 0x0064 */ __u8 reserved68[4]; /* 0x0068 */ __u32 todpr; /* 0x006c */ - __u8 reserved70[16]; /* 0x0070 */ - __u64 gmsor; /* 0x0080 */ - __u64 gmslm; /* 0x0088 */ + __u8 reserved70[32]; /* 0x0070 */ psw_t gpsw; /* 0x0090 */ __u64 gg14; /* 0x00a0 */ __u64 gg15; /* 0x00a8 */ - __u8 reservedb0[30]; /* 0x00b0 */ - __u16 iprcc; /* 0x00ce */ - __u8 reservedd0[48]; /* 0x00d0 */ + __u8 reservedb0[20]; /* 0x00b0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + __u32 reservedc8; /* 0x00c8 */ + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + __u64 tecmc; /* 0x00e8 */ + __u8 reservedf0[16]; /* 0x00f0 */ __u64 gcr[16]; /* 0x0100 */ __u64 gbea; /* 0x0180 */ __u8 reserved188[24]; /* 0x0188 */ __u32 fac; /* 0x01a0 */ - __u8 reserved1a4[92]; /* 0x01a4 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[30]; /* 0x01c0 */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u8 reserved1f0[16]; /* 0x01f0 */ } __attribute__((packed)); +struct kvm_s390_itdb { + __u8 data[256]; +} __packed; + +struct sie_page { + struct kvm_s390_sie_block sie_block; + __u8 reserved200[1024]; /* 0x0200 */ + struct kvm_s390_itdb itdb; /* 0x0600 */ + __u8 reserved700[2304]; /* 0x0700 */ +} __packed; + struct kvm_vcpu_stat { u32 exit_userspace; u32 exit_null; @@ -119,8 +189,11 @@ struct kvm_vcpu_stat { u32 exit_instruction; u32 instruction_lctl; u32 instruction_lctlg; + u32 instruction_stctl; + u32 instruction_stctg; u32 exit_program_interruption; u32 exit_instr_and_program; + u32 deliver_external_call; u32 deliver_emergency_signal; u32 deliver_service_signal; u32 deliver_virtio_interrupt; @@ -128,52 +201,86 @@ struct kvm_vcpu_stat { u32 deliver_prefix_signal; u32 deliver_restart_signal; u32 deliver_program_int; + u32 deliver_io_int; u32 exit_wait_state; + u32 instruction_pfmf; u32 instruction_stidp; u32 instruction_spx; u32 instruction_stpx; u32 instruction_stap; u32 instruction_storage_key; + u32 instruction_ipte_interlock; u32 instruction_stsch; u32 instruction_chsc; u32 instruction_stsi; u32 instruction_stfl; + u32 instruction_tprot; + u32 instruction_essa; u32 instruction_sigp_sense; + u32 instruction_sigp_sense_running; + u32 instruction_sigp_external_call; u32 instruction_sigp_emergency; u32 instruction_sigp_stop; u32 instruction_sigp_arch; u32 instruction_sigp_prefix; u32 instruction_sigp_restart; + u32 diagnose_10; u32 diagnose_44; + u32 diagnose_9c; }; -struct kvm_s390_io_info { - __u16 subchannel_id; /* 0x0b8 */ - __u16 subchannel_nr; /* 0x0ba */ - __u32 io_int_parm; /* 0x0bc */ - __u32 io_int_word; /* 0x0c0 */ -}; - -struct kvm_s390_ext_info { - __u32 ext_params; - __u64 ext_params2; -}; - -#define PGM_OPERATION 0x01 -#define PGM_PRIVILEGED_OPERATION 0x02 -#define PGM_EXECUTE 0x03 -#define PGM_PROTECTION 0x04 -#define PGM_ADDRESSING 0x05 -#define PGM_SPECIFICATION 0x06 -#define PGM_DATA 0x07 - -struct kvm_s390_pgm_info { - __u16 code; -}; - -struct kvm_s390_prefix_info { - __u32 address; -}; +#define PGM_OPERATION 0x01 +#define PGM_PRIVILEGED_OP 0x02 +#define PGM_EXECUTE 0x03 +#define PGM_PROTECTION 0x04 +#define PGM_ADDRESSING 0x05 +#define PGM_SPECIFICATION 0x06 +#define PGM_DATA 0x07 +#define PGM_FIXED_POINT_OVERFLOW 0x08 +#define PGM_FIXED_POINT_DIVIDE 0x09 +#define PGM_DECIMAL_OVERFLOW 0x0a +#define PGM_DECIMAL_DIVIDE 0x0b +#define PGM_HFP_EXPONENT_OVERFLOW 0x0c +#define PGM_HFP_EXPONENT_UNDERFLOW 0x0d +#define PGM_HFP_SIGNIFICANCE 0x0e +#define PGM_HFP_DIVIDE 0x0f +#define PGM_SEGMENT_TRANSLATION 0x10 +#define PGM_PAGE_TRANSLATION 0x11 +#define PGM_TRANSLATION_SPEC 0x12 +#define PGM_SPECIAL_OPERATION 0x13 +#define PGM_OPERAND 0x15 +#define PGM_TRACE_TABEL 0x16 +#define PGM_SPACE_SWITCH 0x1c +#define PGM_HFP_SQUARE_ROOT 0x1d +#define PGM_PC_TRANSLATION_SPEC 0x1f +#define PGM_AFX_TRANSLATION 0x20 +#define PGM_ASX_TRANSLATION 0x21 +#define PGM_LX_TRANSLATION 0x22 +#define PGM_EX_TRANSLATION 0x23 +#define PGM_PRIMARY_AUTHORITY 0x24 +#define PGM_SECONDARY_AUTHORITY 0x25 +#define PGM_LFX_TRANSLATION 0x26 +#define PGM_LSX_TRANSLATION 0x27 +#define PGM_ALET_SPECIFICATION 0x28 +#define PGM_ALEN_TRANSLATION 0x29 +#define PGM_ALE_SEQUENCE 0x2a +#define PGM_ASTE_VALIDITY 0x2b +#define PGM_ASTE_SEQUENCE 0x2c +#define PGM_EXTENDED_AUTHORITY 0x2d +#define PGM_LSTE_SEQUENCE 0x2e +#define PGM_ASTE_INSTANCE 0x2f +#define PGM_STACK_FULL 0x30 +#define PGM_STACK_EMPTY 0x31 +#define PGM_STACK_SPECIFICATION 0x32 +#define PGM_STACK_TYPE 0x33 +#define PGM_STACK_OPERATION 0x34 +#define PGM_ASCE_TYPE 0x38 +#define PGM_REGION_FIRST_TRANS 0x39 +#define PGM_REGION_SECOND_TRANS 0x3a +#define PGM_REGION_THIRD_TRANS 0x3b +#define PGM_MONITOR 0x40 +#define PGM_PER 0x80 +#define PGM_CRYPTO_OPERATION 0x119 struct kvm_s390_interrupt_info { struct list_head list; @@ -182,14 +289,16 @@ struct kvm_s390_interrupt_info { struct kvm_s390_io_info io; struct kvm_s390_ext_info ext; struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; struct kvm_s390_prefix_info prefix; + struct kvm_s390_mchk_info mchk; }; }; /* for local_interrupt.action_flags */ #define ACTION_STORE_ON_STOP (1<<0) #define ACTION_STOP_ON_STOP (1<<1) -#define ACTION_RELOADVCPU_ON_STOP (1<<2) struct kvm_s390_local_interrupt { spinlock_t lock; @@ -197,7 +306,7 @@ struct kvm_s390_local_interrupt { atomic_t active; struct kvm_s390_float_interrupt *float_int; int timer_due; /* event indicator for waitqueue below */ - wait_queue_head_t wq; + wait_queue_head_t *wq; atomic_t *cpuflags; unsigned int action_bits; }; @@ -207,36 +316,139 @@ struct kvm_s390_float_interrupt { struct list_head list; atomic_t active; int next_rr_cpu; - unsigned long idle_mask [(64 + sizeof(long) - 1) / sizeof(long)]; - struct kvm_s390_local_interrupt *local_int[64]; + unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + unsigned int irq_count; +}; + +struct kvm_hw_wp_info_arch { + unsigned long addr; + unsigned long phys_addr; + int len; + char *old_data; +}; + +struct kvm_hw_bp_info_arch { + unsigned long addr; + int len; }; +/* + * Only the upper 16 bits of kvm_guest_debug->control are arch specific. + * Further KVM_GUESTDBG flags which an be used from userspace can be found in + * arch/s390/include/uapi/asm/kvm.h + */ +#define KVM_GUESTDBG_EXIT_PENDING 0x10000000 + +#define guestdbg_enabled(vcpu) \ + (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) +#define guestdbg_sstep_enabled(vcpu) \ + (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) +#define guestdbg_hw_bp_enabled(vcpu) \ + (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) +#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \ + (vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING)) + +struct kvm_guestdbg_info_arch { + unsigned long cr0; + unsigned long cr9; + unsigned long cr10; + unsigned long cr11; + struct kvm_hw_bp_info_arch *hw_bp_info; + struct kvm_hw_wp_info_arch *hw_wp_info; + int nr_hw_bp; + int nr_hw_wp; + unsigned long last_bp; +}; struct kvm_vcpu_arch { struct kvm_s390_sie_block *sie_block; - unsigned long guest_gprs[16]; s390_fp_regs host_fpregs; unsigned int host_acrs[NUM_ACRS]; s390_fp_regs guest_fpregs; - unsigned int guest_acrs[NUM_ACRS]; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct tasklet_struct tasklet; + struct kvm_s390_pgm_info pgm; union { struct cpuid cpu_id; u64 stidp_data; }; + struct gmap *gmap; + struct kvm_guestdbg_info_arch guestdbg; +#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL) + unsigned long pfault_token; + unsigned long pfault_select; + unsigned long pfault_compare; }; struct kvm_vm_stat { u32 remote_tlb_flush; }; +struct kvm_arch_memory_slot { +}; + +struct s390_map_info { + struct list_head list; + __u64 guest_addr; + __u64 addr; + struct page *page; +}; + +struct s390_io_adapter { + unsigned int id; + int isc; + bool maskable; + bool masked; + bool swap; + struct rw_semaphore maps_lock; + struct list_head maps; + atomic_t nr_maps; +}; + +#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) +#define MAX_S390_ADAPTER_MAPS 256 + struct kvm_arch{ struct sca_block *sca; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; + struct kvm_device *flic; + struct gmap *gmap; + int css_support; + int use_irqchip; + int use_cmma; + struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; + wait_queue_head_t ipte_wq; + spinlock_t start_stop_lock; +}; + +#define KVM_HVA_ERR_BAD (-1UL) +#define KVM_HVA_ERR_RO_BAD (-2UL) + +static inline bool kvm_is_error_hva(unsigned long addr) +{ + return IS_ERR_VALUE(addr); +} + +#define ASYNC_PF_PER_VCPU 64 +struct kvm_vcpu; +struct kvm_async_pf; +struct kvm_arch_async_pf { + unsigned long pfault_token; }; -extern int sie64a(struct kvm_s390_sie_block *, unsigned long *); +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); + +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + +extern int sie64a(struct kvm_s390_sie_block *, u64 *); +extern char sie_exit; #endif diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 6964db226f8..e0f842308a6 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -1,5 +1,5 @@ /* - * asm-s390/kvm_para.h - definition for paravirtual devices on s390 + * definition for paravirtual devices on s390 * * Copyright IBM Corp. 2008 * @@ -9,12 +9,6 @@ * * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> */ - -#ifndef __S390_KVM_PARA_H -#define __S390_KVM_PARA_H - -#ifdef __KERNEL__ - /* * Hypercalls for KVM on s390. The calling convention is similar to the * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1 @@ -29,6 +23,12 @@ * * This work is licensed under the terms of the GNU GPL, version 2. */ +#ifndef __S390_KVM_PARA_H +#define __S390_KVM_PARA_H + +#include <uapi/asm/kvm_para.h> + + static inline long kvm_hypercall0(unsigned long nr) { @@ -149,6 +149,9 @@ static inline unsigned int kvm_arch_para_features(void) return 0; } -#endif +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} #endif /* __S390_KVM_PARA_H */ diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index 291c2d01c44..fc8a8284778 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -1,6 +1,9 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H -/* Nothing to see here... */ +#include <linux/stringify.h> + +#define __ALIGN .align 4, 0x07 +#define __ALIGN_STR __stringify(__ALIGN) #endif diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 65e172f8209..4349197ab9d 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 1999,2010 + * Copyright IBM Corp. 1999, 2012 * Author(s): Hartmut Penner <hp@de.ibm.com>, * Martin Schwidefsky <schwidefsky@de.ibm.com>, * Denis Joseph Barrow, @@ -12,13 +12,6 @@ #include <asm/ptrace.h> #include <asm/cpu.h> -void restart_int_handler(void); -void ext_int_handler(void); -void system_call(void); -void pgm_check_handler(void); -void mcck_int_handler(void); -void io_int_handler(void); - #ifdef CONFIG_32BIT #define LC_ORDER 0 @@ -55,7 +48,7 @@ struct _lowcore { psw_t mcck_new_psw; /* 0x0070 */ psw_t io_new_psw; /* 0x0078 */ __u32 ext_params; /* 0x0080 */ - __u16 cpu_addr; /* 0x0084 */ + __u16 ext_cpu_addr; /* 0x0084 */ __u16 ext_int_code; /* 0x0086 */ __u16 svc_ilc; /* 0x0088 */ __u16 svc_code; /* 0x008a */ @@ -63,13 +56,14 @@ struct _lowcore { __u16 pgm_code; /* 0x008e */ __u32 trans_exc_code; /* 0x0090 */ __u16 mon_class_num; /* 0x0094 */ - __u16 per_perc_atmid; /* 0x0096 */ + __u8 per_code; /* 0x0096 */ + __u8 per_atmid; /* 0x0097 */ __u32 per_address; /* 0x0098 */ __u32 monitor_code; /* 0x009c */ __u8 exc_access_id; /* 0x00a0 */ __u8 per_access_id; /* 0x00a1 */ __u8 op_access_id; /* 0x00a2 */ - __u8 ar_access_id; /* 0x00a3 */ + __u8 ar_mode_id; /* 0x00a3 */ __u8 pad_0x00a4[0x00b8-0x00a4]; /* 0x00a4 */ __u16 subchannel_id; /* 0x00b8 */ __u16 subchannel_nr; /* 0x00ba */ @@ -96,52 +90,61 @@ struct _lowcore { __u32 gpregs_save_area[16]; /* 0x0180 */ __u32 cregs_save_area[16]; /* 0x01c0 */ + /* Save areas. */ + __u32 save_area_sync[8]; /* 0x0200 */ + __u32 save_area_async[8]; /* 0x0220 */ + __u32 save_area_restart[1]; /* 0x0240 */ + + /* CPU flags. */ + __u32 cpu_flags; /* 0x0244 */ + /* Return psws. */ - __u32 save_area[16]; /* 0x0200 */ - psw_t return_psw; /* 0x0240 */ - psw_t return_mcck_psw; /* 0x0248 */ + psw_t return_psw; /* 0x0248 */ + psw_t return_mcck_psw; /* 0x0250 */ /* CPU time accounting values */ - __u64 sync_enter_timer; /* 0x0250 */ - __u64 async_enter_timer; /* 0x0258 */ - __u64 mcck_enter_timer; /* 0x0260 */ - __u64 exit_timer; /* 0x0268 */ - __u64 user_timer; /* 0x0270 */ - __u64 system_timer; /* 0x0278 */ - __u64 steal_timer; /* 0x0280 */ - __u64 last_update_timer; /* 0x0288 */ - __u64 last_update_clock; /* 0x0290 */ + __u64 sync_enter_timer; /* 0x0258 */ + __u64 async_enter_timer; /* 0x0260 */ + __u64 mcck_enter_timer; /* 0x0268 */ + __u64 exit_timer; /* 0x0270 */ + __u64 user_timer; /* 0x0278 */ + __u64 system_timer; /* 0x0280 */ + __u64 steal_timer; /* 0x0288 */ + __u64 last_update_timer; /* 0x0290 */ + __u64 last_update_clock; /* 0x0298 */ + __u64 int_clock; /* 0x02a0 */ + __u64 mcck_clock; /* 0x02a8 */ + __u64 clock_comparator; /* 0x02b0 */ /* Current process. */ - __u32 current_task; /* 0x0298 */ - __u32 thread_info; /* 0x029c */ - __u32 kernel_stack; /* 0x02a0 */ + __u32 current_task; /* 0x02b8 */ + __u32 thread_info; /* 0x02bc */ + __u32 kernel_stack; /* 0x02c0 */ - /* Interrupt and panic stack. */ - __u32 async_stack; /* 0x02a4 */ - __u32 panic_stack; /* 0x02a8 */ + /* Interrupt, panic and restart stack. */ + __u32 async_stack; /* 0x02c4 */ + __u32 panic_stack; /* 0x02c8 */ + __u32 restart_stack; /* 0x02cc */ + + /* Restart function and parameter. */ + __u32 restart_fn; /* 0x02d0 */ + __u32 restart_data; /* 0x02d4 */ + __u32 restart_source; /* 0x02d8 */ /* Address space pointer. */ - __u32 kernel_asce; /* 0x02ac */ - __u32 user_asce; /* 0x02b0 */ - __u32 user_exec_asce; /* 0x02b4 */ + __u32 kernel_asce; /* 0x02dc */ + __u32 user_asce; /* 0x02e0 */ + __u32 current_pid; /* 0x02e4 */ /* SMP info area */ - __u32 cpu_nr; /* 0x02b8 */ - __u32 softirq_pending; /* 0x02bc */ - __u32 percpu_offset; /* 0x02c0 */ - __u32 ext_call_fast; /* 0x02c4 */ - __u64 int_clock; /* 0x02c8 */ - __u64 mcck_clock; /* 0x02d0 */ - __u64 clock_comparator; /* 0x02d8 */ - __u32 machine_flags; /* 0x02e0 */ - __u32 ftrace_func; /* 0x02e4 */ - __u8 pad_0x02e8[0x0300-0x02e8]; /* 0x02e8 */ - - /* Interrupt response block */ - __u8 irb[64]; /* 0x0300 */ - - __u8 pad_0x0340[0x0e00-0x0340]; /* 0x0340 */ + __u32 cpu_nr; /* 0x02e8 */ + __u32 softirq_pending; /* 0x02ec */ + __u32 percpu_offset; /* 0x02f0 */ + __u32 machine_flags; /* 0x02f4 */ + __u32 ftrace_func; /* 0x02f8 */ + __u32 spinlock_lockval; /* 0x02fc */ + + __u8 pad_0x0300[0x0e00-0x0300]; /* 0x0300 */ /* * 0xe00 contains the address of the IPL Parameter Information @@ -150,7 +153,10 @@ struct _lowcore { */ __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ - __u8 pad_0x0e08[0x0f00-0x0e08]; /* 0x0e08 */ + __u32 vmcore_info; /* 0x0e08 */ + __u8 pad_0x0e0c[0x0e18-0x0e0c]; /* 0x0e0c */ + __u32 os_info; /* 0x0e18 */ + __u8 pad_0x0e1c[0x0f00-0x0e1c]; /* 0x0e1c */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -182,7 +188,7 @@ struct _lowcore { __u32 ipl_parmblock_ptr; /* 0x0014 */ __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */ __u32 ext_params; /* 0x0080 */ - __u16 cpu_addr; /* 0x0084 */ + __u16 ext_cpu_addr; /* 0x0084 */ __u16 ext_int_code; /* 0x0086 */ __u16 svc_ilc; /* 0x0088 */ __u16 svc_code; /* 0x008a */ @@ -190,12 +196,13 @@ struct _lowcore { __u16 pgm_code; /* 0x008e */ __u32 data_exc_code; /* 0x0090 */ __u16 mon_class_num; /* 0x0094 */ - __u16 per_perc_atmid; /* 0x0096 */ + __u8 per_code; /* 0x0096 */ + __u8 per_atmid; /* 0x0097 */ __u64 per_address; /* 0x0098 */ __u8 exc_access_id; /* 0x00a0 */ __u8 per_access_id; /* 0x00a1 */ __u8 op_access_id; /* 0x00a2 */ - __u8 ar_access_id; /* 0x00a3 */ + __u8 ar_mode_id; /* 0x00a3 */ __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */ __u64 trans_exc_code; /* 0x00a8 */ __u64 monitor_code; /* 0x00b0 */ @@ -227,57 +234,67 @@ struct _lowcore { psw_t mcck_new_psw; /* 0x01e0 */ psw_t io_new_psw; /* 0x01f0 */ - /* Entry/exit save area & return psws. */ - __u64 save_area[16]; /* 0x0200 */ - psw_t return_psw; /* 0x0280 */ - psw_t return_mcck_psw; /* 0x0290 */ + /* Save areas. */ + __u64 save_area_sync[8]; /* 0x0200 */ + __u64 save_area_async[8]; /* 0x0240 */ + __u64 save_area_restart[1]; /* 0x0280 */ + + /* CPU flags. */ + __u64 cpu_flags; /* 0x0288 */ + + /* Return psws. */ + psw_t return_psw; /* 0x0290 */ + psw_t return_mcck_psw; /* 0x02a0 */ /* CPU accounting and timing values. */ - __u64 sync_enter_timer; /* 0x02a0 */ - __u64 async_enter_timer; /* 0x02a8 */ - __u64 mcck_enter_timer; /* 0x02b0 */ - __u64 exit_timer; /* 0x02b8 */ - __u64 user_timer; /* 0x02c0 */ - __u64 system_timer; /* 0x02c8 */ - __u64 steal_timer; /* 0x02d0 */ - __u64 last_update_timer; /* 0x02d8 */ - __u64 last_update_clock; /* 0x02e0 */ + __u64 sync_enter_timer; /* 0x02b0 */ + __u64 async_enter_timer; /* 0x02b8 */ + __u64 mcck_enter_timer; /* 0x02c0 */ + __u64 exit_timer; /* 0x02c8 */ + __u64 user_timer; /* 0x02d0 */ + __u64 system_timer; /* 0x02d8 */ + __u64 steal_timer; /* 0x02e0 */ + __u64 last_update_timer; /* 0x02e8 */ + __u64 last_update_clock; /* 0x02f0 */ + __u64 int_clock; /* 0x02f8 */ + __u64 mcck_clock; /* 0x0300 */ + __u64 clock_comparator; /* 0x0308 */ /* Current process. */ - __u64 current_task; /* 0x02e8 */ - __u64 thread_info; /* 0x02f0 */ - __u64 kernel_stack; /* 0x02f8 */ + __u64 current_task; /* 0x0310 */ + __u64 thread_info; /* 0x0318 */ + __u64 kernel_stack; /* 0x0320 */ - /* Interrupt and panic stack. */ - __u64 async_stack; /* 0x0300 */ - __u64 panic_stack; /* 0x0308 */ + /* Interrupt, panic and restart stack. */ + __u64 async_stack; /* 0x0328 */ + __u64 panic_stack; /* 0x0330 */ + __u64 restart_stack; /* 0x0338 */ + + /* Restart function and parameter. */ + __u64 restart_fn; /* 0x0340 */ + __u64 restart_data; /* 0x0348 */ + __u64 restart_source; /* 0x0350 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0310 */ - __u64 user_asce; /* 0x0318 */ - __u64 user_exec_asce; /* 0x0320 */ + __u64 kernel_asce; /* 0x0358 */ + __u64 user_asce; /* 0x0360 */ + __u64 current_pid; /* 0x0368 */ /* SMP info area */ - __u32 cpu_nr; /* 0x0328 */ - __u32 softirq_pending; /* 0x032c */ - __u64 percpu_offset; /* 0x0330 */ - __u64 ext_call_fast; /* 0x0338 */ - __u64 int_clock; /* 0x0340 */ - __u64 mcck_clock; /* 0x0348 */ - __u64 clock_comparator; /* 0x0350 */ - __u64 vdso_per_cpu_data; /* 0x0358 */ - __u64 machine_flags; /* 0x0360 */ - __u64 ftrace_func; /* 0x0368 */ - __u64 sie_hook; /* 0x0370 */ - __u64 cmf_hpp; /* 0x0378 */ - - /* Interrupt response block. */ - __u8 irb[64]; /* 0x0380 */ + __u32 cpu_nr; /* 0x0370 */ + __u32 softirq_pending; /* 0x0374 */ + __u64 percpu_offset; /* 0x0378 */ + __u64 vdso_per_cpu_data; /* 0x0380 */ + __u64 machine_flags; /* 0x0388 */ + __u64 ftrace_func; /* 0x0390 */ + __u64 gmap; /* 0x0398 */ + __u32 spinlock_lockval; /* 0x03a0 */ + __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */ /* Per cpu primary space access list */ - __u32 paste[16]; /* 0x03c0 */ + __u32 paste[16]; /* 0x0400 */ - __u8 pad_0x0400[0x0e00-0x0400]; /* 0x0400 */ + __u8 pad_0x04c0[0x0e00-0x0440]; /* 0x0440 */ /* * 0xe00 contains the address of the IPL Parameter Information @@ -286,7 +303,10 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ - __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ + __u64 vmcore_info; /* 0x0e0c */ + __u8 pad_0x0e14[0x0e18-0x0e14]; /* 0x0e14 */ + __u64 os_info; /* 0x0e18 */ + __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -310,9 +330,13 @@ struct _lowcore { __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */ __u32 access_regs_save_area[16]; /* 0x1340 */ __u64 cregs_save_area[16]; /* 0x1380 */ + __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */ + + /* Transaction abort diagnostic block */ + __u8 pgm_tdb[256]; /* 0x1800 */ /* align to the top of the prefix area */ - __u8 pad_0x1400[0x2000-0x1400]; /* 0x1400 */ + __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ } __packed; #endif /* CONFIG_32BIT */ diff --git a/arch/s390/include/asm/mathemu.h b/arch/s390/include/asm/mathemu.h index e8dd1ba8edb..614dfaf47f7 100644 --- a/arch/s390/include/asm/mathemu.h +++ b/arch/s390/include/asm/mathemu.h @@ -1,9 +1,8 @@ /* - * arch/s390/kernel/mathemu.h * IEEE floating point emulation. * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h index 4e9c8ae0a63..9977e08df5b 100644 --- a/arch/s390/include/asm/mman.h +++ b/arch/s390/include/asm/mman.h @@ -1,19 +1,15 @@ /* - * include/asm-s390/mman.h - * * S390 version * * Derived from "include/asm-i386/mman.h" */ - #ifndef __S390_MMAN_H__ #define __S390_MMAN_H__ -#include <asm-generic/mman.h> +#include <uapi/asm/mman.h> -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) && defined(CONFIG_64BIT) -int s390_mmap_check(unsigned long addr, unsigned long len); -#define arch_mmap_check(addr,len,flags) s390_mmap_check(addr,len) +#if !defined(__ASSEMBLY__) && defined(CONFIG_64BIT) +int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags); +#define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags) #endif - #endif /* __S390_MMAN_H__ */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 78522cdefdd..a5e656260a7 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -1,23 +1,42 @@ #ifndef __MMU_H #define __MMU_H +#include <linux/cpumask.h> +#include <linux/errno.h> + typedef struct { + cpumask_t cpu_attach_mask; atomic_t attach_count; unsigned int flush_mm; spinlock_t list_lock; - struct list_head crst_list; struct list_head pgtable_list; + struct list_head gmap_list; unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - int noexec; - int has_pgste; /* The mmu context has extended page tables */ - int alloc_pgste; /* cloned contexts will have extended page tables */ + /* The mmu context has extended page tables. */ + unsigned int has_pgste:1; + /* The mmu context uses storage keys. */ + unsigned int use_skey:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ - .context.crst_list = LIST_HEAD_INIT(name.context.crst_list), \ - .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), + .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ + .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), + +static inline int tprot(unsigned long addr) +{ + int rc = -EFAULT; + + asm volatile( + " tprot 0(%1),0\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "a" (addr) : "cc"); + return rc; +} #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index a6f0e7cc9cd..3815bfea1b2 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/mmu_context.h - * * S390 version * * Derived from "include/asm-i386/mmu_context.h" @@ -12,37 +10,20 @@ #include <asm/pgalloc.h> #include <asm/uaccess.h> #include <asm/tlbflush.h> -#include <asm-generic/mm_hooks.h> +#include <asm/ctl_reg.h> static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.attach_count, 0); mm->context.flush_mm = 0; mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; #ifdef CONFIG_64BIT mm->context.asce_bits |= _ASCE_TYPE_REGION3; #endif - if (current->mm->context.alloc_pgste) { - /* - * alloc_pgste indicates, that any NEW context will be created - * with extended page tables. The old context is unchanged. The - * page table allocation and the page table operations will - * look at has_pgste to distinguish normal and extended page - * tables. The only way to create extended page tables is to - * set alloc_pgste and then create a new context (e.g. dup_mm). - * The page table allocation is called after init_new_context - * and if has_pgste is set, it will create extended page - * tables. - */ - mm->context.noexec = 0; - mm->context.has_pgste = 1; - mm->context.alloc_pgste = 1; - } else { - mm->context.noexec = (user_mode == SECONDARY_SPACE_MODE); - mm->context.has_pgste = 0; - mm->context.alloc_pgste = 0; - } + mm->context.has_pgste = 0; + mm->context.use_skey = 0; mm->context.asce_limit = STACK_TOP_MAX; crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; @@ -50,41 +31,69 @@ static inline int init_new_context(struct task_struct *tsk, #define destroy_context(mm) do { } while (0) -#ifndef __s390x__ -#define LCTL_OPCODE "lctl" -#else -#define LCTL_OPCODE "lctlg" -#endif +static inline void set_user_asce(struct mm_struct *mm) +{ + S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd); + if (current->thread.mm_segment.ar4) + __ctl_load(S390_lowcore.user_asce, 7, 7); + set_cpu_flag(CIF_ASCE); +} -static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) +static inline void clear_user_asce(void) { - pgd_t *pgd = mm->pgd; - - S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); - if (user_mode != HOME_SPACE_MODE) { - /* Load primary space page table origin. */ - pgd = mm->context.noexec ? get_shadow_table(pgd) : pgd; - S390_lowcore.user_exec_asce = mm->context.asce_bits | __pa(pgd); - asm volatile(LCTL_OPCODE" 1,1,%0\n" - : : "m" (S390_lowcore.user_exec_asce) ); - } else - /* Load home space page table origin. */ - asm volatile(LCTL_OPCODE" 13,13,%0" - : : "m" (S390_lowcore.user_asce) ); - set_fs(current->thread.mm_segment); + S390_lowcore.user_asce = S390_lowcore.kernel_asce; + + __ctl_load(S390_lowcore.user_asce, 1, 1); + __ctl_load(S390_lowcore.user_asce, 7, 7); +} + +static inline void load_kernel_asce(void) +{ + unsigned long asce; + + __ctl_store(asce, 1, 1); + if (asce != S390_lowcore.kernel_asce) + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + set_cpu_flag(CIF_ASCE); } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); - update_mm(next, tsk); - atomic_dec(&prev->context.attach_count); - WARN_ON(atomic_read(&prev->context.attach_count) < 0); + int cpu = smp_processor_id(); + + if (prev == next) + return; + if (MACHINE_HAS_TLB_LC) + cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); + /* Clear old ASCE by loading the kernel ASCE. */ + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.kernel_asce, 7, 7); atomic_inc(&next->context.attach_count); - /* Check for TLBs not flushed yet */ - if (next->context.flush_mm) - __tlb_flush_mm(next); + atomic_dec(&prev->context.attach_count); + if (MACHINE_HAS_TLB_LC) + cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); + S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd); +} + +#define finish_arch_post_lock_switch finish_arch_post_lock_switch +static inline void finish_arch_post_lock_switch(void) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + + load_kernel_asce(); + if (mm) { + preempt_disable(); + while (atomic_read(&mm->context.attach_count) >> 16) + cpu_relax(); + + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + if (mm->context.flush_mm) + __tlb_flush_mm(mm); + preempt_enable(); + } + set_fs(current->thread.mm_segment); } #define enter_lazy_tlb(mm,tsk) do { } while (0) @@ -93,7 +102,22 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm(prev, next, current); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + set_user_asce(next); +} + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +#ifdef CONFIG_64BIT + if (oldmm->context.asce_limit < mm->context.asce_limit) + crst_table_downgrade(mm, oldmm->context.asce_limit); +#endif +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ } #endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h index 1cc1c5af705..df1f861a848 100644 --- a/arch/s390/include/asm/module.h +++ b/arch/s390/include/asm/module.h @@ -1,5 +1,8 @@ #ifndef _ASM_S390_MODULE_H #define _ASM_S390_MODULE_H + +#include <asm-generic/module.h> + /* * This file contains the s390 architecture specific module code. */ @@ -28,19 +31,4 @@ struct mod_arch_specific struct mod_arch_syminfo *syminfo; }; -#ifdef __s390x__ -#define ElfW(x) Elf64_ ## x -#define ELFW(x) ELF64_ ## x -#else -#define ElfW(x) Elf32_ ## x -#define ELFW(x) ELF32_ ## x -#endif - -#define Elf_Addr ElfW(Addr) -#define Elf_Rela ElfW(Rela) -#define Elf_Shdr ElfW(Shdr) -#define Elf_Sym ElfW(Sym) -#define Elf_Ehdr ElfW(Ehdr) -#define ELF_R_SYM ELFW(R_SYM) -#define ELF_R_TYPE ELFW(R_TYPE) #endif /* _ASM_S390_MODULE_H */ diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h index 688271f5f2e..458c1f7fbc1 100644 --- a/arch/s390/include/asm/mutex.h +++ b/arch/s390/include/asm/mutex.h @@ -7,5 +7,3 @@ */ #include <asm-generic/mutex-dec.h> - -#define arch_mutex_cpu_relax() barrier() diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index f4b60441adc..35f8ec18561 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -1,7 +1,7 @@ /* * Machine check handler definitions * - * Copyright IBM Corp. 2000,2009 + * Copyright IBM Corp. 2000, 2009 * Author(s): Ingo Adlung <adlung@de.ibm.com>, * Martin Schwidefsky <schwidefsky@de.ibm.com>, * Cornelia Huck <cornelia.huck@de.ibm.com>, diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h new file mode 100644 index 00000000000..295f2c4f1c9 --- /dev/null +++ b/arch/s390/include/asm/os_info.h @@ -0,0 +1,49 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ +#ifndef _ASM_S390_OS_INFO_H +#define _ASM_S390_OS_INFO_H + +#define OS_INFO_VERSION_MAJOR 1 +#define OS_INFO_VERSION_MINOR 1 +#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */ + +#define OS_INFO_VMCOREINFO 0 +#define OS_INFO_REIPL_BLOCK 1 + +struct os_info_entry { + u64 addr; + u64 size; + u32 csum; +} __packed; + +struct os_info { + u64 magic; + u32 csum; + u16 version_major; + u16 version_minor; + u64 crashkernel_addr; + u64 crashkernel_size; + struct os_info_entry entry[2]; + u8 reserved[4024]; +} __packed; + +void os_info_init(void); +void os_info_entry_add(int nr, void *ptr, u64 len); +void os_info_crashkernel_add(unsigned long base, unsigned long size); +u32 os_info_csum(struct os_info *os_info); + +#ifdef CONFIG_CRASH_DUMP +void *os_info_old_entry(int nr, unsigned long *size); +int copy_from_oldmem(void *dest, void *src, size_t count); +#else +static inline void *os_info_old_entry(int nr, unsigned long *size) +{ + return NULL; +} +#endif + +#endif /* _ASM_S390_OS_INFO_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 3c987e9ec8d..114258eeaac 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/page.h - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Hartmut Penner (hp@de.ibm.com) */ @@ -32,50 +30,39 @@ #include <asm/setup.h> #ifndef __ASSEMBLY__ +static inline void storage_key_init_range(unsigned long start, unsigned long end) +{ +#if PAGE_DEFAULT_KEY + __storage_key_init_range(start, end); +#endif +} + static inline void clear_page(void *page) { - if (MACHINE_HAS_PFMF) { - asm volatile( - " .insn rre,0xb9af0000,%0,%1" - : : "d" (0x10000), "a" (page) : "memory", "cc"); - } else { - register unsigned long reg1 asm ("1") = 0; - register void *reg2 asm ("2") = page; - register unsigned long reg3 asm ("3") = 4096; - asm volatile( - " mvcl 2,0" - : "+d" (reg2), "+d" (reg3) : "d" (reg1) - : "memory", "cc"); - } + register unsigned long reg1 asm ("1") = 0; + register void *reg2 asm ("2") = page; + register unsigned long reg3 asm ("3") = 4096; + asm volatile( + " mvcl 2,0" + : "+d" (reg2), "+d" (reg3) : "d" (reg1) + : "memory", "cc"); } +/* + * copy_page uses the mvcl instruction with 0xb0 padding byte in order to + * bypass caches when copying a page. Especially when copying huge pages + * this keeps L1 and L2 data caches alive. + */ static inline void copy_page(void *to, void *from) { - if (MACHINE_HAS_MVPG) { - register unsigned long reg0 asm ("0") = 0; - asm volatile( - " mvpg %0,%1" - : : "a" (to), "a" (from), "d" (reg0) - : "memory", "cc"); - } else - asm volatile( - " mvc 0(256,%0),0(%1)\n" - " mvc 256(256,%0),256(%1)\n" - " mvc 512(256,%0),512(%1)\n" - " mvc 768(256,%0),768(%1)\n" - " mvc 1024(256,%0),1024(%1)\n" - " mvc 1280(256,%0),1280(%1)\n" - " mvc 1536(256,%0),1536(%1)\n" - " mvc 1792(256,%0),1792(%1)\n" - " mvc 2048(256,%0),2048(%1)\n" - " mvc 2304(256,%0),2304(%1)\n" - " mvc 2560(256,%0),2560(%1)\n" - " mvc 2816(256,%0),2816(%1)\n" - " mvc 3072(256,%0),3072(%1)\n" - " mvc 3328(256,%0),3328(%1)\n" - " mvc 3584(256,%0),3584(%1)\n" - " mvc 3840(256,%0),3840(%1)\n" - : : "a" (to), "a" (from) : "memory"); + register void *reg2 asm ("2") = to; + register unsigned long reg3 asm ("3") = 0x1000; + register void *reg4 asm ("4") = from; + register unsigned long reg5 asm ("5") = 0xb0001000; + asm volatile( + " mvcl 2,4" + : "+d" (reg2), "+d" (reg3), "+d" (reg4), "+d" (reg5) + : : "memory", "cc"); } #define clear_user_page(page, vaddr, pg) clear_page(page) @@ -90,6 +77,7 @@ static inline void copy_page(void *to, void *from) */ typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct { unsigned long pgste; } pgste_t; typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pud; } pud_t; @@ -97,18 +85,21 @@ typedef struct { unsigned long pgd; } pgd_t; typedef pte_t *pgtable_t; #define pgprot_val(x) ((x).pgprot) +#define pgste_val(x) ((x).pgste) #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) #define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) +#define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) -static inline void -page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) +static inline void page_set_storage_key(unsigned long addr, + unsigned char skey, int mapped) { if (!mapped) asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" @@ -117,18 +108,35 @@ page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); } -static inline unsigned int -page_get_storage_key(unsigned long addr) +static inline unsigned char page_get_storage_key(unsigned long addr) { - unsigned int skey; + unsigned char skey; - asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0)); + asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr)); return skey; } +static inline int page_reset_referenced(unsigned long addr) +{ + unsigned int ipm; + + asm volatile( + " rrbe 0,%1\n" + " ipm %0\n" + : "=d" (ipm) : "a" (addr) : "cc"); + return !!(ipm & 0x20000000); +} + +/* Bits int the storage key */ +#define _PAGE_CHANGED 0x02 /* HW changed bit */ +#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ +#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ +#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ + struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); +void arch_set_page_states(int make_stable); static inline int devmem_is_allowed(unsigned long pfn) { diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 42a145c9ddd..c030900320e 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -1,10 +1,192 @@ #ifndef __ASM_S390_PCI_H #define __ASM_S390_PCI_H -/* S/390 systems don't have a PCI bus. This file is just here because some stupid .c code - * includes it even if CONFIG_PCI is not set. - */ +/* must be set before including asm-generic/pci.h */ #define PCI_DMA_BUS_IS_PHYS (0) +/* must be set before including pci_clp.h */ +#define PCI_BAR_COUNT 6 -#endif /* __ASM_S390_PCI_H */ +#include <linux/pci.h> +#include <asm-generic/pci.h> +#include <asm-generic/pci-dma-compat.h> +#include <asm/pci_clp.h> +#include <asm/pci_debug.h> +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM 0x10000000 + +#define pcibios_assign_all_busses() (0) + +void __iomem *pci_iomap(struct pci_dev *, int, unsigned long); +void pci_iounmap(struct pci_dev *, void __iomem *); +int pci_domain_nr(struct pci_bus *); +int pci_proc_domain(struct pci_bus *); + +#define ZPCI_BUS_NR 0 /* default bus number */ +#define ZPCI_DEVFN 0 /* default device number */ + +/* PCI Function Controls */ +#define ZPCI_FC_FN_ENABLED 0x80 +#define ZPCI_FC_ERROR 0x40 +#define ZPCI_FC_BLOCKED 0x20 +#define ZPCI_FC_DMA_ENABLED 0x10 + +struct zpci_fmb { + u32 format : 8; + u32 dma_valid : 1; + u32 : 23; + u32 samples; + u64 last_update; + /* hardware counters */ + u64 ld_ops; + u64 st_ops; + u64 stb_ops; + u64 rpcit_ops; + u64 dma_rbytes; + u64 dma_wbytes; + /* software counters */ + atomic64_t allocated_pages; + atomic64_t mapped_pages; + atomic64_t unmapped_pages; +} __packed __aligned(16); + +#define ZPCI_MSI_VEC_BITS 11 +#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS) +#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1) + +enum zpci_state { + ZPCI_FN_STATE_RESERVED, + ZPCI_FN_STATE_STANDBY, + ZPCI_FN_STATE_CONFIGURED, + ZPCI_FN_STATE_ONLINE, + NR_ZPCI_FN_STATES, +}; + +struct zpci_bar_struct { + struct resource *res; /* bus resource */ + u32 val; /* bar start & 3 flag bits */ + u16 map_idx; /* index into bar mapping array */ + u8 size; /* order 2 exponent */ +}; + +/* Private data per function */ +struct zpci_dev { + struct pci_dev *pdev; + struct pci_bus *bus; + struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ + + enum zpci_state state; + u32 fid; /* function ID, used by sclp */ + u32 fh; /* function handle, used by insn's */ + u16 vfn; /* virtual function number */ + u16 pchid; /* physical channel ID */ + u8 pfgid; /* function group ID */ + u8 pft; /* pci function type */ + u16 domain; + + u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ + u32 uid; /* user defined id */ + u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ + + /* IRQ stuff */ + u64 msi_addr; /* MSI address */ + struct airq_iv *aibv; /* adapter interrupt bit vector */ + unsigned int aisb; /* number of the summary bit */ + + /* DMA stuff */ + unsigned long *dma_table; + spinlock_t dma_table_lock; + int tlb_refresh; + + spinlock_t iommu_bitmap_lock; + unsigned long *iommu_bitmap; + unsigned long iommu_size; + unsigned long iommu_pages; + unsigned int next_bit; + + char res_name[16]; + struct zpci_bar_struct bars[PCI_BAR_COUNT]; + + u64 start_dma; /* Start of available DMA addresses */ + u64 end_dma; /* End of available DMA addresses */ + u64 dma_mask; /* DMA address space mask */ + + /* Function measurement block */ + struct zpci_fmb *fmb; + u16 fmb_update; /* update interval */ + + enum pci_bus_speed max_bus_speed; + + struct dentry *debugfs_dev; + struct dentry *debugfs_perf; +}; + +static inline bool zdev_enabled(struct zpci_dev *zdev) +{ + return (zdev->fh & (1UL << 31)) ? true : false; +} + +extern const struct attribute_group *zpci_attr_groups[]; + +/* ----------------------------------------------------------------------------- + Prototypes +----------------------------------------------------------------------------- */ +/* Base stuff */ +int zpci_create_device(struct zpci_dev *); +int zpci_enable_device(struct zpci_dev *); +int zpci_disable_device(struct zpci_dev *); +void zpci_stop_device(struct zpci_dev *); +int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); +int zpci_unregister_ioat(struct zpci_dev *, u8); + +/* CLP */ +int clp_scan_pci_devices(void); +int clp_rescan_pci_devices(void); +int clp_rescan_pci_devices_simple(void); +int clp_add_pci_device(u32, u32, int); +int clp_enable_fh(struct zpci_dev *, u8); +int clp_disable_fh(struct zpci_dev *); + +#ifdef CONFIG_PCI +/* Error handling and recovery */ +void zpci_event_error(void *); +void zpci_event_availability(void *); +void zpci_rescan(void); +bool zpci_is_enabled(void); +#else /* CONFIG_PCI */ +static inline void zpci_event_error(void *e) {} +static inline void zpci_event_availability(void *e) {} +static inline void zpci_rescan(void) {} +#endif /* CONFIG_PCI */ + +#ifdef CONFIG_HOTPLUG_PCI_S390 +int zpci_init_slot(struct zpci_dev *); +void zpci_exit_slot(struct zpci_dev *); +#else /* CONFIG_HOTPLUG_PCI_S390 */ +static inline int zpci_init_slot(struct zpci_dev *zdev) +{ + return 0; +} +static inline void zpci_exit_slot(struct zpci_dev *zdev) {} +#endif /* CONFIG_HOTPLUG_PCI_S390 */ + +/* Helpers */ +struct zpci_dev *get_zdev(struct pci_dev *); +struct zpci_dev *get_zdev_by_fid(u32); + +/* DMA */ +int zpci_dma_init(void); +void zpci_dma_exit(void); + +/* FMB */ +int zpci_fmb_enable_device(struct zpci_dev *); +int zpci_fmb_disable_device(struct zpci_dev *); + +/* Debug */ +int zpci_debug_init(void); +void zpci_debug_exit(void); +void zpci_debug_init_device(struct zpci_dev *); +void zpci_debug_exit_device(struct zpci_dev *); +void zpci_debug_info(struct zpci_dev *, struct seq_file *); + +#endif diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h new file mode 100644 index 00000000000..dd78f92f1cc --- /dev/null +++ b/arch/s390/include/asm/pci_clp.h @@ -0,0 +1,186 @@ +#ifndef _ASM_S390_PCI_CLP_H +#define _ASM_S390_PCI_CLP_H + +#include <asm/clp.h> + +/* + * Call Logical Processor - Command Codes + */ +#define CLP_LIST_PCI 0x0002 +#define CLP_QUERY_PCI_FN 0x0003 +#define CLP_QUERY_PCI_FNGRP 0x0004 +#define CLP_SET_PCI_FN 0x0005 + +/* PCI function handle list entry */ +struct clp_fh_list_entry { + u16 device_id; + u16 vendor_id; + u32 config_state : 1; + u32 : 31; + u32 fid; /* PCI function id */ + u32 fh; /* PCI function handle */ +} __packed; + +#define CLP_RC_SETPCIFN_FH 0x0101 /* Invalid PCI fn handle */ +#define CLP_RC_SETPCIFN_FHOP 0x0102 /* Fn handle not valid for op */ +#define CLP_RC_SETPCIFN_DMAAS 0x0103 /* Invalid DMA addr space */ +#define CLP_RC_SETPCIFN_RES 0x0104 /* Insufficient resources */ +#define CLP_RC_SETPCIFN_ALRDY 0x0105 /* Fn already in requested state */ +#define CLP_RC_SETPCIFN_ERR 0x0106 /* Fn in permanent error state */ +#define CLP_RC_SETPCIFN_RECPND 0x0107 /* Error recovery pending */ +#define CLP_RC_SETPCIFN_BUSY 0x0108 /* Fn busy */ +#define CLP_RC_LISTPCI_BADRT 0x010a /* Resume token not recognized */ +#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */ + +/* request or response block header length */ +#define LIST_PCI_HDR_LEN 32 + +/* Number of function handles fitting in response block */ +#define CLP_FH_LIST_NR_ENTRIES \ + ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \ + / sizeof(struct clp_fh_list_entry)) + +#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */ +#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */ + +#define CLP_UTIL_STR_LEN 64 +#define CLP_PFIP_NR_SEGMENTS 4 + +/* List PCI functions request */ +struct clp_req_list_pci { + struct clp_req_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u64 resume_token; + u64 reserved2; +} __packed; + +/* List PCI functions response */ +struct clp_rsp_list_pci { + struct clp_rsp_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u64 resume_token; + u32 reserved2; + u16 max_fn; + u8 reserved3; + u8 entry_size; + struct clp_fh_list_entry fh_list[CLP_FH_LIST_NR_ENTRIES]; +} __packed; + +/* Query PCI function request */ +struct clp_req_query_pci { + struct clp_req_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u32 fh; /* function handle */ + u32 reserved2; + u64 reserved3; +} __packed; + +/* Query PCI function response */ +struct clp_rsp_query_pci { + struct clp_rsp_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 : 64; + u16 vfn; /* virtual fn number */ + u16 : 7; + u16 util_str_avail : 1; /* utility string available? */ + u16 pfgid : 8; /* pci function group id */ + u32 fid; /* pci function id */ + u8 bar_size[PCI_BAR_COUNT]; + u16 pchid; + u32 bar[PCI_BAR_COUNT]; + u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ + u32 : 24; + u8 pft; /* pci function type */ + u64 sdma; /* start dma as */ + u64 edma; /* end dma as */ + u32 reserved[11]; + u32 uid; /* user defined id */ + u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ +} __packed; + +/* Query PCI function group request */ +struct clp_req_query_pci_grp { + struct clp_req_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u32 : 24; + u32 pfgid : 8; /* function group id */ + u32 reserved2; + u64 reserved3; +} __packed; + +/* Query PCI function group response */ +struct clp_rsp_query_pci_grp { + struct clp_rsp_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u16 : 4; + u16 noi : 12; /* number of interrupts */ + u8 version; + u8 : 6; + u8 frame : 1; + u8 refresh : 1; /* TLB refresh mode */ + u16 reserved2; + u16 mui; + u64 reserved3; + u64 dasm; /* dma address space mask */ + u64 msia; /* MSI address */ + u64 reserved4; + u64 reserved5; +} __packed; + +/* Set PCI function request */ +struct clp_req_set_pci { + struct clp_req_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u32 fh; /* function handle */ + u16 reserved2; + u8 oc; /* operation controls */ + u8 ndas; /* number of dma spaces */ + u64 reserved3; +} __packed; + +/* Set PCI function response */ +struct clp_rsp_set_pci { + struct clp_rsp_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u32 fh; /* function handle */ + u32 reserved3; + u64 reserved4; +} __packed; + +/* Combined request/response block structures used by clp insn */ +struct clp_req_rsp_list_pci { + struct clp_req_list_pci request; + struct clp_rsp_list_pci response; +} __packed; + +struct clp_req_rsp_set_pci { + struct clp_req_set_pci request; + struct clp_rsp_set_pci response; +} __packed; + +struct clp_req_rsp_query_pci { + struct clp_req_query_pci request; + struct clp_rsp_query_pci response; +} __packed; + +struct clp_req_rsp_query_pci_grp { + struct clp_req_query_pci_grp request; + struct clp_rsp_query_pci_grp response; +} __packed; + +#endif diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h new file mode 100644 index 00000000000..ac24b26fc06 --- /dev/null +++ b/arch/s390/include/asm/pci_debug.h @@ -0,0 +1,28 @@ +#ifndef _S390_ASM_PCI_DEBUG_H +#define _S390_ASM_PCI_DEBUG_H + +#include <asm/debug.h> + +extern debug_info_t *pci_debug_msg_id; +extern debug_info_t *pci_debug_err_id; + +#define zpci_dbg(imp, fmt, args...) \ + debug_sprintf_event(pci_debug_msg_id, imp, fmt, ##args) + +#define zpci_err(text...) \ + do { \ + char debug_buffer[16]; \ + snprintf(debug_buffer, 16, text); \ + debug_text_event(pci_debug_err_id, 0, debug_buffer); \ + } while (0) + +static inline void zpci_err_hex(void *addr, int len) +{ + while (len > 0) { + debug_event(pci_debug_err_id, 0, (void *) addr, len); + len -= pci_debug_err_id->buf_size; + addr += pci_debug_err_id->buf_size; + } +} + +#endif diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h new file mode 100644 index 00000000000..30b4c179c38 --- /dev/null +++ b/arch/s390/include/asm/pci_dma.h @@ -0,0 +1,196 @@ +#ifndef _ASM_S390_PCI_DMA_H +#define _ASM_S390_PCI_DMA_H + +/* I/O Translation Anchor (IOTA) */ +enum zpci_ioat_dtype { + ZPCI_IOTA_STO = 0, + ZPCI_IOTA_RTTO = 1, + ZPCI_IOTA_RSTO = 2, + ZPCI_IOTA_RFTO = 3, + ZPCI_IOTA_PFAA = 4, + ZPCI_IOTA_IOPFAA = 5, + ZPCI_IOTA_IOPTO = 7 +}; + +#define ZPCI_IOTA_IOT_ENABLED 0x800UL +#define ZPCI_IOTA_DT_ST (ZPCI_IOTA_STO << 2) +#define ZPCI_IOTA_DT_RT (ZPCI_IOTA_RTTO << 2) +#define ZPCI_IOTA_DT_RS (ZPCI_IOTA_RSTO << 2) +#define ZPCI_IOTA_DT_RF (ZPCI_IOTA_RFTO << 2) +#define ZPCI_IOTA_DT_PF (ZPCI_IOTA_PFAA << 2) +#define ZPCI_IOTA_FS_4K 0 +#define ZPCI_IOTA_FS_1M 1 +#define ZPCI_IOTA_FS_2G 2 +#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5) + +#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST) +#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT) +#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS) +#define ZPCI_IOTA_RFTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF) +#define ZPCI_IOTA_RFAA_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G) + +/* I/O Region and segment tables */ +#define ZPCI_INDEX_MASK 0x7ffUL + +#define ZPCI_TABLE_TYPE_MASK 0xc +#define ZPCI_TABLE_TYPE_RFX 0xc +#define ZPCI_TABLE_TYPE_RSX 0x8 +#define ZPCI_TABLE_TYPE_RTX 0x4 +#define ZPCI_TABLE_TYPE_SX 0x0 + +#define ZPCI_TABLE_LEN_RFX 0x3 +#define ZPCI_TABLE_LEN_RSX 0x3 +#define ZPCI_TABLE_LEN_RTX 0x3 + +#define ZPCI_TABLE_OFFSET_MASK 0xc0 +#define ZPCI_TABLE_SIZE 0x4000 +#define ZPCI_TABLE_ALIGN ZPCI_TABLE_SIZE +#define ZPCI_TABLE_ENTRY_SIZE (sizeof(unsigned long)) +#define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE) + +#define ZPCI_TABLE_BITS 11 +#define ZPCI_PT_BITS 8 +#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT) +#define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS) + +#define ZPCI_RTE_FLAG_MASK 0x3fffUL +#define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK) +#define ZPCI_STE_FLAG_MASK 0x7ffUL +#define ZPCI_STE_ADDR_MASK (~ZPCI_STE_FLAG_MASK) + +/* I/O Page tables */ +#define ZPCI_PTE_VALID_MASK 0x400 +#define ZPCI_PTE_INVALID 0x400 +#define ZPCI_PTE_VALID 0x000 +#define ZPCI_PT_SIZE 0x800 +#define ZPCI_PT_ALIGN ZPCI_PT_SIZE +#define ZPCI_PT_ENTRIES (ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE) +#define ZPCI_PT_MASK (ZPCI_PT_ENTRIES - 1) + +#define ZPCI_PTE_FLAG_MASK 0xfffUL +#define ZPCI_PTE_ADDR_MASK (~ZPCI_PTE_FLAG_MASK) + +/* Shared bits */ +#define ZPCI_TABLE_VALID 0x00 +#define ZPCI_TABLE_INVALID 0x20 +#define ZPCI_TABLE_PROTECTED 0x200 +#define ZPCI_TABLE_UNPROTECTED 0x000 + +#define ZPCI_TABLE_VALID_MASK 0x20 +#define ZPCI_TABLE_PROT_MASK 0x200 + +static inline unsigned int calc_rtx(dma_addr_t ptr) +{ + return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; +} + +static inline unsigned int calc_sx(dma_addr_t ptr) +{ + return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; +} + +static inline unsigned int calc_px(dma_addr_t ptr) +{ + return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; +} + +static inline void set_pt_pfaa(unsigned long *entry, void *pfaa) +{ + *entry &= ZPCI_PTE_FLAG_MASK; + *entry |= ((unsigned long) pfaa & ZPCI_PTE_ADDR_MASK); +} + +static inline void set_rt_sto(unsigned long *entry, void *sto) +{ + *entry &= ZPCI_RTE_FLAG_MASK; + *entry |= ((unsigned long) sto & ZPCI_RTE_ADDR_MASK); + *entry |= ZPCI_TABLE_TYPE_RTX; +} + +static inline void set_st_pto(unsigned long *entry, void *pto) +{ + *entry &= ZPCI_STE_FLAG_MASK; + *entry |= ((unsigned long) pto & ZPCI_STE_ADDR_MASK); + *entry |= ZPCI_TABLE_TYPE_SX; +} + +static inline void validate_rt_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry &= ~ZPCI_TABLE_OFFSET_MASK; + *entry |= ZPCI_TABLE_VALID; + *entry |= ZPCI_TABLE_LEN_RTX; +} + +static inline void validate_st_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry |= ZPCI_TABLE_VALID; +} + +static inline void invalidate_table_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry |= ZPCI_TABLE_INVALID; +} + +static inline void invalidate_pt_entry(unsigned long *entry) +{ + WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); + *entry &= ~ZPCI_PTE_VALID_MASK; + *entry |= ZPCI_PTE_INVALID; +} + +static inline void validate_pt_entry(unsigned long *entry) +{ + WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID); + *entry &= ~ZPCI_PTE_VALID_MASK; + *entry |= ZPCI_PTE_VALID; +} + +static inline void entry_set_protected(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_PROT_MASK; + *entry |= ZPCI_TABLE_PROTECTED; +} + +static inline void entry_clr_protected(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_PROT_MASK; + *entry |= ZPCI_TABLE_UNPROTECTED; +} + +static inline int reg_entry_isvalid(unsigned long entry) +{ + return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID; +} + +static inline int pt_entry_isvalid(unsigned long entry) +{ + return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; +} + +static inline int entry_isprotected(unsigned long entry) +{ + return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED; +} + +static inline unsigned long *get_rt_sto(unsigned long entry) +{ + return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) + ? (unsigned long *) (entry & ZPCI_RTE_ADDR_MASK) + : NULL; +} + +static inline unsigned long *get_st_pto(unsigned long entry) +{ + return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) + ? (unsigned long *) (entry & ZPCI_STE_ADDR_MASK) + : NULL; +} + +/* Prototypes */ +int zpci_dma_init_device(struct zpci_dev *); +void zpci_dma_exit_device(struct zpci_dev *); + +#endif diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h new file mode 100644 index 00000000000..649eb62c52b --- /dev/null +++ b/arch/s390/include/asm/pci_insn.h @@ -0,0 +1,86 @@ +#ifndef _ASM_S390_PCI_INSN_H +#define _ASM_S390_PCI_INSN_H + +/* Load/Store status codes */ +#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4 +#define ZPCI_PCI_ST_FUNC_IN_ERR 8 +#define ZPCI_PCI_ST_BLOCKED 12 +#define ZPCI_PCI_ST_INSUF_RES 16 +#define ZPCI_PCI_ST_INVAL_AS 20 +#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24 +#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28 +#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36 +#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40 +#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44 + +/* Load/Store return codes */ +#define ZPCI_PCI_LS_OK 0 +#define ZPCI_PCI_LS_ERR 1 +#define ZPCI_PCI_LS_BUSY 2 +#define ZPCI_PCI_LS_INVAL_HANDLE 3 + +/* Load/Store address space identifiers */ +#define ZPCI_PCIAS_MEMIO_0 0 +#define ZPCI_PCIAS_MEMIO_1 1 +#define ZPCI_PCIAS_MEMIO_2 2 +#define ZPCI_PCIAS_MEMIO_3 3 +#define ZPCI_PCIAS_MEMIO_4 4 +#define ZPCI_PCIAS_MEMIO_5 5 +#define ZPCI_PCIAS_CFGSPC 15 + +/* Modify PCI Function Controls */ +#define ZPCI_MOD_FC_REG_INT 2 +#define ZPCI_MOD_FC_DEREG_INT 3 +#define ZPCI_MOD_FC_REG_IOAT 4 +#define ZPCI_MOD_FC_DEREG_IOAT 5 +#define ZPCI_MOD_FC_REREG_IOAT 6 +#define ZPCI_MOD_FC_RESET_ERROR 7 +#define ZPCI_MOD_FC_RESET_BLOCK 9 +#define ZPCI_MOD_FC_SET_MEASURE 10 + +/* FIB function controls */ +#define ZPCI_FIB_FC_ENABLED 0x80 +#define ZPCI_FIB_FC_ERROR 0x40 +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 +#define ZPCI_FIB_FC_DMAAS_REG 0x10 + +/* FIB function controls */ +#define ZPCI_FIB_FC_ENABLED 0x80 +#define ZPCI_FIB_FC_ERROR 0x40 +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 +#define ZPCI_FIB_FC_DMAAS_REG 0x10 + +/* Function Information Block */ +struct zpci_fib { + u32 fmt : 8; /* format */ + u32 : 24; + u32 : 32; + u8 fc; /* function controls */ + u64 : 56; + u64 pba; /* PCI base address */ + u64 pal; /* PCI address limit */ + u64 iota; /* I/O Translation Anchor */ + u32 : 1; + u32 isc : 3; /* Interrupt subclass */ + u32 noi : 12; /* Number of interrupts */ + u32 : 2; + u32 aibvo : 6; /* Adapter interrupt bit vector offset */ + u32 sum : 1; /* Adapter int summary bit enabled */ + u32 : 1; + u32 aisbo : 6; /* Adapter int summary bit offset */ + u32 : 32; + u64 aibv; /* Adapter int bit vector address */ + u64 aisb; /* Adapter int summary bit address */ + u64 fmb_addr; /* Function measurement block address and key */ + u32 : 32; + u32 gd; +} __packed __aligned(8); + +int zpci_mod_fc(u64 req, struct zpci_fib *fib); +int zpci_refresh_trans(u64 fn, u64 addr, u64 range); +int zpci_load(u64 *data, u64 req, u64 offset); +int zpci_store(u64 data, u64 req, u64 offset); +int zpci_store_block(const u64 *data, u64 req, u64 offset); +void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); + +#endif diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h new file mode 100644 index 00000000000..d194d544d69 --- /dev/null +++ b/arch/s390/include/asm/pci_io.h @@ -0,0 +1,198 @@ +#ifndef _ASM_S390_PCI_IO_H +#define _ASM_S390_PCI_IO_H + +#ifdef CONFIG_PCI + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <asm/pci_insn.h> + +/* I/O Map */ +#define ZPCI_IOMAP_MAX_ENTRIES 0x7fff +#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000ULL +#define ZPCI_IOMAP_ADDR_IDX_MASK 0x7fff000000000000ULL +#define ZPCI_IOMAP_ADDR_OFF_MASK 0x0000ffffffffffffULL + +struct zpci_iomap_entry { + u32 fh; + u8 bar; +}; + +extern struct zpci_iomap_entry *zpci_iomap_start; + +#define ZPCI_IDX(addr) \ + (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48) +#define ZPCI_OFFSET(addr) \ + ((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK) + +#define ZPCI_CREATE_REQ(handle, space, len) \ + ((u64) handle << 32 | space << 16 | len) + +#define zpci_read(LENGTH, RETTYPE) \ +static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \ +{ \ + struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \ + u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \ + u64 data; \ + int rc; \ + \ + rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \ + if (rc) \ + data = -1ULL; \ + return (RETTYPE) data; \ +} + +#define zpci_write(LENGTH, VALTYPE) \ +static inline void zpci_write_##VALTYPE(VALTYPE val, \ + const volatile void __iomem *addr) \ +{ \ + struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \ + u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \ + u64 data = (VALTYPE) val; \ + \ + zpci_store(data, req, ZPCI_OFFSET(addr)); \ +} + +zpci_read(8, u64) +zpci_read(4, u32) +zpci_read(2, u16) +zpci_read(1, u8) +zpci_write(8, u64) +zpci_write(4, u32) +zpci_write(2, u16) +zpci_write(1, u8) + +static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len) +{ + u64 val; + + switch (len) { + case 1: + val = (u64) *((u8 *) data); + break; + case 2: + val = (u64) *((u16 *) data); + break; + case 4: + val = (u64) *((u32 *) data); + break; + case 8: + val = (u64) *((u64 *) data); + break; + default: + val = 0; /* let FW report error */ + break; + } + return zpci_store(val, req, offset); +} + +static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) +{ + u64 data; + int cc; + + cc = zpci_load(&data, req, offset); + if (cc) + goto out; + + switch (len) { + case 1: + *((u8 *) dst) = (u8) data; + break; + case 2: + *((u16 *) dst) = (u16) data; + break; + case 4: + *((u32 *) dst) = (u32) data; + break; + case 8: + *((u64 *) dst) = (u64) data; + break; + } +out: + return cc; +} + +static inline int zpci_write_block(u64 req, const u64 *data, u64 offset) +{ + return zpci_store_block(data, req, offset); +} + +static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max) +{ + int count = len > max ? max : len, size = 1; + + while (!(src & 0x1) && !(dst & 0x1) && ((size << 1) <= count)) { + dst = dst >> 1; + src = src >> 1; + size = size << 1; + } + return size; +} + +static inline int zpci_memcpy_fromio(void *dst, + const volatile void __iomem *src, + unsigned long n) +{ + struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(src)]; + u64 req, offset = ZPCI_OFFSET(src); + int size, rc = 0; + + while (n > 0) { + size = zpci_get_max_write_size((u64) src, (u64) dst, n, 8); + req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size); + rc = zpci_read_single(req, dst, offset, size); + if (rc) + break; + offset += size; + dst += size; + n -= size; + } + return rc; +} + +static inline int zpci_memcpy_toio(volatile void __iomem *dst, + const void *src, unsigned long n) +{ + struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)]; + u64 req, offset = ZPCI_OFFSET(dst); + int size, rc = 0; + + if (!src) + return -EINVAL; + + while (n > 0) { + size = zpci_get_max_write_size((u64) dst, (u64) src, n, 128); + req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size); + + if (size > 8) /* main path */ + rc = zpci_write_block(req, src, offset); + else + rc = zpci_write_single(req, src, offset, size); + if (rc) + break; + offset += size; + src += size; + n -= size; + } + return rc; +} + +static inline int zpci_memset_io(volatile void __iomem *dst, + unsigned char val, size_t count) +{ + u8 *src = kmalloc(count, GFP_KERNEL); + int rc; + + if (src == NULL) + return -ENOMEM; + memset(src, val, count); + + rc = zpci_memcpy_toio(dst, src, count); + kfree(src); + return rc; +} + +#endif /* CONFIG_PCI */ + +#endif /* _ASM_S390_PCI_IO_H */ diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index f7ad8719d02..fa91e009745 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -1,21 +1,190 @@ #ifndef __ARCH_S390_PERCPU__ #define __ARCH_S390_PERCPU__ +#include <linux/preempt.h> +#include <asm/cmpxchg.h> + /* * s390 uses its own implementation for per cpu data, the offset of * the cpu local data area is cached in the cpu's lowcore memory. */ #define __my_cpu_offset S390_lowcore.percpu_offset +#ifdef CONFIG_64BIT + /* * For 64 bit module code, the module may be more than 4G above the * per cpu area, use weak definitions to force the compiler to * generate external references. */ -#if defined(CONFIG_SMP) && defined(__s390x__) && defined(MODULE) +#if defined(CONFIG_SMP) && defined(MODULE) #define ARCH_NEEDS_WEAK_PER_CPU #endif +/* + * We use a compare-and-swap loop since that uses less cpu cycles than + * disabling and enabling interrupts like the generic variant would do. + */ +#define arch_this_cpu_to_op_simple(pcp, val, op) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ old__, new__, prev__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + prev__ = *ptr__; \ + do { \ + old__ = prev__; \ + new__ = old__ op (val); \ + prev__ = cmpxchg(ptr__, old__, new__); \ + } while (prev__ != old__); \ + preempt_enable(); \ + new__; \ +}) + +#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) +#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) +#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) +#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) + +#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) +#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) +#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) +#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +#define arch_this_cpu_add(pcp, val, op1, op2, szcast) \ +{ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ val__ = (val); \ + pcp_op_T__ old__, *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + if (__builtin_constant_p(val__) && \ + ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ + asm volatile( \ + op2 " %[ptr__],%[val__]\n" \ + : [ptr__] "+Q" (*ptr__) \ + : [val__] "i" ((szcast)val__) \ + : "cc"); \ + } else { \ + asm volatile( \ + op1 " %[old__],%[val__],%[ptr__]\n" \ + : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ + : [val__] "d" (val__) \ + : "cc"); \ + } \ + preempt_enable(); \ +} + +#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int) +#define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long) + +#define arch_this_cpu_add_return(pcp, val, op) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ val__ = (val); \ + pcp_op_T__ old__, *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + asm volatile( \ + op " %[old__],%[val__],%[ptr__]\n" \ + : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ + : [val__] "d" (val__) \ + : "cc"); \ + preempt_enable(); \ + old__ + val__; \ +}) + +#define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa") +#define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag") + +#define arch_this_cpu_to_op(pcp, val, op) \ +{ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ val__ = (val); \ + pcp_op_T__ old__, *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + asm volatile( \ + op " %[old__],%[val__],%[ptr__]\n" \ + : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ + : [val__] "d" (val__) \ + : "cc"); \ + preempt_enable(); \ +} + +#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan") +#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, "lang") +#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lao") +#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laog") + +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +#define arch_this_cpu_cmpxchg(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ ret__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + ret__ = cmpxchg(ptr__, oval, nval); \ + preempt_enable(); \ + ret__; \ +}) + +#define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) + +#define arch_this_cpu_xchg(pcp, nval) \ +({ \ + typeof(pcp) *ptr__; \ + typeof(pcp) ret__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + ret__ = xchg(ptr__, nval); \ + preempt_enable(); \ + ret__; \ +}) + +#define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval) +#define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval) +#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval) +#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval) + +#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \ +({ \ + typeof(pcp1) o1__ = (o1), n1__ = (n1); \ + typeof(pcp2) o2__ = (o2), n2__ = (n2); \ + typeof(pcp1) *p1__; \ + typeof(pcp2) *p2__; \ + int ret__; \ + preempt_disable(); \ + p1__ = __this_cpu_ptr(&(pcp1)); \ + p2__ = __this_cpu_ptr(&(pcp2)); \ + ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \ + preempt_enable(); \ + ret__; \ +}) + +#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double +#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double + +#endif /* CONFIG_64BIT */ + #include <asm-generic/percpu.h> #endif /* __ARCH_S390_PERCPU__ */ diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index a75f168d271..159a8ec6da9 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -1,9 +1,96 @@ /* * Performance event support - s390 specific definitions. * - * Copyright 2009 Martin Schwidefsky, IBM Corporation. + * Copyright IBM Corp. 2009, 2013 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + * Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ -/* Empty, just to avoid compiling error */ +#ifndef _ASM_S390_PERF_EVENT_H +#define _ASM_S390_PERF_EVENT_H -#define PERF_EVENT_INDEX_OFFSET 0 +#ifdef CONFIG_64BIT + +#include <linux/perf_event.h> +#include <linux/device.h> +#include <asm/cpu_mf.h> + +/* Per-CPU flags for PMU states */ +#define PMU_F_RESERVED 0x1000 +#define PMU_F_ENABLED 0x2000 +#define PMU_F_IN_USE 0x4000 +#define PMU_F_ERR_IBE 0x0100 +#define PMU_F_ERR_LSDA 0x0200 +#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA) + +/* Perf defintions for PMU event attributes in sysfs */ +extern __init const struct attribute_group **cpumf_cf_event_group(void); +extern ssize_t cpumf_events_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page); +#define EVENT_VAR(_cat, _name) event_attr_##_cat##_##_name +#define EVENT_PTR(_cat, _name) (&EVENT_VAR(_cat, _name).attr.attr) + +#define CPUMF_EVENT_ATTR(cat, name, id) \ + PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show) +#define CPUMF_EVENT_PTR(cat, name) EVENT_PTR(cat, name) + + +/* Perf callbacks */ +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + +/* Perf pt_regs extension for sample-data-entry indicators */ +struct perf_sf_sde_regs { + unsigned char in_guest:1; /* guest sample */ + unsigned long reserved:63; /* reserved */ +}; + +/* Perf PMU definitions for the counter facility */ +#define PERF_CPUM_CF_MAX_CTR 256 + +/* Perf PMU definitions for the sampling facility */ +#define PERF_CPUM_SF_MAX_CTR 2 +#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */ +#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */ +#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */ +#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */ +#define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \ + PERF_CPUM_SF_DIAG_MODE) +#define PERF_CPUM_SF_FULL_BLOCKS 0x0004 /* Process full SDBs only */ + +#define REG_NONE 0 +#define REG_OVERFLOW 1 +#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) +#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) +#define RAWSAMPLE_REG(hwc) ((hwc)->config) +#define TEAR_REG(hwc) ((hwc)->last_tag) +#define SAMPL_RATE(hwc) ((hwc)->event_base) +#define SAMPL_FLAGS(hwc) ((hwc)->config_base) +#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) +#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS) + +/* Structure for sampling data entries to be passed as perf raw sample data + * to user space. Note that raw sample data must be aligned and, thus, might + * be padded with zeros. + */ +struct sf_raw_sample { +#define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE +#define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE + u64 format; + u32 size; /* Size of sf_raw_sample */ + u16 bsdes; /* Basic-sampling data entry size */ + u16 dsdes; /* Diagnostic-sampling data entry size */ + struct hws_basic_entry basic; /* Basic-sampling data entry */ + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ + u8 padding[]; /* Padding to next multiple of 8 */ +} __packed; + +/* Perf hardware reserve and release functions */ +int perf_reserve_sampling(void); +void perf_release_sampling(void); + +#endif /* CONFIG_64BIT */ +#endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 082eb4e50e8..9e18a61d3df 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/pgalloc.h - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Hartmut Penner (hp@de.ibm.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -17,16 +15,17 @@ #include <linux/gfp.h> #include <linux/mm.h> -#define check_pgt_cache() do {} while (0) - -unsigned long *crst_table_alloc(struct mm_struct *, int); +unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); -void crst_table_free_rcu(struct mm_struct *, unsigned long *); -unsigned long *page_table_alloc(struct mm_struct *); +unsigned long *page_table_alloc(struct mm_struct *, unsigned long); void page_table_free(struct mm_struct *, unsigned long *); -void page_table_free_rcu(struct mm_struct *, unsigned long *); -void disable_noexec(struct mm_struct *, struct task_struct *); +void page_table_free_rcu(struct mmu_gather *, unsigned long *); + +void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long, + bool init_skey); +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq); static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { @@ -50,12 +49,9 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n) static inline void crst_table_init(unsigned long *crst, unsigned long entry) { clear_table(crst, entry, sizeof(unsigned long)*2048); - crst = get_shadow_table(crst); - if (crst) - clear_table(crst, entry, sizeof(unsigned long)*2048); } -#ifndef __s390x__ +#ifndef CONFIG_64BIT static inline unsigned long pgd_entry_type(struct mm_struct *mm) { @@ -69,12 +65,9 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) #define pmd_free(mm, x) do { } while (0) #define pgd_populate(mm, pgd, pud) BUG() -#define pgd_populate_kernel(mm, pgd, pud) BUG() - #define pud_populate(mm, pud, pmd) BUG() -#define pud_populate_kernel(mm, pud, pmd) BUG() -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ static inline unsigned long pgd_entry_type(struct mm_struct *mm) { @@ -90,7 +83,7 @@ void crst_table_downgrade(struct mm_struct *, unsigned long limit); static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { - unsigned long *table = crst_table_alloc(mm, mm->context.noexec); + unsigned long *table = crst_table_alloc(mm); if (table) crst_table_init(table, _REGION3_ENTRY_EMPTY); return (pud_t *) table; @@ -99,81 +92,61 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *table = crst_table_alloc(mm, mm->context.noexec); - if (table) - crst_table_init(table, _SEGMENT_ENTRY_EMPTY); + unsigned long *table = crst_table_alloc(mm); + + if (!table) + return NULL; + crst_table_init(table, _SEGMENT_ENTRY_EMPTY); + if (!pgtable_pmd_page_ctor(virt_to_page(table))) { + crst_table_free(mm, table); + return NULL; + } return (pmd_t *) table; } -#define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd) -static inline void pgd_populate_kernel(struct mm_struct *mm, - pgd_t *pgd, pud_t *pud) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { - pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); + pgtable_pmd_page_dtor(virt_to_page(pmd)); + crst_table_free(mm, (unsigned long *) pmd); } static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { - pgd_populate_kernel(mm, pgd, pud); - if (mm->context.noexec) { - pgd = get_shadow_table(pgd); - pud = get_shadow_table(pud); - pgd_populate_kernel(mm, pgd, pud); - } -} - -static inline void pud_populate_kernel(struct mm_struct *mm, - pud_t *pud, pmd_t *pmd) -{ - pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); + pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_populate_kernel(mm, pud, pmd); - if (mm->context.noexec) { - pud = get_shadow_table(pud); - pmd = get_shadow_table(pmd); - pud_populate_kernel(mm, pud, pmd); - } + pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); } -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) { spin_lock_init(&mm->context.list_lock); - INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.pgtable_list); - return (pgd_t *) - crst_table_alloc(mm, user_mode == SECONDARY_SPACE_MODE); + INIT_LIST_HEAD(&mm->context.gmap_list); + return (pgd_t *) crst_table_alloc(mm); } #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) -static inline void pmd_populate_kernel(struct mm_struct *mm, - pmd_t *pmd, pte_t *pte) -{ - pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); -} - static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { - pmd_populate_kernel(mm, pmd, pte); - if (mm->context.noexec) { - pmd = get_shadow_table(pmd); - pmd_populate_kernel(mm, pmd, pte + PTRS_PER_PTE); - } + pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); } +#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) + #define pmd_pgtable(pmd) \ (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) /* * page table entry allocation/free routines. */ -#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) -#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm)) +#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) +#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 02ace3491c5..fcba5e03839 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/pgtable.h - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Hartmut Penner (hp@de.ibm.com) * Ulrich Weigand (weigand@de.ibm.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) @@ -31,20 +29,20 @@ #ifndef __ASSEMBLY__ #include <linux/sched.h> #include <linux/mm_types.h> -#include <asm/bitops.h> +#include <linux/page-flags.h> #include <asm/bug.h> -#include <asm/processor.h> +#include <asm/page.h> extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); extern void vmem_map_init(void); -extern void fault_init(void); /* * The S390 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. */ #define update_mmu_cache(vma, address, ptep) do { } while (0) +#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0) /* * ZERO_PAGE is a global shared page that is always zero; used @@ -57,17 +55,9 @@ extern unsigned long zero_page_mask; #define ZERO_PAGE(vaddr) \ (virt_to_page((void *)(empty_zero_page + \ (((unsigned long)(vaddr)) &zero_page_mask)))) +#define __HAVE_COLOR_ZERO_PAGE -#define is_zero_pfn is_zero_pfn -static inline int is_zero_pfn(unsigned long pfn) -{ - extern unsigned long zero_pfn; - unsigned long offset_from_zero_pfn = pfn - zero_pfn; - return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); -} - -#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) - +/* TODO: s390 cannot support io_remap_pfn_range... */ #endif /* !__ASSEMBLY__ */ /* @@ -75,15 +65,15 @@ static inline int is_zero_pfn(unsigned long pfn) * table can map * PGDIR_SHIFT determines what a third-level page table entry can map */ -#ifndef __s390x__ +#ifndef CONFIG_64BIT # define PMD_SHIFT 20 # define PUD_SHIFT 20 # define PGDIR_SHIFT 20 -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ # define PMD_SHIFT 20 # define PUD_SHIFT 31 # define PGDIR_SHIFT 42 -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) @@ -99,13 +89,13 @@ static inline int is_zero_pfn(unsigned long pfn) * that leads to 1024 pte per pgd */ #define PTRS_PER_PTE 256 -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define PTRS_PER_PMD 1 #define PTRS_PER_PUD 1 -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ #define PTRS_PER_PMD 2048 #define PTRS_PER_PUD 2048 -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ #define PTRS_PER_PGD 2048 #define FIRST_USER_ADDRESS 0 @@ -121,36 +111,26 @@ static inline int is_zero_pfn(unsigned long pfn) #ifndef __ASSEMBLY__ /* - * The vmalloc area will always be on the topmost area of the kernel - * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc, - * which should be enough for any sane case. - * By putting vmalloc at the top, we maximise the gap between physical - * memory and vmalloc to catch misplaced memory accesses. As a side - * effect, this also makes sure that 64 bit module code cannot be used - * as system call address. + * The vmalloc and module area will always be on the topmost area of the kernel + * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules. + * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where + * modules will reside. That makes sure that inter module branches always + * happen without trampolines and in addition the placement within a 2GB frame + * is branch prediction unit friendly. */ - extern unsigned long VMALLOC_START; +extern unsigned long VMALLOC_END; +extern struct page *vmemmap; -#ifndef __s390x__ -#define VMALLOC_SIZE (96UL << 20) -#define VMALLOC_END 0x7e000000UL -#define VMEM_MAP_END 0x80000000UL -#else /* __s390x__ */ -#define VMALLOC_SIZE (128UL << 30) -#define VMALLOC_END 0x3e000000000UL -#define VMEM_MAP_END 0x40000000000UL -#endif /* __s390x__ */ +#define VMEM_MAX_PHYS ((unsigned long) vmemmap) -/* - * VMEM_MAX_PHYS is the highest physical address that can be added to the 1:1 - * mapping. This needs to be calculated at compile time since the size of the - * VMEM_MAP is static but the size of struct page can change. - */ -#define VMEM_MAX_PAGES ((VMEM_MAP_END - VMALLOC_END) / sizeof(struct page)) -#define VMEM_MAX_PFN min(VMALLOC_START >> PAGE_SHIFT, VMEM_MAX_PAGES) -#define VMEM_MAX_PHYS ((VMEM_MAX_PFN << PAGE_SHIFT) & ~((16 << 20) - 1)) -#define vmemmap ((struct page *) VMALLOC_END) +#ifdef CONFIG_64BIT +extern unsigned long MODULES_VADDR; +extern unsigned long MODULES_END; +#define MODULES_VADDR MODULES_VADDR +#define MODULES_END MODULES_END +#define MODULES_LEN (1UL << 31) +#endif /* * A 31 bit pagetable entry of S390 has following format: @@ -237,76 +217,61 @@ extern unsigned long VMALLOC_START; /* Hardware bits in the page table entry */ #define _PAGE_CO 0x100 /* HW Change-bit override */ -#define _PAGE_RO 0x200 /* HW read-only bit */ +#define _PAGE_PROTECT 0x200 /* HW read-only bit */ #define _PAGE_INVALID 0x400 /* HW invalid bit */ +#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */ /* Software bits in the page table entry */ -#define _PAGE_SWT 0x001 /* SW pte type bit t */ -#define _PAGE_SWX 0x002 /* SW pte type bit x */ -#define _PAGE_SPECIAL 0x004 /* SW associated with special page */ +#define _PAGE_PRESENT 0x001 /* SW pte present bit */ +#define _PAGE_TYPE 0x002 /* SW pte type bit */ +#define _PAGE_YOUNG 0x004 /* SW pte young bit */ +#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ +#define _PAGE_READ 0x010 /* SW pte read bit */ +#define _PAGE_WRITE 0x020 /* SW pte write bit */ +#define _PAGE_SPECIAL 0x040 /* SW associated with special page */ +#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL) - -/* Six different types of pages. */ -#define _PAGE_TYPE_EMPTY 0x400 -#define _PAGE_TYPE_NONE 0x401 -#define _PAGE_TYPE_SWAP 0x403 -#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ -#define _PAGE_TYPE_RO 0x200 -#define _PAGE_TYPE_RW 0x000 -#define _PAGE_TYPE_EX_RO 0x202 -#define _PAGE_TYPE_EX_RW 0x002 - -/* - * Only four types for huge pages, using the invalid bit and protection bit - * of a segment table entry. - */ -#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */ -#define _HPAGE_TYPE_NONE 0x220 -#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */ -#define _HPAGE_TYPE_RW 0x000 +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \ + _PAGE_DIRTY | _PAGE_YOUNG) /* - * PTE type bits are rather complicated. handle_pte_fault uses pte_present, - * pte_none and pte_file to find out the pte type WITHOUT holding the page - * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to - * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs - * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards. - * This change is done while holding the lock, but the intermediate step - * of a previously valid pte with the hw invalid bit set can be observed by - * handle_pte_fault. That makes it necessary that all valid pte types with - * the hw invalid bit set must be distinguishable from the four pte types - * empty, none, swap and file. + * handle_pte_fault uses pte_present, pte_none and pte_file to find out the + * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit + * is used to distinguish present from not-present ptes. It is changed only + * with the page table lock held. * - * irxt ipte irxt - * _PAGE_TYPE_EMPTY 1000 -> 1000 - * _PAGE_TYPE_NONE 1001 -> 1001 - * _PAGE_TYPE_SWAP 1011 -> 1011 - * _PAGE_TYPE_FILE 11?1 -> 11?1 - * _PAGE_TYPE_RO 0100 -> 1100 - * _PAGE_TYPE_RW 0000 -> 1000 - * _PAGE_TYPE_EX_RO 0110 -> 1110 - * _PAGE_TYPE_EX_RW 0010 -> 1010 + * The following table gives the different possible bit combinations for + * the pte hardware and software bits in the last 12 bits of a pte: * - * pte_none is true for bits combinations 1000, 1010, 1100, 1110 - * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 - * pte_file is true for bits combinations 1101, 1111 - * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. + * 842100000000 + * 000084210000 + * 000000008421 + * .IR...wrdytp + * empty .10...000000 + * swap .10...xxxx10 + * file .11...xxxxx0 + * prot-none, clean, old .11...000001 + * prot-none, clean, young .11...000101 + * prot-none, dirty, old .10...001001 + * prot-none, dirty, young .10...001101 + * read-only, clean, old .11...010001 + * read-only, clean, young .01...010101 + * read-only, dirty, old .11...011001 + * read-only, dirty, young .01...011101 + * read-write, clean, old .11...110001 + * read-write, clean, young .01...110101 + * read-write, dirty, old .10...111001 + * read-write, dirty, young .00...111101 + * + * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 + * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 + * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600 + * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 */ -/* Page status table bits for virtualization */ -#define RCP_PCL_BIT 55 -#define RCP_HR_BIT 54 -#define RCP_HC_BIT 53 -#define RCP_GR_BIT 50 -#define RCP_GC_BIT 49 - -/* User dirty bit for KVM's migration feature */ -#define KVM_UD_BIT 47 - -#ifndef __s390x__ +#ifndef CONFIG_64BIT /* Bits in the segment table address-space-control-element */ #define _ASCE_SPACE_SWITCH 0x80000000UL /* space switch event */ @@ -316,16 +281,38 @@ extern unsigned long VMALLOC_START; #define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ /* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */ #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ -#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ -#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ +#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) -#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) + +/* + * Segment table entry encoding (I = invalid, R = read-only bit): + * ..R...I..... + * prot-none ..1...1..... + * read-only ..1...0..... + * read-write ..0...0..... + * empty ..0...1..... + */ -#else /* __s390x__ */ +/* Page status table bits for virtualization */ +#define PGSTE_ACC_BITS 0xf0000000UL +#define PGSTE_FP_BIT 0x08000000UL +#define PGSTE_PCL_BIT 0x00800000UL +#define PGSTE_HR_BIT 0x00400000UL +#define PGSTE_HC_BIT 0x00200000UL +#define PGSTE_GR_BIT 0x00040000UL +#define PGSTE_GC_BIT 0x00020000UL +#define PGSTE_UC_BIT 0x00008000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x00004000UL /* IPTE notify bit */ + +#else /* CONFIG_64BIT */ /* Bits in the segment/region table address-space-control-element */ #define _ASCE_ORIGIN ~0xfffUL/* segment table origin */ @@ -342,7 +329,8 @@ extern unsigned long VMALLOC_START; /* Bits in the region table entry */ #define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ -#define _REGION_ENTRY_INV 0x20 /* invalid region table entry */ +#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */ +#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ #define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ #define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ #define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */ @@ -350,24 +338,70 @@ extern unsigned long VMALLOC_START; #define _REGION_ENTRY_LENGTH 0x03 /* region third length */ #define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH) -#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV) +#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID) #define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) -#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV) +#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) -#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV) +#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) + +#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ +#define _REGION3_ENTRY_RO 0x200 /* page protection bit */ +#define _REGION3_ENTRY_CO 0x100 /* change-recording override */ /* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL +#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ -#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ -#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY (0) -#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ +#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ +#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */ +#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG + +/* + * Segment table entry encoding (R = read-only, I = invalid, y = young bit): + * ..R...I...y. + * prot-none, old ..0...1...1. + * prot-none, young ..1...1...1. + * read-only, old ..1...1...0. + * read-only, young ..1...0...1. + * read-write, old ..0...1...0. + * read-write, young ..0...0...1. + * The segment table origin is used to distinguish empty (origin==0) from + * read-write, old segment table entries (origin!=0) + */ -#endif /* __s390x__ */ +#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ + +/* Set of bits not changed in pmd_modify */ +#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ + | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) + +/* Page status table bits for virtualization */ +#define PGSTE_ACC_BITS 0xf000000000000000UL +#define PGSTE_FP_BIT 0x0800000000000000UL +#define PGSTE_PCL_BIT 0x0080000000000000UL +#define PGSTE_HR_BIT 0x0040000000000000UL +#define PGSTE_HC_BIT 0x0020000000000000UL +#define PGSTE_GR_BIT 0x0004000000000000UL +#define PGSTE_GC_BIT 0x0002000000000000UL +#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */ + +#endif /* CONFIG_64BIT */ + +/* Guest Page State used for virtualization */ +#define _PGSTE_GPS_ZERO 0x0000000080000000UL +#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL +#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL +#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL /* * A user page table pointer has the space-switch-event bit, the @@ -377,89 +411,77 @@ extern unsigned long VMALLOC_START; #define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ _ASCE_ALT_EVENT) -/* Bits int the storage key */ -#define _PAGE_CHANGED 0x02 /* HW changed bit */ -#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ - /* * Page protection definitions. */ -#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) -#define PAGE_RO __pgprot(_PAGE_TYPE_RO) -#define PAGE_RW __pgprot(_PAGE_TYPE_RW) -#define PAGE_EX_RO __pgprot(_PAGE_TYPE_EX_RO) -#define PAGE_EX_RW __pgprot(_PAGE_TYPE_EX_RW) - -#define PAGE_KERNEL PAGE_RW -#define PAGE_COPY PAGE_RO +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID) +#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \ + _PAGE_INVALID | _PAGE_PROTECT) +#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_INVALID | _PAGE_PROTECT) + +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_YOUNG | _PAGE_DIRTY) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_YOUNG | _PAGE_DIRTY) +#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ + _PAGE_PROTECT) /* - * Dependent on the EXEC_PROTECT option s390 can do execute protection. - * Write permission always implies read permission. In theory with a - * primary/secondary page table execute only can be implemented but - * it would cost an additional bit in the pte to distinguish all the - * different pte types. To avoid that execute permission currently - * implies read permission as well. + * On s390 the page table entry has an invalid bit and a read-only bit. + * Read permission implies execute permission and write permission + * implies read permission. */ /*xwr*/ #define __P000 PAGE_NONE -#define __P001 PAGE_RO -#define __P010 PAGE_RO -#define __P011 PAGE_RO -#define __P100 PAGE_EX_RO -#define __P101 PAGE_EX_RO -#define __P110 PAGE_EX_RO -#define __P111 PAGE_EX_RO +#define __P001 PAGE_READ +#define __P010 PAGE_READ +#define __P011 PAGE_READ +#define __P100 PAGE_READ +#define __P101 PAGE_READ +#define __P110 PAGE_READ +#define __P111 PAGE_READ #define __S000 PAGE_NONE -#define __S001 PAGE_RO -#define __S010 PAGE_RW -#define __S011 PAGE_RW -#define __S100 PAGE_EX_RO -#define __S101 PAGE_EX_RO -#define __S110 PAGE_EX_RW -#define __S111 PAGE_EX_RW +#define __S001 PAGE_READ +#define __S010 PAGE_WRITE +#define __S011 PAGE_WRITE +#define __S100 PAGE_READ +#define __S101 PAGE_READ +#define __S110 PAGE_WRITE +#define __S111 PAGE_WRITE -#ifndef __s390x__ -# define PxD_SHADOW_SHIFT 1 -#else /* __s390x__ */ -# define PxD_SHADOW_SHIFT 2 -#endif /* __s390x__ */ +/* + * Segment entry (large page) protection definitions. + */ +#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ + _SEGMENT_ENTRY_NONE) +#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \ + _SEGMENT_ENTRY_PROTECT) +#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID) -static inline void *get_shadow_table(void *table) +static inline int mm_has_pgste(struct mm_struct *mm) { - unsigned long addr, offset; - struct page *page; - - addr = (unsigned long) table; - offset = addr & ((PAGE_SIZE << PxD_SHADOW_SHIFT) - 1); - page = virt_to_page((void *)(addr ^ offset)); - return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL); +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.has_pgste)) + return 1; +#endif + return 0; } -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) +static inline int mm_use_skey(struct mm_struct *mm) { - *ptep = entry; - if (mm->context.noexec) { - if (!(pte_val(entry) & _PAGE_INVALID) && - (pte_val(entry) & _PAGE_SWX)) - pte_val(entry) |= _PAGE_RO; - else - pte_val(entry) = _PAGE_TYPE_EMPTY; - ptep[PTRS_PER_PTE] = entry; - } +#ifdef CONFIG_PGSTE + if (mm->context.use_skey) + return 1; +#endif + return 0; } /* * pgd/pmd/pte query functions */ -#ifndef __s390x__ +#ifndef CONFIG_64BIT static inline int pgd_present(pgd_t pgd) { return 1; } static inline int pgd_none(pgd_t pgd) { return 0; } @@ -467,9 +489,10 @@ static inline int pgd_bad(pgd_t pgd) { return 0; } static inline int pud_present(pud_t pud) { return 1; } static inline int pud_none(pud_t pud) { return 0; } +static inline int pud_large(pud_t pud) { return 0; } static inline int pud_bad(pud_t pud) { return 0; } -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ static inline int pgd_present(pgd_t pgd) { @@ -482,7 +505,7 @@ static inline int pgd_none(pgd_t pgd) { if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) return 0; - return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; + return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL; } static inline int pgd_bad(pgd_t pgd) @@ -493,7 +516,7 @@ static inline int pgd_bad(pgd_t pgd) * invalid for either table entry. */ unsigned long mask = - ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; return (pgd_val(pgd) & mask) != 0; } @@ -509,7 +532,14 @@ static inline int pud_none(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) return 0; - return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; + return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL; +} + +static inline int pud_large(pud_t pud) +{ + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) + return 0; + return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); } static inline int pud_bad(pud_t pud) @@ -520,46 +550,105 @@ static inline int pud_bad(pud_t pud) * invalid for either table entry. */ unsigned long mask = - ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; return (pud_val(pud) & mask) != 0; } -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ static inline int pmd_present(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) != 0UL; + return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID; } static inline int pmd_none(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) != 0UL; + return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID; +} + +static inline int pmd_large(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; +#else + return 0; +#endif +} + +static inline int pmd_prot_none(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) && + (pmd_val(pmd) & _SEGMENT_ENTRY_NONE); } static inline int pmd_bad(pmd_t pmd) { - unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV; - return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; +#ifdef CONFIG_64BIT + if (pmd_large(pmd)) + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; +#endif + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } -static inline int pte_none(pte_t pte) +#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH +extern void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +extern int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty); + +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +extern int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + +#define __HAVE_ARCH_PMD_WRITE +static inline int pmd_write(pmd_t pmd) { - return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); + if (pmd_prot_none(pmd)) + return 0; + return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; +} + +static inline int pmd_young(pmd_t pmd) +{ + int young = 0; +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) + young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0; + else + young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; +#endif + return young; } static inline int pte_present(pte_t pte) { - unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX; - return (pte_val(pte) & mask) == _PAGE_TYPE_NONE || - (!(pte_val(pte) & _PAGE_INVALID) && - !(pte_val(pte) & _PAGE_SWT)); + /* Bit pattern: (pte & 0x001) == 0x001 */ + return (pte_val(pte) & _PAGE_PRESENT) != 0; +} + +static inline int pte_none(pte_t pte) +{ + /* Bit pattern: pte == 0x400 */ + return pte_val(pte) == _PAGE_INVALID; +} + +static inline int pte_swap(pte_t pte) +{ + /* Bit pattern: (pte & 0x603) == 0x402 */ + return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | + _PAGE_TYPE | _PAGE_PRESENT)) + == (_PAGE_INVALID | _PAGE_TYPE); } static inline int pte_file(pte_t pte) { - unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT; - return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; + /* Bit pattern: (pte & 0x601) == 0x600 */ + return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT)) + == (_PAGE_INVALID | _PAGE_PROTECT); } static inline int pte_special(pte_t pte) @@ -568,52 +657,228 @@ static inline int pte_special(pte_t pte) } #define __HAVE_ARCH_PTE_SAME -#define pte_same(a,b) (pte_val(a) == pte_val(b)) +static inline int pte_same(pte_t a, pte_t b) +{ + return pte_val(a) == pte_val(b); +} -static inline void rcp_lock(pte_t *ptep) +static inline pgste_t pgste_get_lock(pte_t *ptep) { + unsigned long new = 0; #ifdef CONFIG_PGSTE - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + unsigned long old; + preempt_disable(); - while (test_and_set_bit(RCP_PCL_BIT, pgste)) - ; + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear PCL bit in old */ + " oihh %1,0x0080\n" /* set PCL bit in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); #endif + return __pgste(new); } -static inline void rcp_unlock(pte_t *ptep) +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - clear_bit(RCP_PCL_BIT, pgste); + asm( + " nihh %1,0xff7f\n" /* clear PCL bit */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) + : "cc", "memory"); preempt_enable(); #endif } -/* forward declaration for SetPageUptodate in page-flags.h*/ -static inline void page_clear_dirty(struct page *page, int mapped); -#include <linux/page-flags.h> +static inline pgste_t pgste_get(pte_t *ptep) +{ + unsigned long pgste = 0; +#ifdef CONFIG_PGSTE + pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); +#endif + return __pgste(pgste); +} + +static inline void pgste_set(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; +#endif +} -static inline void ptep_rcp_copy(pte_t *ptep) +static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste, + struct mm_struct *mm) { #ifdef CONFIG_PGSTE - struct page *page = virt_to_page(pte_val(*ptep)); - unsigned int skey; - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - - skey = page_get_storage_key(page_to_phys(page)); - if (skey & _PAGE_CHANGED) { - set_bit_simple(RCP_GC_BIT, pgste); - set_bit_simple(KVM_UD_BIT, pgste); + unsigned long address, bits, skey; + + if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID) + return pgste; + address = pte_val(*ptep) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; +#endif + return pgste; + +} + +static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, + struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + unsigned long address; + unsigned long nkey; + + if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) + return; + VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); + address = pte_val(entry) & PAGE_MASK; + /* + * Set page access key and fetch protection bit from pgste. + * The guest C/R information is still in the PGSTE, set real + * key C/R to 0. + */ + nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; + page_set_storage_key(address, nkey, 0); +#endif +} + +static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) +{ + if ((pte_val(entry) & _PAGE_PRESENT) && + (pte_val(entry) & _PAGE_WRITE) && + !(pte_val(entry) & _PAGE_INVALID)) { + if (!MACHINE_HAS_ESOP) { + /* + * Without enhanced suppression-on-protection force + * the dirty bit on for all writable ptes. + */ + pte_val(entry) |= _PAGE_DIRTY; + pte_val(entry) &= ~_PAGE_PROTECT; + } + if (!(pte_val(entry) & _PAGE_PROTECT)) + /* This pte allows write access, set user-dirty */ + pgste_val(pgste) |= PGSTE_UC_BIT; } - if (skey & _PAGE_REFERENCED) - set_bit_simple(RCP_GR_BIT, pgste); - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { - SetPageDirty(page); - set_bit_simple(KVM_UD_BIT, pgste); + *ptep = entry; + return pgste; +} + +/** + * struct gmap_struct - guest address space + * @mm: pointer to the parent mm_struct + * @table: pointer to the page directory + * @asce: address space control element for gmap page table + * @crst_list: list of all crst tables used in the guest address space + * @pfault_enabled: defines if pfaults are applicable for the guest + */ +struct gmap { + struct list_head list; + struct mm_struct *mm; + unsigned long *table; + unsigned long asce; + void *private; + struct list_head crst_list; + bool pfault_enabled; +}; + +/** + * struct gmap_rmap - reverse mapping for segment table entries + * @gmap: pointer to the gmap_struct + * @entry: pointer to a segment table entry + * @vmaddr: virtual address in the guest address space + */ +struct gmap_rmap { + struct list_head list; + struct gmap *gmap; + unsigned long *entry; + unsigned long vmaddr; +}; + +/** + * struct gmap_pgtable - gmap information attached to a page table + * @vmaddr: address of the 1MB segment in the process virtual memory + * @mapper: list of segment table entries mapping a page table + */ +struct gmap_pgtable { + unsigned long vmaddr; + struct list_head mapper; +}; + +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long address); +}; + +struct gmap *gmap_alloc(struct mm_struct *mm); +void gmap_free(struct gmap *gmap); +void gmap_enable(struct gmap *gmap); +void gmap_disable(struct gmap *gmap); +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len); +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_translate(unsigned long address, struct gmap *); +unsigned long gmap_translate(unsigned long address, struct gmap *); +unsigned long __gmap_fault(unsigned long address, struct gmap *); +unsigned long gmap_fault(unsigned long address, struct gmap *); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *); +void __gmap_zap(unsigned long address, struct gmap *); +bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); + + +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); +void gmap_do_ipte_notify(struct mm_struct *, pte_t *); + +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & PGSTE_IN_BIT) { + pgste_val(pgste) &= ~PGSTE_IN_BIT; + gmap_do_ipte_notify(mm, ptep); } - if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) - SetPageReferenced(page); #endif + return pgste; +} + +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + pgste_t pgste; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste_set_key(ptep, pgste, entry, mm); + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); + } else { + if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1) + pte_val(entry) |= _PAGE_CO; + *ptep = entry; + } } /* @@ -622,89 +887,53 @@ static inline void ptep_rcp_copy(pte_t *ptep) */ static inline int pte_write(pte_t pte) { - return (pte_val(pte) & _PAGE_RO) == 0; + return (pte_val(pte) & _PAGE_WRITE) != 0; } static inline int pte_dirty(pte_t pte) { - /* A pte is neither clean nor dirty on s/390. The dirty bit - * is in the storage key. See page_test_and_clear_dirty for - * details. - */ - return 0; + return (pte_val(pte) & _PAGE_DIRTY) != 0; } static inline int pte_young(pte_t pte) { - /* A pte is neither young nor old on s/390. The young bit - * is in the storage key. See page_test_and_clear_young for - * details. - */ - return 0; + return (pte_val(pte) & _PAGE_YOUNG) != 0; +} + +#define __HAVE_ARCH_PTE_UNUSED +static inline int pte_unused(pte_t pte) +{ + return pte_val(pte) & _PAGE_UNUSED; } /* * pgd/pmd/pte modification functions */ -#ifndef __s390x__ - -#define pgd_clear(pgd) do { } while (0) -#define pud_clear(pud) do { } while (0) - -#else /* __s390x__ */ - -static inline void pgd_clear_kernel(pgd_t * pgd) +static inline void pgd_clear(pgd_t *pgd) { +#ifdef CONFIG_64BIT if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; +#endif } -static inline void pgd_clear(pgd_t * pgd) -{ - pgd_t *shadow = get_shadow_table(pgd); - - pgd_clear_kernel(pgd); - if (shadow) - pgd_clear_kernel(shadow); -} - -static inline void pud_clear_kernel(pud_t *pud) +static inline void pud_clear(pud_t *pud) { +#ifdef CONFIG_64BIT if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) pud_val(*pud) = _REGION3_ENTRY_EMPTY; +#endif } -static inline void pud_clear(pud_t *pud) -{ - pud_t *shadow = get_shadow_table(pud); - - pud_clear_kernel(pud); - if (shadow) - pud_clear_kernel(shadow); -} - -#endif /* __s390x__ */ - -static inline void pmd_clear_kernel(pmd_t * pmdp) -{ - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; -} - -static inline void pmd_clear(pmd_t *pmd) +static inline void pmd_clear(pmd_t *pmdp) { - pmd_t *shadow = get_shadow_table(pmd); - - pmd_clear_kernel(pmd); - if (shadow) - pmd_clear_kernel(shadow); + pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID; } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (mm->context.noexec) - pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; } /* @@ -715,54 +944,63 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_val(pte) &= _PAGE_CHG_MASK; pte_val(pte) |= pgprot_val(newprot); + /* + * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the + * invalid bit set, clear it again for readable, young pages + */ + if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ)) + pte_val(pte) &= ~_PAGE_INVALID; + /* + * newprot for PAGE_READ and PAGE_WRITE has the page protection + * bit set, clear it again for writable, dirty pages + */ + if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_wrprotect(pte_t pte) { - /* Do not clobber _PAGE_TYPE_NONE pages! */ - if (!(pte_val(pte) & _PAGE_INVALID)) - pte_val(pte) |= _PAGE_RO; + pte_val(pte) &= ~_PAGE_WRITE; + pte_val(pte) |= _PAGE_PROTECT; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) &= ~_PAGE_RO; + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_DIRTY) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_mkclean(pte_t pte) { - /* The only user of pte_mkclean is the fork() code. - We must *not* clear the *physical* page dirty bit - just because fork() wants to clear the dirty bit in - *one* of the page's mappings. So we just do nothing. */ + pte_val(pte) &= ~_PAGE_DIRTY; + pte_val(pte) |= _PAGE_PROTECT; return pte; } static inline pte_t pte_mkdirty(pte_t pte) { - /* We do not explicitly set the dirty bit because the - * sske instruction is slow. It is faster to let the - * next instruction set the dirty bit. - */ + pte_val(pte) |= _PAGE_DIRTY; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_mkold(pte_t pte) { - /* S/390 doesn't keep its dirty/referenced bit in the pte. - * There is no point in clearing the real referenced bit. - */ + pte_val(pte) &= ~_PAGE_YOUNG; + pte_val(pte) |= _PAGE_INVALID; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { - /* S/390 doesn't keep its dirty/referenced bit in the pte. - * There is no point in setting the real referenced bit. - */ + pte_val(pte) |= _PAGE_YOUNG; + if (pte_val(pte) & _PAGE_READ) + pte_val(pte) &= ~_PAGE_INVALID; return pte; } @@ -775,138 +1013,135 @@ static inline pte_t pte_mkspecial(pte_t pte) #ifdef CONFIG_HUGETLB_PAGE static inline pte_t pte_mkhuge(pte_t pte) { - /* - * PROT_NONE needs to be remapped from the pte type to the ste type. - * The HW invalid bit is also different for pte and ste. The pte - * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE - * bit, so we don't have to clear it. - */ - if (pte_val(pte) & _PAGE_INVALID) { - if (pte_val(pte) & _PAGE_SWT) - pte_val(pte) |= _HPAGE_TYPE_NONE; - pte_val(pte) |= _SEGMENT_ENTRY_INV; - } - /* - * Clear SW pte bits SWT and SWX, there are no SW bits in a segment - * table entry. - */ - pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX); - /* - * Also set the change-override bit because we don't need dirty bit - * tracking for hugetlbfs pages. - */ - pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); + pte_val(pte) |= _PAGE_LARGE; return pte; } #endif -#ifdef CONFIG_PGSTE +static inline void __ptep_ipte(unsigned long address, pte_t *ptep) +{ + unsigned long pto = (unsigned long) ptep; + +#ifndef CONFIG_64BIT + /* pto in ESA mode must point to the start of the segment table */ + pto &= 0x7ffffc00; +#endif + /* Invalidation + global TLB flush for the pte */ + asm volatile( + " ipte %2,%3" + : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); +} + +static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep) +{ + unsigned long pto = (unsigned long) ptep; + +#ifndef CONFIG_64BIT + /* pto in ESA mode must point to the start of the segment table */ + pto &= 0x7ffffc00; +#endif + /* Invalidation + local TLB flush for the pte */ + asm volatile( + " .insn rrf,0xb2210000,%2,%3,0,1" + : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); +} + +static inline void ptep_flush_direct(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + int active, count; + + if (pte_val(*ptep) & _PAGE_INVALID) + return; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __ptep_ipte_local(address, ptep); + else + __ptep_ipte(address, ptep); + atomic_sub(0x10000, &mm->context.attach_count); +} + +static inline void ptep_flush_lazy(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + int active, count; + + if (pte_val(*ptep) & _PAGE_INVALID) + return; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pte_val(*ptep) |= _PAGE_INVALID; + mm->context.flush_mm = 1; + } else + __ptep_ipte(address, ptep); + atomic_sub(0x10000, &mm->context.attach_count); +} + /* - * Get (and clear) the user dirty bit for a PTE. + * Get (and clear) the user dirty bit for a pte. */ -static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, - pte_t *ptep) +static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) { + pgste_t pgste; + pte_t pte; int dirty; - unsigned long *pgste; - struct page *page; - unsigned int skey; - - if (!mm->context.has_pgste) - return -EINVAL; - rcp_lock(ptep); - pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - page = virt_to_page(pte_val(*ptep)); - skey = page_get_storage_key(page_to_phys(page)); - if (skey & _PAGE_CHANGED) { - set_bit_simple(RCP_GC_BIT, pgste); - set_bit_simple(KVM_UD_BIT, pgste); - } - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { - SetPageDirty(page); - set_bit_simple(KVM_UD_BIT, pgste); + + if (!mm_has_pgste(mm)) + return 0; + pgste = pgste_get_lock(ptep); + dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); + pgste_val(pgste) &= ~PGSTE_UC_BIT; + pte = *ptep; + if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { + pgste = pgste_ipte_notify(mm, ptep, pgste); + __ptep_ipte(addr, ptep); + if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_INVALID; + *ptep = pte; } - dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); - if (skey & _PAGE_CHANGED) - page_clear_dirty(page, 1); - rcp_unlock(ptep); + pgste_set_unlock(ptep, pgste); return dirty; } -#endif #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_PGSTE - unsigned long physpage; + pgste_t pgste; + pte_t pte; int young; - unsigned long *pgste; - if (!vma->vm_mm->context.has_pgste) - return 0; - physpage = pte_val(*ptep) & PAGE_MASK; - pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); + } + + pte = *ptep; + ptep_flush_direct(vma->vm_mm, addr, ptep); + young = pte_young(pte); + pte = pte_mkold(pte); + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else + *ptep = pte; - young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0); - rcp_lock(ptep); - if (young) - set_bit_simple(RCP_GR_BIT, pgste); - young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste); - rcp_unlock(ptep); return young; -#endif - return 0; } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH static inline int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - /* No need to flush TLB - * On s390 reference bits are in storage key and never in TLB - * With virtualization we handle the reference bit, without we - * we can simply return */ -#ifdef CONFIG_PGSTE return ptep_test_and_clear_young(vma, address, ptep); -#endif - return 0; -} - -static inline void __ptep_ipte(unsigned long address, pte_t *ptep) -{ - if (!(pte_val(*ptep) & _PAGE_INVALID)) { -#ifndef __s390x__ - /* pto must point to the start of the segment table */ - pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); -#else - /* ipte in zarch mode can do the math */ - pte_t *pto = ptep; -#endif - asm volatile( - " ipte %2,%3" - : "=m" (*ptep) : "m" (*ptep), - "a" (pto), "a" (address)); - } -} - -static inline void ptep_invalidate(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - if (mm->context.has_pgste) { - rcp_lock(ptep); - __ptep_ipte(address, ptep); - ptep_rcp_copy(ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; - rcp_unlock(ptep); - return; - } - __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (mm->context.noexec) { - __ptep_ipte(address, ptep + PTRS_PER_PTE); - pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY; - } } /* @@ -923,24 +1158,89 @@ static inline void ptep_invalidate(struct mm_struct *mm, * is a nop. */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define ptep_get_and_clear(__mm, __address, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - ptep_invalidate(__mm, __address, __ptep); \ - else \ - pte_clear((__mm), (__address), (__ptep)); \ - __pte; \ -}) +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, ptep, pgste); + } + + pte = *ptep; + ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) = _PAGE_INVALID; + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste, mm); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, + unsigned long address, + pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_ipte_notify(mm, ptep, pgste); + } + + pte = *ptep; + ptep_flush_lazy(mm, address, ptep); + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste, mm); + pgste_set(ptep, pgste); + } + return pte; +} + +static inline void ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long address, + pte_t *ptep, pte_t pte) +{ + pgste_t pgste; + + if (mm_has_pgste(mm)) { + pgste = pgste_get(ptep); + pgste_set_key(ptep, pgste, pte, mm); + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else + *ptep = pte; +} #define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - pte_t pte = *ptep; - ptep_invalidate(vma->vm_mm, address, ptep); + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); + } + + pte = *ptep; + ptep_flush_direct(vma->vm_mm, address, ptep); + pte_val(*ptep) = _PAGE_INVALID; + + if (mm_has_pgste(vma->vm_mm)) { + if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == + _PGSTE_GPS_USAGE_UNUSED) + pte_val(pte) |= _PAGE_UNUSED; + pgste = pgste_update_all(&pte, pgste, vma->vm_mm); + pgste_set_unlock(ptep, pgste); + } return pte; } @@ -953,76 +1253,76 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, - unsigned long addr, + unsigned long address, pte_t *ptep, int full) { - pte_t pte = *ptep; + pgste_t pgste; + pte_t pte; - if (full) - pte_clear(mm, addr, ptep); - else - ptep_invalidate(mm, addr, ptep); + if (!full && mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, ptep, pgste); + } + + pte = *ptep; + if (!full) + ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) = _PAGE_INVALID; + + if (!full && mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste, mm); + pgste_set_unlock(ptep, pgste); + } return pte; } #define __HAVE_ARCH_PTEP_SET_WRPROTECT -#define ptep_set_wrprotect(__mm, __addr, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - if (pte_write(__pte)) { \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - ptep_invalidate(__mm, __addr, __ptep); \ - set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \ - } \ -}) +static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte = *ptep; -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ -({ \ - int __changed = !pte_same(*(__ptep), __entry); \ - if (__changed) { \ - ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ - set_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ - } \ - __changed; \ -}) + if (pte_write(pte)) { + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, ptep, pgste); + } -/* - * Test and clear dirty bit in storage key. - * We can't clear the changed bit atomically. This is a potential - * race against modification of the referenced bit. This function - * should therefore only be called if it is not mapped in any - * address space. - */ -#define __HAVE_ARCH_PAGE_TEST_DIRTY -static inline int page_test_dirty(struct page *page) -{ - return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0; -} + ptep_flush_lazy(mm, address, ptep); + pte = pte_wrprotect(pte); -#define __HAVE_ARCH_PAGE_CLEAR_DIRTY -static inline void page_clear_dirty(struct page *page, int mapped) -{ - page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); + if (mm_has_pgste(mm)) { + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else + *ptep = pte; + } + return pte; } -/* - * Test and clear referenced bit in storage key. - */ -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -static inline int page_test_and_clear_young(struct page *page) +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) { - unsigned long physpage = page_to_phys(page); - int ccode; + pgste_t pgste; - asm volatile( - " rrbe 0,%1\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) : "a" (physpage) : "cc" ); - return ccode & 2; + if (pte_same(*ptep, entry)) + return 0; + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); + } + + ptep_flush_direct(vma->vm_mm, address, ptep); + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); + } else + *ptep = entry; + return 1; } /* @@ -1033,14 +1333,17 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); - return __pte; + return pte_mkyoung(__pte); } static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) { unsigned long physpage = page_to_phys(page); + pte_t __pte = mk_pte_phys(physpage, pgprot); - return mk_pte_phys(physpage, pgprot); + if (pte_write(__pte) && PageDirty(page)) + __pte = pte_mkdirty(__pte); + return __pte; } #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) @@ -1051,7 +1354,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) #define pgd_offset_k(address) pgd_offset(&init_mm, address) -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) #define pud_deref(pmd) ({ BUG(); 0UL; }) @@ -1060,7 +1363,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pud_offset(pgd, address) ((pud_t *) pgd) #define pmd_offset(pud, address) ((pmd_t *) pud + pmd_index(address)) -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) @@ -1082,7 +1385,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) return pmd + pmd_index(address); } -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ #define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) @@ -1096,6 +1399,261 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) +static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) +{ + /* + * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx) + * Convert to segment table entry format. + */ + if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE)) + return pgprot_val(SEGMENT_NONE); + if (pgprot_val(pgprot) == pgprot_val(PAGE_READ)) + return pgprot_val(SEGMENT_READ); + return pgprot_val(SEGMENT_WRITE); +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) { + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } else { + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + } +#endif + return pmd; +} + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + } else { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + } +#endif + return pmd; +} + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + int young; + + young = pmd_young(pmd); + pmd_val(pmd) &= _SEGMENT_CHG_MASK; + pmd_val(pmd) |= massage_pgprot_pmd(newprot); + if (young) + pmd = pmd_mkyoung(pmd); + return pmd; +} + +static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) +{ + pmd_t __pmd; + pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); + return pmd_mkyoung(__pmd); +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + /* Do not clobber PROT_NONE segments! */ + if (!pmd_prot_none(pmd)) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + return pmd; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ + +static inline void __pmdp_csp(pmd_t *pmdp) +{ + register unsigned long reg2 asm("2") = pmd_val(*pmdp); + register unsigned long reg3 asm("3") = pmd_val(*pmdp) | + _SEGMENT_ENTRY_INVALID; + register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; + + asm volatile( + " csp %1,%3" + : "=m" (*pmdp) + : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); +} + +static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp) +{ + unsigned long sto; + + sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,0" + : "=m" (*pmdp) + : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK)) + : "cc" ); +} + +static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) +{ + unsigned long sto; + + sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,1" + : "=m" (*pmdp) + : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK)) + : "cc" ); +} + +static inline void pmdp_flush_direct(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + int active, count; + + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return; + if (!MACHINE_HAS_IDTE) { + __pmdp_csp(pmdp); + return; + } + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __pmdp_idte_local(address, pmdp); + else + __pmdp_idte(address, pmdp); + atomic_sub(0x10000, &mm->context.attach_count); +} + +static inline void pmdp_flush_lazy(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + int active, count; + + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; + mm->context.flush_mm = 1; + } else if (MACHINE_HAS_IDTE) + __pmdp_idte(address, pmdp); + else + __pmdp_csp(pmdp); + atomic_sub(0x10000, &mm->context.attach_count); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t entry) +{ + if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1) + pmd_val(entry) |= _SEGMENT_ENTRY_CO; + *pmdp = entry; +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; + return pmd; +} + +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + /* Do not clobber PROT_NONE segments! */ + if (!pmd_prot_none(pmd)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + return pmd; +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + /* No dirty bit in the segment table entry. */ + return pmd; +} + +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + pmd_t pmd; + + pmd = *pmdp; + pmdp_flush_direct(vma->vm_mm, address, pmdp); + *pmdp = pmd_mkold(pmd); + return pmd_young(pmd); +} + +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + pmd_t pmd = *pmdp; + + pmdp_flush_direct(mm, address, pmdp); + pmd_clear(pmdp); + return pmd; +} + +#define __HAVE_ARCH_PMDP_CLEAR_FLUSH +static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + return pmdp_get_and_clear(vma->vm_mm, address, pmdp); +} + +#define __HAVE_ARCH_PMDP_INVALIDATE +static inline void pmdp_invalidate(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + pmdp_flush_direct(vma->vm_mm, address, pmdp); +} + +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + pmd_t pmd = *pmdp; + + if (pmd_write(pmd)) { + pmdp_flush_direct(mm, address, pmdp); + set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd)); + } +} + +#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) + +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE; +} + +static inline int has_transparent_hugepage(void) +{ + return MACHINE_HAS_HPAGE ? 1 : 0; +} + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return pmd_val(pmd) >> PAGE_SHIFT; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + /* * 31 bit swap entry format: * A page-table entry has some bits we have to treat in a special way. @@ -1103,10 +1661,8 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bit 21 and bit 22 are the page invalid bit and the page protection - * bit. We set both to indicate a swapped page. - * Bit 30 and 31 are used to distinguish the different page types. For - * a swapped page these bits need to be zero. + * Bits 21, 22, 30 and 31 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 * This leaves the bits 1-19 and bits 24-29 to store type and offset. * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 * plus 24 for the offset. @@ -1120,10 +1676,8 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bit 53 and bit 54 are the page invalid bit and the page protection - * bit. We set both to indicate a swapped page. - * Bit 62 and 63 are used to distinguish the different page types. For - * a swapped page these bits need to be zero. + * Bits 53, 54, 62 and 63 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 * This leaves the bits 0-51 and bits 56-61 to store type and offset. * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 * plus 56 for the offset. @@ -1131,7 +1685,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 */ -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define __SWP_OFFSET_MASK (~0UL >> 12) #else #define __SWP_OFFSET_MASK (~0UL >> 11) @@ -1140,7 +1694,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; offset &= __SWP_OFFSET_MASK; - pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) | + pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); return pte; } @@ -1152,18 +1706,18 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#ifndef __s390x__ +#ifndef CONFIG_64BIT # define PTE_FILE_MAX_BITS 26 -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ # define PTE_FILE_MAX_BITS 59 -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ #define pte_to_pgoff(__pte) \ ((((__pte).pte >> 12) << 7) + (((__pte).pte >> 1) & 0x7f)) #define pgoff_to_pte(__off) \ ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ - | _PAGE_TYPE_FILE }) + | _PAGE_INVALID | _PAGE_PROTECT }) #endif /* !__ASSEMBLY__ */ @@ -1172,11 +1726,13 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) extern int vmem_add_mapping(unsigned long start, unsigned long size); extern int vmem_remove_mapping(unsigned long start, unsigned long size); extern int s390_enable_sie(void); +extern void s390_enable_skey(void); /* * No page table caches to initialise */ -#define pgtable_cache_init() do { } while (0) +static inline void pgtable_cache_init(void) { } +static inline void check_pgt_cache(void) { } #include <asm-generic/pgtable.h> diff --git a/arch/s390/include/asm/posix_types.h b/arch/s390/include/asm/posix_types.h deleted file mode 100644 index 8cc113f9252..00000000000 --- a/arch/s390/include/asm/posix_types.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * include/asm-s390/posix_types.h - * - * S390 version - * - * Derived from "include/asm-i386/posix_types.h" - */ - -#ifndef __ARCH_S390_POSIX_TYPES_H -#define __ARCH_S390_POSIX_TYPES_H - -/* - * This file is generally used by user-level software, so you need to - * be a little careful about namespace pollution etc. Also, we cannot - * assume GCC is being used. - */ - -typedef long __kernel_off_t; -typedef int __kernel_pid_t; -typedef unsigned long __kernel_size_t; -typedef long __kernel_time_t; -typedef long __kernel_suseconds_t; -typedef long __kernel_clock_t; -typedef int __kernel_timer_t; -typedef int __kernel_clockid_t; -typedef int __kernel_daddr_t; -typedef char * __kernel_caddr_t; -typedef unsigned short __kernel_uid16_t; -typedef unsigned short __kernel_gid16_t; - -#ifdef __GNUC__ -typedef long long __kernel_loff_t; -#endif - -#ifndef __s390x__ - -typedef unsigned long __kernel_ino_t; -typedef unsigned short __kernel_mode_t; -typedef unsigned short __kernel_nlink_t; -typedef unsigned short __kernel_ipc_pid_t; -typedef unsigned short __kernel_uid_t; -typedef unsigned short __kernel_gid_t; -typedef int __kernel_ssize_t; -typedef int __kernel_ptrdiff_t; -typedef unsigned int __kernel_uid32_t; -typedef unsigned int __kernel_gid32_t; -typedef unsigned short __kernel_old_uid_t; -typedef unsigned short __kernel_old_gid_t; -typedef unsigned short __kernel_old_dev_t; - -#else /* __s390x__ */ - -typedef unsigned int __kernel_ino_t; -typedef unsigned int __kernel_mode_t; -typedef unsigned int __kernel_nlink_t; -typedef int __kernel_ipc_pid_t; -typedef unsigned int __kernel_uid_t; -typedef unsigned int __kernel_gid_t; -typedef long __kernel_ssize_t; -typedef long __kernel_ptrdiff_t; -typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ -typedef __kernel_uid_t __kernel_old_uid_t; -typedef __kernel_gid_t __kernel_old_gid_t; -typedef __kernel_uid_t __kernel_uid32_t; -typedef __kernel_gid_t __kernel_gid32_t; -typedef unsigned short __kernel_old_dev_t; - -#endif /* __s390x__ */ - -typedef struct { - int val[2]; -} __kernel_fsid_t; - - -#ifdef __KERNEL__ - -#undef __FD_SET -static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] |= (1UL<<_rem); -} - -#undef __FD_CLR -static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); -} - -#undef __FD_ISSET -static inline int __FD_ISSET(unsigned long fd, const __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - return (fdsetp->fds_bits[_tmp] & (1UL<<_rem)) != 0; -} - -#undef __FD_ZERO -#define __FD_ZERO(fdsetp) \ - ((void) memset ((void *) (fdsetp), 0, sizeof (__kernel_fd_set))) - -#endif /* __KERNEL__ */ - -#endif diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2c79b641627..6f02d452bbe 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/processor.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Hartmut Penner (hp@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -13,13 +11,38 @@ #ifndef __ASM_S390_PROCESSOR_H #define __ASM_S390_PROCESSOR_H +#define CIF_MCCK_PENDING 0 /* machine check handling is pending */ +#define CIF_ASCE 1 /* user asce needs fixup / uaccess */ + +#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING) +#define _CIF_ASCE (1<<CIF_ASCE) + + +#ifndef __ASSEMBLY__ + #include <linux/linkage.h> +#include <linux/irqflags.h> #include <asm/cpu.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/setup.h> +#include <asm/runtime_instr.h> + +static inline void set_cpu_flag(int flag) +{ + S390_lowcore.cpu_flags |= (1U << flag); +} + +static inline void clear_cpu_flag(int flag) +{ + S390_lowcore.cpu_flags &= ~(1U << flag); +} + +static inline int test_cpu_flag(int flag) +{ + return !!(S390_lowcore.cpu_flags & (1U << flag)); +} -#ifdef __KERNEL__ /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -32,37 +55,36 @@ static inline void get_cpu_id(struct cpuid *ptr) } extern void s390_adjust_jiffies(void); -extern int get_cpu_capability(unsigned int *); +extern const struct seq_operations cpuinfo_op; +extern int sysctl_ieee_emulation_warnings; +extern void execve_tail(void); /* * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. */ -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define TASK_SIZE (1UL << 31) +#define TASK_MAX_SIZE (1UL << 31) #define TASK_UNMAPPED_BASE (1UL << 30) -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ #define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) +#define TASK_MAX_SIZE (1UL << 53) -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ -#ifdef __KERNEL__ - -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define STACK_TOP (1UL << 31) #define STACK_TOP_MAX (1UL << 31) -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ #define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42)) #define STACK_TOP_MAX (1UL << 42) -#endif /* __s390x__ */ - - -#endif +#endif /* CONFIG_64BIT */ #define HAVE_ARCH_PICK_MMAP_LAYOUT @@ -78,14 +100,32 @@ struct thread_struct { unsigned int acrs[NUM_ACRS]; unsigned long ksp; /* kernel stack pointer */ mm_segment_t mm_segment; - unsigned long prot_addr; /* address of protection-excep. */ - unsigned int trap_no; + unsigned long gmap_addr; /* address of last gmap fault. */ + unsigned int gmap_pfault; /* signal of a pending guest pfault */ struct per_regs per_user; /* User specified PER registers */ struct per_event per_event; /* Cause of the last PER trap */ + unsigned long per_flags; /* Flags to control debug behavior */ /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; + struct list_head list; + /* cpu runtime instrumentation */ + struct runtime_instr_cb *ri_cb; + int ri_signum; +#ifdef CONFIG_64BIT + unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ +#endif }; +/* Flag to disable transactions. */ +#define PER_FLAG_NO_TE 1UL +/* Flag to enable random transaction aborts. */ +#define PER_FLAG_TE_ABORT_RAND 2UL +/* Flag to specify random transaction abort mode: + * - abort each transaction at a random instruction before TEND if set. + * - abort random transactions at a random instruction if cleared. + */ +#define PER_FLAG_TE_ABORT_RAND_TEND 4UL + typedef struct thread_struct thread_struct; /* @@ -116,19 +156,19 @@ struct stack_frame { /* * Do necessary setup to start up a new thread. */ -#define start_thread(regs, new_psw, new_stackp) do { \ - set_fs(USER_DS); \ - regs->psw.mask = psw_user_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ +#define start_thread(regs, new_psw, new_stackp) do { \ + regs->psw.mask = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ + execve_tail(); \ } while (0) -#define start_thread31(regs, new_psw, new_stackp) do { \ - set_fs(USER_DS); \ - regs->psw.mask = psw_user32_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm, 1UL << 31); \ +#define start_thread31(regs, new_psw, new_stackp) do { \ + regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ + crst_table_downgrade(current->mm, 1UL << 31); \ + execve_tail(); \ } while (0) /* Forward declaration, a strange C thing */ @@ -136,26 +176,37 @@ struct task_struct; struct mm_struct; struct seq_file; +#ifdef CONFIG_64BIT +extern void show_cacheinfo(struct seq_file *m); +#else +static inline void show_cacheinfo(struct seq_file *m) { } +#endif + /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); - -/* Prepare to copy thread state - unlazy all lazy status */ -#define prepare_to_copy(tsk) do { } while (0) /* * Return saved PC of a blocked thread. */ extern unsigned long thread_saved_pc(struct task_struct *t); -extern void show_code(struct pt_regs *regs); - unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ (task_stack_page(tsk) + THREAD_SIZE) - 1) #define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr) #define KSTK_ESP(tsk) (task_pt_regs(tsk)->gprs[15]) +/* Has task runtime instrumentation enabled ? */ +#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) + +static inline unsigned short stap(void) +{ + unsigned short cpu_address; + + asm volatile("stap %0" : "=m" (cpu_address)); + return cpu_address; +} + /* * Give up the time slice of the virtual PU. */ @@ -166,6 +217,8 @@ static inline void cpu_relax(void) barrier(); } +#define arch_mutex_cpu_relax() barrier() + static inline void psw_set_key(unsigned int key) { asm volatile("spka 0(%0)" : : "d" (key)); @@ -176,7 +229,7 @@ static inline void psw_set_key(unsigned int key) */ static inline void __load_psw(psw_t psw) { -#ifndef __s390x__ +#ifndef CONFIG_64BIT asm volatile("lpsw %0" : : "Q" (psw) : "cc"); #else asm volatile("lpswe %0" : : "Q" (psw) : "cc"); @@ -187,7 +240,6 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ - static inline void __load_psw_mask (unsigned long mask) { unsigned long addr; @@ -195,7 +247,7 @@ static inline void __load_psw_mask (unsigned long mask) psw.mask = mask; -#ifndef __s390x__ +#ifndef CONFIG_64BIT asm volatile( " basr %0,0\n" "0: ahi %0,1f-0b\n" @@ -203,41 +255,52 @@ static inline void __load_psw_mask (unsigned long mask) " lpsw %1\n" "1:" : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ asm volatile( " larl %0,1f\n" " stg %0,%O1+8(%R1)\n" " lpswe %1\n" "1:" : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ } - + /* - * Function to stop a processor until an interruption occurred + * Rewind PSW instruction address by specified number of bytes. */ -static inline void enabled_wait(void) +static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) { - __load_psw_mask(PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT | - PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY); -} +#ifndef CONFIG_64BIT + if (psw.addr & PSW_ADDR_AMODE) + /* 31 bit mode */ + return (psw.addr - ilc) | PSW_ADDR_AMODE; + /* 24 bit mode */ + return (psw.addr - ilc) & ((1UL << 24) - 1); +#else + unsigned long mask; + mask = (psw.mask & PSW_MASK_EA) ? -1UL : + (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 : + (1UL << 24) - 1; + return (psw.addr - ilc) & mask; +#endif +} + /* * Function to drop a processor into disabled wait state */ - -static inline void ATTRIB_NORET disabled_wait(unsigned long code) +static inline void __noreturn disabled_wait(unsigned long code) { unsigned long ctl_buf; psw_t dw_psw; - dw_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; + dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA; dw_psw.addr = code; /* * Store status and then load disabled wait psw, * the processor is dead afterwards */ -#ifndef __s390x__ +#ifndef CONFIG_64BIT asm volatile( " stctl 0,0,0(%2)\n" " ni 0(%2),0xef\n" /* switch off protection */ @@ -256,7 +319,7 @@ static inline void ATTRIB_NORET disabled_wait(unsigned long code) " lpsw 0(%1)" : "=m" (ctl_buf) : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc"); -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ asm volatile( " stctg 0,0,0(%2)\n" " ni 4(%2),0xef\n" /* switch off protection */ @@ -289,11 +352,26 @@ static inline void ATTRIB_NORET disabled_wait(unsigned long code) " lpswe 0(%1)" : "=m" (ctl_buf) : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1"); -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ while (1); } /* + * Use to set psw mask except for the first byte which + * won't be changed by this function. + */ +static inline void +__set_psw_mask(unsigned long mask) +{ + __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); +} + +#define local_mcck_enable() \ + __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK) +#define local_mcck_disable() \ + __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT) + +/* * Basic Machine Check/Program Check Handler. */ @@ -307,23 +385,35 @@ extern void (*s390_base_ext_handler_fn)(void); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL -#endif +extern int memcpy_real(void *, void *, size_t); +extern void memcpy_absolute(void *, void *, size_t); + +#define mem_assign_absolute(dest, val) { \ + __typeof__(dest) __tmp = (val); \ + \ + BUILD_BUG_ON(sizeof(__tmp) != sizeof(val)); \ + memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \ +} /* * Helper macro for exception table entries */ -#ifndef __s390x__ -#define EX_TABLE(_fault,_target) \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long " #_fault "," #_target "\n" \ - ".previous\n" -#else -#define EX_TABLE(_fault,_target) \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ - " .quad " #_fault "," #_target "\n" \ +#define EX_TABLE(_fault, _target) \ + ".section __ex_table,\"a\"\n" \ + ".align 4\n" \ + ".long (" #_fault ") - .\n" \ + ".long (" #_target ") - .\n" \ ".previous\n" -#endif -#endif /* __ASM_S390_PROCESSOR_H */ +#else /* __ASSEMBLY__ */ + +#define EX_TABLE(_fault, _target) \ + .section __ex_table,"a" ; \ + .align 4 ; \ + .long (_fault) - . ; \ + .long (_target) - . ; \ + .previous + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 9ad628a8574..55d69dd7473 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -1,322 +1,70 @@ /* - * include/asm-s390/ptrace.h - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) */ - #ifndef _S390_PTRACE_H #define _S390_PTRACE_H -/* - * Offsets in the user_regs_struct. They are used for the ptrace - * system call and in entry.S - */ -#ifndef __s390x__ - -#define PT_PSWMASK 0x00 -#define PT_PSWADDR 0x04 -#define PT_GPR0 0x08 -#define PT_GPR1 0x0C -#define PT_GPR2 0x10 -#define PT_GPR3 0x14 -#define PT_GPR4 0x18 -#define PT_GPR5 0x1C -#define PT_GPR6 0x20 -#define PT_GPR7 0x24 -#define PT_GPR8 0x28 -#define PT_GPR9 0x2C -#define PT_GPR10 0x30 -#define PT_GPR11 0x34 -#define PT_GPR12 0x38 -#define PT_GPR13 0x3C -#define PT_GPR14 0x40 -#define PT_GPR15 0x44 -#define PT_ACR0 0x48 -#define PT_ACR1 0x4C -#define PT_ACR2 0x50 -#define PT_ACR3 0x54 -#define PT_ACR4 0x58 -#define PT_ACR5 0x5C -#define PT_ACR6 0x60 -#define PT_ACR7 0x64 -#define PT_ACR8 0x68 -#define PT_ACR9 0x6C -#define PT_ACR10 0x70 -#define PT_ACR11 0x74 -#define PT_ACR12 0x78 -#define PT_ACR13 0x7C -#define PT_ACR14 0x80 -#define PT_ACR15 0x84 -#define PT_ORIGGPR2 0x88 -#define PT_FPC 0x90 -/* - * A nasty fact of life that the ptrace api - * only supports passing of longs. - */ -#define PT_FPR0_HI 0x98 -#define PT_FPR0_LO 0x9C -#define PT_FPR1_HI 0xA0 -#define PT_FPR1_LO 0xA4 -#define PT_FPR2_HI 0xA8 -#define PT_FPR2_LO 0xAC -#define PT_FPR3_HI 0xB0 -#define PT_FPR3_LO 0xB4 -#define PT_FPR4_HI 0xB8 -#define PT_FPR4_LO 0xBC -#define PT_FPR5_HI 0xC0 -#define PT_FPR5_LO 0xC4 -#define PT_FPR6_HI 0xC8 -#define PT_FPR6_LO 0xCC -#define PT_FPR7_HI 0xD0 -#define PT_FPR7_LO 0xD4 -#define PT_FPR8_HI 0xD8 -#define PT_FPR8_LO 0XDC -#define PT_FPR9_HI 0xE0 -#define PT_FPR9_LO 0xE4 -#define PT_FPR10_HI 0xE8 -#define PT_FPR10_LO 0xEC -#define PT_FPR11_HI 0xF0 -#define PT_FPR11_LO 0xF4 -#define PT_FPR12_HI 0xF8 -#define PT_FPR12_LO 0xFC -#define PT_FPR13_HI 0x100 -#define PT_FPR13_LO 0x104 -#define PT_FPR14_HI 0x108 -#define PT_FPR14_LO 0x10C -#define PT_FPR15_HI 0x110 -#define PT_FPR15_LO 0x114 -#define PT_CR_9 0x118 -#define PT_CR_10 0x11C -#define PT_CR_11 0x120 -#define PT_IEEE_IP 0x13C -#define PT_LASTOFF PT_IEEE_IP -#define PT_ENDREGS 0x140-1 - -#define GPR_SIZE 4 -#define CR_SIZE 4 - -#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */ - -#else /* __s390x__ */ - -#define PT_PSWMASK 0x00 -#define PT_PSWADDR 0x08 -#define PT_GPR0 0x10 -#define PT_GPR1 0x18 -#define PT_GPR2 0x20 -#define PT_GPR3 0x28 -#define PT_GPR4 0x30 -#define PT_GPR5 0x38 -#define PT_GPR6 0x40 -#define PT_GPR7 0x48 -#define PT_GPR8 0x50 -#define PT_GPR9 0x58 -#define PT_GPR10 0x60 -#define PT_GPR11 0x68 -#define PT_GPR12 0x70 -#define PT_GPR13 0x78 -#define PT_GPR14 0x80 -#define PT_GPR15 0x88 -#define PT_ACR0 0x90 -#define PT_ACR1 0x94 -#define PT_ACR2 0x98 -#define PT_ACR3 0x9C -#define PT_ACR4 0xA0 -#define PT_ACR5 0xA4 -#define PT_ACR6 0xA8 -#define PT_ACR7 0xAC -#define PT_ACR8 0xB0 -#define PT_ACR9 0xB4 -#define PT_ACR10 0xB8 -#define PT_ACR11 0xBC -#define PT_ACR12 0xC0 -#define PT_ACR13 0xC4 -#define PT_ACR14 0xC8 -#define PT_ACR15 0xCC -#define PT_ORIGGPR2 0xD0 -#define PT_FPC 0xD8 -#define PT_FPR0 0xE0 -#define PT_FPR1 0xE8 -#define PT_FPR2 0xF0 -#define PT_FPR3 0xF8 -#define PT_FPR4 0x100 -#define PT_FPR5 0x108 -#define PT_FPR6 0x110 -#define PT_FPR7 0x118 -#define PT_FPR8 0x120 -#define PT_FPR9 0x128 -#define PT_FPR10 0x130 -#define PT_FPR11 0x138 -#define PT_FPR12 0x140 -#define PT_FPR13 0x148 -#define PT_FPR14 0x150 -#define PT_FPR15 0x158 -#define PT_CR_9 0x160 -#define PT_CR_10 0x168 -#define PT_CR_11 0x170 -#define PT_IEEE_IP 0x1A8 -#define PT_LASTOFF PT_IEEE_IP -#define PT_ENDREGS 0x1B0-1 - -#define GPR_SIZE 8 -#define CR_SIZE 8 +#include <uapi/asm/ptrace.h> -#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ +#define PIF_SYSCALL 0 /* inside a system call */ +#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ -#endif /* __s390x__ */ - -#define NUM_GPRS 16 -#define NUM_FPRS 16 -#define NUM_CRS 16 -#define NUM_ACRS 16 - -#define NUM_CR_WORDS 3 - -#define FPR_SIZE 8 -#define FPC_SIZE 4 -#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */ -#define ACR_SIZE 4 - - -#define PTRACE_OLDSETOPTIONS 21 +#define _PIF_SYSCALL (1<<PIF_SYSCALL) +#define _PIF_PER_TRAP (1<<PIF_PER_TRAP) #ifndef __ASSEMBLY__ -#include <linux/stddef.h> -#include <linux/types.h> - -typedef union -{ - float f; - double d; - __u64 ui; - struct - { - __u32 hi; - __u32 lo; - } fp; -} freg_t; - -typedef struct -{ - __u32 fpc; - freg_t fprs[NUM_FPRS]; -} s390_fp_regs; - -#define FPC_EXCEPTION_MASK 0xF8000000 -#define FPC_FLAGS_MASK 0x00F80000 -#define FPC_DXC_MASK 0x0000FF00 -#define FPC_RM_MASK 0x00000003 -#define FPC_VALID_MASK 0xF8F8FF03 - -/* this typedef defines how a Program Status Word looks like */ -typedef struct -{ - unsigned long mask; - unsigned long addr; -} __attribute__ ((aligned(8))) psw_t; - -typedef struct -{ - __u32 mask; - __u32 addr; -} __attribute__ ((aligned(8))) psw_compat_t; - -#ifndef __s390x__ - -#define PSW_MASK_PER 0x40000000UL -#define PSW_MASK_DAT 0x04000000UL -#define PSW_MASK_IO 0x02000000UL -#define PSW_MASK_EXT 0x01000000UL -#define PSW_MASK_KEY 0x00F00000UL -#define PSW_MASK_MCHECK 0x00040000UL -#define PSW_MASK_WAIT 0x00020000UL -#define PSW_MASK_PSTATE 0x00010000UL -#define PSW_MASK_ASC 0x0000C000UL -#define PSW_MASK_CC 0x00003000UL -#define PSW_MASK_PM 0x00000F00UL - -#define PSW_ADDR_AMODE 0x80000000UL -#define PSW_ADDR_INSN 0x7FFFFFFFUL - -#define PSW_BASE_BITS 0x00080000UL -#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) - -#define PSW_ASC_PRIMARY 0x00000000UL -#define PSW_ASC_ACCREG 0x00004000UL -#define PSW_ASC_SECONDARY 0x00008000UL -#define PSW_ASC_HOME 0x0000C000UL - -#else /* __s390x__ */ - -#define PSW_MASK_PER 0x4000000000000000UL -#define PSW_MASK_DAT 0x0400000000000000UL -#define PSW_MASK_IO 0x0200000000000000UL -#define PSW_MASK_EXT 0x0100000000000000UL -#define PSW_MASK_KEY 0x00F0000000000000UL -#define PSW_MASK_MCHECK 0x0004000000000000UL -#define PSW_MASK_WAIT 0x0002000000000000UL -#define PSW_MASK_PSTATE 0x0001000000000000UL -#define PSW_MASK_ASC 0x0000C00000000000UL -#define PSW_MASK_CC 0x0000300000000000UL -#define PSW_MASK_PM 0x00000F0000000000UL - -#define PSW_ADDR_AMODE 0x0000000000000000UL -#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL -#define PSW_BASE_BITS 0x0000000180000000UL -#define PSW_BASE32_BITS 0x0000000080000000UL -#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) - -#define PSW_ASC_PRIMARY 0x0000000000000000UL -#define PSW_ASC_ACCREG 0x0000400000000000UL -#define PSW_ASC_SECONDARY 0x0000800000000000UL -#define PSW_ASC_HOME 0x0000C00000000000UL - -#endif /* __s390x__ */ - -#ifdef __KERNEL__ -extern long psw_kernel_bits; -extern long psw_user_bits; +#define PSW_KERNEL_BITS (PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \ + PSW_MASK_EA | PSW_MASK_BA) +#define PSW_USER_BITS (PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \ + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \ + PSW_MASK_PSTATE | PSW_ASC_PRIMARY) + +struct psw_bits { + unsigned long long : 1; + unsigned long long r : 1; /* PER-Mask */ + unsigned long long : 3; + unsigned long long t : 1; /* DAT Mode */ + unsigned long long i : 1; /* Input/Output Mask */ + unsigned long long e : 1; /* External Mask */ + unsigned long long key : 4; /* PSW Key */ + unsigned long long : 1; + unsigned long long m : 1; /* Machine-Check Mask */ + unsigned long long w : 1; /* Wait State */ + unsigned long long p : 1; /* Problem State */ + unsigned long long as : 2; /* Address Space Control */ + unsigned long long cc : 2; /* Condition Code */ + unsigned long long pm : 4; /* Program Mask */ + unsigned long long ri : 1; /* Runtime Instrumentation */ + unsigned long long : 6; + unsigned long long eaba : 2; /* Addressing Mode */ #ifdef CONFIG_64BIT -extern long psw_user32_bits; + unsigned long long : 31; + unsigned long long ia : 64;/* Instruction Address */ +#else + unsigned long long ia : 31;/* Instruction Address */ #endif -#endif - -/* This macro merges a NEW PSW mask specified by the user into - the currently active PSW mask CURRENT, modifying only those - bits in CURRENT that the user may be allowed to change: this - is the condition code and the program mask bits. */ -#define PSW_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW_MASK_CC|PSW_MASK_PM)) | \ - ((NEW) & (PSW_MASK_CC|PSW_MASK_PM))) - -/* - * The s390_regs structure is used to define the elf_gregset_t. - */ -typedef struct -{ - psw_t psw; - unsigned long gprs[NUM_GPRS]; - unsigned int acrs[NUM_ACRS]; - unsigned long orig_gpr2; -} s390_regs; +}; -typedef struct -{ - psw_compat_t psw; - __u32 gprs[NUM_GPRS]; - __u32 acrs[NUM_ACRS]; - __u32 orig_gpr2; -} s390_compat_regs; +enum { + PSW_AMODE_24BIT = 0, + PSW_AMODE_31BIT = 1, + PSW_AMODE_64BIT = 3 +}; -typedef struct -{ - __u32 gprs_high[NUM_GPRS]; -} s390_compat_regs_high; +enum { + PSW_AS_PRIMARY = 0, + PSW_AS_ACCREG = 1, + PSW_AS_SECONDARY = 2, + PSW_AS_HOME = 3 +}; -#ifdef __KERNEL__ +#define psw_bits(__psw) (*({ \ + typecheck(psw_t, __psw); \ + &(*(struct psw_bits *)(&(__psw))); \ +})) /* * The pt_regs struct defines the way the registers are stored on @@ -328,8 +76,10 @@ struct pt_regs psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; - unsigned short ilc; - unsigned short svcnr; + unsigned int int_code; + unsigned int int_parm; + unsigned long int_parm_long; + unsigned long flags; }; /* @@ -365,188 +115,52 @@ struct per_struct_kernel { unsigned char access_id; /* PER trap access identification */ }; -#define PER_EVENT_MASK 0xE9000000UL +#define PER_EVENT_MASK 0xEB000000UL #define PER_EVENT_BRANCH 0x80000000UL #define PER_EVENT_IFETCH 0x40000000UL #define PER_EVENT_STORE 0x20000000UL #define PER_EVENT_STORE_REAL 0x08000000UL +#define PER_EVENT_TRANSACTION_END 0x02000000UL #define PER_EVENT_NULLIFICATION 0x01000000UL -#define PER_CONTROL_MASK 0x00a00000UL +#define PER_CONTROL_MASK 0x00e00000UL #define PER_CONTROL_BRANCH_ADDRESS 0x00800000UL +#define PER_CONTROL_SUSPENSION 0x00400000UL #define PER_CONTROL_ALTERATION 0x00200000UL -#endif - -/* - * Now for the user space program event recording (trace) definitions. - * The following structures are used only for the ptrace interface, don't - * touch or even look at it if you don't want to modify the user-space - * ptrace interface. In particular stay away from it for in-kernel PER. - */ -typedef struct -{ - unsigned long cr[NUM_CR_WORDS]; -} per_cr_words; - -#define PER_EM_MASK 0xE8000000UL - -typedef struct -{ -#ifdef __s390x__ - unsigned : 32; -#endif /* __s390x__ */ - unsigned em_branching : 1; - unsigned em_instruction_fetch : 1; - /* - * Switching on storage alteration automatically fixes - * the storage alteration event bit in the users std. - */ - unsigned em_storage_alteration : 1; - unsigned em_gpr_alt_unused : 1; - unsigned em_store_real_address : 1; - unsigned : 3; - unsigned branch_addr_ctl : 1; - unsigned : 1; - unsigned storage_alt_space_ctl : 1; - unsigned : 21; - unsigned long starting_addr; - unsigned long ending_addr; -} per_cr_bits; - -typedef struct -{ - unsigned short perc_atmid; - unsigned long address; - unsigned char access_id; -} per_lowcore_words; - -typedef struct +static inline void set_pt_regs_flag(struct pt_regs *regs, int flag) { - unsigned perc_branching : 1; - unsigned perc_instruction_fetch : 1; - unsigned perc_storage_alteration : 1; - unsigned perc_gpr_alt_unused : 1; - unsigned perc_store_real_address : 1; - unsigned : 3; - unsigned atmid_psw_bit_31 : 1; - unsigned atmid_validity_bit : 1; - unsigned atmid_psw_bit_32 : 1; - unsigned atmid_psw_bit_5 : 1; - unsigned atmid_psw_bit_16 : 1; - unsigned atmid_psw_bit_17 : 1; - unsigned si : 2; - unsigned long address; - unsigned : 4; - unsigned access_id : 4; -} per_lowcore_bits; - -typedef struct -{ - union { - per_cr_words words; - per_cr_bits bits; - } control_regs; - /* - * Use these flags instead of setting em_instruction_fetch - * directly they are used so that single stepping can be - * switched on & off while not affecting other tracing - */ - unsigned single_step : 1; - unsigned instruction_fetch : 1; - unsigned : 30; - /* - * These addresses are copied into cr10 & cr11 if single - * stepping is switched off - */ - unsigned long starting_addr; - unsigned long ending_addr; - union { - per_lowcore_words words; - per_lowcore_bits bits; - } lowcore; -} per_struct; - -typedef struct -{ - unsigned int len; - unsigned long kernel_addr; - unsigned long process_addr; -} ptrace_area; - -/* - * S/390 specific non posix ptrace requests. I chose unusual values so - * they are unlikely to clash with future ptrace definitions. - */ -#define PTRACE_PEEKUSR_AREA 0x5000 -#define PTRACE_POKEUSR_AREA 0x5001 -#define PTRACE_PEEKTEXT_AREA 0x5002 -#define PTRACE_PEEKDATA_AREA 0x5003 -#define PTRACE_POKETEXT_AREA 0x5004 -#define PTRACE_POKEDATA_AREA 0x5005 -#define PTRACE_GET_LAST_BREAK 0x5006 - -/* - * PT_PROT definition is loosely based on hppa bsd definition in - * gdb/hppab-nat.c - */ -#define PTRACE_PROT 21 + regs->flags |= (1U << flag); +} -typedef enum +static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag) { - ptprot_set_access_watchpoint, - ptprot_set_write_watchpoint, - ptprot_disable_watchpoint -} ptprot_flags; + regs->flags &= ~(1U << flag); +} -typedef struct +static inline int test_pt_regs_flag(struct pt_regs *regs, int flag) { - unsigned long lowaddr; - unsigned long hiaddr; - ptprot_flags prot; -} ptprot_area; - -/* Sequence of bytes for breakpoint illegal instruction. */ -#define S390_BREAKPOINT {0x0,0x1} -#define S390_BREAKPOINT_U16 ((__u16)0x0001) -#define S390_SYSCALL_OPCODE ((__u16)0x0a00) -#define S390_SYSCALL_SIZE 2 - -/* - * The user_regs_struct defines the way the user registers are - * store on the stack for signal handling. - */ -struct user_regs_struct -{ - psw_t psw; - unsigned long gprs[NUM_GPRS]; - unsigned int acrs[NUM_ACRS]; - unsigned long orig_gpr2; - s390_fp_regs fp_regs; - /* - * These per registers are in here so that gdb can modify them - * itself as there is no "official" ptrace interface for hardware - * watchpoints. This is the way intel does it. - */ - per_struct per_info; - unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ -}; + return !!(regs->flags & (1U << flag)); +} -#ifdef __KERNEL__ /* * These are defined as per linux/ptrace.h, which see. */ #define arch_has_single_step() (1) -extern void show_regs(struct pt_regs * regs); +#define arch_has_block_step() (1) #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) #define user_stack_pointer(regs)((regs)->gprs[15]) -#define regs_return_value(regs)((regs)->gprs[2]) #define profile_pc(regs) instruction_pointer(regs) +static inline long regs_return_value(struct pt_regs *regs) +{ + return regs->gprs[2]; +} + int regs_query_register_offset(const char *name); const char *regs_query_register_name(unsigned int offset); unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); @@ -557,7 +171,5 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) return regs->gprs[15] & PSW_ADDR_INSN; } -#endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ - #endif /* _S390_PTRACE_H */ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 350e7ee5952..d786c634e05 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -1,7 +1,5 @@ /* - * linux/include/asm-s390/qdio.h - * - * Copyright 2000,2008 IBM Corp. + * Copyright IBM Corp. 2000, 2008 * Author(s): Utz Bacher <utz.bacher@de.ibm.com> * Jan Glauber <jang@linux.vnet.ibm.com> * @@ -46,6 +44,8 @@ struct qdesfmt0 { u32 : 16; } __attribute__ ((packed)); +#define QDR_AC_MULTI_BUFFER_ENABLE 0x01 + /** * struct qdr - queue description record (QDR) * @qfmt: queue format @@ -123,6 +123,40 @@ struct slibe { }; /** + * struct qaob - queue asynchronous operation block + * @res0: reserved parameters + * @res1: reserved parameter + * @res2: reserved parameter + * @res3: reserved parameter + * @aorc: asynchronous operation return code + * @flags: internal flags + * @cbtbs: control block type + * @sb_count: number of storage blocks + * @sba: storage block element addresses + * @dcount: size of storage block elements + * @user0: user defineable value + * @res4: reserved paramater + * @user1: user defineable value + * @user2: user defineable value + */ +struct qaob { + u64 res0[6]; + u8 res1; + u8 res2; + u8 res3; + u8 aorc; + u8 flags; + u16 cbtbs; + u8 sb_count; + u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u64 user0; + u64 res4[2]; + u64 user1; + u64 user2; +} __attribute__ ((packed, aligned(256))); + +/** * struct slib - storage list information block (SLIB) * @nsliba: next SLIB address (if any) * @sla: SL address @@ -139,110 +173,47 @@ struct slib { struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q]; } __attribute__ ((packed, aligned(2048))); -/** - * struct sbal_flags - storage block address list flags - * @last: last entry - * @cont: contiguous storage - * @frag: fragmentation - */ -struct sbal_flags { - u8 : 1; - u8 last : 1; - u8 cont : 1; - u8 : 1; - u8 frag : 2; - u8 : 2; -} __attribute__ ((packed)); +#define SBAL_EFLAGS_LAST_ENTRY 0x40 +#define SBAL_EFLAGS_CONTIGUOUS 0x20 +#define SBAL_EFLAGS_FIRST_FRAG 0x04 +#define SBAL_EFLAGS_MIDDLE_FRAG 0x08 +#define SBAL_EFLAGS_LAST_FRAG 0x0c +#define SBAL_EFLAGS_MASK 0x6f -#define SBAL_FLAGS_FIRST_FRAG 0x04000000UL -#define SBAL_FLAGS_MIDDLE_FRAG 0x08000000UL -#define SBAL_FLAGS_LAST_FRAG 0x0c000000UL -#define SBAL_FLAGS_LAST_ENTRY 0x40000000UL -#define SBAL_FLAGS_CONTIGUOUS 0x20000000UL - -#define SBAL_FLAGS0_DATA_CONTINUATION 0x20UL +#define SBAL_SFLAGS0_PCI_REQ 0x40 +#define SBAL_SFLAGS0_DATA_CONTINUATION 0x20 /* Awesome OpenFCP extensions */ -#define SBAL_FLAGS0_TYPE_STATUS 0x00UL -#define SBAL_FLAGS0_TYPE_WRITE 0x08UL -#define SBAL_FLAGS0_TYPE_READ 0x10UL -#define SBAL_FLAGS0_TYPE_WRITE_READ 0x18UL -#define SBAL_FLAGS0_MORE_SBALS 0x04UL -#define SBAL_FLAGS0_COMMAND 0x02UL -#define SBAL_FLAGS0_LAST_SBAL 0x00UL -#define SBAL_FLAGS0_ONLY_SBAL SBAL_FLAGS0_COMMAND -#define SBAL_FLAGS0_MIDDLE_SBAL SBAL_FLAGS0_MORE_SBALS -#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND -#define SBAL_FLAGS0_PCI 0x40 - -/** - * struct sbal_sbalf_0 - sbal flags for sbale 0 - * @pci: PCI indicator - * @cont: data continuation - * @sbtype: storage-block type (FCP) - */ -struct sbal_sbalf_0 { - u8 : 1; - u8 pci : 1; - u8 cont : 1; - u8 sbtype : 2; - u8 : 3; -} __attribute__ ((packed)); - -/** - * struct sbal_sbalf_1 - sbal flags for sbale 1 - * @key: storage key - */ -struct sbal_sbalf_1 { - u8 : 4; - u8 key : 4; -} __attribute__ ((packed)); - -/** - * struct sbal_sbalf_14 - sbal flags for sbale 14 - * @erridx: error index - */ -struct sbal_sbalf_14 { - u8 : 4; - u8 erridx : 4; -} __attribute__ ((packed)); - -/** - * struct sbal_sbalf_15 - sbal flags for sbale 15 - * @reason: reason for error state - */ -struct sbal_sbalf_15 { - u8 reason; -} __attribute__ ((packed)); - -/** - * union sbal_sbalf - storage block address list flags - * @i0: sbalf0 - * @i1: sbalf1 - * @i14: sbalf14 - * @i15: sblaf15 - * @value: raw value - */ -union sbal_sbalf { - struct sbal_sbalf_0 i0; - struct sbal_sbalf_1 i1; - struct sbal_sbalf_14 i14; - struct sbal_sbalf_15 i15; - u8 value; -}; +#define SBAL_SFLAGS0_TYPE_STATUS 0x00 +#define SBAL_SFLAGS0_TYPE_WRITE 0x08 +#define SBAL_SFLAGS0_TYPE_READ 0x10 +#define SBAL_SFLAGS0_TYPE_WRITE_READ 0x18 +#define SBAL_SFLAGS0_MORE_SBALS 0x04 +#define SBAL_SFLAGS0_COMMAND 0x02 +#define SBAL_SFLAGS0_LAST_SBAL 0x00 +#define SBAL_SFLAGS0_ONLY_SBAL SBAL_SFLAGS0_COMMAND +#define SBAL_SFLAGS0_MIDDLE_SBAL SBAL_SFLAGS0_MORE_SBALS +#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND) /** * struct qdio_buffer_element - SBAL entry - * @flags: flags + * @eflags: SBAL entry flags + * @scount: SBAL count + * @sflags: whole SBAL flags * @length: length * @addr: address */ struct qdio_buffer_element { - u32 flags; + u8 eflags; + /* private: */ + u8 res1; + /* public: */ + u8 scount; + u8 sflags; u32 length; #ifdef CONFIG_32BIT /* private: */ - void *reserved; + void *res2; /* public: */ #endif void *addr; @@ -285,9 +256,43 @@ struct slsb { u8 val[QDIO_MAX_BUFFERS_PER_Q]; } __attribute__ ((packed, aligned(256))); +/** + * struct qdio_outbuf_state - SBAL related asynchronous operation information + * (for communication with upper layer programs) + * (only required for use with completion queues) + * @flags: flags indicating state of buffer + * @aob: pointer to QAOB used for the particular SBAL + * @user: pointer to upper layer program's state information related to SBAL + * (stored in user1 data of QAOB) + */ +struct qdio_outbuf_state { + u8 flags; + struct qaob *aob; + void *user; +}; + +#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 +#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 + +#define CHSC_AC1_INITIATE_INPUTQ 0x80 + + +/* qdio adapter-characteristics-1 flag */ +#define AC1_SIGA_INPUT_NEEDED 0x40 /* process input queues */ +#define AC1_SIGA_OUTPUT_NEEDED 0x20 /* process output queues */ +#define AC1_SIGA_SYNC_NEEDED 0x10 /* ask hypervisor to sync */ +#define AC1_AUTOMATIC_SYNC_ON_THININT 0x08 /* set by hypervisor */ +#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI 0x04 /* set by hypervisor */ +#define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */ +#define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */ + +#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080 +#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040 #define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 #define CHSC_AC2_DATA_DIV_ENABLED 0x0002 +#define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000 + struct qdio_ssqd_desc { u8 flags; u8:8; @@ -306,8 +311,7 @@ struct qdio_ssqd_desc { u64 sch_token; u8 mro; u8 mri; - u8:8; - u8 sbalic; + u16 qdioac3; u16:16; u8:8; u8 mmwc; @@ -319,18 +323,20 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, int, int, unsigned long); /* qdio errors reported to the upper-layer program */ -#define QDIO_ERROR_SIGA_TARGET 0x02 -#define QDIO_ERROR_SIGA_ACCESS_EXCEPTION 0x10 -#define QDIO_ERROR_SIGA_BUSY 0x20 -#define QDIO_ERROR_ACTIVATE_CHECK_CONDITION 0x40 -#define QDIO_ERROR_SLSB_STATE 0x80 +#define QDIO_ERROR_ACTIVATE 0x0001 +#define QDIO_ERROR_GET_BUF_STATE 0x0002 +#define QDIO_ERROR_SET_BUF_STATE 0x0004 +#define QDIO_ERROR_SLSB_STATE 0x0100 + +#define QDIO_ERROR_FATAL 0x00ff +#define QDIO_ERROR_TEMPORARY 0xff00 /* for qdio_cleanup */ #define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01 #define QDIO_FLAG_CLEANUP_USING_HALT 0x02 /** - * struct qdio_initialize - qdio initalization data + * struct qdio_initialize - qdio initialization data * @cdev: associated ccw device * @q_format: queue format * @adapter_name: name for the adapter @@ -343,13 +349,16 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @no_output_qs: number of output queues * @input_handler: handler to be called for input queues * @output_handler: handler to be called for output queues + * @queue_start_poll_array: polling handlers (one per input queue or NULL) * @int_parm: interruption parameter * @input_sbal_addr_array: address of no_input_qs * 128 pointers * @output_sbal_addr_array: address of no_output_qs * 128 pointers + * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL) */ struct qdio_initialize { struct ccw_device *cdev; unsigned char q_format; + unsigned char qdr_ac; unsigned char adapter_name[8]; unsigned int qib_param_field_format; unsigned char *qib_param_field; @@ -360,13 +369,43 @@ struct qdio_initialize { unsigned int no_output_qs; qdio_handler_t *input_handler; qdio_handler_t *output_handler; - void (*queue_start_poll) (struct ccw_device *, int, unsigned long); + void (**queue_start_poll_array) (struct ccw_device *, int, + unsigned long); int scan_threshold; unsigned long int_parm; void **input_sbal_addr_array; void **output_sbal_addr_array; + struct qdio_outbuf_state *output_sbal_state_array; }; +/** + * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc() + * @l3_ipv6_addr: entry contains IPv6 address + * @l3_ipv4_addr: entry contains IPv4 address + * @l2_addr_lnid: entry contains MAC address and VLAN ID + */ +enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid}; + +/** + * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc() + * @nit: Network interface token + * @addr: Address of one of the three types + * + * The struct is passed to the callback function by qdio_brinfo_desc() + */ +struct qdio_brinfo_entry_l3_ipv6 { + u64 nit; + struct { unsigned char _s6_addr[16]; } addr; +} __packed; +struct qdio_brinfo_entry_l3_ipv4 { + u64 nit; + struct { uint32_t _s_addr; } addr; +} __packed; +struct qdio_brinfo_entry_l2 { + u64 nit; + struct { u8 mac[6]; u16 lnid; } addr_lnid; +} __packed; + #define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ #define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ #define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ @@ -379,6 +418,7 @@ struct qdio_initialize { extern int qdio_allocate(struct qdio_initialize *); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); +extern void qdio_release_aob(struct qaob *); extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int, unsigned int); extern int qdio_start_irq(struct ccw_device *, int); @@ -387,5 +427,10 @@ extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *); extern int qdio_shutdown(struct ccw_device *, int); extern int qdio_free(struct ccw_device *); extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *); +extern int qdio_pnso_brinfo(struct subchannel_id schid, + int cnc, u16 *response, + void (*cb)(void *priv, enum qdio_brinfo_entry_type type, + void *entry), + void *priv); #endif /* __QDIO_H__ */ diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h index f584f4a5258..804578587a7 100644 --- a/arch/s390/include/asm/reset.h +++ b/arch/s390/include/asm/reset.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/reset.h - * * Copyright IBM Corp. 2006 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ @@ -17,5 +15,5 @@ struct reset_call { extern void register_reset_call(struct reset_call *reset); extern void unregister_reset_call(struct reset_call *reset); -extern void s390_reset_system(void); +extern void s390_reset_system(void (*func)(void *), void *data); #endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h new file mode 100644 index 00000000000..830da737ff8 --- /dev/null +++ b/arch/s390/include/asm/runtime_instr.h @@ -0,0 +1,98 @@ +#ifndef _RUNTIME_INSTR_H +#define _RUNTIME_INSTR_H + +#define S390_RUNTIME_INSTR_START 0x1 +#define S390_RUNTIME_INSTR_STOP 0x2 + +struct runtime_instr_cb { + __u64 buf_current; + __u64 buf_origin; + __u64 buf_limit; + + __u32 valid : 1; + __u32 pstate : 1; + __u32 pstate_set_buf : 1; + __u32 home_space : 1; + __u32 altered : 1; + __u32 : 3; + __u32 pstate_sample : 1; + __u32 sstate_sample : 1; + __u32 pstate_collect : 1; + __u32 sstate_collect : 1; + __u32 : 1; + __u32 halted_int : 1; + __u32 int_requested : 1; + __u32 buffer_full_int : 1; + __u32 key : 4; + __u32 : 9; + __u32 rgs : 3; + + __u32 mode : 4; + __u32 next : 1; + __u32 mae : 1; + __u32 : 2; + __u32 call_type_br : 1; + __u32 return_type_br : 1; + __u32 other_type_br : 1; + __u32 bc_other_type : 1; + __u32 emit : 1; + __u32 tx_abort : 1; + __u32 : 2; + __u32 bp_xn : 1; + __u32 bp_xt : 1; + __u32 bp_ti : 1; + __u32 bp_ni : 1; + __u32 suppr_y : 1; + __u32 suppr_z : 1; + + __u32 dc_miss_extra : 1; + __u32 lat_lev_ignore : 1; + __u32 ic_lat_lev : 4; + __u32 dc_lat_lev : 4; + + __u64 reserved1; + __u64 scaling_factor; + __u64 rsic; + __u64 reserved2; +} __packed __aligned(8); + +extern struct runtime_instr_cb runtime_instr_empty_cb; + +static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */ + : : "Q" (*cb)); +} + +static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */ + : "=Q" (*cb) : : "cc"); +} + +static inline void save_ri_cb(struct runtime_instr_cb *cb_prev) +{ +#ifdef CONFIG_64BIT + if (cb_prev) + store_runtime_instr_cb(cb_prev); +#endif +} + +static inline void restore_ri_cb(struct runtime_instr_cb *cb_next, + struct runtime_instr_cb *cb_prev) +{ +#ifdef CONFIG_64BIT + if (cb_next) + load_runtime_instr_cb(cb_next); + else if (cb_prev) + load_runtime_instr_cb(&runtime_instr_empty_cb); +#endif +} + +#ifdef CONFIG_64BIT +extern void exit_thread_runtime_instr(void); +#else +static inline void exit_thread_runtime_instr(void) { } +#endif + +#endif /* _RUNTIME_INSTR_H */ diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index 423fdda2322..487f9b64efb 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -2,10 +2,8 @@ #define _S390_RWSEM_H /* - * include/asm-s390/rwsem.h - * * S390 version - * Copyright (C) 2002 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2002 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) * * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h @@ -41,81 +39,21 @@ #error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" #endif -#ifdef __KERNEL__ - -#include <linux/list.h> -#include <linux/spinlock.h> - -struct rwsem_waiter; - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *); - -/* - * the semaphore definition - */ -struct rw_semaphore { - signed long count; - spinlock_t wait_lock; - struct list_head wait_list; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; - -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 #define RWSEM_ACTIVE_MASK 0x0000ffff #define RWSEM_WAITING_BIAS (-0x00010000) -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ #define RWSEM_UNLOCKED_VALUE 0x0000000000000000L #define RWSEM_ACTIVE_BIAS 0x0000000000000001L #define RWSEM_ACTIVE_MASK 0x00000000ffffffffL #define RWSEM_WAITING_BIAS (-0x0000000100000000L) -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) /* - * initialisation - */ - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } -#else -# define __RWSEM_DEP_MAP_INIT(lockname) -#endif - -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} - -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); - -#define init_rwsem(sem) \ -do { \ - static struct lock_class_key __key; \ - \ - __init_rwsem((sem), #sem, &__key); \ -} while (0) - - -/* * lock for reading */ static inline void __down_read(struct rw_semaphore *sem) @@ -123,19 +61,19 @@ static inline void __down_read(struct rw_semaphore *sem) signed long old, new; asm volatile( -#ifndef __s390x__ +#ifndef CONFIG_64BIT " l %0,%2\n" "0: lr %1,%0\n" " ahi %1,%4\n" " cs %0,%1,%2\n" " jl 0b" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ " lg %0,%2\n" "0: lgr %1,%0\n" " aghi %1,%4\n" " csg %0,%1,%2\n" " jl 0b" -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ : "=&d" (old), "=&d" (new), "=Q" (sem->count) : "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS) : "cc", "memory"); @@ -151,7 +89,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) signed long old, new; asm volatile( -#ifndef __s390x__ +#ifndef CONFIG_64BIT " l %0,%2\n" "0: ltr %1,%0\n" " jm 1f\n" @@ -159,7 +97,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) " cs %0,%1,%2\n" " jl 0b\n" "1:" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ " lg %0,%2\n" "0: ltgr %1,%0\n" " jm 1f\n" @@ -167,7 +105,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) " csg %0,%1,%2\n" " jl 0b\n" "1:" -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ : "=&d" (old), "=&d" (new), "=Q" (sem->count) : "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS) : "cc", "memory"); @@ -183,19 +121,19 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) tmp = RWSEM_ACTIVE_WRITE_BIAS; asm volatile( -#ifndef __s390x__ +#ifndef CONFIG_64BIT " l %0,%2\n" "0: lr %1,%0\n" " a %1,%4\n" " cs %0,%1,%2\n" " jl 0b" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ " lg %0,%2\n" "0: lgr %1,%0\n" " ag %1,%4\n" " csg %0,%1,%2\n" " jl 0b" -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ : "=&d" (old), "=&d" (new), "=Q" (sem->count) : "Q" (sem->count), "m" (tmp) : "cc", "memory"); @@ -216,19 +154,19 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) signed long old; asm volatile( -#ifndef __s390x__ +#ifndef CONFIG_64BIT " l %0,%1\n" "0: ltr %0,%0\n" " jnz 1f\n" " cs %0,%3,%1\n" " jl 0b\n" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ " lg %0,%1\n" "0: ltgr %0,%0\n" " jnz 1f\n" " csg %0,%3,%1\n" " jl 0b\n" -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ "1:" : "=&d" (old), "=Q" (sem->count) : "Q" (sem->count), "d" (RWSEM_ACTIVE_WRITE_BIAS) @@ -244,19 +182,19 @@ static inline void __up_read(struct rw_semaphore *sem) signed long old, new; asm volatile( -#ifndef __s390x__ +#ifndef CONFIG_64BIT " l %0,%2\n" "0: lr %1,%0\n" " ahi %1,%4\n" " cs %0,%1,%2\n" " jl 0b" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ " lg %0,%2\n" "0: lgr %1,%0\n" " aghi %1,%4\n" " csg %0,%1,%2\n" " jl 0b" -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ : "=&d" (old), "=&d" (new), "=Q" (sem->count) : "Q" (sem->count), "i" (-RWSEM_ACTIVE_READ_BIAS) : "cc", "memory"); @@ -274,19 +212,19 @@ static inline void __up_write(struct rw_semaphore *sem) tmp = -RWSEM_ACTIVE_WRITE_BIAS; asm volatile( -#ifndef __s390x__ +#ifndef CONFIG_64BIT " l %0,%2\n" "0: lr %1,%0\n" " a %1,%4\n" " cs %0,%1,%2\n" " jl 0b" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ " lg %0,%2\n" "0: lgr %1,%0\n" " ag %1,%4\n" " csg %0,%1,%2\n" " jl 0b" -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ : "=&d" (old), "=&d" (new), "=Q" (sem->count) : "Q" (sem->count), "m" (tmp) : "cc", "memory"); @@ -304,19 +242,19 @@ static inline void __downgrade_write(struct rw_semaphore *sem) tmp = -RWSEM_WAITING_BIAS; asm volatile( -#ifndef __s390x__ +#ifndef CONFIG_64BIT " l %0,%2\n" "0: lr %1,%0\n" " a %1,%4\n" " cs %0,%1,%2\n" " jl 0b" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ " lg %0,%2\n" "0: lgr %1,%0\n" " ag %1,%4\n" " csg %0,%1,%2\n" " jl 0b" -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ : "=&d" (old), "=&d" (new), "=Q" (sem->count) : "Q" (sem->count), "m" (tmp) : "cc", "memory"); @@ -332,19 +270,19 @@ static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) signed long old, new; asm volatile( -#ifndef __s390x__ +#ifndef CONFIG_64BIT " l %0,%2\n" "0: lr %1,%0\n" " ar %1,%4\n" " cs %0,%1,%2\n" " jl 0b" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ " lg %0,%2\n" "0: lgr %1,%0\n" " agr %1,%4\n" " csg %0,%1,%2\n" " jl 0b" -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ : "=&d" (old), "=&d" (new), "=Q" (sem->count) : "Q" (sem->count), "d" (delta) : "cc", "memory"); @@ -358,29 +296,23 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) signed long old, new; asm volatile( -#ifndef __s390x__ +#ifndef CONFIG_64BIT " l %0,%2\n" "0: lr %1,%0\n" " ar %1,%4\n" " cs %0,%1,%2\n" " jl 0b" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ " lg %0,%2\n" "0: lgr %1,%0\n" " agr %1,%4\n" " csg %0,%1,%2\n" " jl 0b" -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ : "=&d" (old), "=&d" (new), "=Q" (sem->count) : "Q" (sem->count), "d" (delta) : "cc", "memory"); return new; } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - -#endif /* __KERNEL__ */ #endif /* _S390_RWSEM_H */ diff --git a/arch/s390/include/asm/s390_ext.h b/arch/s390/include/asm/s390_ext.h deleted file mode 100644 index 080876d5f19..00000000000 --- a/arch/s390/include/asm/s390_ext.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright IBM Corp. 1999,2010 - * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, - * Martin Schwidefsky <schwidefsky@de.ibm.com>, - */ - -#ifndef _S390_EXTINT_H -#define _S390_EXTINT_H - -#include <linux/types.h> - -typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long); - -int register_external_interrupt(__u16 code, ext_int_handler_t handler); -int unregister_external_interrupt(__u16 code, ext_int_handler_t handler); - -#endif /* _S390_EXTINT_H */ diff --git a/arch/s390/include/asm/schid.h b/arch/s390/include/asm/schid.h index 3e4d401b4e4..40b47dfa9d6 100644 --- a/arch/s390/include/asm/schid.h +++ b/arch/s390/include/asm/schid.h @@ -1,19 +1,8 @@ #ifndef ASM_SCHID_H #define ASM_SCHID_H -#include <linux/types.h> - -struct subchannel_id { - __u32 cssid : 8; - __u32 : 4; - __u32 m : 1; - __u32 ssid : 2; - __u32 one : 1; - __u32 sch_no : 16; -} __attribute__ ((packed, aligned(4))); - -#ifdef __KERNEL__ #include <linux/string.h> +#include <uapi/asm/schid.h> /* Helper function for sane state of pre-allocated subchannel_id. */ static inline void @@ -29,6 +18,4 @@ schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2) return !memcmp(schid1, schid2, sizeof(struct subchannel_id)); } -#endif /* __KERNEL__ */ - #endif /* ASM_SCHID_H */ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index fed7bee650a..1aba89b53cb 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/sclp.h - * * Copyright IBM Corp. 2007 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ @@ -10,6 +8,7 @@ #include <linux/types.h> #include <asm/chpid.h> +#include <asm/cpu.h> #define SCLP_CHP_INFO_MASK_SIZE 32 @@ -29,7 +28,11 @@ struct sclp_ipl_info { struct sclp_cpu_entry { u8 address; - u8 reserved0[13]; + u8 reserved0[2]; + u8 : 3; + u8 siif : 1; + u8 : 4; + u8 reserved2[10]; u8 type; u8 reserved1; } __attribute__((packed)); @@ -39,20 +42,30 @@ struct sclp_cpu_info { unsigned int standby; unsigned int combined; int has_cpu_type; - struct sclp_cpu_entry cpu[255]; + struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1]; }; int sclp_get_cpu_info(struct sclp_cpu_info *info); int sclp_cpu_configure(u8 cpu); int sclp_cpu_deconfigure(u8 cpu); -void sclp_facilities_detect(void); unsigned long long sclp_get_rnmax(void); unsigned long long sclp_get_rzm(void); +unsigned int sclp_get_max_cpu(void); int sclp_sdias_blk_count(void); int sclp_sdias_copy(void *dest, int blk_num, int nr_blks); int sclp_chp_configure(struct chp_id chpid); int sclp_chp_deconfigure(struct chp_id chpid); int sclp_chp_read_info(struct sclp_chp_info *info); void sclp_get_ipl_info(struct sclp_ipl_info *info); +bool __init sclp_has_linemode(void); +bool __init sclp_has_vt220(void); +bool sclp_has_sprp(void); +int sclp_pci_configure(u32 fid); +int sclp_pci_deconfigure(u32 fid); +int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); +unsigned long sclp_get_hsa_size(void); +void sclp_early_detect(void); +int sclp_has_siif(void); +unsigned int sclp_get_ibc(void); #endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h index de389cb54d2..4af99cdaddf 100644 --- a/arch/s390/include/asm/scsw.h +++ b/arch/s390/include/asm/scsw.h @@ -1,7 +1,7 @@ /* * Helper functions for scsw access. * - * Copyright IBM Corp. 2008,2009 + * Copyright IBM Corp. 2008, 2012 * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> */ @@ -9,7 +9,7 @@ #define _ASM_S390_SCSW_H_ #include <linux/types.h> -#include <asm/chsc.h> +#include <asm/css_chars.h> #include <asm/cio.h> /** @@ -100,14 +100,46 @@ struct tm_scsw { } __attribute__ ((packed)); /** + * struct eadm_scsw - subchannel status word for eadm subchannels + * @key: subchannel key + * @eswf: esw format + * @cc: deferred condition code + * @ectl: extended control + * @fctl: function control + * @actl: activity control + * @stctl: status control + * @aob: AOB address + * @dstat: device status + * @cstat: subchannel status + */ +struct eadm_scsw { + u32 key:4; + u32:1; + u32 eswf:1; + u32 cc:2; + u32:6; + u32 ectl:1; + u32:2; + u32 fctl:3; + u32 actl:7; + u32 stctl:5; + u32 aob; + u32 dstat:8; + u32 cstat:8; + u32:16; +} __packed; + +/** * union scsw - subchannel status word * @cmd: command-mode SCSW * @tm: transport-mode SCSW + * @eadm: eadm SCSW */ union scsw { struct cmd_scsw cmd; struct tm_scsw tm; -} __attribute__ ((packed)); + struct eadm_scsw eadm; +} __packed; #define SCSW_FCTL_CLEAR_FUNC 0x1 #define SCSW_FCTL_HALT_FUNC 0x2 diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h new file mode 100644 index 00000000000..5b3e48ef534 --- /dev/null +++ b/arch/s390/include/asm/serial.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_SERIAL_H +#define _ASM_S390_SERIAL_H + +#define BASE_BAUD 0 + +#endif /* _ASM_S390_SERIAL_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index d5e2ef10537..089a49814c5 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -1,60 +1,40 @@ /* - * include/asm-s390/setup.h - * * S390 version - * Copyright IBM Corp. 1999,2010 + * Copyright IBM Corp. 1999, 2010 */ - #ifndef _ASM_S390_SETUP_H #define _ASM_S390_SETUP_H -#define COMMAND_LINE_SIZE 4096 - -#define ARCH_COMMAND_LINE_SIZE 896 +#include <uapi/asm/setup.h> -#ifdef __KERNEL__ #define PARMAREA 0x10400 -#define MEMORY_CHUNKS 256 #ifndef __ASSEMBLY__ #include <asm/lowcore.h> #include <asm/types.h> -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define IPL_DEVICE (*(unsigned long *) (0x10404)) #define INITRD_START (*(unsigned long *) (0x1040C)) #define INITRD_SIZE (*(unsigned long *) (0x10414)) -#else /* __s390x__ */ +#define OLDMEM_BASE (*(unsigned long *) (0x1041C)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10424)) +#else /* CONFIG_64BIT */ #define IPL_DEVICE (*(unsigned long *) (0x10400)) #define INITRD_START (*(unsigned long *) (0x10408)) #define INITRD_SIZE (*(unsigned long *) (0x10410)) -#endif /* __s390x__ */ +#define OLDMEM_BASE (*(unsigned long *) (0x10418)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10420)) +#endif /* CONFIG_64BIT */ #define COMMAND_LINE ((char *) (0x10480)) -#define CHUNK_READ_WRITE 0 -#define CHUNK_READ_ONLY 1 - -struct mem_chunk { - unsigned long addr; - unsigned long size; - int type; -}; - -extern struct mem_chunk memory_chunk[]; -extern unsigned long real_memory_size; extern int memory_end_set; extern unsigned long memory_end; +extern unsigned long max_physmem_end; -void detect_memory_layout(struct mem_chunk chunk[]); - -#define PRIMARY_SPACE_MODE 0 -#define ACCESS_REGISTER_MODE 1 -#define SECONDARY_SPACE_MODE 2 -#define HOME_SPACE_MODE 3 - -extern unsigned int user_mode; +extern void detect_memory_memblock(void); /* * Machine features detected in head.S @@ -62,50 +42,58 @@ extern unsigned int user_mode; #define MACHINE_FLAG_VM (1UL << 0) #define MACHINE_FLAG_IEEE (1UL << 1) -#define MACHINE_FLAG_CSP (1UL << 3) -#define MACHINE_FLAG_MVPG (1UL << 4) -#define MACHINE_FLAG_DIAG44 (1UL << 5) -#define MACHINE_FLAG_IDTE (1UL << 6) -#define MACHINE_FLAG_DIAG9C (1UL << 7) -#define MACHINE_FLAG_MVCOS (1UL << 8) -#define MACHINE_FLAG_KVM (1UL << 9) -#define MACHINE_FLAG_HPAGE (1UL << 10) -#define MACHINE_FLAG_PFMF (1UL << 11) +#define MACHINE_FLAG_CSP (1UL << 2) +#define MACHINE_FLAG_MVPG (1UL << 3) +#define MACHINE_FLAG_DIAG44 (1UL << 4) +#define MACHINE_FLAG_IDTE (1UL << 5) +#define MACHINE_FLAG_DIAG9C (1UL << 6) +#define MACHINE_FLAG_KVM (1UL << 8) +#define MACHINE_FLAG_ESOP (1UL << 9) +#define MACHINE_FLAG_EDAT1 (1UL << 10) +#define MACHINE_FLAG_EDAT2 (1UL << 11) #define MACHINE_FLAG_LPAR (1UL << 12) -#define MACHINE_FLAG_SPP (1UL << 13) +#define MACHINE_FLAG_LPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) +#define MACHINE_FLAG_TE (1UL << 15) +#define MACHINE_FLAG_RRBM (1UL << 16) +#define MACHINE_FLAG_TLB_LC (1UL << 17) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) #define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) #define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) +#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP) +#define MACHINE_HAS_PFMF MACHINE_HAS_EDAT1 +#define MACHINE_HAS_HPAGE MACHINE_HAS_EDAT1 -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define MACHINE_HAS_IEEE (S390_lowcore.machine_flags & MACHINE_FLAG_IEEE) #define MACHINE_HAS_CSP (S390_lowcore.machine_flags & MACHINE_FLAG_CSP) #define MACHINE_HAS_IDTE (0) #define MACHINE_HAS_DIAG44 (1) #define MACHINE_HAS_MVPG (S390_lowcore.machine_flags & MACHINE_FLAG_MVPG) -#define MACHINE_HAS_MVCOS (0) -#define MACHINE_HAS_HPAGE (0) -#define MACHINE_HAS_PFMF (0) -#define MACHINE_HAS_SPP (0) +#define MACHINE_HAS_EDAT1 (0) +#define MACHINE_HAS_EDAT2 (0) +#define MACHINE_HAS_LPP (0) #define MACHINE_HAS_TOPOLOGY (0) -#else /* __s390x__ */ +#define MACHINE_HAS_TE (0) +#define MACHINE_HAS_RRBM (0) +#define MACHINE_HAS_TLB_LC (0) +#else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) #define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE) #define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) #define MACHINE_HAS_MVPG (1) -#define MACHINE_HAS_MVCOS (S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS) -#define MACHINE_HAS_HPAGE (S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE) -#define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) -#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) +#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1) +#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2) +#define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) -#endif /* __s390x__ */ - -#define ZFCPDUMP_HSA_SIZE (32UL<<20) +#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) +#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) +#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) +#endif /* CONFIG_64BIT */ /* * Console mode. Override with conmode= @@ -128,19 +116,36 @@ extern char vmpoff_cmd[]; #define NSS_NAME_SIZE 8 extern char kernel_nss_name[]; +#ifdef CONFIG_PFAULT +extern int pfault_init(void); +extern void pfault_fini(void); +#else /* CONFIG_PFAULT */ +#define pfault_init() ({-1;}) +#define pfault_fini() do { } while (0) +#endif /* CONFIG_PFAULT */ + +extern void cmma_init(void); + +extern void (*_machine_restart)(char *command); +extern void (*_machine_halt)(void); +extern void (*_machine_power_off)(void); + #else /* __ASSEMBLY__ */ -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define IPL_DEVICE 0x10404 #define INITRD_START 0x1040C #define INITRD_SIZE 0x10414 -#else /* __s390x__ */ +#define OLDMEM_BASE 0x1041C +#define OLDMEM_SIZE 0x10424 +#else /* CONFIG_64BIT */ #define IPL_DEVICE 0x10400 #define INITRD_START 0x10408 #define INITRD_SIZE 0x10410 -#endif /* __s390x__ */ +#define OLDMEM_BASE 0x10418 +#define OLDMEM_SIZE 0x10420 +#endif /* CONFIG_64BIT */ #define COMMAND_LINE 0x10480 #endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* _ASM_S390_SETUP_H */ diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h index 0addc6466d9..5959bfb3b69 100644 --- a/arch/s390/include/asm/sfp-util.h +++ b/arch/s390/include/asm/sfp-util.h @@ -51,7 +51,7 @@ wl = __wl; \ }) -#ifdef __s390x__ +#ifdef CONFIG_64BIT #define udiv_qrnnd(q, r, n1, n0, d) \ do { unsigned long __n; \ unsigned int __r, __d; \ @@ -72,6 +72,6 @@ extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int, #define UDIV_NEEDS_NORMALIZATION 0 -#define abort() return 0 +#define abort() BUG() #define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/include/asm/shmparam.h b/arch/s390/include/asm/shmparam.h index c2e0c0508e7..e985182738f 100644 --- a/arch/s390/include/asm/shmparam.h +++ b/arch/s390/include/asm/shmparam.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/shmparam.h - * * S390 version * * Derived from "include/asm-i386/shmparam.h" diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h index cdf5cb2fe03..abf9e573594 100644 --- a/arch/s390/include/asm/signal.h +++ b/arch/s390/include/asm/signal.h @@ -1,22 +1,13 @@ /* - * include/asm-s390/signal.h - * * S390 version * * Derived from "include/asm-i386/signal.h" */ - #ifndef _ASMS390_SIGNAL_H #define _ASMS390_SIGNAL_H -#include <linux/types.h> -#include <linux/time.h> +#include <uapi/asm/signal.h> -/* Avoid too many header ordering problems. */ -struct siginfo; -struct pt_regs; - -#ifdef __KERNEL__ /* Most things should be clean enough to redefine this at will, if care is taken to make libc match. */ #include <asm/sigcontext.h> @@ -30,143 +21,5 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; -#else -/* Here we must cater to libcs that poke about in kernel headers. */ - -#define NSIG 32 -typedef unsigned long sigset_t; - -#endif /* __KERNEL__ */ - -#define SIGHUP 1 -#define SIGINT 2 -#define SIGQUIT 3 -#define SIGILL 4 -#define SIGTRAP 5 -#define SIGABRT 6 -#define SIGIOT 6 -#define SIGBUS 7 -#define SIGFPE 8 -#define SIGKILL 9 -#define SIGUSR1 10 -#define SIGSEGV 11 -#define SIGUSR2 12 -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGTERM 15 -#define SIGSTKFLT 16 -#define SIGCHLD 17 -#define SIGCONT 18 -#define SIGSTOP 19 -#define SIGTSTP 20 -#define SIGTTIN 21 -#define SIGTTOU 22 -#define SIGURG 23 -#define SIGXCPU 24 -#define SIGXFSZ 25 -#define SIGVTALRM 26 -#define SIGPROF 27 -#define SIGWINCH 28 -#define SIGIO 29 -#define SIGPOLL SIGIO -/* -#define SIGLOST 29 -*/ -#define SIGPWR 30 -#define SIGSYS 31 -#define SIGUNUSED 31 - -/* These should not be considered constants from userland. */ -#define SIGRTMIN 32 -#define SIGRTMAX _NSIG - -/* - * SA_FLAGS values: - * - * SA_ONSTACK indicates that a registered stack_t will be used. - * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. - * SA_RESETHAND clears the handler when the signal is delivered. - * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. - * SA_NODEFER prevents the current signal from being masked in the handler. - * - * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single - * Unix names RESETHAND and NODEFER respectively. - */ -#define SA_NOCLDSTOP 0x00000001 -#define SA_NOCLDWAIT 0x00000002 -#define SA_SIGINFO 0x00000004 -#define SA_ONSTACK 0x08000000 -#define SA_RESTART 0x10000000 -#define SA_NODEFER 0x40000000 -#define SA_RESETHAND 0x80000000 - -#define SA_NOMASK SA_NODEFER -#define SA_ONESHOT SA_RESETHAND - -#define SA_RESTORER 0x04000000 - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - -#define MINSIGSTKSZ 2048 -#define SIGSTKSZ 8192 - -#include <asm-generic/signal-defs.h> - -#ifdef __KERNEL__ -struct old_sigaction { - __sighandler_t sa_handler; - old_sigset_t sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -}; - -struct sigaction { - __sighandler_t sa_handler; - unsigned long sa_flags; - void (*sa_restorer)(void); - sigset_t sa_mask; /* mask last for extensibility */ -}; - -struct k_sigaction { - struct sigaction sa; -}; - -#define ptrace_signal_deliver(regs, cookie) do { } while (0) - -#else -/* Here we must cater to libcs that poke about in kernel headers. */ - -struct sigaction { - union { - __sighandler_t _sa_handler; - void (*_sa_sigaction)(int, struct siginfo *, void *); - } _u; -#ifndef __s390x__ /* lovely */ - sigset_t sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -#else /* __s390x__ */ - unsigned long sa_flags; - void (*sa_restorer)(void); - sigset_t sa_mask; -#endif /* __s390x__ */ -}; - -#define sa_handler _u._sa_handler -#define sa_sigaction _u._sa_sigaction - -#endif /* __KERNEL__ */ - -typedef struct sigaltstack { - void __user *ss_sp; - int ss_flags; - size_t ss_size; -} stack_t; - - +#define __ARCH_HAS_SA_RESTORER #endif diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index e3bffd4e2d6..bf9c823d402 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -1,131 +1,53 @@ -/* - * Routines and structures for signalling other processors. - * - * Copyright IBM Corp. 1999,2010 - * Author(s): Denis Joseph Barrow, - * Martin Schwidefsky <schwidefsky@de.ibm.com>, - * Heiko Carstens <heiko.carstens@de.ibm.com>, - */ - -#ifndef __ASM_SIGP_H -#define __ASM_SIGP_H - -#include <asm/system.h> - -/* Get real cpu address from logical cpu number. */ -extern unsigned short __cpu_logical_map[]; - -static inline int cpu_logical_map(int cpu) -{ -#ifdef CONFIG_SMP - return __cpu_logical_map[cpu]; -#else - return stap(); -#endif -} - -enum { - sigp_sense = 1, - sigp_external_call = 2, - sigp_emergency_signal = 3, - sigp_start = 4, - sigp_stop = 5, - sigp_restart = 6, - sigp_stop_and_store_status = 9, - sigp_initial_cpu_reset = 11, - sigp_cpu_reset = 12, - sigp_set_prefix = 13, - sigp_store_status_at_address = 14, - sigp_store_extended_status_at_address = 15, - sigp_set_architecture = 18, - sigp_conditional_emergency_signal = 19, - sigp_sense_running = 21, -}; - -enum { - sigp_order_code_accepted = 0, - sigp_status_stored = 1, - sigp_busy = 2, - sigp_not_operational = 3, -}; - -/* - * Definitions for external call. - */ -enum { - ec_schedule = 0, - ec_call_function, - ec_call_function_single, -}; - -/* - * Signal processor. - */ -static inline int raw_sigp(u16 cpu, int order) -{ - register unsigned long reg1 asm ("1") = 0; - int ccode; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) - : "d" (reg1), "d" (cpu), - "a" (order) : "cc" , "memory"); - return ccode; -} - -/* - * Signal processor with parameter. - */ -static inline int raw_sigp_p(u32 parameter, u16 cpu, int order) -{ - register unsigned int reg1 asm ("1") = parameter; - int ccode; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) - : "d" (reg1), "d" (cpu), - "a" (order) : "cc" , "memory"); - return ccode; -} - -/* - * Signal processor with parameter and return status. - */ -static inline int raw_sigp_ps(u32 *status, u32 parm, u16 cpu, int order) +#ifndef __S390_ASM_SIGP_H +#define __S390_ASM_SIGP_H + +/* SIGP order codes */ +#define SIGP_SENSE 1 +#define SIGP_EXTERNAL_CALL 2 +#define SIGP_EMERGENCY_SIGNAL 3 +#define SIGP_START 4 +#define SIGP_STOP 5 +#define SIGP_RESTART 6 +#define SIGP_STOP_AND_STORE_STATUS 9 +#define SIGP_INITIAL_CPU_RESET 11 +#define SIGP_SET_PREFIX 13 +#define SIGP_STORE_STATUS_AT_ADDRESS 14 +#define SIGP_SET_ARCHITECTURE 18 +#define SIGP_COND_EMERGENCY_SIGNAL 19 +#define SIGP_SENSE_RUNNING 21 + +/* SIGP condition codes */ +#define SIGP_CC_ORDER_CODE_ACCEPTED 0 +#define SIGP_CC_STATUS_STORED 1 +#define SIGP_CC_BUSY 2 +#define SIGP_CC_NOT_OPERATIONAL 3 + +/* SIGP cpu status bits */ + +#define SIGP_STATUS_CHECK_STOP 0x00000010UL +#define SIGP_STATUS_STOPPED 0x00000040UL +#define SIGP_STATUS_EXT_CALL_PENDING 0x00000080UL +#define SIGP_STATUS_INVALID_PARAMETER 0x00000100UL +#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL +#define SIGP_STATUS_NOT_RUNNING 0x00000400UL + +#ifndef __ASSEMBLY__ + +static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) { register unsigned int reg1 asm ("1") = parm; - int ccode; + int cc; asm volatile( " sigp %1,%2,0(%3)\n" " ipm %0\n" " srl %0,28\n" - : "=d" (ccode), "+d" (reg1) - : "d" (cpu), "a" (order) - : "cc" , "memory"); - *status = reg1; - return ccode; + : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); + if (status && cc == 1) + *status = reg1; + return cc; } -static inline int sigp(int cpu, int order) -{ - return raw_sigp(cpu_logical_map(cpu), order); -} - -static inline int sigp_p(u32 parameter, int cpu, int order) -{ - return raw_sigp_p(parameter, cpu_logical_map(cpu), order); -} - -static inline int sigp_ps(u32 *status, u32 parm, int cpu, int order) -{ - return raw_sigp_ps(status, parm, cpu_logical_map(cpu), order); -} +#endif /* __ASSEMBLY__ */ -#endif /* __ASM_SIGP_H */ +#endif /* __S390_ASM_SIGP_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 045e009fc16..4f1307962a9 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 1999,2009 + * Copyright IBM Corp. 1999, 2012 * Author(s): Denis Joseph Barrow, * Martin Schwidefsky <schwidefsky@de.ibm.com>, * Heiko Carstens <heiko.carstens@de.ibm.com>, @@ -7,70 +7,69 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H -#ifdef CONFIG_SMP - -#include <asm/system.h> #include <asm/sigp.h> -extern void machine_restart_smp(char *); -extern void machine_halt_smp(void); -extern void machine_power_off_smp(void); +#ifdef CONFIG_SMP -#define raw_smp_processor_id() (S390_lowcore.cpu_nr) +#include <asm/lowcore.h> -extern int __cpu_disable (void); -extern void __cpu_die (unsigned int cpu); -extern int __cpu_up (unsigned int cpu); +#define raw_smp_processor_id() (S390_lowcore.cpu_nr) extern struct mutex smp_cpu_state_mutex; -extern int smp_cpu_polarization[]; + +extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; +extern void smp_call_online_cpu(void (*func)(void *), void *); +extern void smp_call_ipl_cpu(void (*func)(void *), void *); -extern void smp_switch_to_ipl_cpu(void (*func)(void *), void *); -extern void smp_switch_to_cpu(void (*)(void *), void *, unsigned long sp, - int from, int to); -extern void smp_restart_cpu(void); +extern int smp_find_processor_id(u16 address); +extern int smp_store_status(int cpu); +extern int smp_vcpu_scheduled(int cpu); +extern void smp_yield_cpu(int cpu); +extern void smp_yield(void); +extern void smp_cpu_set_polarization(int cpu, int val); +extern int smp_cpu_get_polarization(int cpu); +extern void smp_fill_possible_mask(void); -/* - * returns 1 if (virtual) cpu is scheduled - * returns 0 otherwise - */ -static inline int smp_vcpu_scheduled(int cpu) +#else /* CONFIG_SMP */ + +static inline void smp_call_ipl_cpu(void (*func)(void *), void *data) { - u32 status; - - switch (sigp_ps(&status, 0, cpu, sigp_sense_running)) { - case sigp_status_stored: - /* Check for running status */ - if (status & 0x400) - return 0; - break; - case sigp_not_operational: - return 0; - default: - break; - } - return 1; + func(data); } -#else /* CONFIG_SMP */ - -static inline void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) +static inline void smp_call_online_cpu(void (*func)(void *), void *data) { func(data); } -#define smp_vcpu_scheduled (1) +static inline int smp_find_processor_id(u16 address) { return 0; } +static inline int smp_store_status(int cpu) { return 0; } +static inline int smp_vcpu_scheduled(int cpu) { return 1; } +static inline void smp_yield_cpu(int cpu) { } +static inline void smp_yield(void) { } +static inline void smp_fill_possible_mask(void) { } #endif /* CONFIG_SMP */ +static inline void smp_stop_cpu(void) +{ + u16 pcpu = stap(); + + for (;;) { + __pcpu_sigp(pcpu, SIGP_STOP, 0, NULL); + cpu_relax(); + } +} + #ifdef CONFIG_HOTPLUG_CPU extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); +extern void __cpu_die(unsigned int cpu); +extern int __cpu_disable(void); #else static inline int smp_rescan_cpus(void) { return 0; } static inline void cpu_die(void) { } diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h index 545d219e6a2..a60d085ddb4 100644 --- a/arch/s390/include/asm/sparsemem.h +++ b/arch/s390/include/asm/sparsemem.h @@ -4,13 +4,11 @@ #ifdef CONFIG_64BIT #define SECTION_SIZE_BITS 28 -#define MAX_PHYSADDR_BITS 42 -#define MAX_PHYSMEM_BITS 42 +#define MAX_PHYSMEM_BITS 46 #else #define SECTION_SIZE_BITS 25 -#define MAX_PHYSADDR_BITS 31 #define MAX_PHYSMEM_BITS 31 #endif /* CONFIG_64BIT */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 56612fc8186..96879f7ad6d 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/spinlock.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) * * Derived from "include/asm-i386/spinlock.h" @@ -13,16 +11,21 @@ #include <linux/smp.h> +#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval) + +extern int spin_retry; + static inline int -_raw_compare_and_swap(volatile unsigned int *lock, - unsigned int old, unsigned int new) +_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) { + unsigned int old_expected = old; + asm volatile( " cs %0,%3,%1" : "=d" (old), "=Q" (*lock) : "0" (old), "d" (new), "Q" (*lock) : "cc", "memory" ); - return old; + return old == old_expected; } /* @@ -34,52 +37,69 @@ _raw_compare_and_swap(volatile unsigned int *lock, * (the type definitions are in asm/spinlock_types.h) */ -#define arch_spin_is_locked(x) ((x)->owner_cpu != 0) -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) \ - arch_spin_relax(lock); } while (0) +void arch_spin_lock_wait(arch_spinlock_t *); +int arch_spin_trylock_retry(arch_spinlock_t *); +void arch_spin_relax(arch_spinlock_t *); +void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); -extern void arch_spin_lock_wait(arch_spinlock_t *); -extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); -extern int arch_spin_trylock_retry(arch_spinlock_t *); -extern void arch_spin_relax(arch_spinlock_t *lock); +static inline u32 arch_spin_lockval(int cpu) +{ + return ~cpu; +} -static inline void arch_spin_lock(arch_spinlock_t *lp) +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) { - int old; + return lock.lock == 0; +} - old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); - if (likely(old == 0)) - return; - arch_spin_lock_wait(lp); +static inline int arch_spin_is_locked(arch_spinlock_t *lp) +{ + return ACCESS_ONCE(lp->lock) != 0; } -static inline void arch_spin_lock_flags(arch_spinlock_t *lp, - unsigned long flags) +static inline int arch_spin_trylock_once(arch_spinlock_t *lp) { - int old; + barrier(); + return likely(arch_spin_value_unlocked(*lp) && + _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); +} - old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); - if (likely(old == 0)) - return; - arch_spin_lock_wait_flags(lp, flags); +static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp) +{ + return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0); } -static inline int arch_spin_trylock(arch_spinlock_t *lp) +static inline void arch_spin_lock(arch_spinlock_t *lp) { - int old; + if (!arch_spin_trylock_once(lp)) + arch_spin_lock_wait(lp); +} - old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); - if (likely(old == 0)) - return 1; - return arch_spin_trylock_retry(lp); +static inline void arch_spin_lock_flags(arch_spinlock_t *lp, + unsigned long flags) +{ + if (!arch_spin_trylock_once(lp)) + arch_spin_lock_wait_flags(lp, flags); +} + +static inline int arch_spin_trylock(arch_spinlock_t *lp) +{ + if (!arch_spin_trylock_once(lp)) + return arch_spin_trylock_retry(lp); + return 1; } static inline void arch_spin_unlock(arch_spinlock_t *lp) { - _raw_compare_and_swap(&lp->owner_cpu, lp->owner_cpu, 0); + arch_spin_tryrelease_once(lp); } - + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + while (arch_spin_is_locked(lock)) + arch_spin_relax(lock); +} + /* * Read-write spinlocks, allowing multiple readers * but only one writer. @@ -110,42 +130,50 @@ extern void _raw_write_lock_wait(arch_rwlock_t *lp); extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); extern int _raw_write_trylock_retry(arch_rwlock_t *lp); +static inline int arch_read_trylock_once(arch_rwlock_t *rw) +{ + unsigned int old = ACCESS_ONCE(rw->lock); + return likely((int) old >= 0 && + _raw_compare_and_swap(&rw->lock, old, old + 1)); +} + +static inline int arch_write_trylock_once(arch_rwlock_t *rw) +{ + unsigned int old = ACCESS_ONCE(rw->lock); + return likely(old == 0 && + _raw_compare_and_swap(&rw->lock, 0, 0x80000000)); +} + static inline void arch_read_lock(arch_rwlock_t *rw) { - unsigned int old; - old = rw->lock & 0x7fffffffU; - if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old) + if (!arch_read_trylock_once(rw)) _raw_read_lock_wait(rw); } static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) { - unsigned int old; - old = rw->lock & 0x7fffffffU; - if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old) + if (!arch_read_trylock_once(rw)) _raw_read_lock_wait_flags(rw, flags); } static inline void arch_read_unlock(arch_rwlock_t *rw) { - unsigned int old, cmp; + unsigned int old; - old = rw->lock; do { - cmp = old; - old = _raw_compare_and_swap(&rw->lock, old, old - 1); - } while (cmp != old); + old = ACCESS_ONCE(rw->lock); + } while (!_raw_compare_and_swap(&rw->lock, old, old - 1)); } static inline void arch_write_lock(arch_rwlock_t *rw) { - if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) + if (!arch_write_trylock_once(rw)) _raw_write_lock_wait(rw); } static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) { - if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) + if (!arch_write_trylock_once(rw)) _raw_write_lock_wait_flags(rw, flags); } @@ -156,18 +184,16 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) static inline int arch_read_trylock(arch_rwlock_t *rw) { - unsigned int old; - old = rw->lock & 0x7fffffffU; - if (likely(_raw_compare_and_swap(&rw->lock, old, old + 1) == old)) - return 1; - return _raw_read_trylock_retry(rw); + if (!arch_read_trylock_once(rw)) + return _raw_read_trylock_retry(rw); + return 1; } static inline int arch_write_trylock(arch_rwlock_t *rw) { - if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)) - return 1; - return _raw_write_trylock_retry(rw); + if (!arch_write_trylock_once(rw)) + return _raw_write_trylock_retry(rw); + return 1; } #define arch_read_relax(lock) cpu_relax() diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index 9c76656a0af..b2cd6ff7c2c 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -6,13 +6,13 @@ #endif typedef struct { - volatile unsigned int owner_cpu; + unsigned int lock; } __attribute__ ((aligned (4))) arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, } typedef struct { - volatile unsigned int lock; + unsigned int lock; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } diff --git a/arch/s390/include/asm/statfs.h b/arch/s390/include/asm/statfs.h deleted file mode 100644 index 3be7fbd406c..00000000000 --- a/arch/s390/include/asm/statfs.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * include/asm-s390/statfs.h - * - * S390 version - * - * Derived from "include/asm-i386/statfs.h" - */ - -#ifndef _S390_STATFS_H -#define _S390_STATFS_H - -#ifndef __s390x__ -#include <asm-generic/statfs.h> -#else -/* - * We can't use <asm-generic/statfs.h> because in 64-bit mode - * we mix ints of different sizes in our struct statfs. - */ - -#ifndef __KERNEL_STRICT_NAMES -#include <linux/types.h> -typedef __kernel_fsid_t fsid_t; -#endif - -struct statfs { - int f_type; - int f_bsize; - long f_blocks; - long f_bfree; - long f_bavail; - long f_files; - long f_ffree; - __kernel_fsid_t f_fsid; - int f_namelen; - int f_frsize; - int f_flags; - int f_spare[4]; -}; - -struct statfs64 { - int f_type; - int f_bsize; - long f_blocks; - long f_bfree; - long f_bavail; - long f_files; - long f_ffree; - __kernel_fsid_t f_fsid; - int f_namelen; - int f_frsize; - int f_flags; - int f_spare[4]; -}; - -struct compat_statfs64 { - __u32 f_type; - __u32 f_bsize; - __u64 f_blocks; - __u64 f_bfree; - __u64 f_bavail; - __u64 f_files; - __u64 f_ffree; - __kernel_fsid_t f_fsid; - __u32 f_namelen; - __u32 f_frsize; - __u32 f_flags; - __u32 f_spare[4]; -}; - -#endif /* __s390x__ */ -#endif diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index cd0241db5a4..7e2dcd7c57e 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -1,16 +1,12 @@ /* - * include/asm-s390/string.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), */ #ifndef _S390_STRING_H_ #define _S390_STRING_H_ -#ifdef __KERNEL__ - #ifndef _LINUX_TYPES_H #include <linux/types.h> #endif @@ -100,7 +96,6 @@ static inline char *strcat(char *dst, const char *src) static inline char *strcpy(char *dst, const char *src) { -#if __GNUC__ < 4 register int r0 asm("0") = 0; char *ret = dst; @@ -110,14 +105,10 @@ static inline char *strcpy(char *dst, const char *src) : "+&a" (dst), "+&a" (src) : "d" (r0) : "cc", "memory"); return ret; -#else - return __builtin_strcpy(dst, src); -#endif } static inline size_t strlen(const char *s) { -#if __GNUC__ < 4 register unsigned long r0 asm("0") = 0; const char *tmp = s; @@ -126,9 +117,6 @@ static inline size_t strlen(const char *s) " jo 0b" : "+d" (r0), "+a" (tmp) : : "cc"); return r0 - (unsigned long) s; -#else - return __builtin_strlen(s); -#endif } static inline size_t strnlen(const char * s, size_t n) @@ -152,6 +140,4 @@ size_t strlen(const char *s); size_t strnlen(const char * s, size_t n); #endif /* !IN_ARCH_STRING_C */ -#endif /* __KERNEL__ */ - #endif /* __S390_STRING_H_ */ diff --git a/arch/s390/include/asm/suspend.h b/arch/s390/include/asm/suspend.h deleted file mode 100644 index dc75c616eaf..00000000000 --- a/arch/s390/include/asm/suspend.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __ASM_S390_SUSPEND_H -#define __ASM_S390_SUSPEND_H - -static inline int arch_prepare_suspend(void) -{ - return 0; -} - -#endif - diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h new file mode 100644 index 00000000000..18ea9e3f814 --- /dev/null +++ b/arch/s390/include/asm/switch_to.h @@ -0,0 +1,137 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_SWITCH_TO_H +#define __ASM_SWITCH_TO_H + +#include <linux/thread_info.h> +#include <asm/ptrace.h> + +extern struct task_struct *__switch_to(void *, void *); +extern void update_cr_regs(struct task_struct *task); + +static inline int test_fp_ctl(u32 fpc) +{ + u32 orig_fpc; + int rc; + + if (!MACHINE_HAS_IEEE) + return 0; + + asm volatile( + " efpc %1\n" + " sfpc %2\n" + "0: sfpc %1\n" + " la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc), "=d" (orig_fpc) + : "d" (fpc), "0" (-EINVAL)); + return rc; +} + +static inline void save_fp_ctl(u32 *fpc) +{ + if (!MACHINE_HAS_IEEE) + return; + + asm volatile( + " stfpc %0\n" + : "+Q" (*fpc)); +} + +static inline int restore_fp_ctl(u32 *fpc) +{ + int rc; + + if (!MACHINE_HAS_IEEE) + return 0; + + asm volatile( + " lfpc %1\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL)); + return rc; +} + +static inline void save_fp_regs(freg_t *fprs) +{ + asm volatile("std 0,%0" : "=Q" (fprs[0])); + asm volatile("std 2,%0" : "=Q" (fprs[2])); + asm volatile("std 4,%0" : "=Q" (fprs[4])); + asm volatile("std 6,%0" : "=Q" (fprs[6])); + if (!MACHINE_HAS_IEEE) + return; + asm volatile("std 1,%0" : "=Q" (fprs[1])); + asm volatile("std 3,%0" : "=Q" (fprs[3])); + asm volatile("std 5,%0" : "=Q" (fprs[5])); + asm volatile("std 7,%0" : "=Q" (fprs[7])); + asm volatile("std 8,%0" : "=Q" (fprs[8])); + asm volatile("std 9,%0" : "=Q" (fprs[9])); + asm volatile("std 10,%0" : "=Q" (fprs[10])); + asm volatile("std 11,%0" : "=Q" (fprs[11])); + asm volatile("std 12,%0" : "=Q" (fprs[12])); + asm volatile("std 13,%0" : "=Q" (fprs[13])); + asm volatile("std 14,%0" : "=Q" (fprs[14])); + asm volatile("std 15,%0" : "=Q" (fprs[15])); +} + +static inline void restore_fp_regs(freg_t *fprs) +{ + asm volatile("ld 0,%0" : : "Q" (fprs[0])); + asm volatile("ld 2,%0" : : "Q" (fprs[2])); + asm volatile("ld 4,%0" : : "Q" (fprs[4])); + asm volatile("ld 6,%0" : : "Q" (fprs[6])); + if (!MACHINE_HAS_IEEE) + return; + asm volatile("ld 1,%0" : : "Q" (fprs[1])); + asm volatile("ld 3,%0" : : "Q" (fprs[3])); + asm volatile("ld 5,%0" : : "Q" (fprs[5])); + asm volatile("ld 7,%0" : : "Q" (fprs[7])); + asm volatile("ld 8,%0" : : "Q" (fprs[8])); + asm volatile("ld 9,%0" : : "Q" (fprs[9])); + asm volatile("ld 10,%0" : : "Q" (fprs[10])); + asm volatile("ld 11,%0" : : "Q" (fprs[11])); + asm volatile("ld 12,%0" : : "Q" (fprs[12])); + asm volatile("ld 13,%0" : : "Q" (fprs[13])); + asm volatile("ld 14,%0" : : "Q" (fprs[14])); + asm volatile("ld 15,%0" : : "Q" (fprs[15])); +} + +static inline void save_access_regs(unsigned int *acrs) +{ + typedef struct { int _[NUM_ACRS]; } acrstype; + + asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs)); +} + +static inline void restore_access_regs(unsigned int *acrs) +{ + typedef struct { int _[NUM_ACRS]; } acrstype; + + asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs)); +} + +#define switch_to(prev,next,last) do { \ + if (prev->mm) { \ + save_fp_ctl(&prev->thread.fp_regs.fpc); \ + save_fp_regs(prev->thread.fp_regs.fprs); \ + save_access_regs(&prev->thread.acrs[0]); \ + save_ri_cb(prev->thread.ri_cb); \ + } \ + if (next->mm) { \ + restore_fp_ctl(&next->thread.fp_regs.fpc); \ + restore_fp_regs(next->thread.fp_regs.fprs); \ + restore_access_regs(&next->thread.acrs[0]); \ + restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ + update_cr_regs(next); \ + } \ + prev = __switch_to(prev,next); \ +} while (0) + +#endif /* __ASM_SWITCH_TO_H */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 5c0246b955d..abad78d5b10 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -12,7 +12,9 @@ #ifndef _ASM_SYSCALL_H #define _ASM_SYSCALL_H 1 +#include <uapi/linux/audit.h> #include <linux/sched.h> +#include <linux/err.h> #include <asm/ptrace.h> /* @@ -21,11 +23,13 @@ * type here is what we want [need] for both 32 bit and 64 bit systems. */ extern const unsigned int sys_call_table[]; +extern const unsigned int sys_call_table_emu[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->svcnr ? regs->svcnr : -1; + return test_pt_regs_flag(regs, PIF_SYSCALL) ? + (regs->int_code & 0xffff) : -1; } static inline void syscall_rollback(struct task_struct *task, @@ -37,7 +41,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0; + return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; } static inline long syscall_get_return_value(struct task_struct *task, @@ -85,4 +89,12 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->orig_gpr2 = args[0]; } +static inline int syscall_get_arch(void) +{ +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(current, TIF_31BIT)) + return AUDIT_ARCH_S390; +#endif + return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390; +} #endif /* _ASM_SYSCALL_H */ diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 79d3d6e2e9c..f92428e459f 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -1,7 +1,7 @@ /* * definition for store system information stsi * - * Copyright IBM Corp. 2001,2008 + * Copyright IBM Corp. 2001, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -17,7 +17,10 @@ #include <asm/bitsperlong.h> struct sysinfo_1_1_1 { - unsigned short :16; + unsigned char p:1; + unsigned char :6; + unsigned char t:1; + unsigned char :8; unsigned char ccr; unsigned char cai; char reserved_0[28]; @@ -30,9 +33,14 @@ struct sysinfo_1_1_1 { char model[16]; char model_perm_cap[16]; char model_temp_cap[16]; - char model_cap_rating[4]; - char model_perm_cap_rating[4]; - char model_temp_cap_rating[4]; + unsigned int model_cap_rating; + unsigned int model_perm_cap_rating; + unsigned int model_temp_cap_rating; + unsigned char typepct[5]; + unsigned char reserved_2[3]; + unsigned int ncr; + unsigned int npr; + unsigned int ntr; }; struct sysinfo_1_2_1 { @@ -47,8 +55,9 @@ struct sysinfo_1_2_2 { char format; char reserved_0[1]; unsigned short acc_offset; - char reserved_1[24]; - unsigned int secondary_capability; + char reserved_1[20]; + unsigned int nominal_cap; + unsigned int secondary_cap; unsigned int capability; unsigned short cpus_total; unsigned short cpus_configured; @@ -109,6 +118,8 @@ struct sysinfo_3_2_2 { char reserved_544[3552]; }; +extern int topology_max_mnest; + #define TOPOLOGY_CPU_BITS 64 #define TOPOLOGY_NR_MAG 6 @@ -142,21 +153,7 @@ struct sysinfo_15_1_x { union topology_entry tle[0]; }; -static inline int stsi(void *sysinfo, int fc, int sel1, int sel2) -{ - register int r0 asm("0") = (fc << 28) | sel1; - register int r1 asm("1") = sel2; - - asm volatile( - " stsi 0(%2)\n" - "0: jz 2f\n" - "1: lhi %0,%3\n" - "2:\n" - EX_TABLE(0b, 1b) - : "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS) - : "cc", "memory"); - return r0; -} +int stsi(void *sysinfo, int fc, int sel1, int sel2); /* * Service level reporting interface. diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h deleted file mode 100644 index 8f8d759f6a7..00000000000 --- a/arch/s390/include/asm/system.h +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright IBM Corp. 1999, 2009 - * - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> - */ - -#ifndef __ASM_SYSTEM_H -#define __ASM_SYSTEM_H - -#include <linux/kernel.h> -#include <linux/errno.h> -#include <asm/types.h> -#include <asm/ptrace.h> -#include <asm/setup.h> -#include <asm/processor.h> -#include <asm/lowcore.h> - -#ifdef __KERNEL__ - -struct task_struct; - -extern struct task_struct *__switch_to(void *, void *); -extern void update_per_regs(struct task_struct *task); - -static inline void save_fp_regs(s390_fp_regs *fpregs) -{ - asm volatile( - " std 0,%O0+8(%R0)\n" - " std 2,%O0+24(%R0)\n" - " std 4,%O0+40(%R0)\n" - " std 6,%O0+56(%R0)" - : "=Q" (*fpregs) : "Q" (*fpregs)); - if (!MACHINE_HAS_IEEE) - return; - asm volatile( - " stfpc %0\n" - " std 1,%O0+16(%R0)\n" - " std 3,%O0+32(%R0)\n" - " std 5,%O0+48(%R0)\n" - " std 7,%O0+64(%R0)\n" - " std 8,%O0+72(%R0)\n" - " std 9,%O0+80(%R0)\n" - " std 10,%O0+88(%R0)\n" - " std 11,%O0+96(%R0)\n" - " std 12,%O0+104(%R0)\n" - " std 13,%O0+112(%R0)\n" - " std 14,%O0+120(%R0)\n" - " std 15,%O0+128(%R0)\n" - : "=Q" (*fpregs) : "Q" (*fpregs)); -} - -static inline void restore_fp_regs(s390_fp_regs *fpregs) -{ - asm volatile( - " ld 0,%O0+8(%R0)\n" - " ld 2,%O0+24(%R0)\n" - " ld 4,%O0+40(%R0)\n" - " ld 6,%O0+56(%R0)" - : : "Q" (*fpregs)); - if (!MACHINE_HAS_IEEE) - return; - asm volatile( - " lfpc %0\n" - " ld 1,%O0+16(%R0)\n" - " ld 3,%O0+32(%R0)\n" - " ld 5,%O0+48(%R0)\n" - " ld 7,%O0+64(%R0)\n" - " ld 8,%O0+72(%R0)\n" - " ld 9,%O0+80(%R0)\n" - " ld 10,%O0+88(%R0)\n" - " ld 11,%O0+96(%R0)\n" - " ld 12,%O0+104(%R0)\n" - " ld 13,%O0+112(%R0)\n" - " ld 14,%O0+120(%R0)\n" - " ld 15,%O0+128(%R0)\n" - : : "Q" (*fpregs)); -} - -static inline void save_access_regs(unsigned int *acrs) -{ - asm volatile("stam 0,15,%0" : "=Q" (*acrs)); -} - -static inline void restore_access_regs(unsigned int *acrs) -{ - asm volatile("lam 0,15,%0" : : "Q" (*acrs)); -} - -#define switch_to(prev,next,last) do { \ - if (prev->mm) { \ - save_fp_regs(&prev->thread.fp_regs); \ - save_access_regs(&prev->thread.acrs[0]); \ - } \ - if (next->mm) { \ - restore_fp_regs(&next->thread.fp_regs); \ - restore_access_regs(&next->thread.acrs[0]); \ - update_per_regs(next); \ - } \ - prev = __switch_to(prev,next); \ -} while (0) - -extern void account_vtime(struct task_struct *, struct task_struct *); -extern void account_tick_vtime(struct task_struct *); - -#ifdef CONFIG_PFAULT -extern int pfault_init(void); -extern void pfault_fini(void); -#else /* CONFIG_PFAULT */ -#define pfault_init() ({-1;}) -#define pfault_fini() do { } while (0) -#endif /* CONFIG_PFAULT */ - -extern void cmma_init(void); -extern int memcpy_real(void *, void *, size_t); - -#define finish_arch_switch(prev) do { \ - set_fs(current->thread.mm_segment); \ - account_vtime(prev, current); \ -} while (0) - -#define nop() asm volatile("nop") - -#define xchg(ptr,x) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __xchg((unsigned long)(x), (void *)(ptr),sizeof(*(ptr))); \ - __ret; \ -}) - -extern void __xchg_called_with_bad_pointer(void); - -static inline unsigned long __xchg(unsigned long x, void * ptr, int size) -{ - unsigned long addr, old; - int shift; - - switch (size) { - case 1: - addr = (unsigned long) ptr; - shift = (3 ^ (addr & 3)) << 3; - addr ^= addr & 3; - asm volatile( - " l %0,%4\n" - "0: lr 0,%0\n" - " nr 0,%3\n" - " or 0,%2\n" - " cs %0,0,%4\n" - " jl 0b\n" - : "=&d" (old), "=Q" (*(int *) addr) - : "d" (x << shift), "d" (~(255 << shift)), - "Q" (*(int *) addr) : "memory", "cc", "0"); - return old >> shift; - case 2: - addr = (unsigned long) ptr; - shift = (2 ^ (addr & 2)) << 3; - addr ^= addr & 2; - asm volatile( - " l %0,%4\n" - "0: lr 0,%0\n" - " nr 0,%3\n" - " or 0,%2\n" - " cs %0,0,%4\n" - " jl 0b\n" - : "=&d" (old), "=Q" (*(int *) addr) - : "d" (x << shift), "d" (~(65535 << shift)), - "Q" (*(int *) addr) : "memory", "cc", "0"); - return old >> shift; - case 4: - asm volatile( - " l %0,%3\n" - "0: cs %0,%2,%3\n" - " jl 0b\n" - : "=&d" (old), "=Q" (*(int *) ptr) - : "d" (x), "Q" (*(int *) ptr) - : "memory", "cc"); - return old; -#ifdef __s390x__ - case 8: - asm volatile( - " lg %0,%3\n" - "0: csg %0,%2,%3\n" - " jl 0b\n" - : "=&d" (old), "=m" (*(long *) ptr) - : "d" (x), "Q" (*(long *) ptr) - : "memory", "cc"); - return old; -#endif /* __s390x__ */ - } - __xchg_called_with_bad_pointer(); - return x; -} - -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -#define __HAVE_ARCH_CMPXCHG 1 - -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr)))) - -extern void __cmpxchg_called_with_bad_pointer(void); - -static inline unsigned long -__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) -{ - unsigned long addr, prev, tmp; - int shift; - - switch (size) { - case 1: - addr = (unsigned long) ptr; - shift = (3 ^ (addr & 3)) << 3; - addr ^= addr & 3; - asm volatile( - " l %0,%2\n" - "0: nr %0,%5\n" - " lr %1,%0\n" - " or %0,%3\n" - " or %1,%4\n" - " cs %0,%1,%2\n" - " jnl 1f\n" - " xr %1,%0\n" - " nr %1,%5\n" - " jnz 0b\n" - "1:" - : "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr) - : "d" (old << shift), "d" (new << shift), - "d" (~(255 << shift)), "Q" (*(int *) ptr) - : "memory", "cc"); - return prev >> shift; - case 2: - addr = (unsigned long) ptr; - shift = (2 ^ (addr & 2)) << 3; - addr ^= addr & 2; - asm volatile( - " l %0,%2\n" - "0: nr %0,%5\n" - " lr %1,%0\n" - " or %0,%3\n" - " or %1,%4\n" - " cs %0,%1,%2\n" - " jnl 1f\n" - " xr %1,%0\n" - " nr %1,%5\n" - " jnz 0b\n" - "1:" - : "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr) - : "d" (old << shift), "d" (new << shift), - "d" (~(65535 << shift)), "Q" (*(int *) ptr) - : "memory", "cc"); - return prev >> shift; - case 4: - asm volatile( - " cs %0,%3,%1\n" - : "=&d" (prev), "=Q" (*(int *) ptr) - : "0" (old), "d" (new), "Q" (*(int *) ptr) - : "memory", "cc"); - return prev; -#ifdef __s390x__ - case 8: - asm volatile( - " csg %0,%3,%1\n" - : "=&d" (prev), "=Q" (*(long *) ptr) - : "0" (old), "d" (new), "Q" (*(long *) ptr) - : "memory", "cc"); - return prev; -#endif /* __s390x__ */ - } - __cmpxchg_called_with_bad_pointer(); - return old; -} - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - * - * This is very similar to the ppc eieio/sync instruction in that is - * does a checkpoint syncronisation & makes sure that - * all memory ops have completed wrt other CPU's ( see 7-15 POP DJB ). - */ - -#define eieio() asm volatile("bcr 15,0" : : : "memory") -#define SYNC_OTHER_CORES(x) eieio() -#define mb() eieio() -#define rmb() eieio() -#define wmb() eieio() -#define read_barrier_depends() do { } while(0) -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() -#define smp_mb__before_clear_bit() smp_mb() -#define smp_mb__after_clear_bit() smp_mb() - - -#define set_mb(var, value) do { var = value; mb(); } while (0) - -#ifdef __s390x__ - -#define __ctl_load(array, low, high) ({ \ - typedef struct { char _[sizeof(array)]; } addrtype; \ - asm volatile( \ - " lctlg %1,%2,%0\n" \ - : : "Q" (*(addrtype *)(&array)), \ - "i" (low), "i" (high)); \ - }) - -#define __ctl_store(array, low, high) ({ \ - typedef struct { char _[sizeof(array)]; } addrtype; \ - asm volatile( \ - " stctg %1,%2,%0\n" \ - : "=Q" (*(addrtype *)(&array)) \ - : "i" (low), "i" (high)); \ - }) - -#else /* __s390x__ */ - -#define __ctl_load(array, low, high) ({ \ - typedef struct { char _[sizeof(array)]; } addrtype; \ - asm volatile( \ - " lctl %1,%2,%0\n" \ - : : "Q" (*(addrtype *)(&array)), \ - "i" (low), "i" (high)); \ -}) - -#define __ctl_store(array, low, high) ({ \ - typedef struct { char _[sizeof(array)]; } addrtype; \ - asm volatile( \ - " stctl %1,%2,%0\n" \ - : "=Q" (*(addrtype *)(&array)) \ - : "i" (low), "i" (high)); \ - }) - -#endif /* __s390x__ */ - -#define __ctl_set_bit(cr, bit) ({ \ - unsigned long __dummy; \ - __ctl_store(__dummy, cr, cr); \ - __dummy |= 1UL << (bit); \ - __ctl_load(__dummy, cr, cr); \ -}) - -#define __ctl_clear_bit(cr, bit) ({ \ - unsigned long __dummy; \ - __ctl_store(__dummy, cr, cr); \ - __dummy &= ~(1UL << (bit)); \ - __ctl_load(__dummy, cr, cr); \ -}) - -#include <linux/irqflags.h> - -#include <asm-generic/cmpxchg-local.h> - -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - switch (size) { - case 1: - case 2: - case 4: -#ifdef __s390x__ - case 8: -#endif - return __cmpxchg(ptr, old, new, size); - default: - return __cmpxchg_local_generic(ptr, old, new, size); - } - - return old; -} - -/* - * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make - * them available. - */ -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr)))) -#ifdef __s390x__ -#define cmpxchg64_local(ptr, o, n) \ - ({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg_local((ptr), (o), (n)); \ - }) -#else -#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) -#endif - -/* - * Use to set psw mask except for the first byte which - * won't be changed by this function. - */ -static inline void -__set_psw_mask(unsigned long mask) -{ - __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); -} - -#define local_mcck_enable() __set_psw_mask(psw_kernel_bits) -#define local_mcck_disable() __set_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK) - -#ifdef CONFIG_SMP - -extern void smp_ctl_set_bit(int cr, int bit); -extern void smp_ctl_clear_bit(int cr, int bit); -#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) -#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) - -#else - -#define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit) -#define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit) - -#endif /* CONFIG_SMP */ - -#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ - -/* - * The test_facility function uses the bit odering where the MSB is bit 0. - * That makes it easier to query facility bits with the bit number as - * documented in the Principles of Operation. - */ -static inline int test_facility(unsigned long nr) -{ - unsigned char *ptr; - - if (nr >= MAX_FACILITY_BIT) - return 0; - ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); - return (*ptr & (0x80 >> (nr & 7))) != 0; -} - -static inline unsigned short stap(void) -{ - unsigned short cpu_address; - - asm volatile("stap %0" : "=m" (cpu_address)); - return cpu_address; -} - -extern void (*_machine_restart)(char *command); -extern void (*_machine_halt)(void); -extern void (*_machine_power_off)(void); - -extern unsigned long arch_align_stack(unsigned long sp); - -static inline int tprot(unsigned long addr) -{ - int rc = -EFAULT; - - asm volatile( - " tprot 0(%1),0\n" - "0: ipm %0\n" - " srl %0,28\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) : "a" (addr) : "cc"); - return rc; -} - -#endif /* __KERNEL__ */ - -#endif diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h index bc3a35cefc9..db028d17f06 100644 --- a/arch/s390/include/asm/termios.h +++ b/arch/s390/include/asm/termios.h @@ -1,53 +1,13 @@ /* - * include/asm-s390/termios.h - * * S390 version * * Derived from "include/asm-i386/termios.h" */ - #ifndef _S390_TERMIOS_H #define _S390_TERMIOS_H -#include <asm/termbits.h> -#include <asm/ioctls.h> - -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; - -#define NCC 8 -struct termio { - unsigned short c_iflag; /* input mode flags */ - unsigned short c_oflag; /* output mode flags */ - unsigned short c_cflag; /* control mode flags */ - unsigned short c_lflag; /* local mode flags */ - unsigned char c_line; /* line discipline */ - unsigned char c_cc[NCC]; /* control characters */ -}; +#include <uapi/asm/termios.h> -/* modem lines */ -#define TIOCM_LE 0x001 -#define TIOCM_DTR 0x002 -#define TIOCM_RTS 0x004 -#define TIOCM_ST 0x008 -#define TIOCM_SR 0x010 -#define TIOCM_CTS 0x020 -#define TIOCM_CAR 0x040 -#define TIOCM_RNG 0x080 -#define TIOCM_DSR 0x100 -#define TIOCM_CD TIOCM_CAR -#define TIOCM_RI TIOCM_RNG -#define TIOCM_OUT1 0x2000 -#define TIOCM_OUT2 0x4000 -#define TIOCM_LOOP 0x8000 - -/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ - -#ifdef __KERNEL__ /* intr=^C quit=^\ erase=del kill=^U eof=^D vtime=\0 vmin=\1 sxtc=\0 @@ -62,6 +22,4 @@ struct termio { #include <asm-generic/termios-base.h> -#endif /* __KERNEL__ */ - #endif /* _S390_TERMIOS_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index ad1382f7932..b833e9c0bfb 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -1,31 +1,22 @@ /* - * include/asm-s390/thread_info.h - * * S390 version - * Copyright (C) IBM Corp. 2002,2006 + * Copyright IBM Corp. 2002, 2006 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ #ifndef _ASM_THREAD_INFO_H #define _ASM_THREAD_INFO_H -#ifdef __KERNEL__ - /* * Size of kernel stack for each process */ -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define THREAD_ORDER 1 #define ASYNC_ORDER 1 -#else /* __s390x__ */ -#ifndef __SMALL_STACK +#else /* CONFIG_64BIT */ #define THREAD_ORDER 2 #define ASYNC_ORDER 2 -#else -#define THREAD_ORDER 1 -#define ASYNC_ORDER 1 -#endif -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) #define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER) @@ -45,9 +36,11 @@ struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ + unsigned long sys_call_table; /* System call table address */ unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; + unsigned int system_call; __u64 user_timer; __u64 system_timer; unsigned long last_break; /* last breaking-event-address. */ @@ -84,39 +77,28 @@ static inline struct thread_info *current_thread_info(void) /* * thread information flags bit numbers */ -#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_RESTART_SVC 4 /* restart svc with new svc number */ -#define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ -#define TIF_MCCK_PENDING 7 /* machine check handling is pending */ -#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ -#define TIF_SECCOMP 10 /* secure computing */ -#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ -#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling - TIF_NEED_RESCHED */ -#define TIF_31BIT 17 /* 32bit process */ -#define TIF_MEMDIE 18 /* is terminating due to OOM killer */ -#define TIF_RESTORE_SIGMASK 19 /* restore signal mask in do_signal() */ -#define TIF_SINGLE_STEP 20 /* This task is single stepped */ -#define TIF_FREEZE 21 /* thread is freezing for suspend */ +#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ +#define TIF_SECCOMP 5 /* secure computing */ +#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ +#define TIF_31BIT 16 /* 32bit process */ +#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ +#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ +#define TIF_SINGLE_STEP 19 /* This task is single stepped */ +#define TIF_BLOCK_STEP 20 /* This task is block stepped */ #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) -#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) -#define _TIF_RESTART_SVC (1<<TIF_RESTART_SVC) -#define _TIF_PER_TRAP (1<<TIF_PER_TRAP) -#define _TIF_MCCK_PENDING (1<<TIF_MCCK_PENDING) #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) -#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_31BIT (1<<TIF_31BIT) -#define _TIF_SINGLE_STEP (1<<TIF_FREEZE) -#define _TIF_FREEZE (1<<TIF_FREEZE) +#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) #ifdef CONFIG_64BIT #define is_32bit_task() (test_thread_flag(TIF_31BIT)) @@ -124,8 +106,4 @@ static inline struct thread_info *current_thread_info(void) #define is_32bit_task() (1) #endif -#endif /* __KERNEL__ */ - -#define PREEMPT_ACTIVE 0x4000000 - #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h deleted file mode 100644 index 814243cafdf..00000000000 --- a/arch/s390/include/asm/timer.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * include/asm-s390/timer.h - * - * (C) Copyright IBM Corp. 2003,2006 - * Virtual CPU timer - * - * Author: Jan Glauber (jang@de.ibm.com) - */ - -#ifndef _ASM_S390_TIMER_H -#define _ASM_S390_TIMER_H - -#ifdef __KERNEL__ - -#include <linux/timer.h> - -#define VTIMER_MAX_SLICE (0x7ffffffffffff000LL) - -struct vtimer_list { - struct list_head entry; - - int cpu; - __u64 expires; - __u64 interval; - - void (*function)(unsigned long); - unsigned long data; -}; - -/* the vtimer value will wrap after ca. 71 years */ -struct vtimer_queue { - struct list_head list; - spinlock_t lock; - __u64 timer; /* last programmed timer */ - __u64 elapsed; /* elapsed time of timer expire values */ - __u64 idle; /* temp var for idle */ - int do_spt; /* =1: reprogram cpu timer in idle */ -}; - -extern void init_virt_timer(struct vtimer_list *timer); -extern void add_virt_timer(void *new); -extern void add_virt_timer_periodic(void *new); -extern int mod_virt_timer(struct vtimer_list *timer, __u64 expires); -extern int mod_virt_timer_periodic(struct vtimer_list *timer, __u64 expires); -extern int del_virt_timer(struct vtimer_list *timer); - -extern void init_cpu_vtimer(void); -extern void vtime_init(void); - -extern void vtime_stop_cpu(void); -extern void vtime_start_leave(void); - -#endif /* __KERNEL__ */ - -#endif /* _ASM_S390_TIMER_H */ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 88829a40af6..8beee1cceba 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/timex.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * * Derived from "include/asm-i386/timex.h" * Copyright (C) 1992, Linus Torvalds @@ -17,7 +15,7 @@ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL /* Inline functions for clock register access. */ -static inline int set_clock(__u64 time) +static inline int set_tod_clock(__u64 time) { int cc; @@ -29,7 +27,7 @@ static inline int set_clock(__u64 time) return cc; } -static inline int store_clock(__u64 *time) +static inline int store_tod_clock(__u64 *time) { int cc; @@ -73,29 +71,35 @@ static inline void local_tick_enable(unsigned long long comp) typedef unsigned long long cycles_t; -static inline unsigned long long get_clock (void) +static inline void get_tod_clock_ext(char clk[16]) { - unsigned long long clk; + typedef struct { char _[sizeof(clk)]; } addrtype; - asm volatile("stck %0" : "=Q" (clk) : : "cc"); - return clk; + asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc"); } -static inline void get_clock_ext(char *clk) +static inline unsigned long long get_tod_clock(void) { - asm volatile("stcke %0" : "=Q" (*clk) : : "cc"); + unsigned char clk[16]; + get_tod_clock_ext(clk); + return *((unsigned long long *)&clk[1]); } -static inline unsigned long long get_clock_xt(void) +static inline unsigned long long get_tod_clock_fast(void) { - unsigned char clk[16]; - get_clock_ext(clk); - return *((unsigned long long *)&clk[1]); +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + unsigned long long clk; + + asm volatile("stckf %0" : "=Q" (clk) : : "cc"); + return clk; +#else + return get_tod_clock(); +#endif } static inline cycles_t get_cycles(void) { - return (cycles_t) get_clock() >> 2; + return (cycles_t) get_tod_clock() >> 2; } int get_sync_clock(unsigned long long *clock); @@ -121,9 +125,37 @@ extern u64 sched_clock_base_cc; * function, otherwise the returned value is not guaranteed to * be monotonic. */ -static inline unsigned long long get_clock_monotonic(void) +static inline unsigned long long get_tod_clock_monotonic(void) { - return get_clock_xt() - sched_clock_base_cc; + return get_tod_clock() - sched_clock_base_cc; +} + +/** + * tod_to_ns - convert a TOD format value to nanoseconds + * @todval: to be converted TOD format value + * Returns: number of nanoseconds that correspond to the TOD format value + * + * Converting a 64 Bit TOD format value to nanoseconds means that the value + * must be divided by 4.096. In order to achieve that we multiply with 125 + * and divide by 512: + * + * ns = (todval * 125) >> 9; + * + * In order to avoid an overflow with the multiplication we can rewrite this. + * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits) + * we end up with + * + * ns = ((2^32 * th + tl) * 125 ) >> 9; + * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9); + * + */ +static inline unsigned long long tod_to_ns(unsigned long long todval) +{ + unsigned long long ns; + + ns = ((todval >> 32) << 23) * 125; + ns += ((todval & 0xffffffff) * 125) >> 9; + return ns; } #endif diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 9074a54c4d1..a25f09fbaf3 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -26,68 +26,73 @@ #include <linux/swap.h> #include <asm/processor.h> #include <asm/pgalloc.h> -#include <asm/smp.h> #include <asm/tlbflush.h> -#ifndef CONFIG_SMP -#define TLB_NR_PTRS 1 -#else -#define TLB_NR_PTRS 508 -#endif - struct mmu_gather { struct mm_struct *mm; + struct mmu_table_batch *batch; unsigned int fullmm; - unsigned int nr_ptes; - unsigned int nr_pxds; - void *array[TLB_NR_PTRS]; + unsigned long start, end; }; -DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); +struct mmu_table_batch { + struct rcu_head rcu; + unsigned int nr; + void *tables[0]; +}; -static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, - unsigned int full_mm_flush) -{ - struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); +#define MAX_TABLE_BATCH \ + ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) + +extern void tlb_table_flush(struct mmu_gather *tlb); +extern void tlb_remove_table(struct mmu_gather *tlb, void *table); +static inline void tlb_gather_mmu(struct mmu_gather *tlb, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ tlb->mm = mm; - tlb->fullmm = full_mm_flush; - tlb->nr_ptes = 0; - tlb->nr_pxds = TLB_NR_PTRS; - if (tlb->fullmm) - __tlb_flush_mm(mm); - return tlb; + tlb->start = start; + tlb->end = end; + tlb->fullmm = !(start | (end+1)); + tlb->batch = NULL; } -static inline void tlb_flush_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) +static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { - if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < TLB_NR_PTRS)) - __tlb_flush_mm(tlb->mm); - while (tlb->nr_ptes > 0) - page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]); - while (tlb->nr_pxds < TLB_NR_PTRS) - crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]); + __tlb_flush_mm_lazy(tlb->mm); } -static inline void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) +static inline void tlb_flush_mmu_free(struct mmu_gather *tlb) { - tlb_flush_mmu(tlb, start, end); + tlb_table_flush(tlb); +} - rcu_table_freelist_finish(); - /* keep the page table cache within bounds */ - check_pgt_cache(); +static inline void tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_flush_mmu_tlbonly(tlb); + tlb_flush_mmu_free(tlb); +} - put_cpu_var(mmu_gathers); +static inline void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + tlb_flush_mmu(tlb); } /* * Release the page cache reference for a pte removed by - * tlb_ptep_clear_flush. In both flush modes the tlb fo a page cache page + * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page * has already been freed, so just do free_page_and_swap_cache. */ +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + free_page_and_swap_cache(page); + return 1; /* avoid calling tlb_flush_mmu */ +} + static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { free_page_and_swap_cache(page); @@ -100,12 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long address) { - if (!tlb->fullmm) { - tlb->array[tlb->nr_ptes++] = pte; - if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb, 0, 0); - } else - page_table_free(tlb->mm, (unsigned long *) pte); + page_table_free_rcu(tlb, (unsigned long *) pte); } /* @@ -118,15 +118,10 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { -#ifdef __s390x__ +#ifdef CONFIG_64BIT if (tlb->mm->context.asce_limit <= (1UL << 31)) return; - if (!tlb->fullmm) { - tlb->array[--tlb->nr_pxds] = pmd; - if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb, 0, 0); - } else - crst_table_free(tlb->mm, (unsigned long *) pmd); + tlb_remove_table(tlb, pmd); #endif } @@ -140,21 +135,17 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { -#ifdef __s390x__ +#ifdef CONFIG_64BIT if (tlb->mm->context.asce_limit <= (1UL << 42)) return; - if (!tlb->fullmm) { - tlb->array[--tlb->nr_pxds] = pud; - if (tlb->nr_ptes >= tlb->nr_pxds) - tlb_flush_mmu(tlb, 0, 0); - } else - crst_table_free(tlb->mm, (unsigned long *) pud); + tlb_remove_table(tlb, pud); #endif } #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) #define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) +#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0) #define tlb_migrate_finish(mm) do { } while (0) #endif /* _S390_TLB_H */ diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 29d5d6d4bec..16c9c88658c 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -7,19 +7,41 @@ #include <asm/pgalloc.h> /* - * Flush all tlb entries on the local cpu. + * Flush all TLB entries on the local CPU. */ static inline void __tlb_flush_local(void) { asm volatile("ptlb" : : : "memory"); } -#ifdef CONFIG_SMP /* - * Flush all tlb entries on all cpus. + * Flush TLB entries for a specific ASCE on all CPUs + */ +static inline void __tlb_flush_idte(unsigned long asce) +{ + /* Global TLB flush for the mm */ + asm volatile( + " .insn rrf,0xb98e0000,0,%0,%1,0" + : : "a" (2048), "a" (asce) : "cc"); +} + +/* + * Flush TLB entries for a specific ASCE on the local CPU */ +static inline void __tlb_flush_idte_local(unsigned long asce) +{ + /* Local TLB flush for the mm */ + asm volatile( + " .insn rrf,0xb98e0000,0,%0,%1,1" + : : "a" (2048), "a" (asce) : "cc"); +} + +#ifdef CONFIG_SMP void smp_ptlb_all(void); +/* + * Flush all TLB entries on all CPUs. + */ static inline void __tlb_flush_global(void) { register unsigned long reg2 asm("2"); @@ -27,12 +49,12 @@ static inline void __tlb_flush_global(void) register unsigned long reg4 asm("4"); long dummy; -#ifndef __s390x__ +#ifndef CONFIG_64BIT if (!MACHINE_HAS_CSP) { smp_ptlb_all(); return; } -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ dummy = 0; reg2 = reg3 = 0; @@ -42,64 +64,109 @@ static inline void __tlb_flush_global(void) : : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" ); } +/* + * Flush TLB entries for a specific mm on all CPUs (in case gmap is used + * this implicates multiple ASCEs!). + */ static inline void __tlb_flush_full(struct mm_struct *mm) { - cpumask_t local_cpumask; - preempt_disable(); - /* - * If the process only ran on the local cpu, do a local flush. - */ - local_cpumask = cpumask_of_cpu(smp_processor_id()); - if (cpumask_equal(mm_cpumask(mm), &local_cpumask)) + atomic_add(0x10000, &mm->context.attach_count); + if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + /* Local TLB flush */ __tlb_flush_local(); - else + } else { + /* Global TLB flush */ __tlb_flush_global(); + /* Reset TLB flush mask */ + if (MACHINE_HAS_TLB_LC) + cpumask_copy(mm_cpumask(mm), + &mm->context.cpu_attach_mask); + } + atomic_sub(0x10000, &mm->context.attach_count); preempt_enable(); } + +/* + * Flush TLB entries for a specific ASCE on all CPUs. + */ +static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) +{ + int active, count; + + preempt_disable(); + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + __tlb_flush_idte_local(asce); + } else { + if (MACHINE_HAS_IDTE) + __tlb_flush_idte(asce); + else + __tlb_flush_global(); + /* Reset TLB flush mask */ + if (MACHINE_HAS_TLB_LC) + cpumask_copy(mm_cpumask(mm), + &mm->context.cpu_attach_mask); + } + atomic_sub(0x10000, &mm->context.attach_count); + preempt_enable(); +} + +static inline void __tlb_flush_kernel(void) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_idte((unsigned long) init_mm.pgd | + init_mm.context.asce_bits); + else + __tlb_flush_global(); +} #else +#define __tlb_flush_global() __tlb_flush_local() #define __tlb_flush_full(mm) __tlb_flush_local() -#endif /* - * Flush all tlb entries of a page table on all cpus. + * Flush TLB entries for a specific ASCE on all CPUs. */ -static inline void __tlb_flush_idte(unsigned long asce) +static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) { - asm volatile( - " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (2048), "a" (asce) : "cc" ); + if (MACHINE_HAS_TLB_LC) + __tlb_flush_idte_local(asce); + else + __tlb_flush_local(); } +static inline void __tlb_flush_kernel(void) +{ + if (MACHINE_HAS_TLB_LC) + __tlb_flush_idte_local((unsigned long) init_mm.pgd | + init_mm.context.asce_bits); + else + __tlb_flush_local(); +} +#endif + static inline void __tlb_flush_mm(struct mm_struct * mm) { - if (unlikely(cpumask_empty(mm_cpumask(mm)))) - return; /* * If the machine has IDTE we prefer to do a per mm flush * on all cpus instead of doing a local flush if the mm * only ran on the local cpu. */ - if (MACHINE_HAS_IDTE) { - if (mm->context.noexec) - __tlb_flush_idte((unsigned long) - get_shadow_table(mm->pgd) | - mm->context.asce_bits); - __tlb_flush_idte((unsigned long) mm->pgd | + if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) + __tlb_flush_asce(mm, (unsigned long) mm->pgd | mm->context.asce_bits); - return; - } - __tlb_flush_full(mm); + else + __tlb_flush_full(mm); } -static inline void __tlb_flush_mm_cond(struct mm_struct * mm) +static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) { - spin_lock(&mm->page_table_lock); if (mm->context.flush_mm) { __tlb_flush_mm(mm); mm->context.flush_mm = 0; } - spin_unlock(&mm->page_table_lock); } /* @@ -126,19 +193,19 @@ static inline void __tlb_flush_mm_cond(struct mm_struct * mm) static inline void flush_tlb_mm(struct mm_struct *mm) { - __tlb_flush_mm_cond(mm); + __tlb_flush_mm_lazy(mm); } static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __tlb_flush_mm_cond(vma->vm_mm); + __tlb_flush_mm_lazy(vma->vm_mm); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __tlb_flush_mm(&init_mm); + __tlb_flush_kernel(); } #endif /* _S390_TLBFLUSH_H */ diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index c5338834ddb..56af53093d2 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -2,46 +2,52 @@ #define _ASM_S390_TOPOLOGY_H #include <linux/cpumask.h> -#include <asm/sysinfo.h> -extern unsigned char cpu_core_id[NR_CPUS]; -extern cpumask_t cpu_core_map[NR_CPUS]; - -static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu) -{ - return &cpu_core_map[cpu]; -} - -#define topology_core_id(cpu) (cpu_core_id[cpu]) -#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) -#define mc_capable() (1) +struct sysinfo_15_1_x; +struct cpu; #ifdef CONFIG_SCHED_BOOK -extern unsigned char cpu_book_id[NR_CPUS]; -extern cpumask_t cpu_book_map[NR_CPUS]; +struct cpu_topology_s390 { + unsigned short core_id; + unsigned short socket_id; + unsigned short book_id; + cpumask_t core_mask; + cpumask_t book_mask; +}; -static inline const struct cpumask *cpu_book_mask(unsigned int cpu) -{ - return &cpu_book_map[cpu]; -} +extern struct cpu_topology_s390 cpu_topology[NR_CPUS]; -#define topology_book_id(cpu) (cpu_book_id[cpu]) -#define topology_book_cpumask(cpu) (&cpu_book_map[cpu]) +#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) +#define topology_core_id(cpu) (cpu_topology[cpu].core_id) +#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_mask) +#define topology_book_id(cpu) (cpu_topology[cpu].book_id) +#define topology_book_cpumask(cpu) (&cpu_topology[cpu].book_mask) -#endif /* CONFIG_SCHED_BOOK */ +#define mc_capable() 1 +int topology_cpu_init(struct cpu *); int topology_set_cpu_management(int fc); void topology_schedule_update(void); void store_topology(struct sysinfo_15_1_x *info); +void topology_expect_change(void); +const struct cpumask *cpu_coregroup_mask(int cpu); -#define POLARIZATION_UNKNWN (-1) +#else /* CONFIG_SCHED_BOOK */ + +static inline void topology_schedule_update(void) { } +static inline int topology_cpu_init(struct cpu *cpu) { return 0; } +static inline void topology_expect_change(void) { } + +#endif /* CONFIG_SCHED_BOOK */ + +#define POLARIZATION_UNKNOWN (-1) #define POLARIZATION_HRZ (0) #define POLARIZATION_VL (1) #define POLARIZATION_VM (2) #define POLARIZATION_VH (3) -#ifdef CONFIG_SMP +#ifdef CONFIG_SCHED_BOOK void s390_init_cpu_topology(void); #else static inline void s390_init_cpu_topology(void) @@ -49,8 +55,6 @@ static inline void s390_init_cpu_topology(void) }; #endif -#define SD_BOOK_INIT SD_CPU_INIT - #include <asm-generic/topology.h> #endif /* _ASM_S390_TOPOLOGY_H */ diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h index 04d6b95a89c..dccef3ca91f 100644 --- a/arch/s390/include/asm/types.h +++ b/arch/s390/include/asm/types.h @@ -1,44 +1,20 @@ /* - * include/asm-s390/types.h - * * S390 version * * Derived from "include/asm-i386/types.h" */ - #ifndef _S390_TYPES_H #define _S390_TYPES_H -#include <asm-generic/int-ll64.h> - -#ifndef __ASSEMBLY__ - -typedef unsigned short umode_t; - -/* A address type so that arithmetic can be done on it & it can be upgraded to - 64 bit when necessary -*/ -typedef unsigned long addr_t; -typedef __signed__ long saddr_t; - -#endif /* __ASSEMBLY__ */ +#include <uapi/asm/types.h> /* * These aren't exported outside the kernel to avoid name space clashes */ -#ifdef __KERNEL__ #ifndef __ASSEMBLY__ -typedef u64 dma64_addr_t; -#ifdef __s390x__ -/* DMA addresses come in 32-bit and 64-bit flavours. */ -typedef u64 dma_addr_t; -#else -typedef u32 dma_addr_t; -#endif - -#ifndef __s390x__ +#ifndef CONFIG_64BIT typedef union { unsigned long long pair; struct { @@ -47,7 +23,6 @@ typedef union { } subreg; } register_pair; -#endif /* ! __s390x__ */ +#endif /* ! CONFIG_64BIT */ #endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* _S390_TYPES_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d6b1ed0ec52..cd4c68e0398 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/uaccess.h - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Hartmut Penner (hp@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -16,6 +14,7 @@ */ #include <linux/sched.h> #include <linux/errno.h> +#include <asm/ctl_reg.h> #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -49,12 +48,18 @@ #define segment_eq(a,b) ((a).ar4 == (b).ar4) - -static inline int __access_ok(const void __user *addr, unsigned long size) +static inline int __range_ok(unsigned long addr, unsigned long size) { return 1; } -#define access_ok(type,addr,size) __access_ok(addr,size) + +#define __access_ok(addr, size) \ +({ \ + __chk_user_ptr(addr); \ + __range_ok((unsigned long)(addr), (size)); \ +}) + +#define access_ok(type, addr, size) __access_ok(addr, size) /* * The exception table consists of pairs of addresses: the first is the @@ -71,42 +76,104 @@ static inline int __access_ok(const void __user *addr, unsigned long size) struct exception_table_entry { - unsigned long insn, fixup; + int insn, fixup; }; -struct uaccess_ops { - size_t (*copy_from_user)(size_t, const void __user *, void *); - size_t (*copy_from_user_small)(size_t, const void __user *, void *); - size_t (*copy_to_user)(size_t, void __user *, const void *); - size_t (*copy_to_user_small)(size_t, void __user *, const void *); - size_t (*copy_in_user)(size_t, void __user *, const void __user *); - size_t (*clear_user)(size_t, void __user *); - size_t (*strnlen_user)(size_t, const char __user *); - size_t (*strncpy_from_user)(size_t, const char __user *, char *); - int (*futex_atomic_op)(int op, int __user *, int oparg, int *old); - int (*futex_atomic_cmpxchg)(int __user *, int old, int new); -}; +static inline unsigned long extable_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +static inline unsigned long extable_fixup(const struct exception_table_entry *x) +{ + return (unsigned long)&x->fixup + x->fixup; +} + +#define ARCH_HAS_SORT_EXTABLE +#define ARCH_HAS_SEARCH_EXTABLE + +/** + * __copy_from_user: - Copy a block of data from user space, with less checking. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +unsigned long __must_check __copy_from_user(void *to, const void __user *from, + unsigned long n); + +/** + * __copy_to_user: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +unsigned long __must_check __copy_to_user(void __user *to, const void *from, + unsigned long n); -extern struct uaccess_ops uaccess; -extern struct uaccess_ops uaccess_std; -extern struct uaccess_ops uaccess_mvcos; -extern struct uaccess_ops uaccess_mvcos_switch; -extern struct uaccess_ops uaccess_pt; +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user -extern int __handle_fault(unsigned long, unsigned long, int); +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + +#define __put_get_user_asm(to, from, size, spec) \ +({ \ + register unsigned long __reg0 asm("0") = spec; \ + int __rc; \ + \ + asm volatile( \ + "0: mvcos %1,%3,%2\n" \ + "1: xr %0,%0\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: lhi %0,%5\n" \ + " jg 2b\n" \ + ".popsection\n" \ + EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + : "=d" (__rc), "=Q" (*(to)) \ + : "d" (size), "Q" (*(from)), \ + "d" (__reg0), "K" (-EFAULT) \ + : "cc"); \ + __rc; \ +}) -static inline int __put_user_fn(size_t size, void __user *ptr, void *x) +#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL) +#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL) + +#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */ + +static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { - size = uaccess.copy_to_user_small(size, ptr, x); - return size ? -EFAULT : size; + size = __copy_to_user(ptr, x, size); + return size ? -EFAULT : 0; } -static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) +static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { - size = uaccess.copy_from_user_small(size, ptr, x); - return size ? -EFAULT : size; + size = __copy_from_user(x, ptr, size); + return size ? -EFAULT : 0; } +#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */ + /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. @@ -121,8 +188,8 @@ static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) case 2: \ case 4: \ case 8: \ - __pu_err = __put_user_fn(sizeof (*(ptr)), \ - ptr, &__x); \ + __pu_err = __put_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ break; \ default: \ __put_user_bad(); \ @@ -138,7 +205,7 @@ static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) }) -extern int __put_user_bad(void) __attribute__((noreturn)); +int __put_user_bad(void) __attribute__((noreturn)); #define __get_user(x, ptr) \ ({ \ @@ -147,29 +214,29 @@ extern int __put_user_bad(void) __attribute__((noreturn)); switch (sizeof(*(ptr))) { \ case 1: { \ unsigned char __x; \ - __gu_err = __get_user_fn(sizeof (*(ptr)), \ - ptr, &__x); \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 2: { \ unsigned short __x; \ - __gu_err = __get_user_fn(sizeof (*(ptr)), \ - ptr, &__x); \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 4: { \ unsigned int __x; \ - __gu_err = __get_user_fn(sizeof (*(ptr)), \ - ptr, &__x); \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 8: { \ unsigned long long __x; \ - __gu_err = __get_user_fn(sizeof (*(ptr)), \ - ptr, &__x); \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ @@ -186,38 +253,12 @@ extern int __put_user_bad(void) __attribute__((noreturn)); __get_user(x, ptr); \ }) -extern int __get_user_bad(void) __attribute__((noreturn)); +int __get_user_bad(void) __attribute__((noreturn)); #define __put_user_unaligned __put_user #define __get_user_unaligned __get_user /** - * __copy_to_user: - Copy a block of data into user space, with less checking. - * @to: Destination address, in user space. - * @from: Source address, in kernel space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from kernel space to user space. Caller must check - * the specified block with access_ok() before calling this function. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - */ -static inline unsigned long __must_check -__copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (__builtin_constant_p(n) && (n <= 256)) - return uaccess.copy_to_user_small(n, to, from); - else - return uaccess.copy_to_user(n, to, from); -} - -#define __copy_to_user_inatomic __copy_to_user -#define __copy_from_user_inatomic __copy_from_user - -/** * copy_to_user: - Copy a block of data into user space. * @to: Destination address, in user space. * @from: Source address, in kernel space. @@ -234,38 +275,10 @@ static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); - if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); - return n; + return __copy_to_user(to, from, n); } -/** - * __copy_from_user: - Copy a block of data from user space, with less checking. - * @to: Destination address, in kernel space. - * @from: Source address, in user space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from user space to kernel space. Caller must check - * the specified block with access_ok() before calling this function. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - * - * If some data could not be copied, this function will pad the copied - * data to the requested size using zero bytes. - */ -static inline unsigned long __must_check -__copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (__builtin_constant_p(n) && (n <= 256)) - return uaccess.copy_from_user_small(n, from, to); - else - return uaccess.copy_from_user(n, from, to); -} - -extern void copy_from_user_overflow(void) +void copy_from_user_overflow(void) #ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS __compiletime_warning("copy_from_user() buffer size is not provably correct") #endif @@ -297,46 +310,38 @@ copy_from_user(void *to, const void __user *from, unsigned long n) copy_from_user_overflow(); return n; } - if (access_ok(VERIFY_READ, from, n)) - n = __copy_from_user(to, from, n); - else - memset(to, 0, n); - return n; + return __copy_from_user(to, from, n); } -static inline unsigned long __must_check -__copy_in_user(void __user *to, const void __user *from, unsigned long n) -{ - return uaccess.copy_in_user(n, to, from); -} +unsigned long __must_check +__copy_in_user(void __user *to, const void __user *from, unsigned long n); static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) { might_fault(); - if (__access_ok(from,n) && __access_ok(to,n)) - n = __copy_in_user(to, from, n); - return n; + return __copy_in_user(to, from, n); } /* * Copy a null terminated string from userspace. */ + +long __strncpy_from_user(char *dst, const char __user *src, long count); + static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) { - long res = -EFAULT; might_fault(); - if (access_ok(VERIFY_READ, src, 1)) - res = uaccess.strncpy_from_user(count, src, dst); - return res; + return __strncpy_from_user(dst, src, count); } -static inline unsigned long -strnlen_user(const char __user * src, unsigned long n) +unsigned long __must_check __strnlen_user(const char __user *src, unsigned long count); + +static inline unsigned long strnlen_user(const char __user *src, unsigned long n) { might_fault(); - return uaccess.strnlen_user(n, src); + return __strnlen_user(src, n); } /** @@ -358,20 +363,14 @@ strnlen_user(const char __user * src, unsigned long n) /* * Zero Userspace */ +unsigned long __must_check __clear_user(void __user *to, unsigned long size); -static inline unsigned long __must_check -__clear_user(void __user *to, unsigned long n) -{ - return uaccess.clear_user(n, to); -} - -static inline unsigned long __must_check -clear_user(void __user *to, unsigned long n) +static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) { might_fault(); - if (access_ok(VERIFY_WRITE, to, n)) - n = uaccess.clear_user(n, to); - return n; + return __clear_user(to, n); } +int copy_to_user_real(void __user *dest, void *src, unsigned long count); + #endif /* __S390_UACCESS_H */ diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 1049ef27c15..65188635355 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -1,370 +1,13 @@ /* - * include/asm-s390/unistd.h - * * S390 version * * Derived from "include/asm-i386/unistd.h" */ - #ifndef _ASM_S390_UNISTD_H_ #define _ASM_S390_UNISTD_H_ -/* - * This file contains the system call numbers. - */ - -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_restart_syscall 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_pause 29 -#define __NR_utime 30 -#define __NR_access 33 -#define __NR_nice 34 -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -#define __NR_brk 45 -#define __NR_signal 48 -#define __NR_acct 51 -#define __NR_umount2 52 -#define __NR_ioctl 54 -#define __NR_fcntl 55 -#define __NR_setpgid 57 -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -#define __NR_sigaction 67 -#define __NR_sigsuspend 72 -#define __NR_sigpending 73 -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_symlink 83 -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -#define __NR_readdir 89 -#define __NR_mmap 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_getpriority 96 -#define __NR_setpriority 97 -#define __NR_statfs 99 -#define __NR_fstatfs 100 -#define __NR_socketcall 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_stat 106 -#define __NR_lstat 107 -#define __NR_fstat 108 -#define __NR_lookup_dcookie 110 -#define __NR_vhangup 111 -#define __NR_idle 112 -#define __NR_wait4 114 -#define __NR_swapoff 115 -#define __NR_sysinfo 116 -#define __NR_ipc 117 -#define __NR_fsync 118 -#define __NR_sigreturn 119 -#define __NR_clone 120 -#define __NR_setdomainname 121 -#define __NR_uname 122 -#define __NR_adjtimex 124 -#define __NR_mprotect 125 -#define __NR_sigprocmask 126 -#define __NR_create_module 127 -#define __NR_init_module 128 -#define __NR_delete_module 129 -#define __NR_get_kernel_syms 130 -#define __NR_quotactl 131 -#define __NR_getpgid 132 -#define __NR_fchdir 133 -#define __NR_bdflush 134 -#define __NR_sysfs 135 -#define __NR_personality 136 -#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ -#define __NR_getdents 141 -#define __NR_flock 143 -#define __NR_msync 144 -#define __NR_readv 145 -#define __NR_writev 146 -#define __NR_getsid 147 -#define __NR_fdatasync 148 -#define __NR__sysctl 149 -#define __NR_mlock 150 -#define __NR_munlock 151 -#define __NR_mlockall 152 -#define __NR_munlockall 153 -#define __NR_sched_setparam 154 -#define __NR_sched_getparam 155 -#define __NR_sched_setscheduler 156 -#define __NR_sched_getscheduler 157 -#define __NR_sched_yield 158 -#define __NR_sched_get_priority_max 159 -#define __NR_sched_get_priority_min 160 -#define __NR_sched_rr_get_interval 161 -#define __NR_nanosleep 162 -#define __NR_mremap 163 -#define __NR_query_module 167 -#define __NR_poll 168 -#define __NR_nfsservctl 169 -#define __NR_prctl 172 -#define __NR_rt_sigreturn 173 -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 -#define __NR_rt_sigpending 176 -#define __NR_rt_sigtimedwait 177 -#define __NR_rt_sigqueueinfo 178 -#define __NR_rt_sigsuspend 179 -#define __NR_pread64 180 -#define __NR_pwrite64 181 -#define __NR_getcwd 183 -#define __NR_capget 184 -#define __NR_capset 185 -#define __NR_sigaltstack 186 -#define __NR_sendfile 187 -#define __NR_getpmsg 188 -#define __NR_putpmsg 189 -#define __NR_vfork 190 -#define __NR_pivot_root 217 -#define __NR_mincore 218 -#define __NR_madvise 219 -#define __NR_getdents64 220 -#define __NR_readahead 222 -#define __NR_setxattr 224 -#define __NR_lsetxattr 225 -#define __NR_fsetxattr 226 -#define __NR_getxattr 227 -#define __NR_lgetxattr 228 -#define __NR_fgetxattr 229 -#define __NR_listxattr 230 -#define __NR_llistxattr 231 -#define __NR_flistxattr 232 -#define __NR_removexattr 233 -#define __NR_lremovexattr 234 -#define __NR_fremovexattr 235 -#define __NR_gettid 236 -#define __NR_tkill 237 -#define __NR_futex 238 -#define __NR_sched_setaffinity 239 -#define __NR_sched_getaffinity 240 -#define __NR_tgkill 241 -/* Number 242 is reserved for tux */ -#define __NR_io_setup 243 -#define __NR_io_destroy 244 -#define __NR_io_getevents 245 -#define __NR_io_submit 246 -#define __NR_io_cancel 247 -#define __NR_exit_group 248 -#define __NR_epoll_create 249 -#define __NR_epoll_ctl 250 -#define __NR_epoll_wait 251 -#define __NR_set_tid_address 252 -#define __NR_fadvise64 253 -#define __NR_timer_create 254 -#define __NR_timer_settime (__NR_timer_create+1) -#define __NR_timer_gettime (__NR_timer_create+2) -#define __NR_timer_getoverrun (__NR_timer_create+3) -#define __NR_timer_delete (__NR_timer_create+4) -#define __NR_clock_settime (__NR_timer_create+5) -#define __NR_clock_gettime (__NR_timer_create+6) -#define __NR_clock_getres (__NR_timer_create+7) -#define __NR_clock_nanosleep (__NR_timer_create+8) -/* Number 263 is reserved for vserver */ -#define __NR_statfs64 265 -#define __NR_fstatfs64 266 -#define __NR_remap_file_pages 267 -/* Number 268 is reserved for new sys_mbind */ -/* Number 269 is reserved for new sys_get_mempolicy */ -/* Number 270 is reserved for new sys_set_mempolicy */ -#define __NR_mq_open 271 -#define __NR_mq_unlink 272 -#define __NR_mq_timedsend 273 -#define __NR_mq_timedreceive 274 -#define __NR_mq_notify 275 -#define __NR_mq_getsetattr 276 -#define __NR_kexec_load 277 -#define __NR_add_key 278 -#define __NR_request_key 279 -#define __NR_keyctl 280 -#define __NR_waitid 281 -#define __NR_ioprio_set 282 -#define __NR_ioprio_get 283 -#define __NR_inotify_init 284 -#define __NR_inotify_add_watch 285 -#define __NR_inotify_rm_watch 286 -/* Number 287 is reserved for new sys_migrate_pages */ -#define __NR_openat 288 -#define __NR_mkdirat 289 -#define __NR_mknodat 290 -#define __NR_fchownat 291 -#define __NR_futimesat 292 -#define __NR_unlinkat 294 -#define __NR_renameat 295 -#define __NR_linkat 296 -#define __NR_symlinkat 297 -#define __NR_readlinkat 298 -#define __NR_fchmodat 299 -#define __NR_faccessat 300 -#define __NR_pselect6 301 -#define __NR_ppoll 302 -#define __NR_unshare 303 -#define __NR_set_robust_list 304 -#define __NR_get_robust_list 305 -#define __NR_splice 306 -#define __NR_sync_file_range 307 -#define __NR_tee 308 -#define __NR_vmsplice 309 -/* Number 310 is reserved for new sys_move_pages */ -#define __NR_getcpu 311 -#define __NR_epoll_pwait 312 -#define __NR_utimes 313 -#define __NR_fallocate 314 -#define __NR_utimensat 315 -#define __NR_signalfd 316 -#define __NR_timerfd 317 -#define __NR_eventfd 318 -#define __NR_timerfd_create 319 -#define __NR_timerfd_settime 320 -#define __NR_timerfd_gettime 321 -#define __NR_signalfd4 322 -#define __NR_eventfd2 323 -#define __NR_inotify_init1 324 -#define __NR_pipe2 325 -#define __NR_dup3 326 -#define __NR_epoll_create1 327 -#define __NR_preadv 328 -#define __NR_pwritev 329 -#define __NR_rt_tgsigqueueinfo 330 -#define __NR_perf_event_open 331 -#define __NR_fanotify_init 332 -#define __NR_fanotify_mark 333 -#define __NR_prlimit64 334 -#define NR_syscalls 335 +#include <uapi/asm/unistd.h> -/* - * There are some system calls that are not present on 64 bit, some - * have a different name although they do the same (e.g. __NR_chown32 - * is __NR_chown on 64 bit). - */ -#ifndef __s390x__ - -#define __NR_time 13 -#define __NR_lchown 16 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_geteuid 49 -#define __NR_getegid 50 -#define __NR_setreuid 70 -#define __NR_setregid 71 -#define __NR_getrlimit 76 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -#define __NR_fchown 95 -#define __NR_ioperm 101 -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#define __NR__llseek 140 -#define __NR__newselect 142 -#define __NR_setresuid 164 -#define __NR_getresuid 165 -#define __NR_setresgid 170 -#define __NR_getresgid 171 -#define __NR_chown 182 -#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ -#define __NR_mmap2 192 -#define __NR_truncate64 193 -#define __NR_ftruncate64 194 -#define __NR_stat64 195 -#define __NR_lstat64 196 -#define __NR_fstat64 197 -#define __NR_lchown32 198 -#define __NR_getuid32 199 -#define __NR_getgid32 200 -#define __NR_geteuid32 201 -#define __NR_getegid32 202 -#define __NR_setreuid32 203 -#define __NR_setregid32 204 -#define __NR_getgroups32 205 -#define __NR_setgroups32 206 -#define __NR_fchown32 207 -#define __NR_setresuid32 208 -#define __NR_getresuid32 209 -#define __NR_setresgid32 210 -#define __NR_getresgid32 211 -#define __NR_chown32 212 -#define __NR_setuid32 213 -#define __NR_setgid32 214 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 -#define __NR_fcntl64 221 -#define __NR_sendfile64 223 -#define __NR_fadvise64_64 264 -#define __NR_fstatat64 293 - -#else - -#define __NR_select 142 -#define __NR_getrlimit 191 /* SuS compliant getrlimit */ -#define __NR_lchown 198 -#define __NR_getuid 199 -#define __NR_getgid 200 -#define __NR_geteuid 201 -#define __NR_getegid 202 -#define __NR_setreuid 203 -#define __NR_setregid 204 -#define __NR_getgroups 205 -#define __NR_setgroups 206 -#define __NR_fchown 207 -#define __NR_setresuid 208 -#define __NR_getresuid 209 -#define __NR_setresgid 210 -#define __NR_getresgid 211 -#define __NR_chown 212 -#define __NR_setuid 213 -#define __NR_setgid 214 -#define __NR_setfsuid 215 -#define __NR_setfsgid 216 -#define __NR_newfstatat 293 - -#endif - -#ifdef __KERNEL__ #ifndef CONFIG_64BIT #define __IGNORE_select @@ -381,8 +24,8 @@ /* Ignore system calls that are also reachable via sys_socket */ #define __IGNORE_recvmmsg +#define __IGNORE_sendmmsg -#define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_ALARM #define __ARCH_WANT_SYS_GETHOSTNAME @@ -390,6 +33,7 @@ #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_UTIME #define __ARCH_WANT_SYS_SOCKETCALL +#define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_FADVISE64 #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK @@ -399,24 +43,15 @@ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK -#define __ARCH_WANT_SYS_RT_SIGACTION -#define __ARCH_WANT_SYS_RT_SIGSUSPEND # ifndef CONFIG_64BIT # define __ARCH_WANT_STAT64 # define __ARCH_WANT_SYS_TIME # endif # ifdef CONFIG_COMPAT # define __ARCH_WANT_COMPAT_SYS_TIME -# define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND # endif +#define __ARCH_WANT_SYS_FORK +#define __ARCH_WANT_SYS_VFORK +#define __ARCH_WANT_SYS_CLONE -/* - * "Conditional" syscalls - * - * What we want is __attribute__((weak,alias("sys_ni_syscall"))), - * but it doesn't work on all toolchains, so we just do it by hand - */ -#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") - -#endif /* __KERNEL__ */ #endif /* _ASM_S390_UNISTD_H_ */ diff --git a/arch/s390/include/asm/user.h b/arch/s390/include/asm/user.h index 1b050e35fdc..6ed1d188633 100644 --- a/arch/s390/include/asm/user.h +++ b/arch/s390/include/asm/user.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/user.h - * * S390 version * * Derived from "include/asm-i386/usr.h" diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 533f35751ae..bc9746a7d47 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -1,8 +1,6 @@ #ifndef __S390_VDSO_H__ #define __S390_VDSO_H__ -#ifdef __KERNEL__ - /* Default link addresses for the vDSOs */ #define VDSO32_LBASE 0 #define VDSO64_LBASE 0 @@ -28,8 +26,9 @@ struct vdso_data { __u64 wtom_clock_nsec; /* 0x28 */ __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ __u32 tz_dsttime; /* Type of dst correction 0x34 */ - __u32 ectg_available; - __u32 ntp_mult; /* NTP adjusted multiplier 0x3C */ + __u32 ectg_available; /* ECTG instruction present 0x38 */ + __u32 tk_mult; /* Mult. used for xtime_nsec 0x3c */ + __u32 tk_shift; /* Shift used for xtime_nsec 0x40 */ }; struct vdso_per_cpu_data { @@ -40,12 +39,9 @@ struct vdso_per_cpu_data { extern struct vdso_data *vdso_data; #ifdef CONFIG_64BIT -int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore); -void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore); +int vdso_alloc_per_cpu(struct _lowcore *lowcore); +void vdso_free_per_cpu(struct _lowcore *lowcore); #endif #endif /* __ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - #endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h new file mode 100644 index 00000000000..d375526c261 --- /dev/null +++ b/arch/s390/include/asm/vga.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_VGA_H +#define _ASM_S390_VGA_H + +/* Avoid compile errors due to missing asm/vga.h */ + +#endif /* _ASM_S390_VGA_H */ diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h new file mode 100644 index 00000000000..af9896c53eb --- /dev/null +++ b/arch/s390/include/asm/vtime.h @@ -0,0 +1,7 @@ +#ifndef _S390_VTIME_H +#define _S390_VTIME_H + +#define __ARCH_HAS_VTIME_ACCOUNT +#define __ARCH_HAS_VTIME_TASK_SWITCH + +#endif /* _S390_VTIME_H */ diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h new file mode 100644 index 00000000000..bfe25d513ad --- /dev/null +++ b/arch/s390/include/asm/vtimer.h @@ -0,0 +1,33 @@ +/* + * Copyright IBM Corp. 2003, 2012 + * Virtual CPU timer + * + * Author(s): Jan Glauber <jan.glauber@de.ibm.com> + */ + +#ifndef _ASM_S390_TIMER_H +#define _ASM_S390_TIMER_H + +#define VTIMER_MAX_SLICE (0x7fffffffffffffffULL) + +struct vtimer_list { + struct list_head entry; + u64 expires; + u64 interval; + void (*function)(unsigned long); + unsigned long data; +}; + +extern void init_virt_timer(struct vtimer_list *timer); +extern void add_virt_timer(struct vtimer_list *timer); +extern void add_virt_timer_periodic(struct vtimer_list *timer); +extern int mod_virt_timer(struct vtimer_list *timer, u64 expires); +extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires); +extern int del_virt_timer(struct vtimer_list *timer); + +extern void init_cpu_vtimer(void); +extern void vtime_init(void); + +extern void vtime_stop_cpu(void); + +#endif /* _ASM_S390_TIMER_H */ diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild new file mode 100644 index 00000000000..736637363d3 --- /dev/null +++ b/arch/s390/include/uapi/asm/Kbuild @@ -0,0 +1,51 @@ +# UAPI Header export list +include include/uapi/asm-generic/Kbuild.asm + +header-y += auxvec.h +header-y += bitsperlong.h +header-y += byteorder.h +header-y += chpid.h +header-y += chsc.h +header-y += cmb.h +header-y += dasd.h +header-y += debug.h +header-y += errno.h +header-y += fcntl.h +header-y += ioctl.h +header-y += ioctls.h +header-y += ipcbuf.h +header-y += kvm.h +header-y += kvm_para.h +header-y += kvm_virtio.h +header-y += mman.h +header-y += monwriter.h +header-y += msgbuf.h +header-y += param.h +header-y += poll.h +header-y += posix_types.h +header-y += ptrace.h +header-y += qeth.h +header-y += resource.h +header-y += schid.h +header-y += sembuf.h +header-y += setup.h +header-y += shmbuf.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += signal.h +header-y += socket.h +header-y += sockios.h +header-y += sclp_ctl.h +header-y += sie.h +header-y += stat.h +header-y += statfs.h +header-y += swab.h +header-y += tape390.h +header-y += termbits.h +header-y += termios.h +header-y += types.h +header-y += ucontext.h +header-y += unistd.h +header-y += virtio-ccw.h +header-y += vtoc.h +header-y += zcrypt.h diff --git a/arch/s390/include/asm/auxvec.h b/arch/s390/include/uapi/asm/auxvec.h index a1f153e8913..a1f153e8913 100644 --- a/arch/s390/include/asm/auxvec.h +++ b/arch/s390/include/uapi/asm/auxvec.h diff --git a/arch/s390/include/asm/bitsperlong.h b/arch/s390/include/uapi/asm/bitsperlong.h index 6b235aea9c6..6b235aea9c6 100644 --- a/arch/s390/include/asm/bitsperlong.h +++ b/arch/s390/include/uapi/asm/bitsperlong.h diff --git a/arch/s390/include/asm/byteorder.h b/arch/s390/include/uapi/asm/byteorder.h index a332e59e26f..a332e59e26f 100644 --- a/arch/s390/include/asm/byteorder.h +++ b/arch/s390/include/uapi/asm/byteorder.h diff --git a/arch/s390/include/uapi/asm/chpid.h b/arch/s390/include/uapi/asm/chpid.h new file mode 100644 index 00000000000..6b4fb29cc19 --- /dev/null +++ b/arch/s390/include/uapi/asm/chpid.h @@ -0,0 +1,22 @@ +/* + * Copyright IBM Corp. 2007, 2012 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _UAPI_ASM_S390_CHPID_H +#define _UAPI_ASM_S390_CHPID_H + +#include <linux/string.h> +#include <linux/types.h> + +#define __MAX_CHPID 255 + +struct chp_id { + __u8 reserved1; + __u8 cssid; + __u8 reserved2; + __u8 id; +} __attribute__((packed)); + + +#endif /* _UAPI_ASM_S390_CHPID_H */ diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h index 4943654ed7f..65dc694725a 100644 --- a/arch/s390/include/asm/chsc.h +++ b/arch/s390/include/uapi/asm/chsc.h @@ -1,7 +1,7 @@ /* * ioctl interface for /dev/chsc * - * Copyright 2008 IBM Corp. + * Copyright IBM Corp. 2008, 2012 * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> */ @@ -9,9 +9,12 @@ #define _ASM_CHSC_H #include <linux/types.h> +#include <linux/ioctl.h> #include <asm/chpid.h> #include <asm/schid.h> +#define CHSC_SIZE 0x1000 + struct chsc_async_header { __u16 length; __u16 code; @@ -23,15 +26,24 @@ struct chsc_async_header { struct chsc_async_area { struct chsc_async_header header; - __u8 data[PAGE_SIZE - 16 /* size of chsc_async_header */]; + __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; +} __attribute__ ((packed)); + +struct chsc_header { + __u16 length; + __u16 code; } __attribute__ ((packed)); +struct chsc_sync_area { + struct chsc_header header; + __u8 data[CHSC_SIZE - sizeof(struct chsc_header)]; +} __attribute__ ((packed)); struct chsc_response_struct { __u16 length; __u16 code; __u32 parms; - __u8 data[PAGE_SIZE - 8]; + __u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)]; } __attribute__ ((packed)); struct chsc_chp_cd { @@ -124,33 +136,8 @@ struct chsc_cpd_info { #define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list) #define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info) #define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal) +#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area) +#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area) +#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b) -#ifdef __KERNEL__ - -struct css_general_char { - u64 : 12; - u32 dynio : 1; /* bit 12 */ - u32 : 28; - u32 aif : 1; /* bit 41 */ - u32 : 3; - u32 mcss : 1; /* bit 45 */ - u32 fcs : 1; /* bit 46 */ - u32 : 1; - u32 ext_mb : 1; /* bit 48 */ - u32 : 7; - u32 aif_tdd : 1; /* bit 56 */ - u32 : 1; - u32 qebsm : 1; /* bit 58 */ - u32 : 8; - u32 aif_osa : 1; /* bit 67 */ - u32 : 14; - u32 cib : 1; /* bit 82 */ - u32 : 5; - u32 fcx : 1; /* bit 88 */ - u32 : 7; -}__attribute__((packed)); - -extern struct css_general_char css_general_characteristics; - -#endif /* __KERNEL__ */ #endif diff --git a/arch/s390/include/uapi/asm/cmb.h b/arch/s390/include/uapi/asm/cmb.h new file mode 100644 index 00000000000..0c086d00d89 --- /dev/null +++ b/arch/s390/include/uapi/asm/cmb.h @@ -0,0 +1,53 @@ +#ifndef _UAPIS390_CMB_H +#define _UAPIS390_CMB_H + +#include <linux/types.h> + +/** + * struct cmbdata - channel measurement block data for user space + * @size: size of the stored data + * @elapsed_time: time since last sampling + * @ssch_rsch_count: number of ssch and rsch + * @sample_count: number of samples + * @device_connect_time: time of device connect + * @function_pending_time: time of function pending + * @device_disconnect_time: time of device disconnect + * @control_unit_queuing_time: time of control unit queuing + * @device_active_only_time: time of device active only + * @device_busy_time: time of device busy (ext. format) + * @initial_command_response_time: initial command response time (ext. format) + * + * All values are stored as 64 bit for simplicity, especially + * in 32 bit emulation mode. All time values are normalized to + * nanoseconds. + * Currently, two formats are known, which differ by the size of + * this structure, i.e. the last two members are only set when + * the extended channel measurement facility (first shipped in + * z990 machines) is activated. + * Potentially, more fields could be added, which would result in a + * new ioctl number. + */ +struct cmbdata { + __u64 size; + __u64 elapsed_time; + /* basic and exended format: */ + __u64 ssch_rsch_count; + __u64 sample_count; + __u64 device_connect_time; + __u64 function_pending_time; + __u64 device_disconnect_time; + __u64 control_unit_queuing_time; + __u64 device_active_only_time; + /* extended format only: */ + __u64 device_busy_time; + __u64 initial_command_response_time; +}; + +/* enable channel measurement */ +#define BIODASDCMFENABLE _IO(DASD_IOCTL_LETTER, 32) +/* enable channel measurement */ +#define BIODASDCMFDISABLE _IO(DASD_IOCTL_LETTER, 33) +/* read channel measurement data */ +#define BIODASDREADALLCMB _IOWR(DASD_IOCTL_LETTER, 33, struct cmbdata) + +#endif /* _UAPIS390_CMB_H */ diff --git a/arch/s390/include/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 0be28efe5b6..5812a3b2df9 100644 --- a/arch/s390/include/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -1,8 +1,7 @@ /* - * File...........: linux/drivers/s390/block/dasd.c * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> * Bugreports.to..: <Linux390@de.ibm.com> - * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 + * Copyright IBM Corp. 1999, 2000 * EMC Symmetrix ioctl Copyright EMC Corporation, 2008 * Author.........: Nigel Hislop <hislop_nigel@emc.com> * @@ -262,6 +261,10 @@ struct dasd_snid_ioctl_data { #define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) /* Resume IO on device */ #define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7) +/* Abort all I/O on a device */ +#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240) +/* Allow I/O on a device */ +#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241) /* retrieve API version number */ diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h new file mode 100644 index 00000000000..c59fc79125f --- /dev/null +++ b/arch/s390/include/uapi/asm/debug.h @@ -0,0 +1,34 @@ +/* + * S/390 debug facility + * + * Copyright IBM Corp. 1999, 2000 + */ + +#ifndef _UAPIDEBUG_H +#define _UAPIDEBUG_H + +#include <linux/fs.h> + +/* Note: + * struct __debug_entry must be defined outside of #ifdef __KERNEL__ + * in order to allow a user program to analyze the 'raw'-view. + */ + +struct __debug_entry{ + union { + struct { + unsigned long long clock:52; + unsigned long long exception:1; + unsigned long long level:3; + unsigned long long cpuid:8; + } fields; + + unsigned long long stck; + } id; + void* caller; +} __attribute__((packed)); + + +#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ + +#endif /* _UAPIDEBUG_H */ diff --git a/arch/s390/include/asm/errno.h b/arch/s390/include/uapi/asm/errno.h index e41d5b37c4d..395e97d8005 100644 --- a/arch/s390/include/asm/errno.h +++ b/arch/s390/include/uapi/asm/errno.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/errno.h - * * S390 version * */ diff --git a/arch/s390/include/asm/fcntl.h b/arch/s390/include/uapi/asm/fcntl.h index 46ab12db573..46ab12db573 100644 --- a/arch/s390/include/asm/fcntl.h +++ b/arch/s390/include/uapi/asm/fcntl.h diff --git a/arch/s390/include/uapi/asm/hypfs.h b/arch/s390/include/uapi/asm/hypfs.h new file mode 100644 index 00000000000..37998b44953 --- /dev/null +++ b/arch/s390/include/uapi/asm/hypfs.h @@ -0,0 +1,25 @@ +/* + * IOCTL interface for hypfs + * + * Copyright IBM Corp. 2013 + * + * Author: Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _ASM_HYPFS_CTL_H +#define _ASM_HYPFS_CTL_H + +#include <linux/types.h> + +struct hypfs_diag304 { + __u32 args[2]; + __u64 data; + __u64 rc; +} __attribute__((packed)); + +#define HYPFS_IOCTL_MAGIC 0x10 + +#define HYPFS_DIAG304 \ + _IOWR(HYPFS_IOCTL_MAGIC, 0x20, struct hypfs_diag304) + +#endif diff --git a/arch/s390/include/asm/ioctl.h b/arch/s390/include/uapi/asm/ioctl.h index b279fe06dfe..b279fe06dfe 100644 --- a/arch/s390/include/asm/ioctl.h +++ b/arch/s390/include/uapi/asm/ioctl.h diff --git a/arch/s390/include/asm/ioctls.h b/arch/s390/include/uapi/asm/ioctls.h index 960a4c1ebdf..960a4c1ebdf 100644 --- a/arch/s390/include/asm/ioctls.h +++ b/arch/s390/include/uapi/asm/ioctls.h diff --git a/arch/s390/include/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h index 37f293d12c8..37f293d12c8 100644 --- a/arch/s390/include/asm/ipcbuf.h +++ b/arch/s390/include/uapi/asm/ipcbuf.h diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h new file mode 100644 index 00000000000..0fc26430a1e --- /dev/null +++ b/arch/s390/include/uapi/asm/kvm.h @@ -0,0 +1,131 @@ +#ifndef __LINUX_KVM_S390_H +#define __LINUX_KVM_S390_H +/* + * KVM s390 specific structures and definitions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ +#include <linux/types.h> + +#define __KVM_S390 +#define __KVM_HAVE_GUEST_DEBUG + +/* Device control API: s390-specific devices */ +#define KVM_DEV_FLIC_GET_ALL_IRQS 1 +#define KVM_DEV_FLIC_ENQUEUE 2 +#define KVM_DEV_FLIC_CLEAR_IRQS 3 +#define KVM_DEV_FLIC_APF_ENABLE 4 +#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5 +#define KVM_DEV_FLIC_ADAPTER_REGISTER 6 +#define KVM_DEV_FLIC_ADAPTER_MODIFY 7 +/* + * We can have up to 4*64k pending subchannels + 8 adapter interrupts, + * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. + * There are also sclp and machine checks. This gives us + * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000 + * Lets round up to 8192 pages. + */ +#define KVM_S390_MAX_FLOAT_IRQS 266250 +#define KVM_S390_FLIC_MAX_BUFFER 0x2000000 + +struct kvm_s390_io_adapter { + __u32 id; + __u8 isc; + __u8 maskable; + __u8 swap; + __u8 pad; +}; + +#define KVM_S390_IO_ADAPTER_MASK 1 +#define KVM_S390_IO_ADAPTER_MAP 2 +#define KVM_S390_IO_ADAPTER_UNMAP 3 + +struct kvm_s390_io_adapter_req { + __u32 id; + __u8 type; + __u8 mask; + __u16 pad0; + __u64 addr; +}; + +/* kvm attr_group on vm fd */ +#define KVM_S390_VM_MEM_CTRL 0 + +/* kvm attributes for mem_ctrl */ +#define KVM_S390_VM_MEM_ENABLE_CMMA 0 +#define KVM_S390_VM_MEM_CLR_CMMA 1 + +/* for KVM_GET_REGS and KVM_SET_REGS */ +struct kvm_regs { + /* general purpose regs for s390 */ + __u64 gprs[16]; +}; + +/* for KVM_GET_SREGS and KVM_SET_SREGS */ +struct kvm_sregs { + __u32 acrs[16]; + __u64 crs[16]; +}; + +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { + __u32 fpc; + __u64 fprs[16]; +}; + +#define KVM_GUESTDBG_USE_HW_BP 0x00010000 + +#define KVM_HW_BP 1 +#define KVM_HW_WP_WRITE 2 +#define KVM_SINGLESTEP 4 + +struct kvm_debug_exit_arch { + __u64 addr; + __u8 type; + __u8 pad[7]; /* Should be set to 0 */ +}; + +struct kvm_hw_breakpoint { + __u64 addr; + __u64 phys_addr; + __u64 len; + __u8 type; + __u8 pad[7]; /* Should be set to 0 */ +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { + __u32 nr_hw_bp; + __u32 pad; /* Should be set to 0 */ + struct kvm_hw_breakpoint __user *hw_bp; +}; + +#define KVM_SYNC_PREFIX (1UL << 0) +#define KVM_SYNC_GPRS (1UL << 1) +#define KVM_SYNC_ACRS (1UL << 2) +#define KVM_SYNC_CRS (1UL << 3) +/* definition of registers in kvm_run */ +struct kvm_sync_regs { + __u64 prefix; /* prefix register */ + __u64 gprs[16]; /* general purpose registers */ + __u32 acrs[16]; /* access registers */ + __u64 crs[16]; /* control registers */ +}; + +#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) +#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2) +#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3) +#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4) +#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5) +#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6) +#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7) +#define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8) +#define KVM_REG_S390_GBEA (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9) +#endif diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h new file mode 100644 index 00000000000..ff1f4e7b301 --- /dev/null +++ b/arch/s390/include/uapi/asm/kvm_para.h @@ -0,0 +1,11 @@ +/* + * User API definitions for paravirtual devices on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + */ diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/uapi/asm/kvm_virtio.h index 72f614181ef..44a438ca9e7 100644 --- a/arch/s390/include/asm/kvm_virtio.h +++ b/arch/s390/include/uapi/asm/kvm_virtio.h @@ -1,5 +1,5 @@ /* - * kvm_virtio.h - definition for virtio for kvm on s390 + * definition for virtio for kvm on s390 * * Copyright IBM Corp. 2008 * diff --git a/arch/s390/include/uapi/asm/mman.h b/arch/s390/include/uapi/asm/mman.h new file mode 100644 index 00000000000..de23da1f41b --- /dev/null +++ b/arch/s390/include/uapi/asm/mman.h @@ -0,0 +1,6 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/mman.h" + */ +#include <asm-generic/mman.h> diff --git a/arch/s390/include/asm/monwriter.h b/arch/s390/include/uapi/asm/monwriter.h index f0cbf96c52e..f845c8e2f86 100644 --- a/arch/s390/include/asm/monwriter.h +++ b/arch/s390/include/uapi/asm/monwriter.h @@ -1,7 +1,5 @@ /* - * include/asm-s390/monwriter.h - * - * Copyright (C) IBM Corp. 2006 + * Copyright IBM Corp. 2006 * Character device driver for writing z/VM APPLDATA monitor records * Version 1.0 * Author(s): Melissa Howland <melissah@us.ibm.com> diff --git a/arch/s390/include/asm/msgbuf.h b/arch/s390/include/uapi/asm/msgbuf.h index 1bbdee92792..1bbdee92792 100644 --- a/arch/s390/include/asm/msgbuf.h +++ b/arch/s390/include/uapi/asm/msgbuf.h diff --git a/arch/s390/include/asm/param.h b/arch/s390/include/uapi/asm/param.h index c616821bf2a..c616821bf2a 100644 --- a/arch/s390/include/asm/param.h +++ b/arch/s390/include/uapi/asm/param.h diff --git a/arch/s390/include/asm/poll.h b/arch/s390/include/uapi/asm/poll.h index c98509d3149..c98509d3149 100644 --- a/arch/s390/include/asm/poll.h +++ b/arch/s390/include/uapi/asm/poll.h diff --git a/arch/s390/include/uapi/asm/posix_types.h b/arch/s390/include/uapi/asm/posix_types.h new file mode 100644 index 00000000000..bf2a2ad2f80 --- /dev/null +++ b/arch/s390/include/uapi/asm/posix_types.h @@ -0,0 +1,51 @@ +/* + * S390 version + * + */ + +#ifndef __ARCH_S390_POSIX_TYPES_H +#define __ARCH_S390_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ + +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +#define __kernel_size_t __kernel_size_t + +typedef unsigned short __kernel_old_dev_t; +#define __kernel_old_dev_t __kernel_old_dev_t + +#ifndef __s390x__ + +typedef unsigned long __kernel_ino_t; +typedef unsigned short __kernel_mode_t; +typedef unsigned short __kernel_ipc_pid_t; +typedef unsigned short __kernel_uid_t; +typedef unsigned short __kernel_gid_t; +typedef int __kernel_ptrdiff_t; + +#else /* __s390x__ */ + +typedef unsigned int __kernel_ino_t; +typedef unsigned int __kernel_mode_t; +typedef int __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef long __kernel_ptrdiff_t; +typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ + +#endif /* __s390x__ */ + +#define __kernel_ino_t __kernel_ino_t +#define __kernel_mode_t __kernel_mode_t +#define __kernel_ipc_pid_t __kernel_ipc_pid_t +#define __kernel_uid_t __kernel_uid_t +#define __kernel_gid_t __kernel_gid_t + +#include <asm-generic/posix_types.h> + +#endif diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h new file mode 100644 index 00000000000..a150f4fabe4 --- /dev/null +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -0,0 +1,459 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + */ + +#ifndef _UAPI_S390_PTRACE_H +#define _UAPI_S390_PTRACE_H + +/* + * Offsets in the user_regs_struct. They are used for the ptrace + * system call and in entry.S + */ +#ifndef __s390x__ + +#define PT_PSWMASK 0x00 +#define PT_PSWADDR 0x04 +#define PT_GPR0 0x08 +#define PT_GPR1 0x0C +#define PT_GPR2 0x10 +#define PT_GPR3 0x14 +#define PT_GPR4 0x18 +#define PT_GPR5 0x1C +#define PT_GPR6 0x20 +#define PT_GPR7 0x24 +#define PT_GPR8 0x28 +#define PT_GPR9 0x2C +#define PT_GPR10 0x30 +#define PT_GPR11 0x34 +#define PT_GPR12 0x38 +#define PT_GPR13 0x3C +#define PT_GPR14 0x40 +#define PT_GPR15 0x44 +#define PT_ACR0 0x48 +#define PT_ACR1 0x4C +#define PT_ACR2 0x50 +#define PT_ACR3 0x54 +#define PT_ACR4 0x58 +#define PT_ACR5 0x5C +#define PT_ACR6 0x60 +#define PT_ACR7 0x64 +#define PT_ACR8 0x68 +#define PT_ACR9 0x6C +#define PT_ACR10 0x70 +#define PT_ACR11 0x74 +#define PT_ACR12 0x78 +#define PT_ACR13 0x7C +#define PT_ACR14 0x80 +#define PT_ACR15 0x84 +#define PT_ORIGGPR2 0x88 +#define PT_FPC 0x90 +/* + * A nasty fact of life that the ptrace api + * only supports passing of longs. + */ +#define PT_FPR0_HI 0x98 +#define PT_FPR0_LO 0x9C +#define PT_FPR1_HI 0xA0 +#define PT_FPR1_LO 0xA4 +#define PT_FPR2_HI 0xA8 +#define PT_FPR2_LO 0xAC +#define PT_FPR3_HI 0xB0 +#define PT_FPR3_LO 0xB4 +#define PT_FPR4_HI 0xB8 +#define PT_FPR4_LO 0xBC +#define PT_FPR5_HI 0xC0 +#define PT_FPR5_LO 0xC4 +#define PT_FPR6_HI 0xC8 +#define PT_FPR6_LO 0xCC +#define PT_FPR7_HI 0xD0 +#define PT_FPR7_LO 0xD4 +#define PT_FPR8_HI 0xD8 +#define PT_FPR8_LO 0XDC +#define PT_FPR9_HI 0xE0 +#define PT_FPR9_LO 0xE4 +#define PT_FPR10_HI 0xE8 +#define PT_FPR10_LO 0xEC +#define PT_FPR11_HI 0xF0 +#define PT_FPR11_LO 0xF4 +#define PT_FPR12_HI 0xF8 +#define PT_FPR12_LO 0xFC +#define PT_FPR13_HI 0x100 +#define PT_FPR13_LO 0x104 +#define PT_FPR14_HI 0x108 +#define PT_FPR14_LO 0x10C +#define PT_FPR15_HI 0x110 +#define PT_FPR15_LO 0x114 +#define PT_CR_9 0x118 +#define PT_CR_10 0x11C +#define PT_CR_11 0x120 +#define PT_IEEE_IP 0x13C +#define PT_LASTOFF PT_IEEE_IP +#define PT_ENDREGS 0x140-1 + +#define GPR_SIZE 4 +#define CR_SIZE 4 + +#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */ + +#else /* __s390x__ */ + +#define PT_PSWMASK 0x00 +#define PT_PSWADDR 0x08 +#define PT_GPR0 0x10 +#define PT_GPR1 0x18 +#define PT_GPR2 0x20 +#define PT_GPR3 0x28 +#define PT_GPR4 0x30 +#define PT_GPR5 0x38 +#define PT_GPR6 0x40 +#define PT_GPR7 0x48 +#define PT_GPR8 0x50 +#define PT_GPR9 0x58 +#define PT_GPR10 0x60 +#define PT_GPR11 0x68 +#define PT_GPR12 0x70 +#define PT_GPR13 0x78 +#define PT_GPR14 0x80 +#define PT_GPR15 0x88 +#define PT_ACR0 0x90 +#define PT_ACR1 0x94 +#define PT_ACR2 0x98 +#define PT_ACR3 0x9C +#define PT_ACR4 0xA0 +#define PT_ACR5 0xA4 +#define PT_ACR6 0xA8 +#define PT_ACR7 0xAC +#define PT_ACR8 0xB0 +#define PT_ACR9 0xB4 +#define PT_ACR10 0xB8 +#define PT_ACR11 0xBC +#define PT_ACR12 0xC0 +#define PT_ACR13 0xC4 +#define PT_ACR14 0xC8 +#define PT_ACR15 0xCC +#define PT_ORIGGPR2 0xD0 +#define PT_FPC 0xD8 +#define PT_FPR0 0xE0 +#define PT_FPR1 0xE8 +#define PT_FPR2 0xF0 +#define PT_FPR3 0xF8 +#define PT_FPR4 0x100 +#define PT_FPR5 0x108 +#define PT_FPR6 0x110 +#define PT_FPR7 0x118 +#define PT_FPR8 0x120 +#define PT_FPR9 0x128 +#define PT_FPR10 0x130 +#define PT_FPR11 0x138 +#define PT_FPR12 0x140 +#define PT_FPR13 0x148 +#define PT_FPR14 0x150 +#define PT_FPR15 0x158 +#define PT_CR_9 0x160 +#define PT_CR_10 0x168 +#define PT_CR_11 0x170 +#define PT_IEEE_IP 0x1A8 +#define PT_LASTOFF PT_IEEE_IP +#define PT_ENDREGS 0x1B0-1 + +#define GPR_SIZE 8 +#define CR_SIZE 8 + +#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ + +#endif /* __s390x__ */ + +#define NUM_GPRS 16 +#define NUM_FPRS 16 +#define NUM_CRS 16 +#define NUM_ACRS 16 + +#define NUM_CR_WORDS 3 + +#define FPR_SIZE 8 +#define FPC_SIZE 4 +#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */ +#define ACR_SIZE 4 + + +#define PTRACE_OLDSETOPTIONS 21 + +#ifndef __ASSEMBLY__ +#include <linux/stddef.h> +#include <linux/types.h> + +typedef union +{ + float f; + double d; + __u64 ui; + struct + { + __u32 hi; + __u32 lo; + } fp; +} freg_t; + +typedef struct +{ + __u32 fpc; + __u32 pad; + freg_t fprs[NUM_FPRS]; +} s390_fp_regs; + +#define FPC_EXCEPTION_MASK 0xF8000000 +#define FPC_FLAGS_MASK 0x00F80000 +#define FPC_DXC_MASK 0x0000FF00 +#define FPC_RM_MASK 0x00000003 + +/* this typedef defines how a Program Status Word looks like */ +typedef struct +{ + unsigned long mask; + unsigned long addr; +} __attribute__ ((aligned(8))) psw_t; + +#ifndef __s390x__ + +#define PSW_MASK_PER 0x40000000UL +#define PSW_MASK_DAT 0x04000000UL +#define PSW_MASK_IO 0x02000000UL +#define PSW_MASK_EXT 0x01000000UL +#define PSW_MASK_KEY 0x00F00000UL +#define PSW_MASK_BASE 0x00080000UL /* always one */ +#define PSW_MASK_MCHECK 0x00040000UL +#define PSW_MASK_WAIT 0x00020000UL +#define PSW_MASK_PSTATE 0x00010000UL +#define PSW_MASK_ASC 0x0000C000UL +#define PSW_MASK_CC 0x00003000UL +#define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_RI 0x00000000UL +#define PSW_MASK_EA 0x00000000UL +#define PSW_MASK_BA 0x00000000UL + +#define PSW_MASK_USER 0x0000FF00UL + +#define PSW_ADDR_AMODE 0x80000000UL +#define PSW_ADDR_INSN 0x7FFFFFFFUL + +#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) + +#define PSW_ASC_PRIMARY 0x00000000UL +#define PSW_ASC_ACCREG 0x00004000UL +#define PSW_ASC_SECONDARY 0x00008000UL +#define PSW_ASC_HOME 0x0000C000UL + +#else /* __s390x__ */ + +#define PSW_MASK_PER 0x4000000000000000UL +#define PSW_MASK_DAT 0x0400000000000000UL +#define PSW_MASK_IO 0x0200000000000000UL +#define PSW_MASK_EXT 0x0100000000000000UL +#define PSW_MASK_BASE 0x0000000000000000UL +#define PSW_MASK_KEY 0x00F0000000000000UL +#define PSW_MASK_MCHECK 0x0004000000000000UL +#define PSW_MASK_WAIT 0x0002000000000000UL +#define PSW_MASK_PSTATE 0x0001000000000000UL +#define PSW_MASK_ASC 0x0000C00000000000UL +#define PSW_MASK_CC 0x0000300000000000UL +#define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_RI 0x0000008000000000UL +#define PSW_MASK_EA 0x0000000100000000UL +#define PSW_MASK_BA 0x0000000080000000UL + +#define PSW_MASK_USER 0x0000FF0180000000UL + +#define PSW_ADDR_AMODE 0x0000000000000000UL +#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL + +#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) + +#define PSW_ASC_PRIMARY 0x0000000000000000UL +#define PSW_ASC_ACCREG 0x0000400000000000UL +#define PSW_ASC_SECONDARY 0x0000800000000000UL +#define PSW_ASC_HOME 0x0000C00000000000UL + +#endif /* __s390x__ */ + + +/* + * The s390_regs structure is used to define the elf_gregset_t. + */ +typedef struct +{ + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned int acrs[NUM_ACRS]; + unsigned long orig_gpr2; +} s390_regs; + +/* + * Now for the user space program event recording (trace) definitions. + * The following structures are used only for the ptrace interface, don't + * touch or even look at it if you don't want to modify the user-space + * ptrace interface. In particular stay away from it for in-kernel PER. + */ +typedef struct +{ + unsigned long cr[NUM_CR_WORDS]; +} per_cr_words; + +#define PER_EM_MASK 0xE8000000UL + +typedef struct +{ +#ifdef __s390x__ + unsigned : 32; +#endif /* __s390x__ */ + unsigned em_branching : 1; + unsigned em_instruction_fetch : 1; + /* + * Switching on storage alteration automatically fixes + * the storage alteration event bit in the users std. + */ + unsigned em_storage_alteration : 1; + unsigned em_gpr_alt_unused : 1; + unsigned em_store_real_address : 1; + unsigned : 3; + unsigned branch_addr_ctl : 1; + unsigned : 1; + unsigned storage_alt_space_ctl : 1; + unsigned : 21; + unsigned long starting_addr; + unsigned long ending_addr; +} per_cr_bits; + +typedef struct +{ + unsigned short perc_atmid; + unsigned long address; + unsigned char access_id; +} per_lowcore_words; + +typedef struct +{ + unsigned perc_branching : 1; + unsigned perc_instruction_fetch : 1; + unsigned perc_storage_alteration : 1; + unsigned perc_gpr_alt_unused : 1; + unsigned perc_store_real_address : 1; + unsigned : 3; + unsigned atmid_psw_bit_31 : 1; + unsigned atmid_validity_bit : 1; + unsigned atmid_psw_bit_32 : 1; + unsigned atmid_psw_bit_5 : 1; + unsigned atmid_psw_bit_16 : 1; + unsigned atmid_psw_bit_17 : 1; + unsigned si : 2; + unsigned long address; + unsigned : 4; + unsigned access_id : 4; +} per_lowcore_bits; + +typedef struct +{ + union { + per_cr_words words; + per_cr_bits bits; + } control_regs; + /* + * Use these flags instead of setting em_instruction_fetch + * directly they are used so that single stepping can be + * switched on & off while not affecting other tracing + */ + unsigned single_step : 1; + unsigned instruction_fetch : 1; + unsigned : 30; + /* + * These addresses are copied into cr10 & cr11 if single + * stepping is switched off + */ + unsigned long starting_addr; + unsigned long ending_addr; + union { + per_lowcore_words words; + per_lowcore_bits bits; + } lowcore; +} per_struct; + +typedef struct +{ + unsigned int len; + unsigned long kernel_addr; + unsigned long process_addr; +} ptrace_area; + +/* + * S/390 specific non posix ptrace requests. I chose unusual values so + * they are unlikely to clash with future ptrace definitions. + */ +#define PTRACE_PEEKUSR_AREA 0x5000 +#define PTRACE_POKEUSR_AREA 0x5001 +#define PTRACE_PEEKTEXT_AREA 0x5002 +#define PTRACE_PEEKDATA_AREA 0x5003 +#define PTRACE_POKETEXT_AREA 0x5004 +#define PTRACE_POKEDATA_AREA 0x5005 +#define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 +#define PTRACE_ENABLE_TE 0x5009 +#define PTRACE_DISABLE_TE 0x5010 +#define PTRACE_TE_ABORT_RAND 0x5011 + +/* + * The numbers chosen here are somewhat arbitrary but absolutely MUST + * not overlap with any of the number assigned in <linux/ptrace.h>. + */ +#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ + +/* + * PT_PROT definition is loosely based on hppa bsd definition in + * gdb/hppab-nat.c + */ +#define PTRACE_PROT 21 + +typedef enum +{ + ptprot_set_access_watchpoint, + ptprot_set_write_watchpoint, + ptprot_disable_watchpoint +} ptprot_flags; + +typedef struct +{ + unsigned long lowaddr; + unsigned long hiaddr; + ptprot_flags prot; +} ptprot_area; + +/* Sequence of bytes for breakpoint illegal instruction. */ +#define S390_BREAKPOINT {0x0,0x1} +#define S390_BREAKPOINT_U16 ((__u16)0x0001) +#define S390_SYSCALL_OPCODE ((__u16)0x0a00) +#define S390_SYSCALL_SIZE 2 + +/* + * The user_regs_struct defines the way the user registers are + * store on the stack for signal handling. + */ +struct user_regs_struct +{ + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned int acrs[NUM_ACRS]; + unsigned long orig_gpr2; + s390_fp_regs fp_regs; + /* + * These per registers are in here so that gdb can modify them + * itself as there is no "official" ptrace interface for hardware + * watchpoints. This is the way intel does it. + */ + per_struct per_info; + unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_S390_PTRACE_H */ diff --git a/arch/s390/include/asm/qeth.h b/arch/s390/include/uapi/asm/qeth.h index 90efda0b137..3a896cf5258 100644 --- a/arch/s390/include/asm/qeth.h +++ b/arch/s390/include/uapi/asm/qeth.h @@ -1,9 +1,7 @@ /* - * include/asm-s390/qeth.h - * * ioctl definitions for qeth driver * - * Copyright (C) 2004 IBM Corporation + * Copyright IBM Corp. 2004 * * Author(s): Thomas Spatzier <tspat@de.ibm.com> * @@ -20,6 +18,7 @@ #define SIOC_QETH_ARP_FLUSH_CACHE (SIOCDEVPRIVATE + 4) #define SIOC_QETH_ADP_SET_SNMP_CONTROL (SIOCDEVPRIVATE + 5) #define SIOC_QETH_GET_CARD_TYPE (SIOCDEVPRIVATE + 6) +#define SIOC_QETH_QUERY_OAT (SIOCDEVPRIVATE + 7) struct qeth_arp_cache_entry { __u8 macaddr[6]; @@ -107,4 +106,10 @@ struct qeth_arp_query_user_data { char *entries; } __attribute__((packed)); +struct qeth_query_oat_data { + __u32 command; + __u32 buffer_len; + __u32 response_len; + __u64 ptr; +}; #endif /* __ASM_S390_QETH_IOCTL_H__ */ diff --git a/arch/s390/include/asm/resource.h b/arch/s390/include/uapi/asm/resource.h index 366c01de04f..ec23d1c73c9 100644 --- a/arch/s390/include/asm/resource.h +++ b/arch/s390/include/uapi/asm/resource.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/resource.h - * * S390 version * * Derived from "include/asm-i386/resources.h" diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h new file mode 100644 index 00000000000..32f3ab2a820 --- /dev/null +++ b/arch/s390/include/uapi/asm/schid.h @@ -0,0 +1,16 @@ +#ifndef _UAPIASM_SCHID_H +#define _UAPIASM_SCHID_H + +#include <linux/types.h> + +struct subchannel_id { + __u32 cssid : 8; + __u32 : 4; + __u32 m : 1; + __u32 ssid : 2; + __u32 one : 1; + __u32 sch_no : 16; +} __attribute__ ((packed, aligned(4))); + + +#endif /* _UAPIASM_SCHID_H */ diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h new file mode 100644 index 00000000000..f2818613ee4 --- /dev/null +++ b/arch/s390/include/uapi/asm/sclp_ctl.h @@ -0,0 +1,24 @@ +/* + * IOCTL interface for SCLP + * + * Copyright IBM Corp. 2012 + * + * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#ifndef _ASM_SCLP_CTL_H +#define _ASM_SCLP_CTL_H + +#include <linux/types.h> + +struct sclp_ctl_sccb { + __u32 cmdw; + __u64 sccb; +} __attribute__((packed)); + +#define SCLP_CTL_IOCTL_MAGIC 0x10 + +#define SCLP_CTL_SCCB \ + _IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb) + +#endif diff --git a/arch/s390/include/asm/sembuf.h b/arch/s390/include/uapi/asm/sembuf.h index 32626b0cac4..32626b0cac4 100644 --- a/arch/s390/include/asm/sembuf.h +++ b/arch/s390/include/uapi/asm/sembuf.h diff --git a/arch/s390/include/uapi/asm/setup.h b/arch/s390/include/uapi/asm/setup.h new file mode 100644 index 00000000000..5a637e3e385 --- /dev/null +++ b/arch/s390/include/uapi/asm/setup.h @@ -0,0 +1,13 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2010 + */ + +#ifndef _UAPI_ASM_S390_SETUP_H +#define _UAPI_ASM_S390_SETUP_H + +#define COMMAND_LINE_SIZE 4096 + +#define ARCH_COMMAND_LINE_SIZE 896 + +#endif /* _UAPI_ASM_S390_SETUP_H */ diff --git a/arch/s390/include/asm/shmbuf.h b/arch/s390/include/uapi/asm/shmbuf.h index eed2e280ce3..eed2e280ce3 100644 --- a/arch/s390/include/asm/shmbuf.h +++ b/arch/s390/include/uapi/asm/shmbuf.h diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h new file mode 100644 index 00000000000..5d9cc19462c --- /dev/null +++ b/arch/s390/include/uapi/asm/sie.h @@ -0,0 +1,243 @@ +#ifndef _UAPI_ASM_S390_SIE_H +#define _UAPI_ASM_S390_SIE_H + +#define diagnose_codes \ + { 0x10, "DIAG (0x10) release pages" }, \ + { 0x44, "DIAG (0x44) time slice end" }, \ + { 0x9c, "DIAG (0x9c) time slice end directed" }, \ + { 0x204, "DIAG (0x204) logical-cpu utilization" }, \ + { 0x258, "DIAG (0x258) page-reference services" }, \ + { 0x308, "DIAG (0x308) ipl functions" }, \ + { 0x500, "DIAG (0x500) KVM virtio functions" }, \ + { 0x501, "DIAG (0x501) KVM breakpoint" } + +#define sigp_order_codes \ + { 0x01, "SIGP sense" }, \ + { 0x02, "SIGP external call" }, \ + { 0x03, "SIGP emergency signal" }, \ + { 0x05, "SIGP stop" }, \ + { 0x06, "SIGP restart" }, \ + { 0x09, "SIGP stop and store status" }, \ + { 0x0b, "SIGP initial cpu reset" }, \ + { 0x0d, "SIGP set prefix" }, \ + { 0x0e, "SIGP store status at address" }, \ + { 0x12, "SIGP set architecture" }, \ + { 0x15, "SIGP sense running" } + +#define icpt_prog_codes \ + { 0x0001, "Prog Operation" }, \ + { 0x0002, "Prog Privileged Operation" }, \ + { 0x0003, "Prog Execute" }, \ + { 0x0004, "Prog Protection" }, \ + { 0x0005, "Prog Addressing" }, \ + { 0x0006, "Prog Specification" }, \ + { 0x0007, "Prog Data" }, \ + { 0x0008, "Prog Fixedpoint overflow" }, \ + { 0x0009, "Prog Fixedpoint divide" }, \ + { 0x000A, "Prog Decimal overflow" }, \ + { 0x000B, "Prog Decimal divide" }, \ + { 0x000C, "Prog HFP exponent overflow" }, \ + { 0x000D, "Prog HFP exponent underflow" }, \ + { 0x000E, "Prog HFP significance" }, \ + { 0x000F, "Prog HFP divide" }, \ + { 0x0010, "Prog Segment translation" }, \ + { 0x0011, "Prog Page translation" }, \ + { 0x0012, "Prog Translation specification" }, \ + { 0x0013, "Prog Special operation" }, \ + { 0x0015, "Prog Operand" }, \ + { 0x0016, "Prog Trace table" }, \ + { 0x0017, "Prog ASNtranslation specification" }, \ + { 0x001C, "Prog Spaceswitch event" }, \ + { 0x001D, "Prog HFP square root" }, \ + { 0x001F, "Prog PCtranslation specification" }, \ + { 0x0020, "Prog AFX translation" }, \ + { 0x0021, "Prog ASX translation" }, \ + { 0x0022, "Prog LX translation" }, \ + { 0x0023, "Prog EX translation" }, \ + { 0x0024, "Prog Primary authority" }, \ + { 0x0025, "Prog Secondary authority" }, \ + { 0x0026, "Prog LFXtranslation exception" }, \ + { 0x0027, "Prog LSXtranslation exception" }, \ + { 0x0028, "Prog ALET specification" }, \ + { 0x0029, "Prog ALEN translation" }, \ + { 0x002A, "Prog ALE sequence" }, \ + { 0x002B, "Prog ASTE validity" }, \ + { 0x002C, "Prog ASTE sequence" }, \ + { 0x002D, "Prog Extended authority" }, \ + { 0x002E, "Prog LSTE sequence" }, \ + { 0x002F, "Prog ASTE instance" }, \ + { 0x0030, "Prog Stack full" }, \ + { 0x0031, "Prog Stack empty" }, \ + { 0x0032, "Prog Stack specification" }, \ + { 0x0033, "Prog Stack type" }, \ + { 0x0034, "Prog Stack operation" }, \ + { 0x0039, "Prog Region first translation" }, \ + { 0x003A, "Prog Region second translation" }, \ + { 0x003B, "Prog Region third translation" }, \ + { 0x0040, "Prog Monitor event" }, \ + { 0x0080, "Prog PER event" }, \ + { 0x0119, "Prog Crypto operation" } + +#define exit_code_ipa0(ipa0, opcode, mnemonic) \ + { (ipa0 << 8 | opcode), #ipa0 " " mnemonic } +#define exit_code(opcode, mnemonic) \ + { opcode, mnemonic } + +#define icpt_insn_codes \ + exit_code_ipa0(0x01, 0x01, "PR"), \ + exit_code_ipa0(0x01, 0x04, "PTFF"), \ + exit_code_ipa0(0x01, 0x07, "SCKPF"), \ + exit_code_ipa0(0xAA, 0x00, "RINEXT"), \ + exit_code_ipa0(0xAA, 0x01, "RION"), \ + exit_code_ipa0(0xAA, 0x02, "TRIC"), \ + exit_code_ipa0(0xAA, 0x03, "RIOFF"), \ + exit_code_ipa0(0xAA, 0x04, "RIEMIT"), \ + exit_code_ipa0(0xB2, 0x02, "STIDP"), \ + exit_code_ipa0(0xB2, 0x04, "SCK"), \ + exit_code_ipa0(0xB2, 0x05, "STCK"), \ + exit_code_ipa0(0xB2, 0x06, "SCKC"), \ + exit_code_ipa0(0xB2, 0x07, "STCKC"), \ + exit_code_ipa0(0xB2, 0x08, "SPT"), \ + exit_code_ipa0(0xB2, 0x09, "STPT"), \ + exit_code_ipa0(0xB2, 0x0d, "PTLB"), \ + exit_code_ipa0(0xB2, 0x10, "SPX"), \ + exit_code_ipa0(0xB2, 0x11, "STPX"), \ + exit_code_ipa0(0xB2, 0x12, "STAP"), \ + exit_code_ipa0(0xB2, 0x14, "SIE"), \ + exit_code_ipa0(0xB2, 0x16, "SETR"), \ + exit_code_ipa0(0xB2, 0x17, "STETR"), \ + exit_code_ipa0(0xB2, 0x18, "PC"), \ + exit_code_ipa0(0xB2, 0x20, "SERVC"), \ + exit_code_ipa0(0xB2, 0x28, "PT"), \ + exit_code_ipa0(0xB2, 0x29, "ISKE"), \ + exit_code_ipa0(0xB2, 0x2a, "RRBE"), \ + exit_code_ipa0(0xB2, 0x2b, "SSKE"), \ + exit_code_ipa0(0xB2, 0x2c, "TB"), \ + exit_code_ipa0(0xB2, 0x2e, "PGIN"), \ + exit_code_ipa0(0xB2, 0x2f, "PGOUT"), \ + exit_code_ipa0(0xB2, 0x30, "CSCH"), \ + exit_code_ipa0(0xB2, 0x31, "HSCH"), \ + exit_code_ipa0(0xB2, 0x32, "MSCH"), \ + exit_code_ipa0(0xB2, 0x33, "SSCH"), \ + exit_code_ipa0(0xB2, 0x34, "STSCH"), \ + exit_code_ipa0(0xB2, 0x35, "TSCH"), \ + exit_code_ipa0(0xB2, 0x36, "TPI"), \ + exit_code_ipa0(0xB2, 0x37, "SAL"), \ + exit_code_ipa0(0xB2, 0x38, "RSCH"), \ + exit_code_ipa0(0xB2, 0x39, "STCRW"), \ + exit_code_ipa0(0xB2, 0x3a, "STCPS"), \ + exit_code_ipa0(0xB2, 0x3b, "RCHP"), \ + exit_code_ipa0(0xB2, 0x3c, "SCHM"), \ + exit_code_ipa0(0xB2, 0x40, "BAKR"), \ + exit_code_ipa0(0xB2, 0x48, "PALB"), \ + exit_code_ipa0(0xB2, 0x4c, "TAR"), \ + exit_code_ipa0(0xB2, 0x50, "CSP"), \ + exit_code_ipa0(0xB2, 0x54, "MVPG"), \ + exit_code_ipa0(0xB2, 0x58, "BSG"), \ + exit_code_ipa0(0xB2, 0x5a, "BSA"), \ + exit_code_ipa0(0xB2, 0x5f, "CHSC"), \ + exit_code_ipa0(0xB2, 0x74, "SIGA"), \ + exit_code_ipa0(0xB2, 0x76, "XSCH"), \ + exit_code_ipa0(0xB2, 0x78, "STCKE"), \ + exit_code_ipa0(0xB2, 0x7c, "STCKF"), \ + exit_code_ipa0(0xB2, 0x7d, "STSI"), \ + exit_code_ipa0(0xB2, 0xb0, "STFLE"), \ + exit_code_ipa0(0xB2, 0xb1, "STFL"), \ + exit_code_ipa0(0xB2, 0xb2, "LPSWE"), \ + exit_code_ipa0(0xB2, 0xf8, "TEND"), \ + exit_code_ipa0(0xB2, 0xfc, "TABORT"), \ + exit_code_ipa0(0xB9, 0x1e, "KMAC"), \ + exit_code_ipa0(0xB9, 0x28, "PCKMO"), \ + exit_code_ipa0(0xB9, 0x2a, "KMF"), \ + exit_code_ipa0(0xB9, 0x2b, "KMO"), \ + exit_code_ipa0(0xB9, 0x2d, "KMCTR"), \ + exit_code_ipa0(0xB9, 0x2e, "KM"), \ + exit_code_ipa0(0xB9, 0x2f, "KMC"), \ + exit_code_ipa0(0xB9, 0x3e, "KIMD"), \ + exit_code_ipa0(0xB9, 0x3f, "KLMD"), \ + exit_code_ipa0(0xB9, 0x8a, "CSPG"), \ + exit_code_ipa0(0xB9, 0x8d, "EPSW"), \ + exit_code_ipa0(0xB9, 0x8e, "IDTE"), \ + exit_code_ipa0(0xB9, 0x8f, "CRDTE"), \ + exit_code_ipa0(0xB9, 0x9c, "EQBS"), \ + exit_code_ipa0(0xB9, 0xa2, "PTF"), \ + exit_code_ipa0(0xB9, 0xab, "ESSA"), \ + exit_code_ipa0(0xB9, 0xae, "RRBM"), \ + exit_code_ipa0(0xB9, 0xaf, "PFMF"), \ + exit_code_ipa0(0xE3, 0x03, "LRAG"), \ + exit_code_ipa0(0xE3, 0x13, "LRAY"), \ + exit_code_ipa0(0xE3, 0x25, "NTSTG"), \ + exit_code_ipa0(0xE5, 0x00, "LASP"), \ + exit_code_ipa0(0xE5, 0x01, "TPROT"), \ + exit_code_ipa0(0xE5, 0x60, "TBEGIN"), \ + exit_code_ipa0(0xE5, 0x61, "TBEGINC"), \ + exit_code_ipa0(0xEB, 0x25, "STCTG"), \ + exit_code_ipa0(0xEB, 0x2f, "LCTLG"), \ + exit_code_ipa0(0xEB, 0x60, "LRIC"), \ + exit_code_ipa0(0xEB, 0x61, "STRIC"), \ + exit_code_ipa0(0xEB, 0x62, "MRIC"), \ + exit_code_ipa0(0xEB, 0x8a, "SQBS"), \ + exit_code_ipa0(0xC8, 0x01, "ECTG"), \ + exit_code(0x0a, "SVC"), \ + exit_code(0x80, "SSM"), \ + exit_code(0x82, "LPSW"), \ + exit_code(0x83, "DIAG"), \ + exit_code(0xae, "SIGP"), \ + exit_code(0xac, "STNSM"), \ + exit_code(0xad, "STOSM"), \ + exit_code(0xb1, "LRA"), \ + exit_code(0xb6, "STCTL"), \ + exit_code(0xb7, "LCTL"), \ + exit_code(0xee, "PLO") + +#define sie_intercept_code \ + { 0x00, "Host interruption" }, \ + { 0x04, "Instruction" }, \ + { 0x08, "Program interruption" }, \ + { 0x0c, "Instruction and program interruption" }, \ + { 0x10, "External request" }, \ + { 0x14, "External interruption" }, \ + { 0x18, "I/O request" }, \ + { 0x1c, "Wait state" }, \ + { 0x20, "Validity" }, \ + { 0x28, "Stop request" }, \ + { 0x2c, "Operation exception" }, \ + { 0x38, "Partial-execution" }, \ + { 0x3c, "I/O interruption" }, \ + { 0x40, "I/O instruction" }, \ + { 0x48, "Timing subset" } + +/* + * This is the simple interceptable instructions decoder. + * + * It will be used as userspace interface and it can be used in places + * that does not allow to use general decoder functions, + * such as trace events declarations. + * + * Some userspace tools may want to parse this code + * and would be confused by switch(), if() and other statements, + * but they can understand conditional operator. + */ +#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask) \ + (insn >> 56) == (ipa0) ? \ + ((ipa0 << 8) | ((insn >> rshift) & mask)) : + +#define INSN_DECODE(insn) (insn >> 56) + +/* + * The macro icpt_insn_decoder() takes an intercepted instruction + * and returns a key, which can be used to find a mnemonic name + * of the instruction in the icpt_insn_codes table. + */ +#define icpt_insn_decoder(insn) \ + INSN_DECODE_IPA0(0x01, insn, 48, 0xff) \ + INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f) \ + INSN_DECODE_IPA0(0xb2, insn, 48, 0xff) \ + INSN_DECODE_IPA0(0xb9, insn, 48, 0xff) \ + INSN_DECODE_IPA0(0xe3, insn, 48, 0xff) \ + INSN_DECODE_IPA0(0xe5, insn, 48, 0xff) \ + INSN_DECODE_IPA0(0xeb, insn, 16, 0xff) \ + INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f) \ + INSN_DECODE(insn) + +#endif /* _UAPI_ASM_S390_SIE_H */ diff --git a/arch/s390/include/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h index aeb6e0b1332..b30de9c01bb 100644 --- a/arch/s390/include/asm/sigcontext.h +++ b/arch/s390/include/uapi/asm/sigcontext.h @@ -1,8 +1,6 @@ /* - * include/asm-s390/sigcontext.h - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 */ #ifndef _ASM_S390_SIGCONTEXT_H @@ -51,6 +49,7 @@ typedef struct typedef struct { unsigned int fpc; + unsigned int pad; double fprs[__NUM_FPRS]; } _s390_fp_regs; diff --git a/arch/s390/include/asm/siginfo.h b/arch/s390/include/uapi/asm/siginfo.h index e0ff1ab054b..91fd3e4b70c 100644 --- a/arch/s390/include/asm/siginfo.h +++ b/arch/s390/include/uapi/asm/siginfo.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/siginfo.h - * * S390 version * * Derived from "include/asm-i386/siginfo.h" diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h new file mode 100644 index 00000000000..2f43cfbf5f1 --- /dev/null +++ b/arch/s390/include/uapi/asm/signal.h @@ -0,0 +1,129 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/signal.h" + */ + +#ifndef _UAPI_ASMS390_SIGNAL_H +#define _UAPI_ASMS390_SIGNAL_H + +#include <linux/types.h> +#include <linux/time.h> + +/* Avoid too many header ordering problems. */ +struct siginfo; +struct pt_regs; + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +#define SA_RESTORER 0x04000000 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#include <asm-generic/signal-defs.h> + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; +#ifndef __s390x__ /* lovely */ + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +#else /* __s390x__ */ + unsigned long sa_flags; + void (*sa_restorer)(void); + sigset_t sa_mask; +#endif /* __s390x__ */ +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + + +#endif /* _UAPI_ASMS390_SIGNAL_H */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index fdff1e995c7..e031332096d 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/socket.h - * * S390 version * * Derived from "include/asm-i386/socket.h" @@ -30,7 +28,7 @@ #define SO_PRIORITY 12 #define SO_LINGER 13 #define SO_BSDCOMPAT 14 -/* To add :#define SO_REUSEPORT 15 */ +#define SO_REUSEPORT 15 #define SO_PASSCRED 16 #define SO_PEERCRED 17 #define SO_RCVLOWAT 18 @@ -48,6 +46,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 @@ -70,4 +69,21 @@ #define SO_RXQ_OVFL 40 +#define SO_WIFI_STATUS 41 +#define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 + +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + +#define SO_LOCK_FILTER 44 + +#define SO_SELECT_ERR_QUEUE 45 + +#define SO_BUSY_POLL 46 + +#define SO_MAX_PACING_RATE 47 + +#define SO_BPF_EXTENSIONS 48 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/asm/sockios.h b/arch/s390/include/uapi/asm/sockios.h index 6f60eee7324..6f60eee7324 100644 --- a/arch/s390/include/asm/sockios.h +++ b/arch/s390/include/uapi/asm/sockios.h diff --git a/arch/s390/include/asm/stat.h b/arch/s390/include/uapi/asm/stat.h index d92959eebb6..b4ca97d9146 100644 --- a/arch/s390/include/asm/stat.h +++ b/arch/s390/include/uapi/asm/stat.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/stat.h - * * S390 version * * Derived from "include/asm-i386/stat.h" diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h new file mode 100644 index 00000000000..471eb09184d --- /dev/null +++ b/arch/s390/include/uapi/asm/statfs.h @@ -0,0 +1,50 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/statfs.h" + */ + +#ifndef _S390_STATFS_H +#define _S390_STATFS_H + +/* + * We can't use <asm-generic/statfs.h> because in 64-bit mode + * we mix ints of different sizes in our struct statfs. + */ + +#ifndef __KERNEL_STRICT_NAMES +#include <linux/types.h> +typedef __kernel_fsid_t fsid_t; +#endif + +struct statfs { + unsigned int f_type; + unsigned int f_bsize; + unsigned long f_blocks; + unsigned long f_bfree; + unsigned long f_bavail; + unsigned long f_files; + unsigned long f_ffree; + __kernel_fsid_t f_fsid; + unsigned int f_namelen; + unsigned int f_frsize; + unsigned int f_flags; + unsigned int f_spare[4]; +}; + +struct statfs64 { + unsigned int f_type; + unsigned int f_bsize; + unsigned long long f_blocks; + unsigned long long f_bfree; + unsigned long long f_bavail; + unsigned long long f_files; + unsigned long long f_ffree; + __kernel_fsid_t f_fsid; + unsigned int f_namelen; + unsigned int f_frsize; + unsigned int f_flags; + unsigned int f_spare[4]; +}; + +#endif diff --git a/arch/s390/include/asm/swab.h b/arch/s390/include/uapi/asm/swab.h index 6bdee21c077..da3bfe5cc16 100644 --- a/arch/s390/include/asm/swab.h +++ b/arch/s390/include/uapi/asm/swab.h @@ -2,10 +2,8 @@ #define _S390_SWAB_H /* - * include/asm-s390/swab.h - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ @@ -77,7 +75,7 @@ static inline __u16 __arch_swab16p(const __u16 *x) asm volatile( #ifndef __s390x__ - " icm %0,2,%O+1(%R1)\n" + " icm %0,2,%O1+1(%R1)\n" " ic %0,%1\n" : "=&d" (result) : "Q" (*x) : "cc"); #else /* __s390x__ */ diff --git a/arch/s390/include/asm/tape390.h b/arch/s390/include/uapi/asm/tape390.h index 884fba48f1f..b2bc4bab792 100644 --- a/arch/s390/include/asm/tape390.h +++ b/arch/s390/include/uapi/asm/tape390.h @@ -1,10 +1,9 @@ /************************************************************************* * - * tape390.h * enables user programs to display messages and control encryption * on s390 tape devices * - * Copyright IBM Corp. 2001,2006 + * Copyright IBM Corp. 2001, 2006 * Author(s): Michael Holzheu <holzheu@de.ibm.com> * *************************************************************************/ diff --git a/arch/s390/include/asm/termbits.h b/arch/s390/include/uapi/asm/termbits.h index 71bf6ac6a2b..71bf6ac6a2b 100644 --- a/arch/s390/include/asm/termbits.h +++ b/arch/s390/include/uapi/asm/termbits.h diff --git a/arch/s390/include/uapi/asm/termios.h b/arch/s390/include/uapi/asm/termios.h new file mode 100644 index 00000000000..554f973db1e --- /dev/null +++ b/arch/s390/include/uapi/asm/termios.h @@ -0,0 +1,49 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/termios.h" + */ + +#ifndef _UAPI_S390_TERMIOS_H +#define _UAPI_S390_TERMIOS_H + +#include <asm/termbits.h> +#include <asm/ioctls.h> + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + + +#endif /* _UAPI_S390_TERMIOS_H */ diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h new file mode 100644 index 00000000000..038f2b9178a --- /dev/null +++ b/arch/s390/include/uapi/asm/types.h @@ -0,0 +1,22 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/types.h" + */ + +#ifndef _UAPI_S390_TYPES_H +#define _UAPI_S390_TYPES_H + +#include <asm-generic/int-ll64.h> + +#ifndef __ASSEMBLY__ + +/* A address type so that arithmetic can be done on it & it can be upgraded to + 64 bit when necessary +*/ +typedef unsigned long addr_t; +typedef __signed__ long saddr_t; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_S390_TYPES_H */ diff --git a/arch/s390/include/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h index cfb874e66c9..3e077b2a470 100644 --- a/arch/s390/include/asm/ucontext.h +++ b/arch/s390/include/uapi/asm/ucontext.h @@ -1,6 +1,4 @@ /* - * include/asm-s390/ucontext.h - * * S390 version * * Derived from "include/asm-i386/ucontext.h" @@ -18,7 +16,9 @@ struct ucontext_extended { struct ucontext *uc_link; stack_t uc_stack; _sigregs uc_mcontext; - unsigned long uc_sigmask[2]; + sigset_t uc_sigmask; + /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ + unsigned char __unused[128 - sizeof(sigset_t)]; unsigned long uc_gprs_high[16]; }; @@ -29,7 +29,9 @@ struct ucontext { struct ucontext *uc_link; stack_t uc_stack; _sigregs uc_mcontext; - sigset_t uc_sigmask; /* mask last for extensibility */ + sigset_t uc_sigmask; + /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ + unsigned char __unused[128 - sizeof(sigset_t)]; }; #endif /* !_ASM_S390_UCONTEXT_H */ diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h new file mode 100644 index 00000000000..3802d2d3a18 --- /dev/null +++ b/arch/s390/include/uapi/asm/unistd.h @@ -0,0 +1,378 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/unistd.h" + */ + +#ifndef _UAPI_ASM_S390_UNISTD_H_ +#define _UAPI_ASM_S390_UNISTD_H_ + +/* + * This file contains the system call numbers. + */ + +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_restart_syscall 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_brk 45 +#define __NR_signal 48 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_setpgid 57 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_symlink 83 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_lookup_dcookie 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_getdents 141 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 +#define __NR_putpmsg 189 +#define __NR_vfork 190 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_getdents64 220 +#define __NR_readahead 222 +#define __NR_setxattr 224 +#define __NR_lsetxattr 225 +#define __NR_fsetxattr 226 +#define __NR_getxattr 227 +#define __NR_lgetxattr 228 +#define __NR_fgetxattr 229 +#define __NR_listxattr 230 +#define __NR_llistxattr 231 +#define __NR_flistxattr 232 +#define __NR_removexattr 233 +#define __NR_lremovexattr 234 +#define __NR_fremovexattr 235 +#define __NR_gettid 236 +#define __NR_tkill 237 +#define __NR_futex 238 +#define __NR_sched_setaffinity 239 +#define __NR_sched_getaffinity 240 +#define __NR_tgkill 241 +/* Number 242 is reserved for tux */ +#define __NR_io_setup 243 +#define __NR_io_destroy 244 +#define __NR_io_getevents 245 +#define __NR_io_submit 246 +#define __NR_io_cancel 247 +#define __NR_exit_group 248 +#define __NR_epoll_create 249 +#define __NR_epoll_ctl 250 +#define __NR_epoll_wait 251 +#define __NR_set_tid_address 252 +#define __NR_fadvise64 253 +#define __NR_timer_create 254 +#define __NR_timer_settime (__NR_timer_create+1) +#define __NR_timer_gettime (__NR_timer_create+2) +#define __NR_timer_getoverrun (__NR_timer_create+3) +#define __NR_timer_delete (__NR_timer_create+4) +#define __NR_clock_settime (__NR_timer_create+5) +#define __NR_clock_gettime (__NR_timer_create+6) +#define __NR_clock_getres (__NR_timer_create+7) +#define __NR_clock_nanosleep (__NR_timer_create+8) +/* Number 263 is reserved for vserver */ +#define __NR_statfs64 265 +#define __NR_fstatfs64 266 +#define __NR_remap_file_pages 267 +/* Number 268 is reserved for new sys_mbind */ +/* Number 269 is reserved for new sys_get_mempolicy */ +/* Number 270 is reserved for new sys_set_mempolicy */ +#define __NR_mq_open 271 +#define __NR_mq_unlink 272 +#define __NR_mq_timedsend 273 +#define __NR_mq_timedreceive 274 +#define __NR_mq_notify 275 +#define __NR_mq_getsetattr 276 +#define __NR_kexec_load 277 +#define __NR_add_key 278 +#define __NR_request_key 279 +#define __NR_keyctl 280 +#define __NR_waitid 281 +#define __NR_ioprio_set 282 +#define __NR_ioprio_get 283 +#define __NR_inotify_init 284 +#define __NR_inotify_add_watch 285 +#define __NR_inotify_rm_watch 286 +/* Number 287 is reserved for new sys_migrate_pages */ +#define __NR_openat 288 +#define __NR_mkdirat 289 +#define __NR_mknodat 290 +#define __NR_fchownat 291 +#define __NR_futimesat 292 +#define __NR_unlinkat 294 +#define __NR_renameat 295 +#define __NR_linkat 296 +#define __NR_symlinkat 297 +#define __NR_readlinkat 298 +#define __NR_fchmodat 299 +#define __NR_faccessat 300 +#define __NR_pselect6 301 +#define __NR_ppoll 302 +#define __NR_unshare 303 +#define __NR_set_robust_list 304 +#define __NR_get_robust_list 305 +#define __NR_splice 306 +#define __NR_sync_file_range 307 +#define __NR_tee 308 +#define __NR_vmsplice 309 +/* Number 310 is reserved for new sys_move_pages */ +#define __NR_getcpu 311 +#define __NR_epoll_pwait 312 +#define __NR_utimes 313 +#define __NR_fallocate 314 +#define __NR_utimensat 315 +#define __NR_signalfd 316 +#define __NR_timerfd 317 +#define __NR_eventfd 318 +#define __NR_timerfd_create 319 +#define __NR_timerfd_settime 320 +#define __NR_timerfd_gettime 321 +#define __NR_signalfd4 322 +#define __NR_eventfd2 323 +#define __NR_inotify_init1 324 +#define __NR_pipe2 325 +#define __NR_dup3 326 +#define __NR_epoll_create1 327 +#define __NR_preadv 328 +#define __NR_pwritev 329 +#define __NR_rt_tgsigqueueinfo 330 +#define __NR_perf_event_open 331 +#define __NR_fanotify_init 332 +#define __NR_fanotify_mark 333 +#define __NR_prlimit64 334 +#define __NR_name_to_handle_at 335 +#define __NR_open_by_handle_at 336 +#define __NR_clock_adjtime 337 +#define __NR_syncfs 338 +#define __NR_setns 339 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 +#define __NR_s390_runtime_instr 342 +#define __NR_kcmp 343 +#define __NR_finit_module 344 +#define __NR_sched_setattr 345 +#define __NR_sched_getattr 346 +#define __NR_renameat2 347 +#define NR_syscalls 348 + +/* + * There are some system calls that are not present on 64 bit, some + * have a different name although they do the same (e.g. __NR_chown32 + * is __NR_chown on 64 bit). + */ +#ifndef __s390x__ + +#define __NR_time 13 +#define __NR_lchown 16 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_getrlimit 76 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_fchown 95 +#define __NR_ioperm 101 +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR__newselect 142 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_chown 182 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_fcntl64 221 +#define __NR_sendfile64 223 +#define __NR_fadvise64_64 264 +#define __NR_fstatat64 293 + +#else + +#define __NR_select 142 +#define __NR_getrlimit 191 /* SuS compliant getrlimit */ +#define __NR_lchown 198 +#define __NR_getuid 199 +#define __NR_getgid 200 +#define __NR_geteuid 201 +#define __NR_getegid 202 +#define __NR_setreuid 203 +#define __NR_setregid 204 +#define __NR_getgroups 205 +#define __NR_setgroups 206 +#define __NR_fchown 207 +#define __NR_setresuid 208 +#define __NR_getresuid 209 +#define __NR_setresgid 210 +#define __NR_getresgid 211 +#define __NR_chown 212 +#define __NR_setuid 213 +#define __NR_setgid 214 +#define __NR_setfsuid 215 +#define __NR_setfsgid 216 +#define __NR_newfstatat 293 + +#endif + +#endif /* _UAPI_ASM_S390_UNISTD_H_ */ diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h new file mode 100644 index 00000000000..a9a4ebf79fa --- /dev/null +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -0,0 +1,21 @@ +/* + * Definitions for virtio-ccw devices. + * + * Copyright IBM Corp. 2013 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + */ +#ifndef __KVM_VIRTIO_CCW_H +#define __KVM_VIRTIO_CCW_H + +/* Alignment of vring buffers. */ +#define KVM_VIRTIO_CCW_RING_ALIGN 4096 + +/* Subcode for diagnose 500 (virtio hypercall). */ +#define KVM_S390_VIRTIO_CCW_NOTIFY 3 + +#endif diff --git a/arch/s390/include/asm/vtoc.h b/arch/s390/include/uapi/asm/vtoc.h index 8406a2b3157..221419de275 100644 --- a/arch/s390/include/asm/vtoc.h +++ b/arch/s390/include/uapi/asm/vtoc.h @@ -1,9 +1,7 @@ /* - * include/asm-s390/vtoc.h - * * This file contains volume label definitions for DASD devices. * - * (C) Copyright IBM Corp. 2005 + * Copyright IBM Corp. 2005 * * Author(s): Volker Sameske <sameske@de.ibm.com> * diff --git a/arch/s390/include/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 00d3bbd4411..f2b18eacaca 100644 --- a/arch/s390/include/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -3,7 +3,7 @@ * * zcrypt 2.1.0 (user-visible header) * - * Copyright (C) 2001, 2006 IBM Corporation + * Copyright IBM Corp. 2001, 2006 * Author(s): Robert Burroughs * Eric Rossman (edrossma@us.ibm.com) * @@ -154,6 +154,67 @@ struct ica_xcRB { unsigned short priority_window; unsigned int status; } __attribute__((packed)); + +/** + * struct ep11_cprb - EP11 connectivity programming request block + * @cprb_len: CPRB header length [0x0020] + * @cprb_ver_id: CPRB version id. [0x04] + * @pad_000: Alignment pad bytes + * @flags: Admin cmd [0x80] or functional cmd [0x00] + * @func_id: Function id / subtype [0x5434] + * @source_id: Source id [originator id] + * @target_id: Target id [usage/ctrl domain id] + * @ret_code: Return code + * @reserved1: Reserved + * @reserved2: Reserved + * @payload_len: Payload length + */ +struct ep11_cprb { + uint16_t cprb_len; + unsigned char cprb_ver_id; + unsigned char pad_000[2]; + unsigned char flags; + unsigned char func_id[2]; + uint32_t source_id; + uint32_t target_id; + uint32_t ret_code; + uint32_t reserved1; + uint32_t reserved2; + uint32_t payload_len; +} __attribute__((packed)); + +/** + * struct ep11_target_dev - EP11 target device list + * @ap_id: AP device id + * @dom_id: Usage domain id + */ +struct ep11_target_dev { + uint16_t ap_id; + uint16_t dom_id; +}; + +/** + * struct ep11_urb - EP11 user request block + * @targets_num: Number of target adapters + * @targets: Addr to target adapter list + * @weight: Level of request priority + * @req_no: Request id/number + * @req_len: Request length + * @req: Addr to request block + * @resp_len: Response length + * @resp: Addr to response block + */ +struct ep11_urb { + uint16_t targets_num; + uint64_t targets; + uint64_t weight; + uint64_t req_no; + uint64_t req_len; + uint64_t req; + uint64_t resp_len; + uint64_t resp; +} __attribute__((packed)); + #define AUTOSELECT ((unsigned int)0xFFFFFFFF) #define ZCRYPT_IOCTL_MAGIC 'z' @@ -183,6 +244,9 @@ struct ica_xcRB { * ZSECSENDCPRB * Send an arbitrary CPRB to a crypto card. * + * ZSENDEP11CPRB + * Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card. + * * Z90STAT_STATUS_MASK * Return an 64 element array of unsigned chars for the status of * all devices. @@ -256,6 +320,7 @@ struct ica_xcRB { #define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0) #define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0) #define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0) +#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0) /* New status calls */ #define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int) diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/s390/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 64230bc392f..a95c4ca9961 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -14,33 +14,41 @@ endif CFLAGS_smp.o := -Wno-nonnull # +# Disable tailcall optimizations for stack / callchain walking functions +# since this might generate broken code when accessing register 15 and +# passing its content to other functions. +# +CFLAGS_stacktrace.o += -fno-optimize-sibling-calls +CFLAGS_dumpstack.o += -fno-optimize-sibling-calls + +# # Pass UTS_MACHINE for user_regset definition # CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w -obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \ - processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ - s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \ - vdso.o vtime.o sysinfo.o nmi.o sclp.o +obj-y := traps.o time.o process.o base.o early.o setup.o vtime.o +obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o +obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o +obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o +obj-y += dumpstack.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) +obj-y += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) -extra-y += head.o init_task.o vmlinux.lds +extra-y += head.o vmlinux.lds extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) obj-$(CONFIG_MODULES) += s390_ksyms.o module.o -obj-$(CONFIG_SMP) += smp.o topology.o -obj-$(CONFIG_SMP) += $(if $(CONFIG_64BIT),switch_cpu64.o, \ - switch_cpu.o) +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SCHED_BOOK) += topology.o obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o -obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ - compat_wrapper.o compat_exec_domain.o \ - $(compat-obj-y) +obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o +obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y) obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o @@ -48,11 +56,13 @@ obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -# Kexec part -S390_KEXEC_OBJS := machine_kexec.o crash.o -S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) -obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS) +ifdef CONFIG_64BIT +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \ + perf_cpum_cf_events.o +obj-y += runtime_instr.o cache.o +endif # vdso obj-$(CONFIG_64BIT) += vdso64/ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index fe03c140002..afe1715a4eb 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -7,9 +7,11 @@ #define ASM_OFFSETS_C #include <linux/kbuild.h> +#include <linux/kvm_host.h> #include <linux/sched.h> +#include <asm/cputime.h> #include <asm/vdso.h> -#include <asm/sigp.h> +#include <asm/pgtable.h> /* * Make sure that the compiler is new enough. We want a compiler that @@ -27,16 +29,14 @@ int main(void) BLANK(); DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); BLANK(); - DEFINE(__THREAD_per_cause, - offsetof(struct task_struct, thread.per_event.cause)); - DEFINE(__THREAD_per_address, - offsetof(struct task_struct, thread.per_event.address)); - DEFINE(__THREAD_per_paid, - offsetof(struct task_struct, thread.per_event.paid)); + DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause)); + DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address)); + DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid)); BLANK(); DEFINE(__TI_task, offsetof(struct thread_info, task)); DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); DEFINE(__TI_flags, offsetof(struct thread_info, flags)); + DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table)); DEFINE(__TI_cpu, offsetof(struct thread_info, cpu)); DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer)); @@ -47,8 +47,10 @@ int main(void) DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc)); - DEFINE(__PT_SVCNR, offsetof(struct pt_regs, svcnr)); + DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); + DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm)); + DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); + DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); @@ -64,54 +66,63 @@ int main(void) DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); - DEFINE(__VDSO_NTP_MULT, offsetof(struct vdso_data, ntp_mult)); + DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult)); + DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift)); DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base)); DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time)); /* constants used by the vdso */ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); BLANK(); - /* constants for SIGP */ - DEFINE(__SIGP_STOP, sigp_stop); - DEFINE(__SIGP_RESTART, sigp_restart); - DEFINE(__SIGP_SENSE, sigp_sense); - DEFINE(__SIGP_INITIAL_CPU_RESET, sigp_initial_cpu_reset); - BLANK(); + /* idle data offsets */ + DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter)); + DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit)); + DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter)); + DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit)); /* lowcore offsets */ DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); - DEFINE(__LC_CPU_ADDRESS, offsetof(struct _lowcore, cpu_addr)); + DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr)); DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code)); DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc)); DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc)); DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code)); DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code)); - DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid)); + DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num)); + DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code)); + DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid)); DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address)); - DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id)); - DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id)); + DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id)); + DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id)); + DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id)); + DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id)); + DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code)); DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id)); DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr)); DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm)); DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word)); DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list)); DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code)); - DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); - BLANK(); - DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); + DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code)); DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw)); DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw)); DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw)); DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw)); DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw)); DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw)); + DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw)); DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw)); DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw)); DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw)); DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); - DEFINE(__LC_SAVE_AREA, offsetof(struct _lowcore, save_area)); + BLANK(); + DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); + DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); + DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); + DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags)); DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw)); DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw)); DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer)); @@ -124,18 +135,23 @@ int main(void) DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer)); DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock)); DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task)); + DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid)); DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info)); DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); + DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack)); + DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn)); + DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data)); + DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source)); DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce)); DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); - DEFINE(__LC_USER_EXEC_ASCE, offsetof(struct _lowcore, user_exec_asce)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); - DEFINE(__LC_IRB, offsetof(struct _lowcore, irb)); + DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); + BLANK(); DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area)); DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area)); @@ -147,14 +163,20 @@ int main(void) #ifdef CONFIG_32BIT DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); #else /* CONFIG_32BIT */ + DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code)); + DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address)); DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area)); DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); - DEFINE(__LC_SIE_HOOK, offsetof(struct _lowcore, sie_hook)); - DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp)); + DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); + DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); + DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); + DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); + DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); + DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); #endif /* CONFIG_32BIT */ return 0; } diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index 15e46ca9433..797a823a227 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -1,18 +1,19 @@ /* * arch/s390/kernel/base.S * - * Copyright IBM Corp. 2006,2007 + * Copyright IBM Corp. 2006, 2007 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> * Michael Holzheu <holzheu@de.ibm.com> */ +#include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> +#include <asm/sigp.h> #ifdef CONFIG_64BIT - .globl s390_base_mcck_handler -s390_base_mcck_handler: +ENTRY(s390_base_mcck_handler) basr %r13,0 0: lg %r15,__LC_PANIC_STACK # load panic stack aghi %r15,-STACK_FRAME_OVERHEAD @@ -26,14 +27,14 @@ s390_base_mcck_handler: lpswe __LC_MCK_OLD_PSW .section .bss + .align 8 .globl s390_base_mcck_handler_fn s390_base_mcck_handler_fn: .quad 0 .previous - .globl s390_base_ext_handler -s390_base_ext_handler: - stmg %r0,%r15,__LC_SAVE_AREA +ENTRY(s390_base_ext_handler) + stmg %r0,%r15,__LC_SAVE_AREA_ASYNC basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_ext_handler_fn @@ -41,19 +42,19 @@ s390_base_ext_handler: ltgr %r1,%r1 jz 1f basr %r14,%r1 -1: lmg %r0,%r15,__LC_SAVE_AREA +1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpswe __LC_EXT_OLD_PSW .section .bss + .align 8 .globl s390_base_ext_handler_fn s390_base_ext_handler_fn: .quad 0 .previous - .globl s390_base_pgm_handler -s390_base_pgm_handler: - stmg %r0,%r15,__LC_SAVE_AREA +ENTRY(s390_base_pgm_handler) + stmg %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_pgm_handler_fn @@ -61,7 +62,7 @@ s390_base_pgm_handler: ltgr %r1,%r1 jz 1f basr %r14,%r1 - lmg %r0,%r15,__LC_SAVE_AREA + lmg %r0,%r15,__LC_SAVE_AREA_SYNC lpswe __LC_PGM_OLD_PSW 1: lpswe disabled_wait_psw-0b(%r13) @@ -70,15 +71,69 @@ disabled_wait_psw: .quad 0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler .section .bss + .align 8 .globl s390_base_pgm_handler_fn s390_base_pgm_handler_fn: .quad 0 .previous +# +# Calls diag 308 subcode 1 and continues execution +# +# The following conditions must be ensured before calling this function: +# * Prefix register = 0 +# * Lowcore protection is disabled +# +ENTRY(diag308_reset) + larl %r4,.Lctlregs # Save control registers + stctg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Floating point control register + stfpc 0(%r4) + larl %r4,.Lcontinue_psw # Save PSW flags + epsw %r2,%r3 + stm %r2,%r3,0(%r4) + larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 + lghi %r3,0 + lg %r4,0(%r4) # Save PSW + sturg %r4,%r3 # Use sturg, because of large pages + lghi %r1,1 + diag %r1,%r1,0x308 +.Lrestart_part2: + lhi %r0,0 # Load r0 with zero + lhi %r1,2 # Use mode 2 = ESAME (dump) + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode + sam64 # Switch to 64 bit addressing mode + larl %r4,.Lctlregs # Restore control registers + lctlg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Restore floating point ctl register + lfpc 0(%r4) + larl %r4,.Lcontinue_psw # Restore PSW flags + lpswe 0(%r4) +.Lcontinue: + br %r14 +.align 16 +.Lrestart_psw: + .long 0x00080000,0x80000000 + .Lrestart_part2 + + .section .data..nosave,"aw",@progbits +.align 8 +.Lcontinue_psw: + .quad 0,.Lcontinue + .previous + + .section .bss +.align 8 +.Lctlregs: + .rept 16 + .quad 0 + .endr +.Lfpctl: + .long 0 + .previous + #else /* CONFIG_64BIT */ - .globl s390_base_mcck_handler -s390_base_mcck_handler: +ENTRY(s390_base_mcck_handler) basr %r13,0 0: l %r15,__LC_PANIC_STACK # load panic stack ahi %r15,-STACK_FRAME_OVERHEAD @@ -93,14 +148,14 @@ s390_base_mcck_handler: 2: .long s390_base_mcck_handler_fn .section .bss + .align 4 .globl s390_base_mcck_handler_fn s390_base_mcck_handler_fn: .long 0 .previous - .globl s390_base_ext_handler -s390_base_ext_handler: - stm %r0,%r15,__LC_SAVE_AREA +ENTRY(s390_base_ext_handler) + stm %r0,%r15,__LC_SAVE_AREA_ASYNC basr %r13,0 0: ahi %r15,-STACK_FRAME_OVERHEAD l %r1,2f-0b(%r13) @@ -108,21 +163,21 @@ s390_base_ext_handler: ltr %r1,%r1 jz 1f basr %r14,%r1 -1: lm %r0,%r15,__LC_SAVE_AREA +1: lm %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpsw __LC_EXT_OLD_PSW 2: .long s390_base_ext_handler_fn .section .bss + .align 4 .globl s390_base_ext_handler_fn s390_base_ext_handler_fn: .long 0 .previous - .globl s390_base_pgm_handler -s390_base_pgm_handler: - stm %r0,%r15,__LC_SAVE_AREA +ENTRY(s390_base_pgm_handler) + stm %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 0: ahi %r15,-STACK_FRAME_OVERHEAD l %r1,2f-0b(%r13) @@ -130,7 +185,7 @@ s390_base_pgm_handler: ltr %r1,%r1 jz 1f basr %r14,%r1 - lm %r0,%r15,__LC_SAVE_AREA + lm %r0,%r15,__LC_SAVE_AREA_SYNC lpsw __LC_PGM_OLD_PSW 1: lpsw disabled_wait_psw-0b(%r13) @@ -142,6 +197,7 @@ disabled_wait_psw: .long 0x000a0000,0x00000000 + s390_base_pgm_handler .section .bss + .align 4 .globl s390_base_pgm_handler_fn s390_base_pgm_handler_fn: .long 0 diff --git a/arch/s390/kernel/bitmap.c b/arch/s390/kernel/bitmap.c deleted file mode 100644 index 3ae4757b006..00000000000 --- a/arch/s390/kernel/bitmap.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Bitmaps for set_bit, clear_bit, test_and_set_bit, ... - * See include/asm/{bitops.h|posix_types.h} for details - * - * Copyright IBM Corp. 1999,2009 - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, - */ - -#include <linux/bitops.h> -#include <linux/module.h> - -const char _oi_bitmap[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 }; -EXPORT_SYMBOL(_oi_bitmap); - -const char _ni_bitmap[] = { 0xfe, 0xfd, 0xfb, 0xf7, 0xef, 0xdf, 0xbf, 0x7f }; -EXPORT_SYMBOL(_ni_bitmap); - -const char _zb_findmap[] = { - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 }; -EXPORT_SYMBOL(_zb_findmap); - -const char _sb_findmap[] = { - 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0, - 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 }; -EXPORT_SYMBOL(_sb_findmap); diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c new file mode 100644 index 00000000000..c0b03c28d15 --- /dev/null +++ b/arch/s390/kernel/cache.c @@ -0,0 +1,389 @@ +/* + * Extract CPU cache information and expose them via sysfs. + * + * Copyright IBM Corp. 2012 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/notifier.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <asm/facility.h> + +struct cache { + unsigned long size; + unsigned int line_size; + unsigned int associativity; + unsigned int nr_sets; + unsigned int level : 3; + unsigned int type : 2; + unsigned int private : 1; + struct list_head list; +}; + +struct cache_dir { + struct kobject *kobj; + struct cache_index_dir *index; +}; + +struct cache_index_dir { + struct kobject kobj; + int cpu; + struct cache *cache; + struct cache_index_dir *next; +}; + +enum { + CACHE_SCOPE_NOTEXISTS, + CACHE_SCOPE_PRIVATE, + CACHE_SCOPE_SHARED, + CACHE_SCOPE_RESERVED, +}; + +enum { + CACHE_TYPE_SEPARATE, + CACHE_TYPE_DATA, + CACHE_TYPE_INSTRUCTION, + CACHE_TYPE_UNIFIED, +}; + +enum { + EXTRACT_TOPOLOGY, + EXTRACT_LINE_SIZE, + EXTRACT_SIZE, + EXTRACT_ASSOCIATIVITY, +}; + +enum { + CACHE_TI_UNIFIED = 0, + CACHE_TI_DATA = 0, + CACHE_TI_INSTRUCTION, +}; + +struct cache_info { + unsigned char : 4; + unsigned char scope : 2; + unsigned char type : 2; +}; + +#define CACHE_MAX_LEVEL 8 + +union cache_topology { + struct cache_info ci[CACHE_MAX_LEVEL]; + unsigned long long raw; +}; + +static const char * const cache_type_string[] = { + "Data", + "Instruction", + "Unified", +}; + +static struct cache_dir *cache_dir_cpu[NR_CPUS]; +static LIST_HEAD(cache_list); + +void show_cacheinfo(struct seq_file *m) +{ + struct cache *cache; + int index = 0; + + list_for_each_entry(cache, &cache_list, list) { + seq_printf(m, "cache%-11d: ", index); + seq_printf(m, "level=%d ", cache->level); + seq_printf(m, "type=%s ", cache_type_string[cache->type]); + seq_printf(m, "scope=%s ", cache->private ? "Private" : "Shared"); + seq_printf(m, "size=%luK ", cache->size >> 10); + seq_printf(m, "line_size=%u ", cache->line_size); + seq_printf(m, "associativity=%d", cache->associativity); + seq_puts(m, "\n"); + index++; + } +} + +static inline unsigned long ecag(int ai, int li, int ti) +{ + unsigned long cmd, val; + + cmd = ai << 4 | li << 1 | ti; + asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ + : "=d" (val) : "a" (cmd)); + return val; +} + +static int __init cache_add(int level, int private, int type) +{ + struct cache *cache; + int ti; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return -ENOMEM; + if (type == CACHE_TYPE_INSTRUCTION) + ti = CACHE_TI_INSTRUCTION; + else + ti = CACHE_TI_UNIFIED; + cache->size = ecag(EXTRACT_SIZE, level, ti); + cache->line_size = ecag(EXTRACT_LINE_SIZE, level, ti); + cache->associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti); + cache->nr_sets = cache->size / cache->associativity; + cache->nr_sets /= cache->line_size; + cache->private = private; + cache->level = level + 1; + cache->type = type - 1; + list_add_tail(&cache->list, &cache_list); + return 0; +} + +static void __init cache_build_info(void) +{ + struct cache *cache, *next; + union cache_topology ct; + int level, private, rc; + + ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); + for (level = 0; level < CACHE_MAX_LEVEL; level++) { + switch (ct.ci[level].scope) { + case CACHE_SCOPE_SHARED: + private = 0; + break; + case CACHE_SCOPE_PRIVATE: + private = 1; + break; + default: + return; + } + if (ct.ci[level].type == CACHE_TYPE_SEPARATE) { + rc = cache_add(level, private, CACHE_TYPE_DATA); + rc |= cache_add(level, private, CACHE_TYPE_INSTRUCTION); + } else { + rc = cache_add(level, private, ct.ci[level].type); + } + if (rc) + goto error; + } + return; +error: + list_for_each_entry_safe(cache, next, &cache_list, list) { + list_del(&cache->list); + kfree(cache); + } +} + +static struct cache_dir *cache_create_cache_dir(int cpu) +{ + struct cache_dir *cache_dir; + struct kobject *kobj = NULL; + struct device *dev; + + dev = get_cpu_device(cpu); + if (!dev) + goto out; + kobj = kobject_create_and_add("cache", &dev->kobj); + if (!kobj) + goto out; + cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL); + if (!cache_dir) + goto out; + cache_dir->kobj = kobj; + cache_dir_cpu[cpu] = cache_dir; + return cache_dir; +out: + kobject_put(kobj); + return NULL; +} + +static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *kobj) +{ + return container_of(kobj, struct cache_index_dir, kobj); +} + +static void cache_index_release(struct kobject *kobj) +{ + struct cache_index_dir *index; + + index = kobj_to_cache_index_dir(kobj); + kfree(index); +} + +static ssize_t cache_index_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kobj_attribute *kobj_attr; + + kobj_attr = container_of(attr, struct kobj_attribute, attr); + return kobj_attr->show(kobj, kobj_attr, buf); +} + +#define DEFINE_CACHE_ATTR(_name, _format, _value) \ +static ssize_t cache_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *buf) \ +{ \ + struct cache_index_dir *index; \ + \ + index = kobj_to_cache_index_dir(kobj); \ + return sprintf(buf, _format, _value); \ +} \ +static struct kobj_attribute cache_##_name##_attr = \ + __ATTR(_name, 0444, cache_##_name##_show, NULL); + +DEFINE_CACHE_ATTR(size, "%luK\n", index->cache->size >> 10); +DEFINE_CACHE_ATTR(coherency_line_size, "%u\n", index->cache->line_size); +DEFINE_CACHE_ATTR(number_of_sets, "%u\n", index->cache->nr_sets); +DEFINE_CACHE_ATTR(ways_of_associativity, "%u\n", index->cache->associativity); +DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type]); +DEFINE_CACHE_ATTR(level, "%d\n", index->cache->level); + +static ssize_t shared_cpu_map_func(struct kobject *kobj, int type, char *buf) +{ + struct cache_index_dir *index; + int len; + + index = kobj_to_cache_index_dir(kobj); + len = type ? + cpulist_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)) : + cpumask_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)); + len += sprintf(&buf[len], "\n"); + return len; +} + +static ssize_t shared_cpu_map_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return shared_cpu_map_func(kobj, 0, buf); +} +static struct kobj_attribute cache_shared_cpu_map_attr = + __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); + +static ssize_t shared_cpu_list_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return shared_cpu_map_func(kobj, 1, buf); +} +static struct kobj_attribute cache_shared_cpu_list_attr = + __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); + +static struct attribute *cache_index_default_attrs[] = { + &cache_type_attr.attr, + &cache_size_attr.attr, + &cache_number_of_sets_attr.attr, + &cache_ways_of_associativity_attr.attr, + &cache_level_attr.attr, + &cache_coherency_line_size_attr.attr, + &cache_shared_cpu_map_attr.attr, + &cache_shared_cpu_list_attr.attr, + NULL, +}; + +static const struct sysfs_ops cache_index_ops = { + .show = cache_index_show, +}; + +static struct kobj_type cache_index_type = { + .sysfs_ops = &cache_index_ops, + .release = cache_index_release, + .default_attrs = cache_index_default_attrs, +}; + +static int cache_create_index_dir(struct cache_dir *cache_dir, + struct cache *cache, int index, int cpu) +{ + struct cache_index_dir *index_dir; + int rc; + + index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); + if (!index_dir) + return -ENOMEM; + index_dir->cache = cache; + index_dir->cpu = cpu; + rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, + cache_dir->kobj, "index%d", index); + if (rc) + goto out; + index_dir->next = cache_dir->index; + cache_dir->index = index_dir; + return 0; +out: + kfree(index_dir); + return rc; +} + +static int cache_add_cpu(int cpu) +{ + struct cache_dir *cache_dir; + struct cache *cache; + int rc, index = 0; + + if (list_empty(&cache_list)) + return 0; + cache_dir = cache_create_cache_dir(cpu); + if (!cache_dir) + return -ENOMEM; + list_for_each_entry(cache, &cache_list, list) { + if (!cache->private) + break; + rc = cache_create_index_dir(cache_dir, cache, index, cpu); + if (rc) + return rc; + index++; + } + return 0; +} + +static void cache_remove_cpu(int cpu) +{ + struct cache_index_dir *index, *next; + struct cache_dir *cache_dir; + + cache_dir = cache_dir_cpu[cpu]; + if (!cache_dir) + return; + index = cache_dir->index; + while (index) { + next = index->next; + kobject_put(&index->kobj); + index = next; + } + kobject_put(cache_dir->kobj); + kfree(cache_dir); + cache_dir_cpu[cpu] = NULL; +} + +static int cache_hotplug(struct notifier_block *nfb, unsigned long action, + void *hcpu) +{ + int cpu = (long)hcpu; + int rc = 0; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + rc = cache_add_cpu(cpu); + if (rc) + cache_remove_cpu(cpu); + break; + case CPU_DEAD: + cache_remove_cpu(cpu); + break; + } + return rc ? NOTIFY_BAD : NOTIFY_OK; +} + +static int __init cache_init(void) +{ + int cpu; + + if (!test_facility(34)) + return 0; + cache_build_info(); + + cpu_notifier_register_begin(); + for_each_online_cpu(cpu) + cache_add_cpu(cpu); + __hotcpu_notifier(cache_hotplug, 0); + cpu_notifier_register_done(); + return 0; +} +device_initcall(cache_init); diff --git a/arch/s390/kernel/compat_exec_domain.c b/arch/s390/kernel/compat_exec_domain.c deleted file mode 100644 index 914d49444f9..00000000000 --- a/arch/s390/kernel/compat_exec_domain.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Support for 32-bit Linux for S390 personality. - * - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Gerhard Tonn (ton@de.ibm.com) - * - * - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/personality.h> -#include <linux/sched.h> - -static struct exec_domain s390_exec_domain; - -static int __init s390_init (void) -{ - s390_exec_domain.name = "Linux/s390"; - s390_exec_domain.handler = NULL; - s390_exec_domain.pers_low = PER_LINUX32; - s390_exec_domain.pers_high = PER_LINUX32; - s390_exec_domain.signal_map = default_exec_domain.signal_map; - s390_exec_domain.signal_invmap = default_exec_domain.signal_invmap; - register_exec_domain(&s390_exec_domain); - return 0; -} - -__initcall(s390_init); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 53acaa86dd9..ca38139423a 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -1,8 +1,6 @@ /* - * arch/s390x/kernel/linux32.c - * * S390 version - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Gerhard Tonn (ton@de.ibm.com) * Thomas Spatzier (tspat@de.ibm.com) @@ -60,13 +58,6 @@ #include "compat_linux.h" -long psw_user32_bits = (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); -long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE); - /* For this source file, we want overflow handling. */ #undef high2lowuid @@ -95,92 +86,111 @@ long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | #define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid) #define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid) -asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename, + u16, user, u16, group) { return sys_chown(filename, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_lchown16(const char __user * filename, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *, + filename, u16, user, u16, group) { return sys_lchown(filename, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_fchown16(unsigned int fd, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group) { return sys_fchown(fd, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_setregid16(u16 rgid, u16 egid) +COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid) { return sys_setregid(low2highgid(rgid), low2highgid(egid)); } -asmlinkage long sys32_setgid16(u16 gid) +COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid) { return sys_setgid((gid_t)gid); } -asmlinkage long sys32_setreuid16(u16 ruid, u16 euid) +COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid) { return sys_setreuid(low2highuid(ruid), low2highuid(euid)); } -asmlinkage long sys32_setuid16(u16 uid) +COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid) { return sys_setuid((uid_t)uid); } -asmlinkage long sys32_setresuid16(u16 ruid, u16 euid, u16 suid) +COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid) { return sys_setresuid(low2highuid(ruid), low2highuid(euid), - low2highuid(suid)); + low2highuid(suid)); } -asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid) +COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp, + u16 __user *, euidp, u16 __user *, suidp) { + const struct cred *cred = current_cred(); int retval; + u16 ruid, euid, suid; + + ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid)); + euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid)); + suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid)); - if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && - !(retval = put_user(high2lowuid(current->cred->euid), euid))) - retval = put_user(high2lowuid(current->cred->suid), suid); + if (!(retval = put_user(ruid, ruidp)) && + !(retval = put_user(euid, euidp))) + retval = put_user(suid, suidp); return retval; } -asmlinkage long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid) +COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid) { return sys_setresgid(low2highgid(rgid), low2highgid(egid), - low2highgid(sgid)); + low2highgid(sgid)); } -asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid) +COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp, + u16 __user *, egidp, u16 __user *, sgidp) { + const struct cred *cred = current_cred(); int retval; + u16 rgid, egid, sgid; - if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && - !(retval = put_user(high2lowgid(current->cred->egid), egid))) - retval = put_user(high2lowgid(current->cred->sgid), sgid); + rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid)); + egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid)); + sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid)); + + if (!(retval = put_user(rgid, rgidp)) && + !(retval = put_user(egid, egidp))) + retval = put_user(sgid, sgidp); return retval; } -asmlinkage long sys32_setfsuid16(u16 uid) +COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid) { return sys_setfsuid((uid_t)uid); } -asmlinkage long sys32_setfsgid16(u16 gid) +COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid) { return sys_setfsgid((gid_t)gid); } static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info) { + struct user_namespace *user_ns = current_user_ns(); int i; u16 group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { - group = (u16)GROUP_AT(group_info, i); + kgid = GROUP_AT(group_info, i); + group = (u16)from_kgid_munged(user_ns, kgid); if (put_user(group, grouplist+i)) return -EFAULT; } @@ -190,43 +200,51 @@ static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist) { + struct user_namespace *user_ns = current_user_ns(); int i; u16 group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { if (get_user(group, grouplist+i)) return -EFAULT; - GROUP_AT(group_info, i) = (gid_t)group; + + kgid = make_kgid(user_ns, (gid_t)group); + if (!gid_valid(kgid)) + return -EINVAL; + + GROUP_AT(group_info, i) = kgid; } return 0; } -asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) +COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist) { + const struct cred *cred = current_cred(); int i; if (gidsetsize < 0) return -EINVAL; - get_group_info(current->cred->group_info); - i = current->cred->group_info->ngroups; + get_group_info(cred->group_info); + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->cred->group_info)) { + if (groups16_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->cred->group_info); + put_group_info(cred->group_info); return i; } -asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) +COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist) { struct group_info *group_info; int retval; @@ -251,273 +269,65 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) return retval; } -asmlinkage long sys32_getuid16(void) +COMPAT_SYSCALL_DEFINE0(s390_getuid16) { - return high2lowuid(current->cred->uid); + return high2lowuid(from_kuid_munged(current_user_ns(), current_uid())); } -asmlinkage long sys32_geteuid16(void) +COMPAT_SYSCALL_DEFINE0(s390_geteuid16) { - return high2lowuid(current->cred->euid); + return high2lowuid(from_kuid_munged(current_user_ns(), current_euid())); } -asmlinkage long sys32_getgid16(void) +COMPAT_SYSCALL_DEFINE0(s390_getgid16) { - return high2lowgid(current->cred->gid); + return high2lowgid(from_kgid_munged(current_user_ns(), current_gid())); } -asmlinkage long sys32_getegid16(void) +COMPAT_SYSCALL_DEFINE0(s390_getegid16) { - return high2lowgid(current->cred->egid); + return high2lowgid(from_kgid_munged(current_user_ns(), current_egid())); } -/* - * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation. - * - * This is really horribly ugly. - */ #ifdef CONFIG_SYSVIPC -asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr) +COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, + compat_ulong_t, third, compat_uptr_t, ptr) { if (call >> 16) /* hack for backward compatibility */ return -EINVAL; - - call &= 0xffff; - - switch (call) { - case SEMTIMEDOP: - return compat_sys_semtimedop(first, compat_ptr(ptr), - second, compat_ptr(third)); - case SEMOP: - /* struct sembuf is the same on 32 and 64bit :)) */ - return sys_semtimedop(first, compat_ptr(ptr), - second, NULL); - case SEMGET: - return sys_semget(first, second, third); - case SEMCTL: - return compat_sys_semctl(first, second, third, - compat_ptr(ptr)); - case MSGSND: - return compat_sys_msgsnd(first, second, third, - compat_ptr(ptr)); - case MSGRCV: - return compat_sys_msgrcv(first, second, 0, third, - 0, compat_ptr(ptr)); - case MSGGET: - return sys_msgget((key_t) first, second); - case MSGCTL: - return compat_sys_msgctl(first, second, compat_ptr(ptr)); - case SHMAT: - return compat_sys_shmat(first, second, third, - 0, compat_ptr(ptr)); - case SHMDT: - return sys_shmdt(compat_ptr(ptr)); - case SHMGET: - return sys_shmget(first, (unsigned)second, third); - case SHMCTL: - return compat_sys_shmctl(first, second, compat_ptr(ptr)); - } - - return -ENOSYS; + return compat_sys_ipc(call, first, second, third, ptr, third); } #endif -asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low) +COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low) { - if ((int)high < 0) - return -EINVAL; - else - return sys_truncate(path, (high << 32) | low); + return sys_truncate(path, (unsigned long)high << 32 | low); } -asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low) +COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low) { - if ((int)high < 0) - return -EINVAL; - else - return sys_ftruncate(fd, (high << 32) | low); -} - -asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval) -{ - struct timespec t; - int ret; - mm_segment_t old_fs = get_fs (); - - set_fs (KERNEL_DS); - ret = sys_sched_rr_get_interval(pid, - (struct timespec __force __user *) &t); - set_fs (old_fs); - if (put_compat_timespec(&t, interval)) - return -EFAULT; - return ret; + return sys_ftruncate(fd, (unsigned long)high << 32 | low); } -asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, size_t sigsetsize) -{ - sigset_t s; - compat_sigset_t s32; - int ret; - mm_segment_t old_fs = get_fs(); - - if (set) { - if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) - return -EFAULT; - switch (_NSIG_WORDS) { - case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } - } - set_fs (KERNEL_DS); - ret = sys_rt_sigprocmask(how, - set ? (sigset_t __force __user *) &s : NULL, - oset ? (sigset_t __force __user *) &s : NULL, - sigsetsize); - set_fs (old_fs); - if (ret) return ret; - if (oset) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } - if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) - return -EFAULT; - } - return 0; -} - -asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, - size_t sigsetsize) -{ - sigset_t s; - compat_sigset_t s32; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs (KERNEL_DS); - ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize); - set_fs (old_fs); - if (!ret) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } - if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) - return -EFAULT; - } - return ret; -} - -asmlinkage long -sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo) -{ - siginfo_t info; - int ret; - mm_segment_t old_fs = get_fs(); - - if (copy_siginfo_from_user32(&info, uinfo)) - return -EFAULT; - set_fs (KERNEL_DS); - ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force __user *) &info); - set_fs (old_fs); - return ret; -} - -/* - * sys32_execve() executes a new program after the asm stub has set - * things up for us. This should basically do what I want it to. - */ -asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv, - compat_uptr_t __user *envp) -{ - struct pt_regs *regs = task_pt_regs(current); - char *filename; - long rc; - - filename = getname(name); - rc = PTR_ERR(filename); - if (IS_ERR(filename)) - return rc; - rc = compat_do_execve(filename, argv, envp, regs); - if (rc) - goto out; - current->thread.fp_regs.fpc=0; - asm volatile("sfpc %0,0" : : "d" (0)); - rc = regs->gprs[2]; -out: - putname(filename); - return rc; -} - -asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf, - size_t count, u32 poshi, u32 poslo) +COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf, + compat_size_t, count, u32, high, u32, low) { if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); + return sys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low); } -asmlinkage long sys32_pwrite64(unsigned int fd, const char __user *ubuf, - size_t count, u32 poshi, u32 poslo) +COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf, + compat_size_t, count, u32, high, u32, low) { if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); + return sys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low); } -asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count) +COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count) { - return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); -} - -asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - - if (offset && get_user(of, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, - offset ? (off_t __force __user *) &of : NULL, count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; -} - -asmlinkage long sys32_sendfile64(int out_fd, int in_fd, - compat_loff_t __user *offset, s32 count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile64(out_fd, in_fd, - offset ? (loff_t __force __user *) &lof : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; + return sys_readahead(fd, (unsigned long)high << 32 | low, count); } struct stat64_emu31 { @@ -556,8 +366,8 @@ static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) tmp.__st_ino = (u32)stat->ino; tmp.st_mode = stat->mode; tmp.st_nlink = (unsigned int)stat->nlink; - tmp.st_uid = stat->uid; - tmp.st_gid = stat->gid; + tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_rdev = huge_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_blksize = (u32)stat->blksize; @@ -569,7 +379,7 @@ static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } -asmlinkage long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_stat(filename, &stat); @@ -578,7 +388,7 @@ asmlinkage long sys32_stat64(const char __user * filename, struct stat64_emu31 _ return ret; } -asmlinkage long sys32_lstat64(const char __user * filename, struct stat64_emu31 __user * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_lstat(filename, &stat); @@ -587,7 +397,7 @@ asmlinkage long sys32_lstat64(const char __user * filename, struct stat64_emu31 return ret; } -asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_fstat(fd, &stat); @@ -596,8 +406,8 @@ asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * sta return ret; } -asmlinkage long sys32_fstatat64(unsigned int dfd, const char __user *filename, - struct stat64_emu31 __user* statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename, + struct stat64_emu31 __user *, statbuf, int, flag) { struct kstat stat; int error; @@ -623,7 +433,7 @@ struct mmap_arg_struct_emu31 { compat_ulong_t offset; }; -asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg) +COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg) { struct mmap_arg_struct_emu31 a; @@ -631,22 +441,20 @@ asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg) return -EFAULT; if (a.offset & ~PAGE_MASK) return -EINVAL; - a.addr = (unsigned long) compat_ptr(a.addr); return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); } -asmlinkage long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg) +COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg) { struct mmap_arg_struct_emu31 a; if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; - a.addr = (unsigned long) compat_ptr(a.addr); return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); } -asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count) +COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count) { if ((compat_ssize_t) count < 0) return -EINVAL; @@ -654,7 +462,7 @@ asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count) return sys_read(fd, buf, count); } -asmlinkage long sys32_write(unsigned int fd, const char __user * buf, size_t count) +COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count) { if ((compat_ssize_t) count < 0) return -EINVAL; @@ -668,14 +476,13 @@ asmlinkage long sys32_write(unsigned int fd, const char __user * buf, size_t cou * because the 31 bit values differ from the 64 bit values. */ -asmlinkage long -sys32_fadvise64(int fd, loff_t offset, size_t len, int advise) +COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise) { if (advise == 4) advise = POSIX_FADV_DONTNEED; else if (advise == 5) advise = POSIX_FADV_NOREUSE; - return sys_fadvise64(fd, offset, len, advise); + return sys_fadvise64(fd, (unsigned long)high << 32 | low, len, advise); } struct fadvise64_64_args { @@ -685,8 +492,7 @@ struct fadvise64_64_args { int advice; }; -asmlinkage long -sys32_fadvise64_64(struct fadvise64_64_args __user *args) +COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) { struct fadvise64_64_args a; @@ -698,3 +504,17 @@ sys32_fadvise64_64(struct fadvise64_64_args __user *args) a.advice = POSIX_FADV_NOREUSE; return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); } + +COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow, + u32, nhigh, u32, nlow, unsigned int, flags) +{ + return sys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow, + ((u64)nhigh << 32) + nlow, flags); +} + +COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow, + u32, lenhigh, u32, lenlow) +{ + return sys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow, + ((u64)lenhigh << 32) + lenlow); +} diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 9635d759c2b..70d4b7c4bea 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -17,81 +17,6 @@ struct ipc_kludge_32 { __s32 msgtyp; }; -struct old_sigaction32 { - __u32 sa_handler; /* Really a pointer, but need to deal with 32 bits */ - compat_old_sigset_t sa_mask; /* A 32 bit mask */ - __u32 sa_flags; - __u32 sa_restorer; /* Another 32 bit pointer */ -}; - -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[((128/sizeof(int)) - 3)]; - - /* kill() */ - struct { - pid_t _pid; /* sender's pid */ - uid_t _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ - } _timer; - - /* POSIX.1b signals */ - struct { - pid_t _pid; /* sender's pid */ - uid_t _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - pid_t _pid; /* which child */ - uid_t _uid; /* sender's uid */ - int _status;/* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - __u32 _addr; /* faulting insn/memory ref. - pointer */ - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - } _sifields; -} compat_siginfo_t; - -/* - * How these fields are to be accessed. - */ -#define si_pid _sifields._kill._pid -#define si_uid _sifields._kill._uid -#define si_status _sifields._sigchld._status -#define si_utime _sifields._sigchld._utime -#define si_stime _sifields._sigchld._stime -#define si_value _sifields._rt._sigval -#define si_int _sifields._rt._sigval.sival_int -#define si_ptr _sifields._rt._sigval.sival_ptr -#define si_addr _sifields._sigfault._addr -#define si_band _sifields._sigpoll._band -#define si_fd _sifields._sigpoll._fd -#define si_tid _sifields._timer._tid -#define si_overrun _sifields._timer._overrun - /* asm/sigcontext.h */ typedef union { @@ -102,6 +27,7 @@ typedef union typedef struct { unsigned int fpc; + unsigned int pad; freg_t32 fprs[__NUM_FPRS]; } _s390_fp_regs32; @@ -136,92 +62,59 @@ struct sigcontext32 }; /* asm/signal.h */ -struct sigaction32 { - __u32 sa_handler; /* pointer */ - __u32 sa_flags; - __u32 sa_restorer; /* pointer */ - compat_sigset_t sa_mask; /* mask last for extensibility */ -}; - -typedef struct { - __u32 ss_sp; /* pointer */ - int ss_flags; - compat_size_t ss_size; -} stack_t32; /* asm/ucontext.h */ struct ucontext32 { __u32 uc_flags; __u32 uc_link; /* pointer */ - stack_t32 uc_stack; + compat_stack_t uc_stack; _sigregs32 uc_mcontext; - compat_sigset_t uc_sigmask; /* mask last for extensibility */ + compat_sigset_t uc_sigmask; + /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ + unsigned char __unused[128 - sizeof(compat_sigset_t)]; }; struct stat64_emu31; struct mmap_arg_struct_emu31; struct fadvise64_64_args; -struct old_sigaction32; -struct old_sigaction32; -long sys32_chown16(const char __user * filename, u16 user, u16 group); -long sys32_lchown16(const char __user * filename, u16 user, u16 group); -long sys32_fchown16(unsigned int fd, u16 user, u16 group); -long sys32_setregid16(u16 rgid, u16 egid); -long sys32_setgid16(u16 gid); -long sys32_setreuid16(u16 ruid, u16 euid); -long sys32_setuid16(u16 uid); -long sys32_setresuid16(u16 ruid, u16 euid, u16 suid); -long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid); -long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid); -long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid); -long sys32_setfsuid16(u16 uid); -long sys32_setfsgid16(u16 gid); -long sys32_getgroups16(int gidsetsize, u16 __user *grouplist); -long sys32_setgroups16(int gidsetsize, u16 __user *grouplist); -long sys32_getuid16(void); -long sys32_geteuid16(void); -long sys32_getgid16(void); -long sys32_getegid16(void); -long sys32_ipc(u32 call, int first, int second, int third, u32 ptr); -long sys32_truncate64(const char __user * path, unsigned long high, - unsigned long low); -long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low); -long sys32_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval); -long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, size_t sigsetsize); -long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize); -long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo); -long sys32_execve(const char __user *name, compat_uptr_t __user *argv, - compat_uptr_t __user *envp); -long sys32_init_module(void __user *umod, unsigned long len, - const char __user *uargs); -long sys32_delete_module(const char __user *name_user, unsigned int flags); -long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, - u32 poshi, u32 poslo); -long sys32_pwrite64(unsigned int fd, const char __user *ubuf, - size_t count, u32 poshi, u32 poslo); -compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count); -long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, - size_t count); -long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, - s32 count); -long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf); -long sys32_lstat64(const char __user * filename, - struct stat64_emu31 __user * statbuf); -long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf); -long sys32_fstatat64(unsigned int dfd, const char __user *filename, - struct stat64_emu31 __user* statbuf, int flag); -unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg); -long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg); -long sys32_read(unsigned int fd, char __user * buf, size_t count); -long sys32_write(unsigned int fd, const char __user * buf, size_t count); -long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise); -long sys32_fadvise64_64(struct fadvise64_64_args __user *args); -long sys32_sigaction(int sig, const struct old_sigaction32 __user *act, - struct old_sigaction32 __user *oact); -long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, - struct sigaction32 __user *oact, size_t sigsetsize); -long sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss); +long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group); +long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group); +long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group); +long compat_sys_s390_setregid16(u16 rgid, u16 egid); +long compat_sys_s390_setgid16(u16 gid); +long compat_sys_s390_setreuid16(u16 ruid, u16 euid); +long compat_sys_s390_setuid16(u16 uid); +long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid); +long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid); +long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid); +long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid); +long compat_sys_s390_setfsuid16(u16 uid); +long compat_sys_s390_setfsgid16(u16 gid); +long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist); +long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist); +long compat_sys_s390_getuid16(void); +long compat_sys_s390_geteuid16(void); +long compat_sys_s390_getgid16(void); +long compat_sys_s390_getegid16(void); +long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low); +long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low); +long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low); +long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low); +long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count); +long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag); +long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg); +long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg); +long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count); +long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count); +long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise); +long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); +long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags); +long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow); +long compat_sys_sigreturn(void); +long compat_sys_rt_sigreturn(void); + #endif /* _ASM_S390X_S390_H */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index eee999853a7..f204d692036 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/compat_signal.c - * - * Copyright (C) IBM Corp. 2000,2006 + * Copyright IBM Corp. 2000, 2006 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) * Gerhard Tonn (ton@de.ibm.com) * @@ -27,12 +25,11 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> +#include <asm/switch_to.h> #include "compat_linux.h" #include "compat_ptrace.h" #include "entry.h" -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - typedef struct { __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; @@ -52,13 +49,10 @@ typedef struct __u32 gprs_high[NUM_GPRS]; } rt_sigframe32; -int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err; - if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t))) - return -EFAULT; - /* If you change siginfo_t structure, please be sure this code is fixed accordingly. It should never copy any pad contained in the structure @@ -105,7 +99,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) break; } } - return err; + return err ? -EFAULT : 0; } int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) @@ -113,9 +107,6 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) int err; u32 tmp; - if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t))) - return -EFAULT; - err = __get_user(to->si_signo, &from->si_signo); err |= __get_user(to->si_errno, &from->si_errno); err |= __get_user(to->si_code, &from->si_code); @@ -141,7 +132,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; case __SI_FAULT >> 16: err |= __get_user(tmp, &from->si_addr); - to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN); + to->si_addr = (void __force __user *) + (u64) (tmp & PSW32_ADDR_INSN); break; case __SI_POLL >> 16: err |= __get_user(to->si_band, &from->si_band); @@ -156,193 +148,72 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; } } - return err; -} - -asmlinkage long -sys32_sigaction(int sig, const struct old_sigaction32 __user *act, - struct old_sigaction32 __user *oact) -{ - struct k_sigaction new_ka, old_ka; - unsigned long sa_handler, sa_restorer; - int ret; - - if (act) { - compat_old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(sa_handler, &act->sa_handler) || - __get_user(sa_restorer, &act->sa_restorer) || - __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(mask, &act->sa_mask)) - return -EFAULT; - new_ka.sa.sa_handler = (__sighandler_t) sa_handler; - new_ka.sa.sa_restorer = (void (*)(void)) sa_restorer; - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - sa_handler = (unsigned long) old_ka.sa.sa_handler; - sa_restorer = (unsigned long) old_ka.sa.sa_restorer; - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(sa_handler, &oact->sa_handler) || - __put_user(sa_restorer, &oact->sa_restorer) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) - return -EFAULT; - } - - return ret; -} - -asmlinkage long -sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, - struct sigaction32 __user *oact, size_t sigsetsize) -{ - struct k_sigaction new_ka, old_ka; - unsigned long sa_handler; - int ret; - compat_sigset_t set32; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - - if (act) { - ret = get_user(sa_handler, &act->sa_handler); - ret |= __copy_from_user(&set32, &act->sa_mask, - sizeof(compat_sigset_t)); - switch (_NSIG_WORDS) { - case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] - | (((long)set32.sig[7]) << 32); - case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] - | (((long)set32.sig[5]) << 32); - case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] - | (((long)set32.sig[3]) << 32); - case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] - | (((long)set32.sig[1]) << 32); - } - ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); - - if (ret) - return -EFAULT; - new_ka.sa.sa_handler = (__sighandler_t) sa_handler; - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - switch (_NSIG_WORDS) { - case 4: - set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); - set32.sig[6] = old_ka.sa.sa_mask.sig[3]; - case 3: - set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); - set32.sig[4] = old_ka.sa.sa_mask.sig[2]; - case 2: - set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); - set32.sig[2] = old_ka.sa.sa_mask.sig[1]; - case 1: - set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); - set32.sig[0] = old_ka.sa.sa_mask.sig[0]; - } - ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler); - ret |= __copy_to_user(&oact->sa_mask, &set32, - sizeof(compat_sigset_t)); - ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - } - - return ret; -} - -asmlinkage long -sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss) -{ - struct pt_regs *regs = task_pt_regs(current); - stack_t kss, koss; - unsigned long ss_sp; - int ret, err = 0; - mm_segment_t old_fs = get_fs(); - - if (uss) { - if (!access_ok(VERIFY_READ, uss, sizeof(*uss))) - return -EFAULT; - err |= __get_user(ss_sp, &uss->ss_sp); - err |= __get_user(kss.ss_size, &uss->ss_size); - err |= __get_user(kss.ss_flags, &uss->ss_flags); - if (err) - return -EFAULT; - kss.ss_sp = (void __user *) ss_sp; - } - - set_fs (KERNEL_DS); - ret = do_sigaltstack((stack_t __force __user *) (uss ? &kss : NULL), - (stack_t __force __user *) (uoss ? &koss : NULL), - regs->gprs[15]); - set_fs (old_fs); - - if (!ret && uoss) { - if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss))) - return -EFAULT; - ss_sp = (unsigned long) koss.ss_sp; - err |= __put_user(ss_sp, &uoss->ss_sp); - err |= __put_user(koss.ss_size, &uoss->ss_size); - err |= __put_user(koss.ss_flags, &uoss->ss_flags); - if (err) - return -EFAULT; - } - return ret; + return err ? -EFAULT : 0; } static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) { - _s390_regs_common32 regs32; - int err, i; + _sigregs32 user_sregs; + int i; - regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits, - (__u32)(regs->psw.mask >> 32)); - regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr; + user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32); + user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI; + user_sregs.regs.psw.mask |= PSW32_USER_BITS; + user_sregs.regs.psw.addr = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); for (i = 0; i < NUM_GPRS; i++) - regs32.gprs[i] = (__u32) regs->gprs[i]; + user_sregs.regs.gprs[i] = (__u32) regs->gprs[i]; save_access_regs(current->thread.acrs); - memcpy(regs32.acrs, current->thread.acrs, sizeof(regs32.acrs)); - err = __copy_to_user(&sregs->regs, ®s32, sizeof(regs32)); - if (err) - return err; - save_fp_regs(¤t->thread.fp_regs); - /* s390_fp_regs and _s390_fp_regs32 are the same ! */ - return __copy_to_user(&sregs->fpregs, ¤t->thread.fp_regs, - sizeof(_s390_fp_regs32)); + memcpy(&user_sregs.regs.acrs, current->thread.acrs, + sizeof(user_sregs.regs.acrs)); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); + memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, + sizeof(user_sregs.fpregs)); + if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32))) + return -EFAULT; + return 0; } static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) { - _s390_regs_common32 regs32; - int err, i; + _sigregs32 user_sregs; + int i; /* Alwys make any pending restarted system call return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; - err = __copy_from_user(®s32, &sregs->regs, sizeof(regs32)); - if (err) - return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - (__u64)regs32.psw.mask << 32); - regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); + if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs))) + return -EFAULT; + + if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI)) + return -EINVAL; + + /* Loading the floating-point-control word can fail. Do that first. */ + if (restore_fp_ctl(&user_sregs.fpregs.fpc)) + return -EINVAL; + + /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) | + (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 | + (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 | + (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE); + /* Check for invalid user address space control. */ + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME) + regs->psw.mask = PSW_ASC_PRIMARY | + (regs->psw.mask & ~PSW_MASK_ASC); + regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN); for (i = 0; i < NUM_GPRS; i++) - regs->gprs[i] = (__u64) regs32.gprs[i]; - memcpy(current->thread.acrs, regs32.acrs, sizeof(current->thread.acrs)); + regs->gprs[i] = (__u64) user_sregs.regs.gprs[i]; + memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, + sizeof(current->thread.acrs)); restore_access_regs(current->thread.acrs); - err = __copy_from_user(¤t->thread.fp_regs, &sregs->fpregs, - sizeof(_s390_fp_regs32)); - current->thread.fp_regs.fpc &= FPC_VALID_MASK; - if (err) - return err; + memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, + sizeof(current->thread.fp_regs)); - restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + restore_fp_regs(current->thread.fp_regs.fprs); + clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -353,91 +224,58 @@ static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) for (i = 0; i < NUM_GPRS; i++) gprs_high[i] = regs->gprs[i] >> 32; - - return __copy_to_user(uregs, &gprs_high, sizeof(gprs_high)); + if (__copy_to_user(uregs, &gprs_high, sizeof(gprs_high))) + return -EFAULT; + return 0; } static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) { __u32 gprs_high[NUM_GPRS]; - int err, i; + int i; - err = __copy_from_user(&gprs_high, uregs, sizeof(gprs_high)); - if (err) - return err; + if (__copy_from_user(&gprs_high, uregs, sizeof(gprs_high))) + return -EFAULT; for (i = 0; i < NUM_GPRS; i++) *(__u32 *)®s->gprs[i] = gprs_high[i]; return 0; } -asmlinkage long sys32_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; sigset_t set; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; if (restore_sigregs_gprs_high(regs, frame->gprs_high)) goto badframe; - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; } -asmlinkage long sys32_rt_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; sigset_t set; - stack_t st; - __u32 ss_sp; - int err; - mm_segment_t old_fs = get_fs(); - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) goto badframe; if (restore_sigregs_gprs_high(regs, frame->gprs_high)) goto badframe; - - err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp); - st.ss_sp = compat_ptr(ss_sp); - err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size); - err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags); - if (err) + if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; - - set_fs (KERNEL_DS); - do_sigaltstack((stack_t __force __user *)&st, NULL, regs->gprs[15]); - set_fs (old_fs); - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -469,13 +307,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) sp = current->sas_ss_sp + current->sas_ss_size; } - /* This is the legacy signal stack switching. */ - else if (!user_mode(regs) && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - sp = (unsigned long) ka->sa.sa_restorer; - } - return (void __user *)((sp - frame_size) & -8ul); } @@ -493,8 +324,6 @@ static int setup_frame32(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) { sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(sigframe32)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32))) - goto give_sigsegv; if (frame == (void __user *) -1UL) goto give_sigsegv; @@ -512,11 +341,11 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; + regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + (u16 __force __user *)(frame->retcode))) goto give_sigsegv; } @@ -525,19 +354,28 @@ static int setup_frame32(int sig, struct k_sigaction *ka, goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->gprs[15] = (__force __u64) frame; + /* Force 31 bit amode and default user address space control. */ + regs->psw.mask = PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); + regs->psw.addr = (__force __u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->sc; + regs->gprs[3] = (__force __u64) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ - regs->gprs[4] = current->thread.trap_no; - regs->gprs[5] = current->thread.prot_addr; + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; + } /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) goto give_sigsegv; return 0; @@ -551,8 +389,6 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, { int err = 0; rt_sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(rt_sigframe32)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32))) - goto give_sigsegv; if (frame == (void __user *) -1UL) goto give_sigsegv; @@ -563,10 +399,7 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, /* Create the ucontext. */ err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->gprs[15]), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]); err |= save_sigregs32(regs, &frame->uc.uc_mcontext); err |= save_sigregs_gprs_high(regs, frame->gprs_high); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -576,24 +409,30 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; - err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode)); + regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, + (u16 __force __user *)(frame->retcode))) + goto give_sigsegv; } /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->gprs[15] = (__force __u64) frame; + /* Force 31 bit amode and default user address space control. */ + regs->psw.mask = PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); + regs->psw.addr = (__u64 __force) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->info; - regs->gprs[4] = (__u64) &frame->uc; + regs->gprs[3] = (__force __u64) &frame->info; + regs->gprs[4] = (__force __u64) &frame->uc; + regs->gprs[5] = task_thread_info(current)->last_break; return 0; give_sigsegv: @@ -605,9 +444,8 @@ give_sigsegv: * OK, we're invoking a handler */ -int -handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +void handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { int ret; @@ -616,15 +454,9 @@ handle_signal32(unsigned long sig, struct k_sigaction *ka, ret = setup_rt_frame32(sig, ka, info, oldset, regs); else ret = setup_frame32(sig, ka, oldset, regs); - - if (ret == 0) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - return ret; + if (ret) + return; + signal_delivered(sig, info, ka, regs, + test_thread_flag(TIF_SINGLE_STEP)); } diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S deleted file mode 100644 index 8e60fb23b90..00000000000 --- a/arch/s390/kernel/compat_wrapper.S +++ /dev/null @@ -1,1879 +0,0 @@ -/* -* arch/s390/kernel/compat_wrapper.S -* wrapper for 31 bit compatible system calls. -* -* Copyright (C) IBM Corp. 2000,2006 -* Author(s): Gerhard Tonn (ton@de.ibm.com), -* Thomas Spatzier (tspat@de.ibm.com) -*/ - - .globl sys32_exit_wrapper -sys32_exit_wrapper: - lgfr %r2,%r2 # int - jg sys_exit # branch to sys_exit - - .globl sys32_read_wrapper -sys32_read_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys32_read # branch to sys_read - - .globl sys32_write_wrapper -sys32_write_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - jg sys32_write # branch to system call - - .globl sys32_open_wrapper -sys32_open_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_open # branch to system call - - .globl sys32_close_wrapper -sys32_close_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_close # branch to system call - - .globl sys32_creat_wrapper -sys32_creat_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_creat # branch to system call - - .globl sys32_link_wrapper -sys32_link_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_link # branch to system call - - .globl sys32_unlink_wrapper -sys32_unlink_wrapper: - llgtr %r2,%r2 # const char * - jg sys_unlink # branch to system call - - .globl sys32_chdir_wrapper -sys32_chdir_wrapper: - llgtr %r2,%r2 # const char * - jg sys_chdir # branch to system call - - .globl sys32_time_wrapper -sys32_time_wrapper: - llgtr %r2,%r2 # int * - jg compat_sys_time # branch to system call - - .globl sys32_mknod_wrapper -sys32_mknod_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgfr %r4,%r4 # dev - jg sys_mknod # branch to system call - - .globl sys32_chmod_wrapper -sys32_chmod_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # mode_t - jg sys_chmod # branch to system call - - .globl sys32_lchown16_wrapper -sys32_lchown16_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_uid_emu31_t - jg sys32_lchown16 # branch to system call - - .globl sys32_lseek_wrapper -sys32_lseek_wrapper: - llgfr %r2,%r2 # unsigned int - lgfr %r3,%r3 # off_t - llgfr %r4,%r4 # unsigned int - jg sys_lseek # branch to system call - -#sys32_getpid_wrapper # void - - .globl sys32_mount_wrapper -sys32_mount_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # char * - llgfr %r5,%r5 # unsigned long - llgtr %r6,%r6 # void * - jg compat_sys_mount # branch to system call - - .globl sys32_oldumount_wrapper -sys32_oldumount_wrapper: - llgtr %r2,%r2 # char * - jg sys_oldumount # branch to system call - - .globl sys32_setuid16_wrapper -sys32_setuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - jg sys32_setuid16 # branch to system call - -#sys32_getuid16_wrapper # void - - .globl sys32_ptrace_wrapper -sys32_ptrace_wrapper: - lgfr %r2,%r2 # long - lgfr %r3,%r3 # long - llgtr %r4,%r4 # long - llgfr %r5,%r5 # long - jg compat_sys_ptrace # branch to system call - - .globl sys32_alarm_wrapper -sys32_alarm_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_alarm # branch to system call - - .globl compat_sys_utime_wrapper -compat_sys_utime_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_utimbuf * - jg compat_sys_utime # branch to system call - - .globl sys32_access_wrapper -sys32_access_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_access # branch to system call - - .globl sys32_nice_wrapper -sys32_nice_wrapper: - lgfr %r2,%r2 # int - jg sys_nice # branch to system call - -#sys32_sync_wrapper # void - - .globl sys32_kill_wrapper -sys32_kill_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_kill # branch to system call - - .globl sys32_rename_wrapper -sys32_rename_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_rename # branch to system call - - .globl sys32_mkdir_wrapper -sys32_mkdir_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_mkdir # branch to system call - - .globl sys32_rmdir_wrapper -sys32_rmdir_wrapper: - llgtr %r2,%r2 # const char * - jg sys_rmdir # branch to system call - - .globl sys32_dup_wrapper -sys32_dup_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_dup # branch to system call - - .globl sys32_pipe_wrapper -sys32_pipe_wrapper: - llgtr %r2,%r2 # u32 * - jg sys_pipe # branch to system call - - .globl compat_sys_times_wrapper -compat_sys_times_wrapper: - llgtr %r2,%r2 # struct compat_tms * - jg compat_sys_times # branch to system call - - .globl sys32_brk_wrapper -sys32_brk_wrapper: - llgtr %r2,%r2 # unsigned long - jg sys_brk # branch to system call - - .globl sys32_setgid16_wrapper -sys32_setgid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - jg sys32_setgid16 # branch to system call - -#sys32_getgid16_wrapper # void - - .globl sys32_signal_wrapper -sys32_signal_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __sighandler_t - jg sys_signal - -#sys32_geteuid16_wrapper # void - -#sys32_getegid16_wrapper # void - - .globl sys32_acct_wrapper -sys32_acct_wrapper: - llgtr %r2,%r2 # char * - jg sys_acct # branch to system call - - .globl sys32_umount_wrapper -sys32_umount_wrapper: - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_umount # branch to system call - - .globl compat_sys_ioctl_wrapper -compat_sys_ioctl_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned int - jg compat_sys_ioctl # branch to system call - - .globl compat_sys_fcntl_wrapper -compat_sys_fcntl_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned long - jg compat_sys_fcntl # branch to system call - - .globl sys32_setpgid_wrapper -sys32_setpgid_wrapper: - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # pid_t - jg sys_setpgid # branch to system call - - .globl sys32_umask_wrapper -sys32_umask_wrapper: - lgfr %r2,%r2 # int - jg sys_umask # branch to system call - - .globl sys32_chroot_wrapper -sys32_chroot_wrapper: - llgtr %r2,%r2 # char * - jg sys_chroot # branch to system call - - .globl sys32_ustat_wrapper -sys32_ustat_wrapper: - llgfr %r2,%r2 # dev_t - llgtr %r3,%r3 # struct ustat * - jg compat_sys_ustat - - .globl sys32_dup2_wrapper -sys32_dup2_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_dup2 # branch to system call - -#sys32_getppid_wrapper # void - -#sys32_getpgrp_wrapper # void - -#sys32_setsid_wrapper # void - - .globl sys32_sigaction_wrapper -sys32_sigaction_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct old_sigaction * - llgtr %r4,%r4 # struct old_sigaction32 * - jg sys32_sigaction # branch to system call - - .globl sys32_setreuid16_wrapper -sys32_setreuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - jg sys32_setreuid16 # branch to system call - - .globl sys32_setregid16_wrapper -sys32_setregid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - llgfr %r3,%r3 # __kernel_old_gid_emu31_t - jg sys32_setregid16 # branch to system call - - .globl sys_sigsuspend_wrapper -sys_sigsuspend_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # old_sigset_t - jg sys_sigsuspend - - .globl compat_sys_sigpending_wrapper -compat_sys_sigpending_wrapper: - llgtr %r2,%r2 # compat_old_sigset_t * - jg compat_sys_sigpending # branch to system call - - .globl sys32_sethostname_wrapper -sys32_sethostname_wrapper: - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_sethostname # branch to system call - - .globl compat_sys_setrlimit_wrapper -compat_sys_setrlimit_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_setrlimit # branch to system call - - .globl compat_sys_old_getrlimit_wrapper -compat_sys_old_getrlimit_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_old_getrlimit # branch to system call - - .globl compat_sys_getrlimit_wrapper -compat_sys_getrlimit_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_getrlimit # branch to system call - - .globl sys32_mmap2_wrapper -sys32_mmap2_wrapper: - llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * - jg sys32_mmap2 # branch to system call - - .globl compat_sys_getrusage_wrapper -compat_sys_getrusage_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct rusage_emu31 * - jg compat_sys_getrusage # branch to system call - - .globl compat_sys_gettimeofday_wrapper -compat_sys_gettimeofday_wrapper: - llgtr %r2,%r2 # struct timeval_emu31 * - llgtr %r3,%r3 # struct timezone * - jg compat_sys_gettimeofday # branch to system call - - .globl compat_sys_settimeofday_wrapper -compat_sys_settimeofday_wrapper: - llgtr %r2,%r2 # struct timeval_emu31 * - llgtr %r3,%r3 # struct timezone * - jg compat_sys_settimeofday # branch to system call - - .globl sys32_getgroups16_wrapper -sys32_getgroups16_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - jg sys32_getgroups16 # branch to system call - - .globl sys32_setgroups16_wrapper -sys32_setgroups16_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - jg sys32_setgroups16 # branch to system call - - .globl sys32_symlink_wrapper -sys32_symlink_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_symlink # branch to system call - - .globl sys32_readlink_wrapper -sys32_readlink_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # char * - lgfr %r4,%r4 # int - jg sys_readlink # branch to system call - - .globl sys32_uselib_wrapper -sys32_uselib_wrapper: - llgtr %r2,%r2 # const char * - jg sys_uselib # branch to system call - - .globl sys32_swapon_wrapper -sys32_swapon_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_swapon # branch to system call - - .globl sys32_reboot_wrapper -sys32_reboot_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # unsigned int - llgtr %r5,%r5 # void * - jg sys_reboot # branch to system call - - .globl old32_readdir_wrapper -old32_readdir_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg compat_sys_old_readdir # branch to system call - - .globl old32_mmap_wrapper -old32_mmap_wrapper: - llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * - jg old32_mmap # branch to system call - - .globl sys32_munmap_wrapper -sys32_munmap_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_munmap # branch to system call - - .globl sys32_truncate_wrapper -sys32_truncate_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # long - jg sys_truncate # branch to system call - - .globl sys32_ftruncate_wrapper -sys32_ftruncate_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - jg sys_ftruncate # branch to system call - - .globl sys32_fchmod_wrapper -sys32_fchmod_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # mode_t - jg sys_fchmod # branch to system call - - .globl sys32_fchown16_wrapper -sys32_fchown16_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # compat_uid_t - llgfr %r4,%r4 # compat_uid_t - jg sys32_fchown16 # branch to system call - - .globl sys32_getpriority_wrapper -sys32_getpriority_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_getpriority # branch to system call - - .globl sys32_setpriority_wrapper -sys32_setpriority_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_setpriority # branch to system call - - .globl compat_sys_statfs_wrapper -compat_sys_statfs_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_statfs * - jg compat_sys_statfs # branch to system call - - .globl compat_sys_fstatfs_wrapper -compat_sys_fstatfs_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct compat_statfs * - jg compat_sys_fstatfs # branch to system call - - .globl compat_sys_socketcall_wrapper -compat_sys_socketcall_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # u32 * - jg compat_sys_socketcall # branch to system call - - .globl sys32_syslog_wrapper -sys32_syslog_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - lgfr %r4,%r4 # int - jg sys_syslog # branch to system call - - .globl compat_sys_setitimer_wrapper -compat_sys_setitimer_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct itimerval_emu31 * - llgtr %r4,%r4 # struct itimerval_emu31 * - jg compat_sys_setitimer # branch to system call - - .globl compat_sys_getitimer_wrapper -compat_sys_getitimer_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct itimerval_emu31 * - jg compat_sys_getitimer # branch to system call - - .globl compat_sys_newstat_wrapper -compat_sys_newstat_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newstat # branch to system call - - .globl compat_sys_newlstat_wrapper -compat_sys_newlstat_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newlstat # branch to system call - - .globl compat_sys_newfstat_wrapper -compat_sys_newfstat_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newfstat # branch to system call - -#sys32_vhangup_wrapper # void - - .globl compat_sys_wait4_wrapper -compat_sys_wait4_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # unsigned int * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct rusage * - jg compat_sys_wait4 # branch to system call - - .globl sys32_swapoff_wrapper -sys32_swapoff_wrapper: - llgtr %r2,%r2 # const char * - jg sys_swapoff # branch to system call - - .globl compat_sys_sysinfo_wrapper -compat_sys_sysinfo_wrapper: - llgtr %r2,%r2 # struct sysinfo_emu31 * - jg compat_sys_sysinfo # branch to system call - - .globl sys32_ipc_wrapper -sys32_ipc_wrapper: - llgfr %r2,%r2 # uint - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgfr %r6,%r6 # u32 - jg sys32_ipc # branch to system call - - .globl sys32_fsync_wrapper -sys32_fsync_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_fsync # branch to system call - -#sys32_sigreturn_wrapper # done in sigreturn_glue - -#sys32_clone_wrapper # done in clone_glue - - .globl sys32_setdomainname_wrapper -sys32_setdomainname_wrapper: - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_setdomainname # branch to system call - - .globl sys32_newuname_wrapper -sys32_newuname_wrapper: - llgtr %r2,%r2 # struct new_utsname * - jg sys_newuname # branch to system call - - .globl compat_sys_adjtimex_wrapper -compat_sys_adjtimex_wrapper: - llgtr %r2,%r2 # struct compat_timex * - jg compat_sys_adjtimex # branch to system call - - .globl sys32_mprotect_wrapper -sys32_mprotect_wrapper: - llgtr %r2,%r2 # unsigned long (actually pointer - llgfr %r3,%r3 # size_t - llgfr %r4,%r4 # unsigned long - jg sys_mprotect # branch to system call - - .globl compat_sys_sigprocmask_wrapper -compat_sys_sigprocmask_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_old_sigset_t * - llgtr %r4,%r4 # compat_old_sigset_t * - jg compat_sys_sigprocmask # branch to system call - - .globl sys_init_module_wrapper -sys_init_module_wrapper: - llgtr %r2,%r2 # void * - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # char * - jg sys_init_module # branch to system call - - .globl sys_delete_module_wrapper -sys_delete_module_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned int - jg sys_delete_module # branch to system call - - .globl sys32_quotactl_wrapper -sys32_quotactl_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # qid_t - llgtr %r5,%r5 # caddr_t - jg sys_quotactl # branch to system call - - .globl sys32_getpgid_wrapper -sys32_getpgid_wrapper: - lgfr %r2,%r2 # pid_t - jg sys_getpgid # branch to system call - - .globl sys32_fchdir_wrapper -sys32_fchdir_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_fchdir # branch to system call - - .globl sys32_bdflush_wrapper -sys32_bdflush_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # long - jg sys_bdflush # branch to system call - - .globl sys32_sysfs_wrapper -sys32_sysfs_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys_sysfs # branch to system call - - .globl sys32_personality_wrapper -sys32_personality_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_s390_personality # branch to system call - - .globl sys32_setfsuid16_wrapper -sys32_setfsuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - jg sys32_setfsuid16 # branch to system call - - .globl sys32_setfsgid16_wrapper -sys32_setfsgid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - jg sys32_setfsgid16 # branch to system call - - .globl sys32_llseek_wrapper -sys32_llseek_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgtr %r5,%r5 # loff_t * - llgfr %r6,%r6 # unsigned int - jg sys_llseek # branch to system call - - .globl sys32_getdents_wrapper -sys32_getdents_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg compat_sys_getdents # branch to system call - - .globl compat_sys_select_wrapper -compat_sys_select_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_fd_set * - llgtr %r4,%r4 # compat_fd_set * - llgtr %r5,%r5 # compat_fd_set * - llgtr %r6,%r6 # struct compat_timeval * - jg compat_sys_select # branch to system call - - .globl sys32_flock_wrapper -sys32_flock_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_flock # branch to system call - - .globl sys32_msync_wrapper -sys32_msync_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - lgfr %r4,%r4 # int - jg sys_msync # branch to system call - - .globl compat_sys_readv_wrapper -compat_sys_readv_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct compat_iovec * - llgfr %r4,%r4 # unsigned long - jg compat_sys_readv # branch to system call - - .globl compat_sys_writev_wrapper -compat_sys_writev_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct compat_iovec * - llgfr %r4,%r4 # unsigned long - jg compat_sys_writev # branch to system call - - .globl sys32_getsid_wrapper -sys32_getsid_wrapper: - lgfr %r2,%r2 # pid_t - jg sys_getsid # branch to system call - - .globl sys32_fdatasync_wrapper -sys32_fdatasync_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_fdatasync # branch to system call - - .globl sys32_mlock_wrapper -sys32_mlock_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_mlock # branch to system call - - .globl sys32_munlock_wrapper -sys32_munlock_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_munlock # branch to system call - - .globl sys32_mlockall_wrapper -sys32_mlockall_wrapper: - lgfr %r2,%r2 # int - jg sys_mlockall # branch to system call - -#sys32_munlockall_wrapper # void - - .globl sys32_sched_setparam_wrapper -sys32_sched_setparam_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_param * - jg sys_sched_setparam # branch to system call - - .globl sys32_sched_getparam_wrapper -sys32_sched_getparam_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_param * - jg sys_sched_getparam # branch to system call - - .globl sys32_sched_setscheduler_wrapper -sys32_sched_setscheduler_wrapper: - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct sched_param * - jg sys_sched_setscheduler # branch to system call - - .globl sys32_sched_getscheduler_wrapper -sys32_sched_getscheduler_wrapper: - lgfr %r2,%r2 # pid_t - jg sys_sched_getscheduler # branch to system call - -#sys32_sched_yield_wrapper # void - - .globl sys32_sched_get_priority_max_wrapper -sys32_sched_get_priority_max_wrapper: - lgfr %r2,%r2 # int - jg sys_sched_get_priority_max # branch to system call - - .globl sys32_sched_get_priority_min_wrapper -sys32_sched_get_priority_min_wrapper: - lgfr %r2,%r2 # int - jg sys_sched_get_priority_min # branch to system call - - .globl sys32_sched_rr_get_interval_wrapper -sys32_sched_rr_get_interval_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct compat_timespec * - jg sys32_sched_rr_get_interval # branch to system call - - .globl compat_sys_nanosleep_wrapper -compat_sys_nanosleep_wrapper: - llgtr %r2,%r2 # struct compat_timespec * - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_nanosleep # branch to system call - - .globl sys32_mremap_wrapper -sys32_mremap_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_mremap # branch to system call - - .globl sys32_setresuid16_wrapper -sys32_setresuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_uid_emu31_t - jg sys32_setresuid16 # branch to system call - - .globl sys32_getresuid16_wrapper -sys32_getresuid16_wrapper: - llgtr %r2,%r2 # __kernel_old_uid_emu31_t * - llgtr %r3,%r3 # __kernel_old_uid_emu31_t * - llgtr %r4,%r4 # __kernel_old_uid_emu31_t * - jg sys32_getresuid16 # branch to system call - - .globl sys32_poll_wrapper -sys32_poll_wrapper: - llgtr %r2,%r2 # struct pollfd * - llgfr %r3,%r3 # unsigned int - lgfr %r4,%r4 # long - jg sys_poll # branch to system call - - .globl compat_sys_nfsservctl_wrapper -compat_sys_nfsservctl_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct compat_nfsctl_arg* - llgtr %r4,%r4 # union compat_nfsctl_res* - jg compat_sys_nfsservctl # branch to system call - - .globl sys32_setresgid16_wrapper -sys32_setresgid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - llgfr %r3,%r3 # __kernel_old_gid_emu31_t - llgfr %r4,%r4 # __kernel_old_gid_emu31_t - jg sys32_setresgid16 # branch to system call - - .globl sys32_getresgid16_wrapper -sys32_getresgid16_wrapper: - llgtr %r2,%r2 # __kernel_old_gid_emu31_t * - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - llgtr %r4,%r4 # __kernel_old_gid_emu31_t * - jg sys32_getresgid16 # branch to system call - - .globl sys32_prctl_wrapper -sys32_prctl_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_prctl # branch to system call - -#sys32_rt_sigreturn_wrapper # done in rt_sigreturn_glue - - .globl sys32_rt_sigaction_wrapper -sys32_rt_sigaction_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct sigaction_emu31 * - llgtr %r4,%r4 # const struct sigaction_emu31 * - llgfr %r5,%r5 # size_t - jg sys32_rt_sigaction # branch to system call - - .globl sys32_rt_sigprocmask_wrapper -sys32_rt_sigprocmask_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # old_sigset_emu31 * - llgtr %r4,%r4 # old_sigset_emu31 * - llgfr %r5,%r5 # size_t - jg sys32_rt_sigprocmask # branch to system call - - .globl sys32_rt_sigpending_wrapper -sys32_rt_sigpending_wrapper: - llgtr %r2,%r2 # sigset_emu31 * - llgfr %r3,%r3 # size_t - jg sys32_rt_sigpending # branch to system call - - .globl compat_sys_rt_sigtimedwait_wrapper -compat_sys_rt_sigtimedwait_wrapper: - llgtr %r2,%r2 # const sigset_emu31_t * - llgtr %r3,%r3 # siginfo_emu31_t * - llgtr %r4,%r4 # const struct compat_timespec * - llgfr %r5,%r5 # size_t - jg compat_sys_rt_sigtimedwait # branch to system call - - .globl sys32_rt_sigqueueinfo_wrapper -sys32_rt_sigqueueinfo_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # siginfo_emu31_t * - jg sys32_rt_sigqueueinfo # branch to system call - - .globl compat_sys_rt_sigsuspend_wrapper -compat_sys_rt_sigsuspend_wrapper: - llgtr %r2,%r2 # compat_sigset_t * - llgfr %r3,%r3 # compat_size_t - jg compat_sys_rt_sigsuspend - - .globl sys32_pread64_wrapper -sys32_pread64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg sys32_pread64 # branch to system call - - .globl sys32_pwrite64_wrapper -sys32_pwrite64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg sys32_pwrite64 # branch to system call - - .globl sys32_chown16_wrapper -sys32_chown16_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_gid_emu31_t - jg sys32_chown16 # branch to system call - - .globl sys32_getcwd_wrapper -sys32_getcwd_wrapper: - llgtr %r2,%r2 # char * - llgfr %r3,%r3 # unsigned long - jg sys_getcwd # branch to system call - - .globl sys32_capget_wrapper -sys32_capget_wrapper: - llgtr %r2,%r2 # cap_user_header_t - llgtr %r3,%r3 # cap_user_data_t - jg sys_capget # branch to system call - - .globl sys32_capset_wrapper -sys32_capset_wrapper: - llgtr %r2,%r2 # cap_user_header_t - llgtr %r3,%r3 # const cap_user_data_t - jg sys_capset # branch to system call - - .globl sys32_sigaltstack_wrapper -sys32_sigaltstack_wrapper: - llgtr %r2,%r2 # const stack_emu31_t * - llgtr %r3,%r3 # stack_emu31_t * - jg sys32_sigaltstack - - .globl sys32_sendfile_wrapper -sys32_sendfile_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # __kernel_off_emu31_t * - llgfr %r5,%r5 # size_t - jg sys32_sendfile # branch to system call - -#sys32_vfork_wrapper # done in vfork_glue - - .globl sys32_truncate64_wrapper -sys32_truncate64_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys32_truncate64 # branch to system call - - .globl sys32_ftruncate64_wrapper -sys32_ftruncate64_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys32_ftruncate64 # branch to system call - - .globl sys32_lchown_wrapper -sys32_lchown_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_lchown # branch to system call - -#sys32_getuid_wrapper # void -#sys32_getgid_wrapper # void -#sys32_geteuid_wrapper # void -#sys32_getegid_wrapper # void - - .globl sys32_setreuid_wrapper -sys32_setreuid_wrapper: - llgfr %r2,%r2 # uid_t - llgfr %r3,%r3 # uid_t - jg sys_setreuid # branch to system call - - .globl sys32_setregid_wrapper -sys32_setregid_wrapper: - llgfr %r2,%r2 # gid_t - llgfr %r3,%r3 # gid_t - jg sys_setregid # branch to system call - - .globl sys32_getgroups_wrapper -sys32_getgroups_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # gid_t * - jg sys_getgroups # branch to system call - - .globl sys32_setgroups_wrapper -sys32_setgroups_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # gid_t * - jg sys_setgroups # branch to system call - - .globl sys32_fchown_wrapper -sys32_fchown_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_fchown # branch to system call - - .globl sys32_setresuid_wrapper -sys32_setresuid_wrapper: - llgfr %r2,%r2 # uid_t - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # uid_t - jg sys_setresuid # branch to system call - - .globl sys32_getresuid_wrapper -sys32_getresuid_wrapper: - llgtr %r2,%r2 # uid_t * - llgtr %r3,%r3 # uid_t * - llgtr %r4,%r4 # uid_t * - jg sys_getresuid # branch to system call - - .globl sys32_setresgid_wrapper -sys32_setresgid_wrapper: - llgfr %r2,%r2 # gid_t - llgfr %r3,%r3 # gid_t - llgfr %r4,%r4 # gid_t - jg sys_setresgid # branch to system call - - .globl sys32_getresgid_wrapper -sys32_getresgid_wrapper: - llgtr %r2,%r2 # gid_t * - llgtr %r3,%r3 # gid_t * - llgtr %r4,%r4 # gid_t * - jg sys_getresgid # branch to system call - - .globl sys32_chown_wrapper -sys32_chown_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_chown # branch to system call - - .globl sys32_setuid_wrapper -sys32_setuid_wrapper: - llgfr %r2,%r2 # uid_t - jg sys_setuid # branch to system call - - .globl sys32_setgid_wrapper -sys32_setgid_wrapper: - llgfr %r2,%r2 # gid_t - jg sys_setgid # branch to system call - - .globl sys32_setfsuid_wrapper -sys32_setfsuid_wrapper: - llgfr %r2,%r2 # uid_t - jg sys_setfsuid # branch to system call - - .globl sys32_setfsgid_wrapper -sys32_setfsgid_wrapper: - llgfr %r2,%r2 # gid_t - jg sys_setfsgid # branch to system call - - .globl sys32_pivot_root_wrapper -sys32_pivot_root_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_pivot_root # branch to system call - - .globl sys32_mincore_wrapper -sys32_mincore_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - llgtr %r4,%r4 # unsigned char * - jg sys_mincore # branch to system call - - .globl sys32_madvise_wrapper -sys32_madvise_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - lgfr %r4,%r4 # int - jg sys_madvise # branch to system call - - .globl sys32_getdents64_wrapper -sys32_getdents64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg sys_getdents64 # branch to system call - - .globl compat_sys_fcntl64_wrapper -compat_sys_fcntl64_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned long - jg compat_sys_fcntl64 # branch to system call - - .globl sys32_stat64_wrapper -sys32_stat64_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat64 * - jg sys32_stat64 # branch to system call - - .globl sys32_lstat64_wrapper -sys32_lstat64_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat64 * - jg sys32_lstat64 # branch to system call - - .globl sys32_stime_wrapper -sys32_stime_wrapper: - llgtr %r2,%r2 # long * - jg compat_sys_stime # branch to system call - - .globl sys32_sysctl_wrapper -sys32_sysctl_wrapper: - llgtr %r2,%r2 # struct compat_sysctl_args * - jg compat_sys_sysctl - - .globl sys32_fstat64_wrapper -sys32_fstat64_wrapper: - llgfr %r2,%r2 # unsigned long - llgtr %r3,%r3 # struct stat64 * - jg sys32_fstat64 # branch to system call - - .globl compat_sys_futex_wrapper -compat_sys_futex_wrapper: - llgtr %r2,%r2 # u32 * - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct compat_timespec * - llgtr %r6,%r6 # u32 * - lgf %r0,164(%r15) # int - stg %r0,160(%r15) - jg compat_sys_futex # branch to system call - - .globl sys32_setxattr_wrapper -sys32_setxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_setxattr - - .globl sys32_lsetxattr_wrapper -sys32_lsetxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_lsetxattr - - .globl sys32_fsetxattr_wrapper -sys32_fsetxattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_fsetxattr - - .globl sys32_getxattr_wrapper -sys32_getxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_getxattr - - .globl sys32_lgetxattr_wrapper -sys32_lgetxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_lgetxattr - - .globl sys32_fgetxattr_wrapper -sys32_fgetxattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_fgetxattr - - .globl sys32_listxattr_wrapper -sys32_listxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_listxattr - - .globl sys32_llistxattr_wrapper -sys32_llistxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_llistxattr - - .globl sys32_flistxattr_wrapper -sys32_flistxattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_flistxattr - - .globl sys32_removexattr_wrapper -sys32_removexattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - jg sys_removexattr - - .globl sys32_lremovexattr_wrapper -sys32_lremovexattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - jg sys_lremovexattr - - .globl sys32_fremovexattr_wrapper -sys32_fremovexattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - jg sys_fremovexattr - - .globl sys32_sched_setaffinity_wrapper -sys32_sched_setaffinity_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # unsigned long * - jg compat_sys_sched_setaffinity - - .globl sys32_sched_getaffinity_wrapper -sys32_sched_getaffinity_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # unsigned long * - jg compat_sys_sched_getaffinity - - .globl sys32_exit_group_wrapper -sys32_exit_group_wrapper: - lgfr %r2,%r2 # int - jg sys_exit_group # branch to system call - - .globl sys32_set_tid_address_wrapper -sys32_set_tid_address_wrapper: - llgtr %r2,%r2 # int * - jg sys_set_tid_address # branch to system call - - .globl sys_epoll_create_wrapper -sys_epoll_create_wrapper: - lgfr %r2,%r2 # int - jg sys_epoll_create # branch to system call - - .globl sys_epoll_ctl_wrapper -sys_epoll_ctl_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct epoll_event * - jg sys_epoll_ctl # branch to system call - - .globl sys_epoll_wait_wrapper -sys_epoll_wait_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct epoll_event * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - jg sys_epoll_wait # branch to system call - - .globl sys32_lookup_dcookie_wrapper -sys32_lookup_dcookie_wrapper: - sllg %r2,%r2,32 # get high word of 64bit dcookie - or %r2,%r3 # get low word of 64bit dcookie - llgtr %r3,%r4 # char * - llgfr %r4,%r5 # size_t - jg sys_lookup_dcookie - - .globl sys32_fadvise64_wrapper -sys32_fadvise64_wrapper: - lgfr %r2,%r2 # int - sllg %r3,%r3,32 # get high word of 64bit loff_t - or %r3,%r4 # get low word of 64bit loff_t - llgfr %r4,%r5 # size_t (unsigned long) - lgfr %r5,%r6 # int - jg sys32_fadvise64 - - .globl sys32_fadvise64_64_wrapper -sys32_fadvise64_64_wrapper: - llgtr %r2,%r2 # struct fadvise64_64_args * - jg sys32_fadvise64_64 - - .globl sys32_clock_settime_wrapper -sys32_clock_settime_wrapper: - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_settime - - .globl sys32_clock_gettime_wrapper -sys32_clock_gettime_wrapper: - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_gettime - - .globl sys32_clock_getres_wrapper -sys32_clock_getres_wrapper: - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_getres - - .globl sys32_clock_nanosleep_wrapper -sys32_clock_nanosleep_wrapper: - lgfr %r2,%r2 # clockid_t (int) - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_timespec * - llgtr %r5,%r5 # struct compat_timespec * - jg compat_sys_clock_nanosleep - - .globl sys32_timer_create_wrapper -sys32_timer_create_wrapper: - lgfr %r2,%r2 # timer_t (int) - llgtr %r3,%r3 # struct compat_sigevent * - llgtr %r4,%r4 # timer_t * - jg compat_sys_timer_create - - .globl sys32_timer_settime_wrapper -sys32_timer_settime_wrapper: - lgfr %r2,%r2 # timer_t (int) - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_itimerspec * - llgtr %r5,%r5 # struct compat_itimerspec * - jg compat_sys_timer_settime - - .globl sys32_timer_gettime_wrapper -sys32_timer_gettime_wrapper: - lgfr %r2,%r2 # timer_t (int) - llgtr %r3,%r3 # struct compat_itimerspec * - jg compat_sys_timer_gettime - - .globl sys32_timer_getoverrun_wrapper -sys32_timer_getoverrun_wrapper: - lgfr %r2,%r2 # timer_t (int) - jg sys_timer_getoverrun - - .globl sys32_timer_delete_wrapper -sys32_timer_delete_wrapper: - lgfr %r2,%r2 # timer_t (int) - jg sys_timer_delete - - .globl sys32_io_setup_wrapper -sys32_io_setup_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # u32 * - jg compat_sys_io_setup - - .globl sys32_io_destroy_wrapper -sys32_io_destroy_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - jg sys_io_destroy - - .globl sys32_io_getevents_wrapper -sys32_io_getevents_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - lgfr %r3,%r3 # long - lgfr %r4,%r4 # long - llgtr %r5,%r5 # struct io_event * - llgtr %r6,%r6 # struct compat_timespec * - jg compat_sys_io_getevents - - .globl sys32_io_submit_wrapper -sys32_io_submit_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - lgfr %r3,%r3 # long - llgtr %r4,%r4 # struct iocb ** - jg compat_sys_io_submit - - .globl sys32_io_cancel_wrapper -sys32_io_cancel_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - llgtr %r3,%r3 # struct iocb * - llgtr %r4,%r4 # struct io_event * - jg sys_io_cancel - - .globl compat_sys_statfs64_wrapper -compat_sys_statfs64_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # compat_size_t - llgtr %r4,%r4 # struct compat_statfs64 * - jg compat_sys_statfs64 - - .globl compat_sys_fstatfs64_wrapper -compat_sys_fstatfs64_wrapper: - llgfr %r2,%r2 # unsigned int fd - llgfr %r3,%r3 # compat_size_t - llgtr %r4,%r4 # struct compat_statfs64 * - jg compat_sys_fstatfs64 - - .globl compat_sys_mq_open_wrapper -compat_sys_mq_open_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgfr %r4,%r4 # mode_t - llgtr %r5,%r5 # struct compat_mq_attr * - jg compat_sys_mq_open - - .globl sys32_mq_unlink_wrapper -sys32_mq_unlink_wrapper: - llgtr %r2,%r2 # const char * - jg sys_mq_unlink - - .globl compat_sys_mq_timedsend_wrapper -compat_sys_mq_timedsend_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # unsigned int - llgtr %r6,%r6 # const struct compat_timespec * - jg compat_sys_mq_timedsend - - .globl compat_sys_mq_timedreceive_wrapper -compat_sys_mq_timedreceive_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - llgtr %r5,%r5 # unsigned int * - llgtr %r6,%r6 # const struct compat_timespec * - jg compat_sys_mq_timedreceive - - .globl compat_sys_mq_notify_wrapper -compat_sys_mq_notify_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # struct compat_sigevent * - jg compat_sys_mq_notify - - .globl compat_sys_mq_getsetattr_wrapper -compat_sys_mq_getsetattr_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # struct compat_mq_attr * - llgtr %r4,%r4 # struct compat_mq_attr * - jg compat_sys_mq_getsetattr - - .globl compat_sys_add_key_wrapper -compat_sys_add_key_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # const void * - llgfr %r5,%r5 # size_t - llgfr %r6,%r6 # (key_serial_t) u32 - jg sys_add_key - - .globl compat_sys_request_key_wrapper -compat_sys_request_key_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # const void * - llgfr %r5,%r5 # (key_serial_t) u32 - jg sys_request_key - - .globl sys32_remap_file_pages_wrapper -sys32_remap_file_pages_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_remap_file_pages - - .globl compat_sys_waitid_wrapper -compat_sys_waitid_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # pid_t - llgtr %r4,%r4 # siginfo_emu31_t * - lgfr %r5,%r5 # int - llgtr %r6,%r6 # struct rusage_emu31 * - jg compat_sys_waitid - - .globl compat_sys_kexec_load_wrapper -compat_sys_kexec_load_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # struct kexec_segment * - llgfr %r5,%r5 # unsigned long - jg compat_sys_kexec_load - - .globl sys_ioprio_set_wrapper -sys_ioprio_set_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_ioprio_set - - .globl sys_ioprio_get_wrapper -sys_ioprio_get_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_ioprio_get - - .globl sys_inotify_add_watch_wrapper -sys_inotify_add_watch_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # u32 - jg sys_inotify_add_watch - - .globl sys_inotify_rm_watch_wrapper -sys_inotify_rm_watch_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # u32 - jg sys_inotify_rm_watch - - .globl compat_sys_openat_wrapper -compat_sys_openat_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - jg compat_sys_openat - - .globl sys_mkdirat_wrapper -sys_mkdirat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_mkdirat - - .globl sys_mknodat_wrapper -sys_mknodat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgfr %r5,%r5 # unsigned int - jg sys_mknodat - - .globl sys_fchownat_wrapper -sys_fchownat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # uid_t - llgfr %r5,%r5 # gid_t - lgfr %r6,%r6 # int - jg sys_fchownat - - .globl compat_sys_futimesat_wrapper -compat_sys_futimesat_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct timeval * - jg compat_sys_futimesat - - .globl sys32_fstatat64_wrapper -sys32_fstatat64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct stat64 * - lgfr %r5,%r5 # int - jg sys32_fstatat64 - - .globl sys_unlinkat_wrapper -sys_unlinkat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_unlinkat - - .globl sys_renameat_wrapper -sys_renameat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # const char * - jg sys_renameat - - .globl sys_linkat_wrapper -sys_linkat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # const char * - lgfr %r6,%r6 # int - jg sys_linkat - - .globl sys_symlinkat_wrapper -sys_symlinkat_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgtr %r4,%r4 # const char * - jg sys_symlinkat - - .globl sys_readlinkat_wrapper -sys_readlinkat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # char * - lgfr %r5,%r5 # int - jg sys_readlinkat - - .globl sys_fchmodat_wrapper -sys_fchmodat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # mode_t - jg sys_fchmodat - - .globl sys_faccessat_wrapper -sys_faccessat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_faccessat - - .globl compat_sys_pselect6_wrapper -compat_sys_pselect6_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # fd_set * - llgtr %r4,%r4 # fd_set * - llgtr %r5,%r5 # fd_set * - llgtr %r6,%r6 # struct timespec * - llgt %r0,164(%r15) # void * - stg %r0,160(%r15) - jg compat_sys_pselect6 - - .globl compat_sys_ppoll_wrapper -compat_sys_ppoll_wrapper: - llgtr %r2,%r2 # struct pollfd * - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # struct timespec * - llgtr %r5,%r5 # const sigset_t * - llgfr %r6,%r6 # size_t - jg compat_sys_ppoll - - .globl sys_unshare_wrapper -sys_unshare_wrapper: - llgfr %r2,%r2 # unsigned long - jg sys_unshare - - .globl compat_sys_set_robust_list_wrapper -compat_sys_set_robust_list_wrapper: - llgtr %r2,%r2 # struct compat_robust_list_head * - llgfr %r3,%r3 # size_t - jg compat_sys_set_robust_list - - .globl compat_sys_get_robust_list_wrapper -compat_sys_get_robust_list_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_uptr_t_t * - llgtr %r4,%r4 # compat_size_t * - jg compat_sys_get_robust_list - - .globl sys_splice_wrapper -sys_splice_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # loff_t * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # loff_t * - llgfr %r6,%r6 # size_t - llgf %r0,164(%r15) # unsigned int - stg %r0,160(%r15) - jg sys_splice - - .globl sys_sync_file_range_wrapper -sys_sync_file_range_wrapper: - lgfr %r2,%r2 # int - sllg %r3,%r3,32 # get high word of 64bit loff_t - or %r3,%r4 # get low word of 64bit loff_t - sllg %r4,%r5,32 # get high word of 64bit loff_t - or %r4,%r6 # get low word of 64bit loff_t - llgf %r5,164(%r15) # unsigned int - jg sys_sync_file_range - - .globl sys_tee_wrapper -sys_tee_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # unsigned int - jg sys_tee - - .globl compat_sys_vmsplice_wrapper -compat_sys_vmsplice_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_iovec * - llgfr %r4,%r4 # unsigned int - llgfr %r5,%r5 # unsigned int - jg compat_sys_vmsplice - - .globl sys_getcpu_wrapper -sys_getcpu_wrapper: - llgtr %r2,%r2 # unsigned * - llgtr %r3,%r3 # unsigned * - llgtr %r4,%r4 # struct getcpu_cache * - jg sys_getcpu - - .globl compat_sys_epoll_pwait_wrapper -compat_sys_epoll_pwait_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct compat_epoll_event * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgtr %r6,%r6 # compat_sigset_t * - llgf %r0,164(%r15) # compat_size_t - stg %r0,160(%r15) - jg compat_sys_epoll_pwait - - .globl compat_sys_utimes_wrapper -compat_sys_utimes_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_timeval * - jg compat_sys_utimes - - .globl compat_sys_utimensat_wrapper -compat_sys_utimensat_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct compat_timespec * - lgfr %r5,%r5 # int - jg compat_sys_utimensat - - .globl compat_sys_signalfd_wrapper -compat_sys_signalfd_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_sigset_t * - llgfr %r4,%r4 # compat_size_t - jg compat_sys_signalfd - - .globl sys_eventfd_wrapper -sys_eventfd_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_eventfd - - .globl sys_fallocate_wrapper -sys_fallocate_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - sllg %r4,%r4,32 # get high word of 64bit loff_t - lr %r4,%r5 # get low word of 64bit loff_t - sllg %r5,%r6,32 # get high word of 64bit loff_t - l %r5,164(%r15) # get low word of 64bit loff_t - jg sys_fallocate - - .globl sys_timerfd_create_wrapper -sys_timerfd_create_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_timerfd_create - - .globl compat_sys_timerfd_settime_wrapper -compat_sys_timerfd_settime_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_itimerspec * - llgtr %r5,%r5 # struct compat_itimerspec * - jg compat_sys_timerfd_settime - - .globl compat_sys_timerfd_gettime_wrapper -compat_sys_timerfd_gettime_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct compat_itimerspec * - jg compat_sys_timerfd_gettime - - .globl compat_sys_signalfd4_wrapper -compat_sys_signalfd4_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_sigset_t * - llgfr %r4,%r4 # compat_size_t - lgfr %r5,%r5 # int - jg compat_sys_signalfd4 - - .globl sys_eventfd2_wrapper -sys_eventfd2_wrapper: - llgfr %r2,%r2 # unsigned int - lgfr %r3,%r3 # int - jg sys_eventfd2 - - .globl sys_inotify_init1_wrapper -sys_inotify_init1_wrapper: - lgfr %r2,%r2 # int - jg sys_inotify_init1 - - .globl sys_pipe2_wrapper -sys_pipe2_wrapper: - llgtr %r2,%r2 # u32 * - lgfr %r3,%r3 # int - jg sys_pipe2 # branch to system call - - .globl sys_dup3_wrapper -sys_dup3_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - lgfr %r4,%r4 # int - jg sys_dup3 # branch to system call - - .globl sys_epoll_create1_wrapper -sys_epoll_create1_wrapper: - lgfr %r2,%r2 # int - jg sys_epoll_create1 # branch to system call - - .globl sys32_readahead_wrapper -sys32_readahead_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # u32 - llgfr %r4,%r4 # u32 - lgfr %r5,%r5 # s32 - jg sys32_readahead # branch to system call - - .globl sys32_sendfile64_wrapper -sys32_sendfile64_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # compat_loff_t * - lgfr %r5,%r5 # s32 - jg sys32_sendfile64 # branch to system call - - .globl sys_tkill_wrapper -sys_tkill_wrapper: - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # int - jg sys_tkill # branch to system call - - .globl sys_tgkill_wrapper -sys_tgkill_wrapper: - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # pid_t - lgfr %r4,%r4 # int - jg sys_tgkill # branch to system call - - .globl compat_sys_keyctl_wrapper -compat_sys_keyctl_wrapper: - llgfr %r2,%r2 # u32 - llgfr %r3,%r3 # u32 - llgfr %r4,%r4 # u32 - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg compat_sys_keyctl # branch to system call - - .globl compat_sys_preadv_wrapper -compat_sys_preadv_wrapper: - llgfr %r2,%r2 # unsigned long - llgtr %r3,%r3 # compat_iovec * - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg compat_sys_preadv # branch to system call - - .globl compat_sys_pwritev_wrapper -compat_sys_pwritev_wrapper: - llgfr %r2,%r2 # unsigned long - llgtr %r3,%r3 # compat_iovec * - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg compat_sys_pwritev # branch to system call - - .globl compat_sys_rt_tgsigqueueinfo_wrapper -compat_sys_rt_tgsigqueueinfo_wrapper: - lgfr %r2,%r2 # compat_pid_t - lgfr %r3,%r3 # compat_pid_t - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct compat_siginfo * - jg compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call - - .globl sys_perf_event_open_wrapper -sys_perf_event_open_wrapper: - llgtr %r2,%r2 # const struct perf_event_attr * - lgfr %r3,%r3 # pid_t - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgfr %r6,%r6 # unsigned long - jg sys_perf_event_open # branch to system call - - .globl sys_clone_wrapper -sys_clone_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # int * - llgtr %r5,%r5 # int * - jg sys_clone # branch to system call - - .globl sys32_execve_wrapper -sys32_execve_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # compat_uptr_t * - llgtr %r4,%r4 # compat_uptr_t * - jg sys32_execve # branch to system call - - .globl sys_fanotify_init_wrapper -sys_fanotify_init_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_fanotify_init # branch to system call - - .globl sys_fanotify_mark_wrapper -sys_fanotify_mark_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - sllg %r4,%r4,32 # get high word of 64bit mask - lr %r4,%r5 # get low word of 64bit mask - llgfr %r5,%r6 # unsigned int - llgt %r6,164(%r15) # char * - jg sys_fanotify_mark # branch to system call - - .globl sys_prlimit64_wrapper -sys_prlimit64_wrapper: - lgfr %r2,%r2 # pid_t - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # const struct rlimit64 __user * - llgtr %r5,%r5 # struct rlimit64 __user * - jg sys_prlimit64 # branch to system call diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c new file mode 100644 index 00000000000..45cdb37aa6f --- /dev/null +++ b/arch/s390/kernel/compat_wrapper.c @@ -0,0 +1,216 @@ +/* + * Compat system call wrappers. + * + * Copyright IBM Corp. 2014 + */ + +#include <linux/syscalls.h> +#include <linux/compat.h> +#include "entry.h" + +#define COMPAT_SYSCALL_WRAP1(name, ...) \ + COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP2(name, ...) \ + COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP3(name, ...) \ + COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP4(name, ...) \ + COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP5(name, ...) \ + COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP6(name, ...) \ + COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__) + +#define __SC_COMPAT_TYPE(t, a) \ + __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a + +#define __SC_COMPAT_CAST(t, a) \ +({ \ + long __ReS = a; \ + \ + BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \ + !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \ + if (__TYPE_IS_L(t)) \ + __ReS = (s32)a; \ + if (__TYPE_IS_UL(t)) \ + __ReS = (u32)a; \ + if (__TYPE_IS_PTR(t)) \ + __ReS = a & 0x7fffffff; \ + (t)__ReS; \ +}) + +/* + * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by + * compat tasks. These wrappers will only be used for system calls where only + * the system call arguments need sign or zero extension or zeroing of the upper + * 33 bits of pointers. + * Note: since the wrapper function will afterwards call a system call which + * again performs zero and sign extension for all system call arguments with + * a size of less than eight bytes, these compat wrappers only touch those + * system call arguments with a size of eight bytes ((unsigned) long and + * pointers). Zero and sign extension for e.g. int parameters will be done by + * the regular system call wrappers. + */ +#define COMPAT_SYSCALL_WRAPx(x, name, ...) \ + asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\ + asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \ + { \ + return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ + } + +COMPAT_SYSCALL_WRAP1(exit, int, error_code); +COMPAT_SYSCALL_WRAP1(close, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname); +COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname); +COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename); +COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev); +COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode); +COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name); +COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds); +COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode); +COMPAT_SYSCALL_WRAP1(nice, int, increment); +COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig); +COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname); +COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname); +COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes); +COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes); +COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk); +COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler); +COMPAT_SYSCALL_WRAP1(acct, const char __user *, name); +COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags); +COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid); +COMPAT_SYSCALL_WRAP1(umask, int, mask); +COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename); +COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd); +COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask); +COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len); +COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new); +COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz); +COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library); +COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags); +COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg); +COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len); +COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode); +COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who); +COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval); +COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len); +COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile); +COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len); +COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name); +COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot); +COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs); +COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr); +COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid); +COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data); +COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2); +COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality); +COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence); +COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd); +COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags); +COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid); +COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len); +COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len); +COMPAT_SYSCALL_WRAP1(mlockall, int, flags); +COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid); +COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy); +COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy); +COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr); +COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout); +COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5); +COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size); +COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr); +COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data); +COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid); +COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid); +COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid); +COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid); +COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid); +COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid); +COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid); +COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid); +COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid); +COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid); +COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old); +COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec); +COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior); +COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count); +COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name); +COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name); +COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name); +COMPAT_SYSCALL_WRAP1(exit_group, int, error_code); +COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr); +COMPAT_SYSCALL_WRAP1(epoll_create, int, size); +COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event); +COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout); +COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id); +COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id); +COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx); +COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); +COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name); +COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id); +COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id); +COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags); +COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio); +COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who); +COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask); +COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd); +COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev); +COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag); +COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag); +COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname); +COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags); +COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname); +COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz); +COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode); +COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode); +COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags); +COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache); +COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count); +COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags); +COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags); +COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags); +COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags); +COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags); +COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); +COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); +COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); +COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags); +COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val); +COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags); +COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); +COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); +COMPAT_SYSCALL_WRAP1(syncfs, int, fd); +COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype); +COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum); +COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2); +COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); +COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); +COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags); diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 3e8b8816f30..d7b0c4d2788 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/cpcmd.c - * * S390 version - * Copyright IBM Corp. 1999,2007 + * Copyright IBM Corp. 1999, 2007 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Christian Borntraeger (cborntra@de.ibm.com), */ @@ -18,7 +16,6 @@ #include <linux/string.h> #include <asm/ebcdic.h> #include <asm/cpcmd.h> -#include <asm/system.h> #include <asm/io.h> static DEFINE_SPINLOCK(cpcmd_lock); diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c deleted file mode 100644 index 8cc7c9fa64f..00000000000 --- a/arch/s390/kernel/crash.c +++ /dev/null @@ -1,16 +0,0 @@ -/* - * arch/s390/kernel/crash.c - * - * (C) Copyright IBM Corp. 2005 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - * - */ - -#include <linux/threads.h> -#include <linux/kexec.h> -#include <linux/reboot.h> - -void machine_crash_shutdown(struct pt_regs *regs) -{ -} diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c new file mode 100644 index 00000000000..a3b9150e680 --- /dev/null +++ b/arch/s390/kernel/crash_dump.c @@ -0,0 +1,647 @@ +/* + * S390 kdump implementation + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/crash_dump.h> +#include <asm/lowcore.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/bootmem.h> +#include <linux/elf.h> +#include <linux/memblock.h> +#include <asm/os_info.h> +#include <asm/elf.h> +#include <asm/ipl.h> +#include <asm/sclp.h> + +#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) +#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) +#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + +static struct memblock_region oldmem_region; + +static struct memblock_type oldmem_type = { + .cnt = 1, + .max = 1, + .total_size = 0, + .regions = &oldmem_region, +}; + +#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \ + for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \ + &oldmem_type, p_start, \ + p_end, p_nid); \ + i != (u64)ULLONG_MAX; \ + __next_mem_range(&i, nid, &memblock.physmem, \ + &oldmem_type, \ + p_start, p_end, p_nid)) + +struct dump_save_areas dump_save_areas; + +/* + * Allocate and add a save area for a CPU + */ +struct save_area *dump_save_area_create(int cpu) +{ + struct save_area **save_areas, *save_area; + + save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); + if (!save_area) + return NULL; + if (cpu + 1 > dump_save_areas.count) { + dump_save_areas.count = cpu + 1; + save_areas = krealloc(dump_save_areas.areas, + dump_save_areas.count * sizeof(void *), + GFP_KERNEL | __GFP_ZERO); + if (!save_areas) { + kfree(save_area); + return NULL; + } + dump_save_areas.areas = save_areas; + } + dump_save_areas.areas[cpu] = save_area; + return save_area; +} + +/* + * Return physical address for virtual address + */ +static inline void *load_real_addr(void *addr) +{ + unsigned long real_addr; + + asm volatile( + " lra %0,0(%1)\n" + " jz 0f\n" + " la %0,0\n" + "0:" + : "=a" (real_addr) : "a" (addr) : "cc"); + return (void *)real_addr; +} + +/* + * Copy real to virtual or real memory + */ +static int copy_from_realmem(void *dest, void *src, size_t count) +{ + unsigned long size; + + if (!count) + return 0; + if (!is_vmalloc_or_module_addr(dest)) + return memcpy_real(dest, src, count); + do { + size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK)); + if (memcpy_real(load_real_addr(dest), src, size)) + return -EFAULT; + count -= size; + dest += size; + src += size; + } while (count); + return 0; +} + +/* + * Pointer to ELF header in new kernel + */ +static void *elfcorehdr_newmem; + +/* + * Copy one page from zfcpdump "oldmem" + * + * For pages below HSA size memory from the HSA is copied. Otherwise + * real memory copy is used. + */ +static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize, + unsigned long src, int userbuf) +{ + int rc; + + if (src < sclp_get_hsa_size()) { + rc = memcpy_hsa(buf, src, csize, userbuf); + } else { + if (userbuf) + rc = copy_to_user_real((void __force __user *) buf, + (void *) src, csize); + else + rc = memcpy_real(buf, (void *) src, csize); + } + return rc ? rc : csize; +} + +/* + * Copy one page from kdump "oldmem" + * + * For the kdump reserved memory this functions performs a swap operation: + * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. + * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, + unsigned long src, int userbuf) + +{ + int rc; + + if (src < OLDMEM_SIZE) + src += OLDMEM_BASE; + else if (src > OLDMEM_BASE && + src < OLDMEM_BASE + OLDMEM_SIZE) + src -= OLDMEM_BASE; + if (userbuf) + rc = copy_to_user_real((void __force __user *) buf, + (void *) src, csize); + else + rc = copy_from_realmem(buf, (void *) src, csize); + return (rc == 0) ? rc : csize; +} + +/* + * Copy one page from "oldmem" + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, + unsigned long offset, int userbuf) +{ + unsigned long src; + + if (!csize) + return 0; + src = (pfn << PAGE_SHIFT) + offset; + if (OLDMEM_BASE) + return copy_oldmem_page_kdump(buf, csize, src, userbuf); + else + return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf); +} + +/* + * Remap "oldmem" for kdump + * + * For the kdump reserved memory this functions performs a swap operation: + * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + unsigned long size_old; + int rc; + + if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) { + size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT)); + rc = remap_pfn_range(vma, from, + pfn + (OLDMEM_BASE >> PAGE_SHIFT), + size_old, prot); + if (rc || size == size_old) + return rc; + size -= size_old; + from += size_old; + pfn += size_old >> PAGE_SHIFT; + } + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Remap "oldmem" for zfcpdump + * + * We only map available memory above HSA size. Memory below HSA size + * is read on demand using the copy_oldmem_page() function. + */ +static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, + unsigned long from, + unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + unsigned long hsa_end = sclp_get_hsa_size(); + unsigned long size_hsa; + + if (pfn < hsa_end >> PAGE_SHIFT) { + size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT)); + if (size == size_hsa) + return 0; + size -= size_hsa; + from += size_hsa; + pfn += size_hsa >> PAGE_SHIFT; + } + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Remap "oldmem" for kdump or zfcpdump + */ +int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + if (OLDMEM_BASE) + return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); + else + return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, + prot); +} + +/* + * Copy memory from old kernel + */ +int copy_from_oldmem(void *dest, void *src, size_t count) +{ + unsigned long copied = 0; + int rc; + + if (OLDMEM_BASE) { + if ((unsigned long) src < OLDMEM_SIZE) { + copied = min(count, OLDMEM_SIZE - (unsigned long) src); + rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied); + if (rc) + return rc; + } + } else { + unsigned long hsa_end = sclp_get_hsa_size(); + if ((unsigned long) src < hsa_end) { + copied = min(count, hsa_end - (unsigned long) src); + rc = memcpy_hsa(dest, (unsigned long) src, copied, 0); + if (rc) + return rc; + } + } + return copy_from_realmem(dest + copied, src + copied, count - copied); +} + +/* + * Alloc memory and panic in case of ENOMEM + */ +static void *kzalloc_panic(int len) +{ + void *rc; + + rc = kzalloc(len, GFP_KERNEL); + if (!rc) + panic("s390 kdump kzalloc (%d) failed", len); + return rc; +} + +/* + * Initialize ELF note + */ +static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) +{ + Elf64_Nhdr *note; + u64 len; + + note = (Elf64_Nhdr *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = d_len; + note->n_type = type; + len = sizeof(Elf64_Nhdr); + + memcpy(buf + len, name, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + memcpy(buf + len, desc, note->n_descsz); + len = roundup(len + note->n_descsz, 4); + + return PTR_ADD(buf, len); +} + +/* + * Initialize prstatus note + */ +static void *nt_prstatus(void *ptr, struct save_area *sa) +{ + struct elf_prstatus nt_prstatus; + static int cpu_nr = 1; + + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); + nt_prstatus.pr_pid = cpu_nr; + cpu_nr++; + + return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), + "CORE"); +} + +/* + * Initialize fpregset (floating point) note + */ +static void *nt_fpregset(void *ptr, struct save_area *sa) +{ + elf_fpregset_t nt_fpregset; + + memset(&nt_fpregset, 0, sizeof(nt_fpregset)); + memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); + memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); + + return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), + "CORE"); +} + +/* + * Initialize timer note + */ +static void *nt_s390_timer(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD clock comparator note + */ +static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, + sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD programmable register note + */ +static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, + sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize control register note + */ +static void *nt_s390_ctrs(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, + sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize prefix register note + */ +static void *nt_s390_prefix(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, + sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Fill ELF notes for one CPU with save area registers + */ +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +{ + ptr = nt_prstatus(ptr, sa); + ptr = nt_fpregset(ptr, sa); + ptr = nt_s390_timer(ptr, sa); + ptr = nt_s390_tod_cmp(ptr, sa); + ptr = nt_s390_tod_preg(ptr, sa); + ptr = nt_s390_ctrs(ptr, sa); + ptr = nt_s390_prefix(ptr, sa); + return ptr; +} + +/* + * Initialize prpsinfo note (new kernel) + */ +static void *nt_prpsinfo(void *ptr) +{ + struct elf_prpsinfo prpsinfo; + + memset(&prpsinfo, 0, sizeof(prpsinfo)); + prpsinfo.pr_sname = 'R'; + strcpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Get vmcoreinfo using lowcore->vmcore_info (new kernel) + */ +static void *get_vmcoreinfo_old(unsigned long *size) +{ + char nt_name[11], *vmcoreinfo; + Elf64_Nhdr note; + void *addr; + + if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return NULL; + memset(nt_name, 0, sizeof(nt_name)); + if (copy_from_oldmem(¬e, addr, sizeof(note))) + return NULL; + if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + return NULL; + if (strcmp(nt_name, "VMCOREINFO") != 0) + return NULL; + vmcoreinfo = kzalloc_panic(note.n_descsz); + if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return NULL; + *size = note.n_descsz; + return vmcoreinfo; +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + unsigned long size; + void *vmcoreinfo; + + vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); + if (!vmcoreinfo) + vmcoreinfo = get_vmcoreinfo_old(&size); + if (!vmcoreinfo) + return ptr; + return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); +} + +/* + * Initialize ELF header (new kernel) + */ +static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) +{ + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2MSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = EM_S390; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + ehdr->e_phnum = mem_chunk_cnt + 1; + return ehdr + 1; +} + +/* + * Return CPU count for ELF header (new kernel) + */ +static int get_cpu_cnt(void) +{ + int i, cpus = 0; + + for (i = 0; i < dump_save_areas.count; i++) { + if (dump_save_areas.areas[i]->pref_reg == 0) + continue; + cpus++; + } + return cpus; +} + +/* + * Return memory chunk count for ELF header (new kernel) + */ +static int get_mem_chunk_cnt(void) +{ + int cnt = 0; + u64 idx; + + for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL) + cnt++; + return cnt; +} + +/* + * Initialize ELF loads (new kernel) + */ +static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) +{ + phys_addr_t start, end; + u64 idx; + + for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) { + phdr->p_filesz = end - start; + phdr->p_type = PT_LOAD; + phdr->p_offset = start; + phdr->p_vaddr = start; + phdr->p_paddr = start; + phdr->p_memsz = end - start; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; + phdr++; + } +} + +/* + * Initialize notes (new kernel) + */ +static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) +{ + struct save_area *sa; + void *ptr_start = ptr; + int i; + + ptr = nt_prpsinfo(ptr); + + for (i = 0; i < dump_save_areas.count; i++) { + sa = dump_save_areas.areas[i]; + if (sa->pref_reg == 0) + continue; + ptr = fill_cpu_elf_notes(ptr, sa); + } + ptr = nt_vmcoreinfo(ptr); + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = PT_NOTE; + phdr->p_offset = notes_offset; + phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); + phdr->p_memsz = phdr->p_filesz; + return ptr; +} + +/* + * Create ELF core header (new kernel) + */ +int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) +{ + Elf64_Phdr *phdr_notes, *phdr_loads; + int mem_chunk_cnt; + void *ptr, *hdr; + u32 alloc_size; + u64 hdr_off; + + /* If we are not in kdump or zfcpdump mode return */ + if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) + return 0; + /* If elfcorehdr= has been passed via cmdline, we use that one */ + if (elfcorehdr_addr != ELFCORE_ADDR_MAX) + return 0; + /* If we cannot get HSA size for zfcpdump return error */ + if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size()) + return -ENODEV; + + /* For kdump, exclude previous crashkernel memory */ + if (OLDMEM_BASE) { + oldmem_region.base = OLDMEM_BASE; + oldmem_region.size = OLDMEM_SIZE; + oldmem_type.total_size = OLDMEM_SIZE; + } + + mem_chunk_cnt = get_mem_chunk_cnt(); + + alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + mem_chunk_cnt * sizeof(Elf64_Phdr); + hdr = kzalloc_panic(alloc_size); + /* Init elf header */ + ptr = ehdr_init(hdr, mem_chunk_cnt); + /* Init program headers */ + phdr_notes = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); + phdr_loads = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); + /* Init notes */ + hdr_off = PTR_DIFF(ptr, hdr); + ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); + /* Init loads */ + hdr_off = PTR_DIFF(ptr, hdr); + loads_init(phdr_loads, hdr_off); + *addr = (unsigned long long) hdr; + elfcorehdr_newmem = hdr; + *size = (unsigned long long) hdr_off; + BUG_ON(elfcorehdr_size > alloc_size); + return 0; +} + +/* + * Free ELF core header (new kernel) + */ +void elfcorehdr_free(unsigned long long addr) +{ + if (!elfcorehdr_newmem) + return; + kfree((void *)(unsigned long)addr); +} + +/* + * Read from ELF header + */ +ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + void *src = (void *)(unsigned long)*ppos; + + src = elfcorehdr_newmem ? src : src - OLDMEM_BASE; + memcpy(buf, src, count); + *ppos += count; + return count; +} + +/* + * Read from ELF notes data + */ +ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) +{ + void *src = (void *)(unsigned long)*ppos; + int rc; + + if (elfcorehdr_newmem) { + memcpy(buf, src, count); + } else { + rc = copy_from_oldmem(buf, src, count); + if (rc) + return rc; + } + *ppos += count; + return count; +} diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 5ad6bc078bf..ee8390da6ea 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1,9 +1,8 @@ /* - * arch/s390/kernel/debug.c * S/390 debug facility * - * Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH, - * IBM Corporation + * Copyright IBM Corp. 1999, 2012 + * * Author(s): Michael Holzheu (holzheu@de.ibm.com), * Holger Smolinski (Holger.Smolinski@de.ibm.com) * @@ -74,7 +73,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, static int debug_open(struct inode *inode, struct file *file); static int debug_close(struct inode *inode, struct file *file); static debug_info_t *debug_info_create(const char *name, int pages_per_area, - int nr_areas, int buf_size, mode_t mode); + int nr_areas, int buf_size, umode_t mode); static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t * id, @@ -111,6 +110,7 @@ struct debug_view debug_raw_view = { NULL, NULL }; +EXPORT_SYMBOL(debug_raw_view); struct debug_view debug_hex_ascii_view = { "hex_ascii", @@ -120,6 +120,7 @@ struct debug_view debug_hex_ascii_view = { NULL, NULL }; +EXPORT_SYMBOL(debug_hex_ascii_view); static struct debug_view debug_level_view = { "level", @@ -156,6 +157,7 @@ struct debug_view debug_sprintf_view = { NULL, NULL }; +EXPORT_SYMBOL(debug_sprintf_view); /* used by dump analysis tools to determine version of debug feature */ static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION; @@ -167,6 +169,7 @@ static debug_info_t *debug_area_last = NULL; static DEFINE_MUTEX(debug_mutex); static int initialized; +static int debug_critical; static const struct file_operations debug_file_ops = { .owner = THIS_MODULE, @@ -330,7 +333,7 @@ debug_info_free(debug_info_t* db_info){ static debug_info_t* debug_info_create(const char *name, int pages_per_area, int nr_areas, - int buf_size, mode_t mode) + int buf_size, umode_t mode) { debug_info_t* rc; @@ -608,7 +611,7 @@ debug_open(struct inode *inode, struct file *file) debug_info_t *debug_info, *debug_info_snapshot; mutex_lock(&debug_mutex); - debug_info = file->f_path.dentry->d_inode->i_private; + debug_info = file_inode(file)->i_private; /* find debug view */ for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!debug_info->views[i]) @@ -688,7 +691,7 @@ debug_close(struct inode *inode, struct file *file) */ debug_info_t *debug_register_mode(const char *name, int pages_per_area, - int nr_areas, int buf_size, mode_t mode, + int nr_areas, int buf_size, umode_t mode, uid_t uid, gid_t gid) { debug_info_t *rc = NULL; @@ -730,6 +733,7 @@ debug_info_t *debug_register(const char *name, int pages_per_area, return debug_register_mode(name, pages_per_area, nr_areas, buf_size, S_IRUSR | S_IWUSR, 0, 0); } +EXPORT_SYMBOL(debug_register); /* * debug_unregister: @@ -748,6 +752,7 @@ debug_unregister(debug_info_t * id) out: return; } +EXPORT_SYMBOL(debug_unregister); /* * debug_set_size: @@ -810,7 +815,7 @@ debug_set_level(debug_info_t* id, int new_level) } spin_unlock_irqrestore(&id->lock,flags); } - +EXPORT_SYMBOL(debug_set_level); /* * proceed_active_entry: @@ -862,7 +867,7 @@ static inline void debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, int exception) { - active->id.stck = get_clock(); + active->id.stck = get_tod_clock_fast(); active->id.fields.cpuid = smp_processor_id(); active->caller = __builtin_return_address(0); active->id.fields.exception = exception; @@ -884,7 +889,7 @@ static int debug_active=1; * if debug_active is already off */ static int -s390dbf_procactive(ctl_table *table, int write, +s390dbf_procactive(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) @@ -930,7 +935,12 @@ debug_stop_all(void) if (debug_stoppable) debug_active = 0; } +EXPORT_SYMBOL(debug_stop_all); +void debug_set_critical(void) +{ + debug_critical = 1; +} /* * debug_event_common: @@ -945,7 +955,11 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len) if (!debug_active || !id->areas) return NULL; - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); memset(DEBUG_DATA(active), 0, id->buf_size); memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -954,6 +968,7 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len) return active; } +EXPORT_SYMBOL(debug_event_common); /* * debug_exception_common: @@ -968,7 +983,11 @@ debug_entry_t if (!debug_active || !id->areas) return NULL; - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); memset(DEBUG_DATA(active), 0, id->buf_size); memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -977,6 +996,7 @@ debug_entry_t return active; } +EXPORT_SYMBOL(debug_exception_common); /* * counts arguments in format string for sprintf view @@ -1013,7 +1033,11 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...) return NULL; numargs=debug_count_numargs(string); - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active); va_start(ap,string); @@ -1026,6 +1050,7 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...) return active; } +EXPORT_SYMBOL(debug_sprintf_event); /* * debug_sprintf_exception: @@ -1047,7 +1072,11 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...) numargs=debug_count_numargs(string); - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active); va_start(ap,string); @@ -1060,25 +1089,7 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...) return active; } - -/* - * debug_init: - * - is called exactly once to initialize the debug feature - */ - -static int -__init debug_init(void) -{ - int rc = 0; - - s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table); - mutex_lock(&debug_mutex); - debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT,NULL); - initialized = 1; - mutex_unlock(&debug_mutex); - - return rc; -} +EXPORT_SYMBOL(debug_sprintf_exception); /* * debug_register_view: @@ -1090,7 +1101,7 @@ debug_register_view(debug_info_t * id, struct debug_view *view) int rc = 0; int i; unsigned long flags; - mode_t mode; + umode_t mode; struct dentry *pde; if (!id) @@ -1116,16 +1127,18 @@ debug_register_view(debug_info_t * id, struct debug_view *view) if (i == DEBUG_MAX_VIEWS) { pr_err("Registering view %s/%s would exceed the maximum " "number of views %i\n", id->name, view->name, i); - debugfs_remove(pde); rc = -1; } else { id->views[i] = view; id->debugfs_entries[i] = pde; } spin_unlock_irqrestore(&id->lock, flags); + if (rc) + debugfs_remove(pde); out: return rc; } +EXPORT_SYMBOL(debug_register_view); /* * debug_unregister_view: @@ -1134,9 +1147,9 @@ out: int debug_unregister_view(debug_info_t * id, struct debug_view *view) { - int rc = 0; - int i; + struct dentry *dentry = NULL; unsigned long flags; + int i, rc = 0; if (!id) goto out; @@ -1148,13 +1161,16 @@ debug_unregister_view(debug_info_t * id, struct debug_view *view) if (i == DEBUG_MAX_VIEWS) rc = -1; else { - debugfs_remove(id->debugfs_entries[i]); + dentry = id->debugfs_entries[i]; id->views[i] = NULL; + id->debugfs_entries[i] = NULL; } spin_unlock_irqrestore(&id->lock, flags); + debugfs_remove(dentry); out: return rc; } +EXPORT_SYMBOL(debug_unregister_view); static inline char * debug_get_user_string(const char __user *user_buf, size_t user_len) @@ -1428,10 +1444,10 @@ debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, rc += sprintf(out_buf + rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; - if (!isprint(c)) - rc += sprintf(out_buf + rc, "."); - else + if (isascii(c) && isprint(c)) rc += sprintf(out_buf + rc, "%c", c); + else + rc += sprintf(out_buf + rc, "."); } rc += sprintf(out_buf + rc, "\n"); return rc; @@ -1464,6 +1480,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, except_str, entry->id.fields.cpuid, (void *) caller); return rc; } +EXPORT_SYMBOL(debug_dflt_header_fn); /* * prints debug data sprintf-formated: @@ -1512,33 +1529,16 @@ out: } /* - * clean up module + * debug_init: + * - is called exactly once to initialize the debug feature */ -static void __exit debug_exit(void) +static int __init debug_init(void) { - debugfs_remove(debug_debugfs_root_entry); - unregister_sysctl_table(s390dbf_sysctl_header); - return; + s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table); + mutex_lock(&debug_mutex); + debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL); + initialized = 1; + mutex_unlock(&debug_mutex); + return 0; } - -/* - * module definitions - */ postcore_initcall(debug_init); -module_exit(debug_exit); -MODULE_LICENSE("GPL"); - -EXPORT_SYMBOL(debug_register); -EXPORT_SYMBOL(debug_unregister); -EXPORT_SYMBOL(debug_set_level); -EXPORT_SYMBOL(debug_stop_all); -EXPORT_SYMBOL(debug_register_view); -EXPORT_SYMBOL(debug_unregister_view); -EXPORT_SYMBOL(debug_event_common); -EXPORT_SYMBOL(debug_exception_common); -EXPORT_SYMBOL(debug_hex_ascii_view); -EXPORT_SYMBOL(debug_raw_view); -EXPORT_SYMBOL(debug_dflt_header_fn); -EXPORT_SYMBOL(debug_sprintf_view); -EXPORT_SYMBOL(debug_sprintf_exception); -EXPORT_SYMBOL(debug_sprintf_event); diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index c032d11da8a..8237fc07ac7 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -9,27 +9,6 @@ #include <asm/diag.h> /* - * Diagnose 10: Release pages - */ -void diag10(unsigned long addr) -{ - if (addr >= 0x7ff00000) - return; - asm volatile( -#ifdef CONFIG_64BIT - " sam31\n" - " diag %0,%0,0x10\n" - "0: sam64\n" -#else - " diag %0,%0,0x10\n" - "0:\n" -#endif - EX_TABLE(0b, 0b) - : : "a" (addr)); -} -EXPORT_SYMBOL(diag10); - -/* * Diagnose 14: Input spool file manipulation */ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index c83726c9fe0..993efe6a887 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1,6 +1,4 @@ /* - * arch/s390/kernel/dis.c - * * Disassemble s390 instructions. * * Copyright IBM Corp. 2007 @@ -24,15 +22,15 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> -#include <asm/system.h> #include <asm/uaccess.h> +#include <asm/dis.h> #include <asm/io.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/mathemu.h> #include <asm/cpcmd.h> -#include <asm/s390_ext.h> #include <asm/lowcore.h> #include <asm/debug.h> +#include <asm/irq.h> #ifndef CONFIG_64BIT #define ONELONG "%08lx: " @@ -40,17 +38,6 @@ #define ONELONG "%016lx: " #endif /* CONFIG_64BIT */ -#define OPERAND_GPR 0x1 /* Operand printed as %rx */ -#define OPERAND_FPR 0x2 /* Operand printed as %fx */ -#define OPERAND_AR 0x4 /* Operand printed as %ax */ -#define OPERAND_CR 0x8 /* Operand printed as %cx */ -#define OPERAND_DISP 0x10 /* Operand printed as displacement */ -#define OPERAND_BASE 0x20 /* Operand printed as base register */ -#define OPERAND_INDEX 0x40 /* Operand printed as index register */ -#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */ -#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */ -#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */ - enum { UNUSED, /* Indicates the end of the operand list */ R_8, /* GPR starting at position 8 */ @@ -86,22 +73,29 @@ enum { U4_12, /* 4 bit unsigned value starting at 12 */ U4_16, /* 4 bit unsigned value starting at 16 */ U4_20, /* 4 bit unsigned value starting at 20 */ + U4_24, /* 4 bit unsigned value starting at 24 */ + U4_28, /* 4 bit unsigned value starting at 28 */ U4_32, /* 4 bit unsigned value starting at 32 */ + U4_36, /* 4 bit unsigned value starting at 36 */ U8_8, /* 8 bit unsigned value starting at 8 */ U8_16, /* 8 bit unsigned value starting at 16 */ U8_24, /* 8 bit unsigned value starting at 24 */ U8_32, /* 8 bit unsigned value starting at 32 */ I8_8, /* 8 bit signed value starting at 8 */ I8_32, /* 8 bit signed value starting at 32 */ + J12_12, /* PC relative offset at 12 */ I16_16, /* 16 bit signed value starting at 16 */ I16_32, /* 32 bit signed value starting at 16 */ U16_16, /* 16 bit unsigned value starting at 16 */ U16_32, /* 32 bit unsigned value starting at 16 */ J16_16, /* PC relative jump offset at 16 */ + J16_32, /* PC relative offset at 16 */ + I24_24, /* 24 bit signed value starting at 24 */ J32_16, /* PC relative long offset at 16 */ I32_16, /* 32 bit signed value starting at 16 */ U32_16, /* 32 bit unsigned value starting at 16 */ M_16, /* 4 bit optional mask starting at 16 */ + M_20, /* 4 bit optional mask starting at 20 */ RO_28, /* optional GPR starting at position 28 */ }; @@ -112,6 +106,8 @@ enum { enum { INSTR_INVALID, INSTR_E, + INSTR_IE_UU, + INSTR_MII_UPI, INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU, INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0, INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP, @@ -121,13 +117,15 @@ enum { INSTR_RRE_FF, INSTR_RRE_FR, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF, INSTR_RRE_RR, INSTR_RRE_RR_OPT, INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR, - INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_M0RR, INSTR_RRF_R0RR, - INSTR_RRF_R0RR2, INSTR_RRF_RURR, INSTR_RRF_U0FF, INSTR_RRF_U0RF, - INSTR_RRF_U0RR, INSTR_RRF_UUFF, INSTR_RRR_F0FF, INSTR_RRS_RRRDU, + INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_FUFF2, INSTR_RRF_M0RR, + INSTR_RRF_R0RR, INSTR_RRF_R0RR2, INSTR_RRF_RMRR, INSTR_RRF_RURR, + INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, INSTR_RRF_UUFF, + INSTR_RRF_UUFR, INSTR_RRF_UURF, + INSTR_RRR_F0FF, INSTR_RRS_RRRDU, INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR, INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, INSTR_RSI_RRP, - INSTR_RSL_R0RD, + INSTR_RSL_LRDFU, INSTR_RSL_R0RD, INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD, INSTR_RSY_RDRM, INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, @@ -139,6 +137,7 @@ enum { INSTR_SIL_RDI, INSTR_SIL_RDU, INSTR_SIY_IRD, INSTR_SIY_URD, INSTR_SI_URD, + INSTR_SMI_U0RDP, INSTR_SSE_RDRD, INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2, INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, @@ -146,19 +145,7 @@ enum { INSTR_S_00, INSTR_S_RD, }; -struct operand { - int bits; /* The number of bits in the operand. */ - int shift; /* The number of bits to shift. */ - int flags; /* One bit syntax flags. */ -}; - -struct insn { - const char name[5]; - unsigned char opfrag; - unsigned char format; -}; - -static const struct operand operands[] = +static const struct s390_operand operands[] = { [UNUSED] = { 0, 0, 0 }, [R_8] = { 4, 8, OPERAND_GPR }, @@ -194,31 +181,42 @@ static const struct operand operands[] = [U4_12] = { 4, 12, 0 }, [U4_16] = { 4, 16, 0 }, [U4_20] = { 4, 20, 0 }, + [U4_24] = { 4, 24, 0 }, + [U4_28] = { 4, 28, 0 }, [U4_32] = { 4, 32, 0 }, + [U4_36] = { 4, 36, 0 }, [U8_8] = { 8, 8, 0 }, [U8_16] = { 8, 16, 0 }, [U8_24] = { 8, 24, 0 }, [U8_32] = { 8, 32, 0 }, + [J12_12] = { 12, 12, OPERAND_PCREL }, [I16_16] = { 16, 16, OPERAND_SIGNED }, [U16_16] = { 16, 16, 0 }, [U16_32] = { 16, 32, 0 }, [J16_16] = { 16, 16, OPERAND_PCREL }, + [J16_32] = { 16, 32, OPERAND_PCREL }, [I16_32] = { 16, 32, OPERAND_SIGNED }, + [I24_24] = { 24, 24, OPERAND_SIGNED }, [J32_16] = { 32, 16, OPERAND_PCREL }, [I32_16] = { 32, 16, OPERAND_SIGNED }, [U32_16] = { 32, 16, 0 }, [M_16] = { 4, 16, 0 }, + [M_20] = { 4, 20, 0 }, [RO_28] = { 4, 28, OPERAND_GPR } }; static const unsigned char formats[][7] = { [INSTR_E] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_IE_UU] = { 0xff, U4_24,U4_28,0,0,0,0 }, + [INSTR_MII_UPI] = { 0xff, U4_8,J12_12,I24_24 }, + [INSTR_RIE_R0IU] = { 0xff, R_8,I16_16,U4_32,0,0,0 }, [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 }, + [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 }, [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 }, [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 }, [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 }, - [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 }, + [INSTR_RIE_RUPU] = { 0xff, R_8,U8_32,U4_12,J16_16,0,0 }, [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, @@ -248,14 +246,18 @@ static const unsigned char formats[][7] = { [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 }, [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 }, [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 }, + [INSTR_RRF_FUFF2] = { 0xff, F_24,F_28,F_16,U4_20,0,0 }, [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 }, + [INSTR_RRF_RMRR] = { 0xff, R_24,R_16,R_28,M_20,0,0 }, [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 }, [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 }, [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 }, + [INSTR_RRF_UUFR] = { 0xff, F_24,U4_16,R_28,U4_20,0,0 }, + [INSTR_RRF_UURF] = { 0xff, R_24,U4_16,F_28,U4_20,0,0 }, [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 }, [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 }, [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 }, @@ -267,12 +269,13 @@ static const unsigned char formats[][7] = { [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, + [INSTR_RSL_LRDFU] = { 0xff, F_32,D_20,L4_8,B_16,U4_36,0 }, [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 }, [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 }, [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, - [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, @@ -292,9 +295,10 @@ static const unsigned char formats[][7] = { [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 }, [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 }, [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, + [INSTR_SMI_U0RDP] = { 0xff, U4_8,J16_32,D_20,B_16,0,0 }, [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, - [INSTR_SSF_RRDRD] = { 0x00, D_20,B_16,D_36,B_32,R_8,0 }, - [INSTR_SSF_RRDRD2]= { 0x00, R_8,D_20,B_16,D_36,B_32,0 }, + [INSTR_SSF_RRDRD] = { 0x0f, D_20,B_16,D_36,B_32,R_8,0 }, + [INSTR_SSF_RRDRD2]= { 0x0f, R_8,D_20,B_16,D_36,B_32,0 }, [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 }, [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 }, [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 }, @@ -307,36 +311,157 @@ static const unsigned char formats[][7] = { enum { LONG_INSN_ALGHSIK, + LONG_INSN_ALHHHR, + LONG_INSN_ALHHLR, LONG_INSN_ALHSIK, + LONG_INSN_ALSIHN, + LONG_INSN_CDFBRA, + LONG_INSN_CDGBRA, + LONG_INSN_CDGTRA, + LONG_INSN_CDLFBR, + LONG_INSN_CDLFTR, + LONG_INSN_CDLGBR, + LONG_INSN_CDLGTR, + LONG_INSN_CEFBRA, + LONG_INSN_CEGBRA, + LONG_INSN_CELFBR, + LONG_INSN_CELGBR, + LONG_INSN_CFDBRA, + LONG_INSN_CFEBRA, + LONG_INSN_CFXBRA, + LONG_INSN_CGDBRA, + LONG_INSN_CGDTRA, + LONG_INSN_CGEBRA, + LONG_INSN_CGXBRA, + LONG_INSN_CGXTRA, + LONG_INSN_CLFDBR, + LONG_INSN_CLFDTR, + LONG_INSN_CLFEBR, LONG_INSN_CLFHSI, + LONG_INSN_CLFXBR, + LONG_INSN_CLFXTR, + LONG_INSN_CLGDBR, + LONG_INSN_CLGDTR, + LONG_INSN_CLGEBR, LONG_INSN_CLGFRL, LONG_INSN_CLGHRL, LONG_INSN_CLGHSI, + LONG_INSN_CLGXBR, + LONG_INSN_CLGXTR, LONG_INSN_CLHHSI, + LONG_INSN_CXFBRA, + LONG_INSN_CXGBRA, + LONG_INSN_CXGTRA, + LONG_INSN_CXLFBR, + LONG_INSN_CXLFTR, + LONG_INSN_CXLGBR, + LONG_INSN_CXLGTR, + LONG_INSN_FIDBRA, + LONG_INSN_FIEBRA, + LONG_INSN_FIXBRA, + LONG_INSN_LDXBRA, + LONG_INSN_LEDBRA, + LONG_INSN_LEXBRA, + LONG_INSN_LLGFAT, LONG_INSN_LLGFRL, LONG_INSN_LLGHRL, + LONG_INSN_LLGTAT, LONG_INSN_POPCNT, + LONG_INSN_RIEMIT, + LONG_INSN_RINEXT, + LONG_INSN_RISBGN, LONG_INSN_RISBHG, LONG_INSN_RISBLG, + LONG_INSN_SLHHHR, + LONG_INSN_SLHHLR, + LONG_INSN_TABORT, + LONG_INSN_TBEGIN, + LONG_INSN_TBEGINC, + LONG_INSN_PCISTG, + LONG_INSN_MPCIFC, + LONG_INSN_STPCIFC, + LONG_INSN_PCISTB, }; static char *long_insn_name[] = { [LONG_INSN_ALGHSIK] = "alghsik", + [LONG_INSN_ALHHHR] = "alhhhr", + [LONG_INSN_ALHHLR] = "alhhlr", [LONG_INSN_ALHSIK] = "alhsik", + [LONG_INSN_ALSIHN] = "alsihn", + [LONG_INSN_CDFBRA] = "cdfbra", + [LONG_INSN_CDGBRA] = "cdgbra", + [LONG_INSN_CDGTRA] = "cdgtra", + [LONG_INSN_CDLFBR] = "cdlfbr", + [LONG_INSN_CDLFTR] = "cdlftr", + [LONG_INSN_CDLGBR] = "cdlgbr", + [LONG_INSN_CDLGTR] = "cdlgtr", + [LONG_INSN_CEFBRA] = "cefbra", + [LONG_INSN_CEGBRA] = "cegbra", + [LONG_INSN_CELFBR] = "celfbr", + [LONG_INSN_CELGBR] = "celgbr", + [LONG_INSN_CFDBRA] = "cfdbra", + [LONG_INSN_CFEBRA] = "cfebra", + [LONG_INSN_CFXBRA] = "cfxbra", + [LONG_INSN_CGDBRA] = "cgdbra", + [LONG_INSN_CGDTRA] = "cgdtra", + [LONG_INSN_CGEBRA] = "cgebra", + [LONG_INSN_CGXBRA] = "cgxbra", + [LONG_INSN_CGXTRA] = "cgxtra", + [LONG_INSN_CLFDBR] = "clfdbr", + [LONG_INSN_CLFDTR] = "clfdtr", + [LONG_INSN_CLFEBR] = "clfebr", [LONG_INSN_CLFHSI] = "clfhsi", + [LONG_INSN_CLFXBR] = "clfxbr", + [LONG_INSN_CLFXTR] = "clfxtr", + [LONG_INSN_CLGDBR] = "clgdbr", + [LONG_INSN_CLGDTR] = "clgdtr", + [LONG_INSN_CLGEBR] = "clgebr", [LONG_INSN_CLGFRL] = "clgfrl", [LONG_INSN_CLGHRL] = "clghrl", [LONG_INSN_CLGHSI] = "clghsi", + [LONG_INSN_CLGXBR] = "clgxbr", + [LONG_INSN_CLGXTR] = "clgxtr", [LONG_INSN_CLHHSI] = "clhhsi", + [LONG_INSN_CXFBRA] = "cxfbra", + [LONG_INSN_CXGBRA] = "cxgbra", + [LONG_INSN_CXGTRA] = "cxgtra", + [LONG_INSN_CXLFBR] = "cxlfbr", + [LONG_INSN_CXLFTR] = "cxlftr", + [LONG_INSN_CXLGBR] = "cxlgbr", + [LONG_INSN_CXLGTR] = "cxlgtr", + [LONG_INSN_FIDBRA] = "fidbra", + [LONG_INSN_FIEBRA] = "fiebra", + [LONG_INSN_FIXBRA] = "fixbra", + [LONG_INSN_LDXBRA] = "ldxbra", + [LONG_INSN_LEDBRA] = "ledbra", + [LONG_INSN_LEXBRA] = "lexbra", + [LONG_INSN_LLGFAT] = "llgfat", [LONG_INSN_LLGFRL] = "llgfrl", [LONG_INSN_LLGHRL] = "llghrl", + [LONG_INSN_LLGTAT] = "llgtat", [LONG_INSN_POPCNT] = "popcnt", + [LONG_INSN_RIEMIT] = "riemit", + [LONG_INSN_RINEXT] = "rinext", + [LONG_INSN_RISBGN] = "risbgn", [LONG_INSN_RISBHG] = "risbhg", - [LONG_INSN_RISBLG] = "risblk", + [LONG_INSN_RISBLG] = "risblg", + [LONG_INSN_SLHHHR] = "slhhhr", + [LONG_INSN_SLHHLR] = "slhhlr", + [LONG_INSN_TABORT] = "tabort", + [LONG_INSN_TBEGIN] = "tbegin", + [LONG_INSN_TBEGINC] = "tbeginc", + [LONG_INSN_PCISTG] = "pcistg", + [LONG_INSN_MPCIFC] = "mpcifc", + [LONG_INSN_STPCIFC] = "stpcifc", + [LONG_INSN_PCISTB] = "pcistb", }; -static struct insn opcode[] = { +static struct s390_insn opcode[] = { #ifdef CONFIG_64BIT + { "bprp", 0xc5, INSTR_MII_UPI }, + { "bpp", 0xc7, INSTR_SMI_U0RDP }, + { "trtr", 0xd0, INSTR_SS_L0RDRD }, { "lmd", 0xef, INSTR_SS_RRRDRD3 }, #endif { "spm", 0x04, INSTR_RR_R0 }, @@ -371,7 +496,6 @@ static struct insn opcode[] = { { "lcdr", 0x23, INSTR_RR_FF }, { "hdr", 0x24, INSTR_RR_FF }, { "ldxr", 0x25, INSTR_RR_FF }, - { "lrdr", 0x25, INSTR_RR_FF }, { "mxr", 0x26, INSTR_RR_FF }, { "mxdr", 0x27, INSTR_RR_FF }, { "ldr", 0x28, INSTR_RR_FF }, @@ -388,7 +512,6 @@ static struct insn opcode[] = { { "lcer", 0x33, INSTR_RR_FF }, { "her", 0x34, INSTR_RR_FF }, { "ledr", 0x35, INSTR_RR_FF }, - { "lrer", 0x35, INSTR_RR_FF }, { "axr", 0x36, INSTR_RR_FF }, { "sxr", 0x37, INSTR_RR_FF }, { "ler", 0x38, INSTR_RR_FF }, @@ -396,7 +519,6 @@ static struct insn opcode[] = { { "aer", 0x3a, INSTR_RR_FF }, { "ser", 0x3b, INSTR_RR_FF }, { "mder", 0x3c, INSTR_RR_FF }, - { "mer", 0x3c, INSTR_RR_FF }, { "der", 0x3d, INSTR_RR_FF }, { "aur", 0x3e, INSTR_RR_FF }, { "sur", 0x3f, INSTR_RR_FF }, @@ -447,7 +569,6 @@ static struct insn opcode[] = { { "ae", 0x7a, INSTR_RX_FRRD }, { "se", 0x7b, INSTR_RX_FRRD }, { "mde", 0x7c, INSTR_RX_FRRD }, - { "me", 0x7c, INSTR_RX_FRRD }, { "de", 0x7d, INSTR_RX_FRRD }, { "au", 0x7e, INSTR_RX_FRRD }, { "su", 0x7f, INSTR_RX_FRRD }, @@ -525,11 +646,11 @@ static struct insn opcode[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_01[] = { +static struct s390_insn opcode_01[] = { #ifdef CONFIG_64BIT - { "sam64", 0x0e, INSTR_E }, - { "pfpo", 0x0a, INSTR_E }, { "ptff", 0x04, INSTR_E }, + { "pfpo", 0x0a, INSTR_E }, + { "sam64", 0x0e, INSTR_E }, #endif { "pr", 0x01, INSTR_E }, { "upt", 0x02, INSTR_E }, @@ -541,7 +662,7 @@ static struct insn opcode_01[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_a5[] = { +static struct s390_insn opcode_a5[] = { #ifdef CONFIG_64BIT { "iihh", 0x00, INSTR_RI_RU }, { "iihl", 0x01, INSTR_RI_RU }, @@ -563,7 +684,7 @@ static struct insn opcode_a5[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_a7[] = { +static struct s390_insn opcode_a7[] = { #ifdef CONFIG_64BIT { "tmhh", 0x02, INSTR_RI_RU }, { "tmhl", 0x03, INSTR_RI_RU }, @@ -585,18 +706,41 @@ static struct insn opcode_a7[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_b2[] = { +static struct s390_insn opcode_aa[] = { +#ifdef CONFIG_64BIT + { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI }, + { "rion", 0x01, INSTR_RI_RI }, + { "tric", 0x02, INSTR_RI_RI }, + { "rioff", 0x03, INSTR_RI_RI }, + { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_b2[] = { #ifdef CONFIG_64BIT - { "sske", 0x2b, INSTR_RRF_M0RR }, { "stckf", 0x7c, INSTR_S_RD }, - { "cu21", 0xa6, INSTR_RRF_M0RR }, - { "cuutf", 0xa6, INSTR_RRF_M0RR }, - { "cu12", 0xa7, INSTR_RRF_M0RR }, - { "cutfu", 0xa7, INSTR_RRF_M0RR }, + { "lpp", 0x80, INSTR_S_RD }, + { "lcctl", 0x84, INSTR_S_RD }, + { "lpctl", 0x85, INSTR_S_RD }, + { "qsi", 0x86, INSTR_S_RD }, + { "lsctl", 0x87, INSTR_S_RD }, + { "qctri", 0x8e, INSTR_S_RD }, { "stfle", 0xb0, INSTR_S_RD }, { "lpswe", 0xb2, INSTR_S_RD }, + { "srnmb", 0xb8, INSTR_S_RD }, { "srnmt", 0xb9, INSTR_S_RD }, { "lfas", 0xbd, INSTR_S_RD }, + { "scctr", 0xe0, INSTR_RRE_RR }, + { "spctr", 0xe1, INSTR_RRE_RR }, + { "ecctr", 0xe4, INSTR_RRE_RR }, + { "epctr", 0xe5, INSTR_RRE_RR }, + { "ppa", 0xe8, INSTR_RRF_U0RR }, + { "etnd", 0xec, INSTR_RRE_R0 }, + { "ecpga", 0xed, INSTR_RRE_RR }, + { "tend", 0xf8, INSTR_S_00 }, + { "niai", 0xfa, INSTR_IE_UU }, + { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD }, #endif { "stidp", 0x02, INSTR_S_RD }, { "sck", 0x04, INSTR_S_RD }, @@ -615,6 +759,7 @@ static struct insn opcode_b2[] = { { "pc", 0x18, INSTR_S_RD }, { "sac", 0x19, INSTR_S_RD }, { "cfc", 0x1a, INSTR_S_RD }, + { "servc", 0x20, INSTR_RRE_RR }, { "ipte", 0x21, INSTR_RRE_RR }, { "ipm", 0x22, INSTR_RRE_R0 }, { "ivsk", 0x23, INSTR_RRE_RR }, @@ -625,9 +770,9 @@ static struct insn opcode_b2[] = { { "pt", 0x28, INSTR_RRE_RR }, { "iske", 0x29, INSTR_RRE_RR }, { "rrbe", 0x2a, INSTR_RRE_RR }, - { "sske", 0x2b, INSTR_RRE_RR }, + { "sske", 0x2b, INSTR_RRF_M0RR }, { "tb", 0x2c, INSTR_RRE_0R }, - { "dxr", 0x2d, INSTR_RRE_F0 }, + { "dxr", 0x2d, INSTR_RRE_FF }, { "pgin", 0x2e, INSTR_RRE_RR }, { "pgout", 0x2f, INSTR_RRE_RR }, { "csch", 0x30, INSTR_S_00 }, @@ -645,8 +790,8 @@ static struct insn opcode_b2[] = { { "schm", 0x3c, INSTR_S_00 }, { "bakr", 0x40, INSTR_RRE_RR }, { "cksm", 0x41, INSTR_RRE_RR }, - { "sqdr", 0x44, INSTR_RRE_F0 }, - { "sqer", 0x45, INSTR_RRE_F0 }, + { "sqdr", 0x44, INSTR_RRE_FF }, + { "sqer", 0x45, INSTR_RRE_FF }, { "stura", 0x46, INSTR_RRE_RR }, { "msta", 0x47, INSTR_RRE_R0 }, { "palb", 0x48, INSTR_RRE_00 }, @@ -677,14 +822,14 @@ static struct insn opcode_b2[] = { { "stfpc", 0x9c, INSTR_S_RD }, { "lfpc", 0x9d, INSTR_S_RD }, { "tre", 0xa5, INSTR_RRE_RR }, - { "cuutf", 0xa6, INSTR_RRE_RR }, - { "cutfu", 0xa7, INSTR_RRE_RR }, + { "cuutf", 0xa6, INSTR_RRF_M0RR }, + { "cutfu", 0xa7, INSTR_RRF_M0RR }, { "stfl", 0xb1, INSTR_S_RD }, { "trap4", 0xff, INSTR_S_RD }, { "", 0, INSTR_INVALID } }; -static struct insn opcode_b3[] = { +static struct s390_insn opcode_b3[] = { #ifdef CONFIG_64BIT { "maylr", 0x38, INSTR_RRF_F0FF }, { "mylr", 0x39, INSTR_RRF_F0FF }, @@ -692,72 +837,87 @@ static struct insn opcode_b3[] = { { "myr", 0x3b, INSTR_RRF_F0FF }, { "mayhr", 0x3c, INSTR_RRF_F0FF }, { "myhr", 0x3d, INSTR_RRF_F0FF }, - { "cegbr", 0xa4, INSTR_RRE_RR }, - { "cdgbr", 0xa5, INSTR_RRE_RR }, - { "cxgbr", 0xa6, INSTR_RRE_RR }, - { "cgebr", 0xa8, INSTR_RRF_U0RF }, - { "cgdbr", 0xa9, INSTR_RRF_U0RF }, - { "cgxbr", 0xaa, INSTR_RRF_U0RF }, - { "cfer", 0xb8, INSTR_RRF_U0RF }, - { "cfdr", 0xb9, INSTR_RRF_U0RF }, - { "cfxr", 0xba, INSTR_RRF_U0RF }, - { "cegr", 0xc4, INSTR_RRE_RR }, - { "cdgr", 0xc5, INSTR_RRE_RR }, - { "cxgr", 0xc6, INSTR_RRE_RR }, - { "cger", 0xc8, INSTR_RRF_U0RF }, - { "cgdr", 0xc9, INSTR_RRF_U0RF }, - { "cgxr", 0xca, INSTR_RRF_U0RF }, { "lpdfr", 0x70, INSTR_RRE_FF }, { "lndfr", 0x71, INSTR_RRE_FF }, { "cpsdr", 0x72, INSTR_RRF_F0FF2 }, { "lcdfr", 0x73, INSTR_RRE_FF }, + { "sfasr", 0x85, INSTR_RRE_R0 }, + { { 0, LONG_INSN_CELFBR }, 0x90, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLFBR }, 0x91, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXLFBR }, 0x92, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CEFBRA }, 0x94, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDFBRA }, 0x95, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXFBRA }, 0x96, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CFEBRA }, 0x98, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CFDBRA }, 0x99, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CFXBRA }, 0x9a, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CLFEBR }, 0x9c, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLFDBR }, 0x9d, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLFXBR }, 0x9e, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CELGBR }, 0xa0, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLGBR }, 0xa1, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXLGBR }, 0xa2, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CEGBRA }, 0xa4, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDGBRA }, 0xa5, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXGBRA }, 0xa6, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CGEBRA }, 0xa8, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CGDBRA }, 0xa9, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CGXBRA }, 0xaa, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CLGEBR }, 0xac, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGDBR }, 0xad, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGXBR }, 0xae, INSTR_RRF_UUFR }, { "ldgr", 0xc1, INSTR_RRE_FR }, + { "cegr", 0xc4, INSTR_RRE_FR }, + { "cdgr", 0xc5, INSTR_RRE_FR }, + { "cxgr", 0xc6, INSTR_RRE_FR }, + { "cger", 0xc8, INSTR_RRF_U0RF }, + { "cgdr", 0xc9, INSTR_RRF_U0RF }, + { "cgxr", 0xca, INSTR_RRF_U0RF }, { "lgdr", 0xcd, INSTR_RRE_RF }, - { "adtr", 0xd2, INSTR_RRR_F0FF }, - { "axtr", 0xda, INSTR_RRR_F0FF }, - { "cdtr", 0xe4, INSTR_RRE_FF }, - { "cxtr", 0xec, INSTR_RRE_FF }, + { "mdtra", 0xd0, INSTR_RRF_FUFF2 }, + { "ddtra", 0xd1, INSTR_RRF_FUFF2 }, + { "adtra", 0xd2, INSTR_RRF_FUFF2 }, + { "sdtra", 0xd3, INSTR_RRF_FUFF2 }, + { "ldetr", 0xd4, INSTR_RRF_0UFF }, + { "ledtr", 0xd5, INSTR_RRF_UUFF }, + { "ltdtr", 0xd6, INSTR_RRE_FF }, + { "fidtr", 0xd7, INSTR_RRF_UUFF }, + { "mxtra", 0xd8, INSTR_RRF_FUFF2 }, + { "dxtra", 0xd9, INSTR_RRF_FUFF2 }, + { "axtra", 0xda, INSTR_RRF_FUFF2 }, + { "sxtra", 0xdb, INSTR_RRF_FUFF2 }, + { "lxdtr", 0xdc, INSTR_RRF_0UFF }, + { "ldxtr", 0xdd, INSTR_RRF_UUFF }, + { "ltxtr", 0xde, INSTR_RRE_FF }, + { "fixtr", 0xdf, INSTR_RRF_UUFF }, { "kdtr", 0xe0, INSTR_RRE_FF }, - { "kxtr", 0xe8, INSTR_RRE_FF }, - { "cedtr", 0xf4, INSTR_RRE_FF }, - { "cextr", 0xfc, INSTR_RRE_FF }, - { "cdgtr", 0xf1, INSTR_RRE_FR }, - { "cxgtr", 0xf9, INSTR_RRE_FR }, - { "cdstr", 0xf3, INSTR_RRE_FR }, - { "cxstr", 0xfb, INSTR_RRE_FR }, - { "cdutr", 0xf2, INSTR_RRE_FR }, - { "cxutr", 0xfa, INSTR_RRE_FR }, - { "cgdtr", 0xe1, INSTR_RRF_U0RF }, - { "cgxtr", 0xe9, INSTR_RRF_U0RF }, - { "csdtr", 0xe3, INSTR_RRE_RF }, - { "csxtr", 0xeb, INSTR_RRE_RF }, + { { 0, LONG_INSN_CGDTRA }, 0xe1, INSTR_RRF_UURF }, { "cudtr", 0xe2, INSTR_RRE_RF }, - { "cuxtr", 0xea, INSTR_RRE_RF }, - { "ddtr", 0xd1, INSTR_RRR_F0FF }, - { "dxtr", 0xd9, INSTR_RRR_F0FF }, + { "csdtr", 0xe3, INSTR_RRE_RF }, + { "cdtr", 0xe4, INSTR_RRE_FF }, { "eedtr", 0xe5, INSTR_RRE_RF }, - { "eextr", 0xed, INSTR_RRE_RF }, { "esdtr", 0xe7, INSTR_RRE_RF }, + { "kxtr", 0xe8, INSTR_RRE_FF }, + { { 0, LONG_INSN_CGXTRA }, 0xe9, INSTR_RRF_UUFR }, + { "cuxtr", 0xea, INSTR_RRE_RF }, + { "csxtr", 0xeb, INSTR_RRE_RF }, + { "cxtr", 0xec, INSTR_RRE_FF }, + { "eextr", 0xed, INSTR_RRE_RF }, { "esxtr", 0xef, INSTR_RRE_RF }, - { "iedtr", 0xf6, INSTR_RRF_F0FR }, - { "iextr", 0xfe, INSTR_RRF_F0FR }, - { "ltdtr", 0xd6, INSTR_RRE_FF }, - { "ltxtr", 0xde, INSTR_RRE_FF }, - { "fidtr", 0xd7, INSTR_RRF_UUFF }, - { "fixtr", 0xdf, INSTR_RRF_UUFF }, - { "ldetr", 0xd4, INSTR_RRF_0UFF }, - { "lxdtr", 0xdc, INSTR_RRF_0UFF }, - { "ledtr", 0xd5, INSTR_RRF_UUFF }, - { "ldxtr", 0xdd, INSTR_RRF_UUFF }, - { "mdtr", 0xd0, INSTR_RRR_F0FF }, - { "mxtr", 0xd8, INSTR_RRR_F0FF }, + { { 0, LONG_INSN_CDGTRA }, 0xf1, INSTR_RRF_UUFR }, + { "cdutr", 0xf2, INSTR_RRE_FR }, + { "cdstr", 0xf3, INSTR_RRE_FR }, + { "cedtr", 0xf4, INSTR_RRE_FF }, { "qadtr", 0xf5, INSTR_RRF_FUFF }, - { "qaxtr", 0xfd, INSTR_RRF_FUFF }, + { "iedtr", 0xf6, INSTR_RRF_F0FR }, { "rrdtr", 0xf7, INSTR_RRF_FFRU }, + { { 0, LONG_INSN_CXGTRA }, 0xf9, INSTR_RRF_UURF }, + { "cxutr", 0xfa, INSTR_RRE_FR }, + { "cxstr", 0xfb, INSTR_RRE_FR }, + { "cextr", 0xfc, INSTR_RRE_FF }, + { "qaxtr", 0xfd, INSTR_RRF_FUFF }, + { "iextr", 0xfe, INSTR_RRF_F0FR }, { "rrxtr", 0xff, INSTR_RRF_FFRU }, - { "sfasr", 0x85, INSTR_RRE_R0 }, - { "sdtr", 0xd3, INSTR_RRR_F0FF }, - { "sxtr", 0xdb, INSTR_RRR_F0FF }, #endif { "lpebr", 0x00, INSTR_RRE_FF }, { "lnebr", 0x01, INSTR_RRE_FF }, @@ -804,10 +964,10 @@ static struct insn opcode_b3[] = { { "lnxbr", 0x41, INSTR_RRE_FF }, { "ltxbr", 0x42, INSTR_RRE_FF }, { "lcxbr", 0x43, INSTR_RRE_FF }, - { "ledbr", 0x44, INSTR_RRE_FF }, - { "ldxbr", 0x45, INSTR_RRE_FF }, - { "lexbr", 0x46, INSTR_RRE_FF }, - { "fixbr", 0x47, INSTR_RRF_U0FF }, + { { 0, LONG_INSN_LEDBRA }, 0x44, INSTR_RRF_UUFF }, + { { 0, LONG_INSN_LDXBRA }, 0x45, INSTR_RRF_UUFF }, + { { 0, LONG_INSN_LEXBRA }, 0x46, INSTR_RRF_UUFF }, + { { 0, LONG_INSN_FIXBRA }, 0x47, INSTR_RRF_UUFF }, { "kxbr", 0x48, INSTR_RRE_FF }, { "cxbr", 0x49, INSTR_RRE_FF }, { "axbr", 0x4a, INSTR_RRE_FF }, @@ -817,24 +977,24 @@ static struct insn opcode_b3[] = { { "tbedr", 0x50, INSTR_RRF_U0FF }, { "tbdr", 0x51, INSTR_RRF_U0FF }, { "diebr", 0x53, INSTR_RRF_FUFF }, - { "fiebr", 0x57, INSTR_RRF_U0FF }, - { "thder", 0x58, INSTR_RRE_RR }, - { "thdr", 0x59, INSTR_RRE_RR }, + { { 0, LONG_INSN_FIEBRA }, 0x57, INSTR_RRF_UUFF }, + { "thder", 0x58, INSTR_RRE_FF }, + { "thdr", 0x59, INSTR_RRE_FF }, { "didbr", 0x5b, INSTR_RRF_FUFF }, - { "fidbr", 0x5f, INSTR_RRF_U0FF }, + { { 0, LONG_INSN_FIDBRA }, 0x5f, INSTR_RRF_UUFF }, { "lpxr", 0x60, INSTR_RRE_FF }, { "lnxr", 0x61, INSTR_RRE_FF }, { "ltxr", 0x62, INSTR_RRE_FF }, { "lcxr", 0x63, INSTR_RRE_FF }, - { "lxr", 0x65, INSTR_RRE_RR }, + { "lxr", 0x65, INSTR_RRE_FF }, { "lexr", 0x66, INSTR_RRE_FF }, - { "fixr", 0x67, INSTR_RRF_U0FF }, + { "fixr", 0x67, INSTR_RRE_FF }, { "cxr", 0x69, INSTR_RRE_FF }, - { "lzer", 0x74, INSTR_RRE_R0 }, - { "lzdr", 0x75, INSTR_RRE_R0 }, - { "lzxr", 0x76, INSTR_RRE_R0 }, - { "fier", 0x77, INSTR_RRF_U0FF }, - { "fidr", 0x7f, INSTR_RRF_U0FF }, + { "lzer", 0x74, INSTR_RRE_F0 }, + { "lzdr", 0x75, INSTR_RRE_F0 }, + { "lzxr", 0x76, INSTR_RRE_F0 }, + { "fier", 0x77, INSTR_RRE_FF }, + { "fidr", 0x7f, INSTR_RRE_FF }, { "sfpc", 0x84, INSTR_RRE_RR_OPT }, { "efpc", 0x8c, INSTR_RRE_RR_OPT }, { "cefbr", 0x94, INSTR_RRE_RF }, @@ -843,13 +1003,16 @@ static struct insn opcode_b3[] = { { "cfebr", 0x98, INSTR_RRF_U0RF }, { "cfdbr", 0x99, INSTR_RRF_U0RF }, { "cfxbr", 0x9a, INSTR_RRF_U0RF }, - { "cefr", 0xb4, INSTR_RRE_RF }, - { "cdfr", 0xb5, INSTR_RRE_RF }, - { "cxfr", 0xb6, INSTR_RRE_RF }, + { "cefr", 0xb4, INSTR_RRE_FR }, + { "cdfr", 0xb5, INSTR_RRE_FR }, + { "cxfr", 0xb6, INSTR_RRE_FR }, + { "cfer", 0xb8, INSTR_RRF_U0RF }, + { "cfdr", 0xb9, INSTR_RRF_U0RF }, + { "cfxr", 0xba, INSTR_RRF_U0RF }, { "", 0, INSTR_INVALID } }; -static struct insn opcode_b9[] = { +static struct s390_insn opcode_b9[] = { #ifdef CONFIG_64BIT { "lpgr", 0x00, INSTR_RRE_RR }, { "lngr", 0x01, INSTR_RRE_RR }, @@ -887,7 +1050,23 @@ static struct insn opcode_b9[] = { { "lhr", 0x27, INSTR_RRE_RR }, { "cgfr", 0x30, INSTR_RRE_RR }, { "clgfr", 0x31, INSTR_RRE_RR }, + { "cfdtr", 0x41, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGDTR }, 0x42, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLFDTR }, 0x43, INSTR_RRF_UURF }, { "bctgr", 0x46, INSTR_RRE_RR }, + { "cfxtr", 0x49, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGXTR }, 0x4a, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CLFXTR }, 0x4b, INSTR_RRF_UUFR }, + { "cdftr", 0x51, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLGTR }, 0x52, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLFTR }, 0x53, INSTR_RRF_UUFR }, + { "cxftr", 0x59, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CXLGTR }, 0x5a, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CXLFTR }, 0x5b, INSTR_RRF_UUFR }, + { "cgrt", 0x60, INSTR_RRF_U0RR }, + { "clgrt", 0x61, INSTR_RRF_U0RR }, + { "crt", 0x72, INSTR_RRF_U0RR }, + { "clrt", 0x73, INSTR_RRF_U0RR }, { "ngr", 0x80, INSTR_RRE_RR }, { "ogr", 0x81, INSTR_RRE_RR }, { "xgr", 0x82, INSTR_RRE_RR }, @@ -900,32 +1079,34 @@ static struct insn opcode_b9[] = { { "slbgr", 0x89, INSTR_RRE_RR }, { "cspg", 0x8a, INSTR_RRE_RR }, { "idte", 0x8e, INSTR_RRF_R0RR }, + { "crdte", 0x8f, INSTR_RRF_RMRR }, { "llcr", 0x94, INSTR_RRE_RR }, { "llhr", 0x95, INSTR_RRE_RR }, { "esea", 0x9d, INSTR_RRE_R0 }, + { "ptf", 0xa2, INSTR_RRE_R0 }, { "lptea", 0xaa, INSTR_RRF_RURR }, + { "rrbm", 0xae, INSTR_RRE_RR }, + { "pfmf", 0xaf, INSTR_RRE_RR }, { "cu14", 0xb0, INSTR_RRF_M0RR }, { "cu24", 0xb1, INSTR_RRF_M0RR }, - { "cu41", 0xb2, INSTR_RRF_M0RR }, - { "cu42", 0xb3, INSTR_RRF_M0RR }, - { "crt", 0x72, INSTR_RRF_U0RR }, - { "cgrt", 0x60, INSTR_RRF_U0RR }, - { "clrt", 0x73, INSTR_RRF_U0RR }, - { "clgrt", 0x61, INSTR_RRF_U0RR }, - { "ptf", 0xa2, INSTR_RRE_R0 }, - { "pfmf", 0xaf, INSTR_RRE_RR }, - { "trte", 0xbf, INSTR_RRF_M0RR }, + { "cu41", 0xb2, INSTR_RRE_RR }, + { "cu42", 0xb3, INSTR_RRE_RR }, { "trtre", 0xbd, INSTR_RRF_M0RR }, + { "srstu", 0xbe, INSTR_RRE_RR }, + { "trte", 0xbf, INSTR_RRF_M0RR }, { "ahhhr", 0xc8, INSTR_RRF_R0RR2 }, { "shhhr", 0xc9, INSTR_RRF_R0RR2 }, - { "alhhh", 0xca, INSTR_RRF_R0RR2 }, - { "alhhl", 0xca, INSTR_RRF_R0RR2 }, - { "slhhh", 0xcb, INSTR_RRF_R0RR2 }, - { "chhr ", 0xcd, INSTR_RRE_RR }, + { { 0, LONG_INSN_ALHHHR }, 0xca, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_SLHHHR }, 0xcb, INSTR_RRF_R0RR2 }, + { "chhr", 0xcd, INSTR_RRE_RR }, { "clhhr", 0xcf, INSTR_RRE_RR }, + { { 0, LONG_INSN_PCISTG }, 0xd0, INSTR_RRE_RR }, + { "pcilg", 0xd2, INSTR_RRE_RR }, + { "rpcit", 0xd3, INSTR_RRE_RR }, { "ahhlr", 0xd8, INSTR_RRF_R0RR2 }, { "shhlr", 0xd9, INSTR_RRF_R0RR2 }, - { "slhhl", 0xdb, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_ALHHLR }, 0xda, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_SLHHLR }, 0xdb, INSTR_RRF_R0RR2 }, { "chlr", 0xdd, INSTR_RRE_RR }, { "clhlr", 0xdf, INSTR_RRE_RR }, { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR }, @@ -953,13 +1134,9 @@ static struct insn opcode_b9[] = { { "kimd", 0x3e, INSTR_RRE_RR }, { "klmd", 0x3f, INSTR_RRE_RR }, { "epsw", 0x8d, INSTR_RRE_RR }, - { "trtt", 0x90, INSTR_RRE_RR }, { "trtt", 0x90, INSTR_RRF_M0RR }, - { "trto", 0x91, INSTR_RRE_RR }, { "trto", 0x91, INSTR_RRF_M0RR }, - { "trot", 0x92, INSTR_RRE_RR }, { "trot", 0x92, INSTR_RRF_M0RR }, - { "troo", 0x93, INSTR_RRE_RR }, { "troo", 0x93, INSTR_RRF_M0RR }, { "mlr", 0x96, INSTR_RRE_RR }, { "dlr", 0x97, INSTR_RRE_RR }, @@ -968,7 +1145,7 @@ static struct insn opcode_b9[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_c0[] = { +static struct s390_insn opcode_c0[] = { #ifdef CONFIG_64BIT { "lgfi", 0x01, INSTR_RIL_RI }, { "xihf", 0x06, INSTR_RIL_RU }, @@ -988,8 +1165,10 @@ static struct insn opcode_c0[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_c2[] = { +static struct s390_insn opcode_c2[] = { #ifdef CONFIG_64BIT + { "msgfi", 0x00, INSTR_RIL_RI }, + { "msfi", 0x01, INSTR_RIL_RI }, { "slgfi", 0x04, INSTR_RIL_RU }, { "slfi", 0x05, INSTR_RIL_RU }, { "agfi", 0x08, INSTR_RIL_RI }, @@ -1000,71 +1179,69 @@ static struct insn opcode_c2[] = { { "cfi", 0x0d, INSTR_RIL_RI }, { "clgfi", 0x0e, INSTR_RIL_RU }, { "clfi", 0x0f, INSTR_RIL_RU }, - { "msfi", 0x01, INSTR_RIL_RI }, - { "msgfi", 0x00, INSTR_RIL_RI }, #endif { "", 0, INSTR_INVALID } }; -static struct insn opcode_c4[] = { +static struct s390_insn opcode_c4[] = { #ifdef CONFIG_64BIT - { "lrl", 0x0d, INSTR_RIL_RP }, + { "llhrl", 0x02, INSTR_RIL_RP }, + { "lghrl", 0x04, INSTR_RIL_RP }, + { "lhrl", 0x05, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP }, + { "sthrl", 0x07, INSTR_RIL_RP }, { "lgrl", 0x08, INSTR_RIL_RP }, + { "stgrl", 0x0b, INSTR_RIL_RP }, { "lgfrl", 0x0c, INSTR_RIL_RP }, - { "lhrl", 0x05, INSTR_RIL_RP }, - { "lghrl", 0x04, INSTR_RIL_RP }, + { "lrl", 0x0d, INSTR_RIL_RP }, { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP }, - { "llhrl", 0x02, INSTR_RIL_RP }, - { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP }, { "strl", 0x0f, INSTR_RIL_RP }, - { "stgrl", 0x0b, INSTR_RIL_RP }, - { "sthrl", 0x07, INSTR_RIL_RP }, #endif { "", 0, INSTR_INVALID } }; -static struct insn opcode_c6[] = { +static struct s390_insn opcode_c6[] = { #ifdef CONFIG_64BIT - { "crl", 0x0d, INSTR_RIL_RP }, - { "cgrl", 0x08, INSTR_RIL_RP }, - { "cgfrl", 0x0c, INSTR_RIL_RP }, - { "chrl", 0x05, INSTR_RIL_RP }, + { "exrl", 0x00, INSTR_RIL_RP }, + { "pfdrl", 0x02, INSTR_RIL_UP }, { "cghrl", 0x04, INSTR_RIL_RP }, - { "clrl", 0x0f, INSTR_RIL_RP }, + { "chrl", 0x05, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP }, + { "clhrl", 0x07, INSTR_RIL_RP }, + { "cgrl", 0x08, INSTR_RIL_RP }, { "clgrl", 0x0a, INSTR_RIL_RP }, + { "cgfrl", 0x0c, INSTR_RIL_RP }, + { "crl", 0x0d, INSTR_RIL_RP }, { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP }, - { "clhrl", 0x07, INSTR_RIL_RP }, - { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP }, - { "pfdrl", 0x02, INSTR_RIL_UP }, - { "exrl", 0x00, INSTR_RIL_RP }, + { "clrl", 0x0f, INSTR_RIL_RP }, #endif { "", 0, INSTR_INVALID } }; -static struct insn opcode_c8[] = { +static struct s390_insn opcode_c8[] = { #ifdef CONFIG_64BIT { "mvcos", 0x00, INSTR_SSF_RRDRD }, { "ectg", 0x01, INSTR_SSF_RRDRD }, { "csst", 0x02, INSTR_SSF_RRDRD }, { "lpd", 0x04, INSTR_SSF_RRDRD2 }, - { "lpdg ", 0x05, INSTR_SSF_RRDRD2 }, + { "lpdg", 0x05, INSTR_SSF_RRDRD2 }, #endif { "", 0, INSTR_INVALID } }; -static struct insn opcode_cc[] = { +static struct s390_insn opcode_cc[] = { #ifdef CONFIG_64BIT { "brcth", 0x06, INSTR_RIL_RP }, { "aih", 0x08, INSTR_RIL_RI }, { "alsih", 0x0a, INSTR_RIL_RI }, - { "alsih", 0x0b, INSTR_RIL_RI }, + { { 0, LONG_INSN_ALSIHN }, 0x0b, INSTR_RIL_RI }, { "cih", 0x0d, INSTR_RIL_RI }, - { "clih ", 0x0f, INSTR_RIL_RI }, + { "clih", 0x0f, INSTR_RIL_RI }, #endif { "", 0, INSTR_INVALID } }; -static struct insn opcode_e3[] = { +static struct s390_insn opcode_e3[] = { #ifdef CONFIG_64BIT { "ltg", 0x02, INSTR_RXY_RRRD }, { "lrag", 0x03, INSTR_RXY_RRRD }, @@ -1093,11 +1270,15 @@ static struct insn opcode_e3[] = { { "cg", 0x20, INSTR_RXY_RRRD }, { "clg", 0x21, INSTR_RXY_RRRD }, { "stg", 0x24, INSTR_RXY_RRRD }, + { "ntstg", 0x25, INSTR_RXY_RRRD }, { "cvdy", 0x26, INSTR_RXY_RRRD }, { "cvdg", 0x2e, INSTR_RXY_RRRD }, { "strvg", 0x2f, INSTR_RXY_RRRD }, { "cgf", 0x30, INSTR_RXY_RRRD }, { "clgf", 0x31, INSTR_RXY_RRRD }, + { "ltgf", 0x32, INSTR_RXY_RRRD }, + { "cgh", 0x34, INSTR_RXY_RRRD }, + { "pfd", 0x36, INSTR_RXY_URRD }, { "strvh", 0x3f, INSTR_RXY_RRRD }, { "bctg", 0x46, INSTR_RXY_RRRD }, { "sty", 0x50, INSTR_RXY_RRRD }, @@ -1110,21 +1291,25 @@ static struct insn opcode_e3[] = { { "cy", 0x59, INSTR_RXY_RRRD }, { "ay", 0x5a, INSTR_RXY_RRRD }, { "sy", 0x5b, INSTR_RXY_RRRD }, + { "mfy", 0x5c, INSTR_RXY_RRRD }, { "aly", 0x5e, INSTR_RXY_RRRD }, { "sly", 0x5f, INSTR_RXY_RRRD }, { "sthy", 0x70, INSTR_RXY_RRRD }, { "lay", 0x71, INSTR_RXY_RRRD }, { "stcy", 0x72, INSTR_RXY_RRRD }, { "icy", 0x73, INSTR_RXY_RRRD }, + { "laey", 0x75, INSTR_RXY_RRRD }, { "lb", 0x76, INSTR_RXY_RRRD }, { "lgb", 0x77, INSTR_RXY_RRRD }, { "lhy", 0x78, INSTR_RXY_RRRD }, { "chy", 0x79, INSTR_RXY_RRRD }, { "ahy", 0x7a, INSTR_RXY_RRRD }, { "shy", 0x7b, INSTR_RXY_RRRD }, + { "mhy", 0x7c, INSTR_RXY_RRRD }, { "ng", 0x80, INSTR_RXY_RRRD }, { "og", 0x81, INSTR_RXY_RRRD }, { "xg", 0x82, INSTR_RXY_RRRD }, + { "lgat", 0x85, INSTR_RXY_RRRD }, { "mlg", 0x86, INSTR_RXY_RRRD }, { "dlg", 0x87, INSTR_RXY_RRRD }, { "alcg", 0x88, INSTR_RXY_RRRD }, @@ -1135,22 +1320,22 @@ static struct insn opcode_e3[] = { { "llgh", 0x91, INSTR_RXY_RRRD }, { "llc", 0x94, INSTR_RXY_RRRD }, { "llh", 0x95, INSTR_RXY_RRRD }, - { "cgh", 0x34, INSTR_RXY_RRRD }, - { "laey", 0x75, INSTR_RXY_RRRD }, - { "ltgf", 0x32, INSTR_RXY_RRRD }, - { "mfy", 0x5c, INSTR_RXY_RRRD }, - { "mhy", 0x7c, INSTR_RXY_RRRD }, - { "pfd", 0x36, INSTR_RXY_URRD }, + { { 0, LONG_INSN_LLGTAT }, 0x9c, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_LLGFAT }, 0x9d, INSTR_RXY_RRRD }, + { "lat", 0x9f, INSTR_RXY_RRRD }, { "lbh", 0xc0, INSTR_RXY_RRRD }, { "llch", 0xc2, INSTR_RXY_RRRD }, { "stch", 0xc3, INSTR_RXY_RRRD }, { "lhh", 0xc4, INSTR_RXY_RRRD }, { "llhh", 0xc6, INSTR_RXY_RRRD }, { "sthh", 0xc7, INSTR_RXY_RRRD }, + { "lfhat", 0xc8, INSTR_RXY_RRRD }, { "lfh", 0xca, INSTR_RXY_RRRD }, { "stfh", 0xcb, INSTR_RXY_RRRD }, { "chf", 0xcd, INSTR_RXY_RRRD }, { "clhf", 0xcf, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_MPCIFC }, 0xd0, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_STPCIFC }, 0xd4, INSTR_RXY_RRRD }, #endif { "lrv", 0x1e, INSTR_RXY_RRRD }, { "lrvh", 0x1f, INSTR_RXY_RRRD }, @@ -1162,18 +1347,20 @@ static struct insn opcode_e3[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_e5[] = { +static struct s390_insn opcode_e5[] = { #ifdef CONFIG_64BIT { "strag", 0x02, INSTR_SSE_RDRD }, + { "mvhhi", 0x44, INSTR_SIL_RDI }, + { "mvghi", 0x48, INSTR_SIL_RDI }, + { "mvhi", 0x4c, INSTR_SIL_RDI }, { "chhsi", 0x54, INSTR_SIL_RDI }, - { "chsi", 0x5c, INSTR_SIL_RDI }, - { "cghsi", 0x58, INSTR_SIL_RDI }, { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU }, - { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU }, + { "cghsi", 0x58, INSTR_SIL_RDI }, { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU }, - { "mvhhi", 0x44, INSTR_SIL_RDI }, - { "mvhi", 0x4c, INSTR_SIL_RDI }, - { "mvghi", 0x48, INSTR_SIL_RDI }, + { "chsi", 0x5c, INSTR_SIL_RDI }, + { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU }, + { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU }, + { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU }, #endif { "lasp", 0x00, INSTR_SSE_RDRD }, { "tprot", 0x01, INSTR_SSE_RDRD }, @@ -1182,7 +1369,7 @@ static struct insn opcode_e5[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_eb[] = { +static struct s390_insn opcode_eb[] = { #ifdef CONFIG_64BIT { "lmg", 0x04, INSTR_RSY_RRRD }, { "srag", 0x0a, INSTR_RSY_RRRD }, @@ -1194,9 +1381,11 @@ static struct insn opcode_eb[] = { { "rllg", 0x1c, INSTR_RSY_RRRD }, { "clmh", 0x20, INSTR_RSY_RURD }, { "clmy", 0x21, INSTR_RSY_RURD }, + { "clt", 0x23, INSTR_RSY_RURD }, { "stmg", 0x24, INSTR_RSY_RRRD }, { "stctg", 0x25, INSTR_RSY_CCRD }, { "stmh", 0x26, INSTR_RSY_RRRD }, + { "clgt", 0x2b, INSTR_RSY_RURD }, { "stcmh", 0x2c, INSTR_RSY_RURD }, { "stcmy", 0x2d, INSTR_RSY_RURD }, { "lctlg", 0x2f, INSTR_RSY_CCRD }, @@ -1205,13 +1394,17 @@ static struct insn opcode_eb[] = { { "cdsg", 0x3e, INSTR_RSY_RRRD }, { "bxhg", 0x44, INSTR_RSY_RRRD }, { "bxleg", 0x45, INSTR_RSY_RRRD }, + { "ecag", 0x4c, INSTR_RSY_RRRD }, { "tmy", 0x51, INSTR_SIY_URD }, { "mviy", 0x52, INSTR_SIY_URD }, { "niy", 0x54, INSTR_SIY_URD }, { "cliy", 0x55, INSTR_SIY_URD }, { "oiy", 0x56, INSTR_SIY_URD }, { "xiy", 0x57, INSTR_SIY_URD }, - { "icmh", 0x80, INSTR_RSE_RURD }, + { "asi", 0x6a, INSTR_SIY_IRD }, + { "alsi", 0x6e, INSTR_SIY_IRD }, + { "agsi", 0x7a, INSTR_SIY_IRD }, + { "algsi", 0x7e, INSTR_SIY_IRD }, { "icmh", 0x80, INSTR_RSY_RURD }, { "icmy", 0x81, INSTR_RSY_RURD }, { "clclu", 0x8f, INSTR_RSY_RRRD }, @@ -1220,11 +1413,8 @@ static struct insn opcode_eb[] = { { "lmy", 0x98, INSTR_RSY_RRRD }, { "lamy", 0x9a, INSTR_RSY_AARD }, { "stamy", 0x9b, INSTR_RSY_AARD }, - { "asi", 0x6a, INSTR_SIY_IRD }, - { "agsi", 0x7a, INSTR_SIY_IRD }, - { "alsi", 0x6e, INSTR_SIY_IRD }, - { "algsi", 0x7e, INSTR_SIY_IRD }, - { "ecag", 0x4c, INSTR_RSY_RRRD }, + { { 0, LONG_INSN_PCISTB }, 0xd0, INSTR_RSY_RRRD }, + { "sic", 0xd1, INSTR_RSY_RRRD }, { "srak", 0xdc, INSTR_RSY_RRRD }, { "slak", 0xdd, INSTR_RSY_RRRD }, { "srlk", 0xde, INSTR_RSY_RRRD }, @@ -1243,6 +1433,9 @@ static struct insn opcode_eb[] = { { "lax", 0xf7, INSTR_RSY_RRRD }, { "laa", 0xf8, INSTR_RSY_RRRD }, { "laal", 0xfa, INSTR_RSY_RRRD }, + { "lric", 0x60, INSTR_RSY_RDRM }, + { "stric", 0x61, INSTR_RSY_RDRM }, + { "mric", 0x62, INSTR_RSY_RDRM }, #endif { "rll", 0x1d, INSTR_RSY_RRRD }, { "mvclu", 0x8e, INSTR_RSY_RRRD }, @@ -1250,45 +1443,46 @@ static struct insn opcode_eb[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_ec[] = { +static struct s390_insn opcode_ec[] = { #ifdef CONFIG_64BIT { "brxhg", 0x44, INSTR_RIE_RRP }, { "brxlg", 0x45, INSTR_RIE_RRP }, - { "crb", 0xf6, INSTR_RRS_RRRDU }, - { "cgrb", 0xe4, INSTR_RRS_RRRDU }, - { "crj", 0x76, INSTR_RIE_RRPU }, + { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU }, + { "rnsbg", 0x54, INSTR_RIE_RRUUU }, + { "risbg", 0x55, INSTR_RIE_RRUUU }, + { "rosbg", 0x56, INSTR_RIE_RRUUU }, + { "rxsbg", 0x57, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBGN }, 0x59, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU }, { "cgrj", 0x64, INSTR_RIE_RRPU }, - { "cib", 0xfe, INSTR_RIS_RURDI }, - { "cgib", 0xfc, INSTR_RIS_RURDI }, - { "cij", 0x7e, INSTR_RIE_RUPI }, - { "cgij", 0x7c, INSTR_RIE_RUPI }, - { "cit", 0x72, INSTR_RIE_R0IU }, + { "clgrj", 0x65, INSTR_RIE_RRPU }, { "cgit", 0x70, INSTR_RIE_R0IU }, - { "clrb", 0xf7, INSTR_RRS_RRRDU }, - { "clgrb", 0xe5, INSTR_RRS_RRRDU }, + { "clgit", 0x71, INSTR_RIE_R0UU }, + { "cit", 0x72, INSTR_RIE_R0IU }, + { "clfit", 0x73, INSTR_RIE_R0UU }, + { "crj", 0x76, INSTR_RIE_RRPU }, { "clrj", 0x77, INSTR_RIE_RRPU }, - { "clgrj", 0x65, INSTR_RIE_RRPU }, - { "clib", 0xff, INSTR_RIS_RURDU }, - { "clgib", 0xfd, INSTR_RIS_RURDU }, - { "clij", 0x7f, INSTR_RIE_RUPU }, + { "cgij", 0x7c, INSTR_RIE_RUPI }, { "clgij", 0x7d, INSTR_RIE_RUPU }, - { "clfit", 0x73, INSTR_RIE_R0UU }, - { "clgit", 0x71, INSTR_RIE_R0UU }, - { "rnsbg", 0x54, INSTR_RIE_RRUUU }, - { "rxsbg", 0x57, INSTR_RIE_RRUUU }, - { "rosbg", 0x56, INSTR_RIE_RRUUU }, - { "risbg", 0x55, INSTR_RIE_RRUUU }, - { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU }, - { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU }, + { "cij", 0x7e, INSTR_RIE_RUPI }, + { "clij", 0x7f, INSTR_RIE_RUPU }, { "ahik", 0xd8, INSTR_RIE_RRI0 }, { "aghik", 0xd9, INSTR_RIE_RRI0 }, { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 }, { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 }, + { "cgrb", 0xe4, INSTR_RRS_RRRDU }, + { "clgrb", 0xe5, INSTR_RRS_RRRDU }, + { "crb", 0xf6, INSTR_RRS_RRRDU }, + { "clrb", 0xf7, INSTR_RRS_RRRDU }, + { "cgib", 0xfc, INSTR_RIS_RURDI }, + { "clgib", 0xfd, INSTR_RIS_RURDU }, + { "cib", 0xfe, INSTR_RIS_RURDI }, + { "clib", 0xff, INSTR_RIS_RURDU }, #endif { "", 0, INSTR_INVALID } }; -static struct insn opcode_ed[] = { +static struct s390_insn opcode_ed[] = { #ifdef CONFIG_64BIT { "mayl", 0x38, INSTR_RXF_FRRDF }, { "myl", 0x39, INSTR_RXF_FRRDF }, @@ -1296,20 +1490,24 @@ static struct insn opcode_ed[] = { { "my", 0x3b, INSTR_RXF_FRRDF }, { "mayh", 0x3c, INSTR_RXF_FRRDF }, { "myh", 0x3d, INSTR_RXF_FRRDF }, - { "ley", 0x64, INSTR_RXY_FRRD }, - { "ldy", 0x65, INSTR_RXY_FRRD }, - { "stey", 0x66, INSTR_RXY_FRRD }, - { "stdy", 0x67, INSTR_RXY_FRRD }, { "sldt", 0x40, INSTR_RXF_FRRDF }, - { "slxt", 0x48, INSTR_RXF_FRRDF }, { "srdt", 0x41, INSTR_RXF_FRRDF }, + { "slxt", 0x48, INSTR_RXF_FRRDF }, { "srxt", 0x49, INSTR_RXF_FRRDF }, { "tdcet", 0x50, INSTR_RXE_FRRD }, - { "tdcdt", 0x54, INSTR_RXE_FRRD }, - { "tdcxt", 0x58, INSTR_RXE_FRRD }, { "tdget", 0x51, INSTR_RXE_FRRD }, + { "tdcdt", 0x54, INSTR_RXE_FRRD }, { "tdgdt", 0x55, INSTR_RXE_FRRD }, + { "tdcxt", 0x58, INSTR_RXE_FRRD }, { "tdgxt", 0x59, INSTR_RXE_FRRD }, + { "ley", 0x64, INSTR_RXY_FRRD }, + { "ldy", 0x65, INSTR_RXY_FRRD }, + { "stey", 0x66, INSTR_RXY_FRRD }, + { "stdy", 0x67, INSTR_RXY_FRRD }, + { "czdt", 0xa8, INSTR_RSL_LRDFU }, + { "czxt", 0xa9, INSTR_RSL_LRDFU }, + { "cdzt", 0xaa, INSTR_RSL_LRDFU }, + { "cxzt", 0xab, INSTR_RSL_LRDFU }, #endif { "ldeb", 0x04, INSTR_RXE_FRRD }, { "lxdb", 0x05, INSTR_RXE_FRRD }, @@ -1352,7 +1550,7 @@ static struct insn opcode_ed[] = { /* Extracts an operand value from an instruction. */ static unsigned int extract_operand(unsigned char *code, - const struct operand *operand) + const struct s390_operand *operand) { unsigned int val; int bits; @@ -1388,16 +1586,11 @@ static unsigned int extract_operand(unsigned char *code, return val; } -static inline int insn_length(unsigned char code) -{ - return ((((int) code + 64) >> 7) + 1) << 1; -} - -static struct insn *find_insn(unsigned char *code) +struct s390_insn *find_insn(unsigned char *code) { unsigned char opfrag = code[1]; unsigned char opmask; - struct insn *table; + struct s390_insn *table; switch (code[0]) { case 0x01: @@ -1409,6 +1602,9 @@ static struct insn *find_insn(unsigned char *code) case 0xa7: table = opcode_a7; break; + case 0xaa: + table = opcode_aa; + break; case 0xb2: table = opcode_b2; break; @@ -1469,11 +1665,39 @@ static struct insn *find_insn(unsigned char *code) return NULL; } +/** + * insn_to_mnemonic - decode an s390 instruction + * @instruction: instruction to decode + * @buf: buffer to fill with mnemonic + * @len: length of buffer + * + * Decode the instruction at @instruction and store the corresponding + * mnemonic into @buf of length @len. + * @buf is left unchanged if the instruction could not be decoded. + * Returns: + * %0 on success, %-ENOENT if the instruction was not found. + */ +int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len) +{ + struct s390_insn *insn; + + insn = find_insn(instruction); + if (!insn) + return -ENOENT; + if (insn->name[0] == '\0') + snprintf(buf, len, "%s", + long_insn_name[(int) insn->name[1]]); + else + snprintf(buf, len, "%.5s", insn->name); + return 0; +} +EXPORT_SYMBOL_GPL(insn_to_mnemonic); + static int print_insn(char *buffer, unsigned char *code, unsigned long addr) { - struct insn *insn; + struct s390_insn *insn; const unsigned char *ops; - const struct operand *operand; + const struct s390_operand *operand; unsigned int value; char separator; char *ptr; @@ -1532,7 +1756,7 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) void show_code(struct pt_regs *regs) { - char *mode = (regs->psw.mask & PSW_MASK_PSTATE) ? "User" : "Krnl"; + char *mode = user_mode(regs) ? "User" : "Krnl"; unsigned char code[64]; char buffer[64], *ptr; mm_segment_t old_fs; @@ -1541,7 +1765,7 @@ void show_code(struct pt_regs *regs) /* Get a snapshot of the 64 bytes surrounding the fault address. */ old_fs = get_fs(); - set_fs((regs->psw.mask & PSW_MASK_PSTATE) ? USER_DS : KERNEL_DS); + set_fs(user_mode(regs) ? USER_DS : KERNEL_DS); for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) { addr = regs->psw.addr - 34 + start; if (__copy_from_user(code + start - 2, @@ -1577,10 +1801,15 @@ void show_code(struct pt_regs *regs) ptr += sprintf(ptr, "%s Code:", mode); hops = 0; while (start < end && hops < 8) { - *ptr++ = (start == 32) ? '>' : ' '; + opsize = insn_length(code[start]); + if (start + opsize == 32) + *ptr++ = '#'; + else if (start == 32) + *ptr++ = '>'; + else + *ptr++ = ' '; addr = regs->psw.addr + start - 32; ptr += sprintf(ptr, ONELONG, addr); - opsize = insn_length(code[start]); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) @@ -1597,3 +1826,28 @@ void show_code(struct pt_regs *regs) } printk("\n"); } + +void print_fn_code(unsigned char *code, unsigned long len) +{ + char buffer[64], *ptr; + int opsize, i; + + while (len) { + ptr = buffer; + opsize = insn_length(*code); + if (opsize > len) + break; + ptr += sprintf(ptr, "%p: ", code); + for (i = 0; i < opsize; i++) + ptr += sprintf(ptr, "%02x", code[i]); + *ptr++ = '\t'; + if (i < 4) + *ptr++ = '\t'; + ptr += print_insn(ptr, code, (unsigned long) code); + *ptr++ = '\n'; + *ptr++ = 0; + printk(buffer); + code += opsize; + len -= opsize; + } +} diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c new file mode 100644 index 00000000000..acb412442e5 --- /dev/null +++ b/arch/s390/kernel/dumpstack.c @@ -0,0 +1,217 @@ +/* + * Stack dumping functions + * + * Copyright IBM Corp. 1999, 2013 + */ + +#include <linux/kallsyms.h> +#include <linux/hardirq.h> +#include <linux/kprobes.h> +#include <linux/utsname.h> +#include <linux/export.h> +#include <linux/kdebug.h> +#include <linux/ptrace.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <asm/processor.h> +#include <asm/debug.h> +#include <asm/dis.h> +#include <asm/ipl.h> + +#ifndef CONFIG_64BIT +#define LONG "%08lx " +#define FOURLONG "%08lx %08lx %08lx %08lx\n" +static int kstack_depth_to_print = 12; +#else /* CONFIG_64BIT */ +#define LONG "%016lx " +#define FOURLONG "%016lx %016lx %016lx %016lx\n" +static int kstack_depth_to_print = 20; +#endif /* CONFIG_64BIT */ + +/* + * For show_trace we have tree different stack to consider: + * - the panic stack which is used if the kernel stack has overflown + * - the asynchronous interrupt stack (cpu related) + * - the synchronous kernel stack (process related) + * The stack trace can start at any of the three stack and can potentially + * touch all of them. The order is: panic stack, async stack, sync stack. + */ +static unsigned long +__show_trace(unsigned long sp, unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + unsigned long addr; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + addr = sf->gprs[8] & PSW_ADDR_INSN; + printk("([<%016lx>] %pSR)\n", addr, (void *)addr); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + addr = sf->gprs[8] & PSW_ADDR_INSN; + printk(" [<%016lx>] %pSR\n", addr, (void *)addr); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + addr = regs->psw.addr & PSW_ADDR_INSN; + printk(" [<%016lx>] %pSR\n", addr, (void *)addr); + low = sp; + sp = regs->gprs[15]; + } +} + +static void show_trace(struct task_struct *task, unsigned long *stack) +{ + const unsigned long frame_size = + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + register unsigned long __r15 asm ("15"); + unsigned long sp; + + sp = (unsigned long) stack; + if (!sp) + sp = task ? task->thread.ksp : __r15; + printk("Call Trace:\n"); +#ifdef CONFIG_CHECK_STACK + sp = __show_trace(sp, + S390_lowcore.panic_stack + frame_size - 4096, + S390_lowcore.panic_stack + frame_size); +#endif + sp = __show_trace(sp, + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size); + if (task) + __show_trace(sp, (unsigned long) task_stack_page(task), + (unsigned long) task_stack_page(task) + THREAD_SIZE); + else + __show_trace(sp, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); + if (!task) + task = current; + debug_show_held_locks(task); +} + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + register unsigned long *__r15 asm ("15"); + unsigned long *stack; + int i; + + if (!sp) + stack = task ? (unsigned long *) task->thread.ksp : __r15; + else + stack = sp; + + for (i = 0; i < kstack_depth_to_print; i++) { + if (((addr_t) stack & (THREAD_SIZE-1)) == 0) + break; + if ((i * sizeof(long) % 32) == 0) + printk("%s ", i == 0 ? "" : "\n"); + printk(LONG, *stack++); + } + printk("\n"); + show_trace(task, sp); +} + +static void show_last_breaking_event(struct pt_regs *regs) +{ +#ifdef CONFIG_64BIT + printk("Last Breaking-Event-Address:\n"); + printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]); +#endif +} + +static inline int mask_bits(struct pt_regs *regs, unsigned long bits) +{ + return (regs->psw.mask & bits) / ((~bits + 1) & bits); +} + +void show_registers(struct pt_regs *regs) +{ + char *mode; + + mode = user_mode(regs) ? "User" : "Krnl"; + printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); + if (!user_mode(regs)) + printk(" (%pSR)", (void *)regs->psw.addr); + printk("\n"); + printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " + "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), + mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), + mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), + mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), + mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), + mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); +#ifdef CONFIG_64BIT + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); +#endif + printk("\n%s GPRS: " FOURLONG, mode, + regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); + printk(" " FOURLONG, + regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); + printk(" " FOURLONG, + regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); + printk(" " FOURLONG, + regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); + show_code(regs); +} + +void show_regs(struct pt_regs *regs) +{ + show_regs_print_info(KERN_DEFAULT); + show_registers(regs); + /* Show stack backtrace if pt_regs is from kernel mode */ + if (!user_mode(regs)) + show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_last_breaking_event(regs); +} + +static DEFINE_SPINLOCK(die_lock); + +void die(struct pt_regs *regs, const char *str) +{ + static int die_counter; + + oops_enter(); + lgr_info_log(); + debug_stop_all(); + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irq(&die_lock); + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception: panic_on_oops"); + oops_exit(); + do_exit(SIGSEGV); +} diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3b7e7dddc32..0dff972a169 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -1,6 +1,4 @@ /* - * arch/s390/kernel/early.c - * * Copyright IBM Corp. 2007, 2009 * Author(s): Hongjie Yang <hongjie@us.ibm.com>, * Heiko Carstens <heiko.carstens@de.ibm.com> @@ -29,6 +27,7 @@ #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/sclp.h> +#include <asm/facility.h> #include "entry.h" /* @@ -48,10 +47,10 @@ static void __init reset_tod_clock(void) { u64 time; - if (store_clock(&time) == 0) + if (store_tod_clock(&time) == 0) return; /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_clock(TOD_UNIX_EPOCH) != 0 || store_clock(&time) != 0) + if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) disabled_wait(0); sched_clock_base_cc = TOD_UNIX_EPOCH; @@ -94,6 +93,7 @@ static noinline __init void create_kernel_nss(void) unsigned int sinitrd_pfn, einitrd_pfn; #endif int response; + int hlen; size_t len; char *savesys_ptr; char defsys_cmd[DEFSYS_CMD_SIZE]; @@ -124,24 +124,27 @@ static noinline __init void create_kernel_nss(void) end_pfn = PFN_UP(__pa(&_end)); min_size = end_pfn << 2; - sprintf(defsys_cmd, "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", - kernel_nss_name, stext_pfn - 1, stext_pfn, eshared_pfn - 1, - eshared_pfn, end_pfn); + hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE, + "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", + kernel_nss_name, stext_pfn - 1, stext_pfn, + eshared_pfn - 1, eshared_pfn, end_pfn); #ifdef CONFIG_BLK_DEV_INITRD if (INITRD_START && INITRD_SIZE) { sinitrd_pfn = PFN_DOWN(__pa(INITRD_START)); einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE)); min_size = einitrd_pfn << 2; - sprintf(defsys_cmd, "%s EW %.5X-%.5X", defsys_cmd, - sinitrd_pfn, einitrd_pfn); + hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, + " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn); } #endif - sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK PARMREGS=0-13", - defsys_cmd, min_size); - sprintf(savesys_cmd, "SAVESYS %s \n IPL %s", - kernel_nss_name, kernel_nss_name); + snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, + " EW MINSIZE=%.7iK PARMREGS=0-13", min_size); + defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0'; + snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s", + kernel_nss_name, kernel_nss_name); + savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0'; __cpcmd(defsys_cmd, NULL, 0, &response); @@ -170,7 +173,7 @@ static noinline __init void create_kernel_nss(void) } /* re-initialize cputime accounting. */ - sched_clock_base_cc = get_clock(); + sched_clock_base_cc = get_tod_clock(); S390_lowcore.last_update_clock = sched_clock_base_cc; S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; S390_lowcore.user_timer = 0; @@ -203,6 +206,7 @@ static noinline __init void clear_bss_section(void) */ static noinline __init void init_kernel_storage_key(void) { +#if PAGE_DEFAULT_KEY unsigned long end_pfn, init_pfn; end_pfn = PFN_UP(__pa(&_end)); @@ -210,45 +214,70 @@ static noinline __init void init_kernel_storage_key(void) for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY, 0); +#endif } -static __initdata struct sysinfo_3_2_2 vmms __aligned(PAGE_SIZE); +static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); static noinline __init void detect_machine_type(void) { + struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; + /* Check current-configuration-level */ - if ((stsi(NULL, 0, 0, 0) >> 28) <= 2) { + if (stsi(NULL, 0, 0, 0) <= 2) { S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR; return; } /* Get virtual-machine cpu information. */ - if (stsi(&vmms, 3, 2, 2) == -ENOSYS || !vmms.count) + if (stsi(vmms, 3, 2, 2) || !vmms->count) return; /* Running under KVM? If not we assume z/VM */ - if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3)) + if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; else S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } -static __init void early_pgm_check_handler(void) +static __init void setup_topology(void) +{ +#ifdef CONFIG_64BIT + int max_mnest; + + if (!test_facility(11)) + return; + S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; + for (max_mnest = 6; max_mnest > 1; max_mnest--) { + if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) + break; + } + topology_max_mnest = max_mnest; +#endif +} + +static void early_pgm_check_handler(void) { - unsigned long addr; const struct exception_table_entry *fixup; + unsigned long cr0, cr0_new; + unsigned long addr; addr = S390_lowcore.program_old_psw.addr; fixup = search_exception_tables(addr & PSW_ADDR_INSN); if (!fixup) disabled_wait(0); - S390_lowcore.program_old_psw.addr = fixup->fixup | PSW_ADDR_AMODE; + /* Disable low address protection before storing into lowcore. */ + __ctl_store(cr0, 0, 0); + cr0_new = cr0 & ~(1UL << 28); + __ctl_load(cr0_new, 0, 0); + S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE; + __ctl_load(cr0, 0, 0); } static noinline __init void setup_lowcore_early(void) { psw_t psw; - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; @@ -258,35 +287,8 @@ static noinline __init void setup_lowcore_early(void) static noinline __init void setup_facility_list(void) { - unsigned long nr; - - S390_lowcore.stfl_fac_list = 0; - asm volatile( - " .insn s,0xb2b10000,0(0)\n" /* stfl */ - "0:\n" - EX_TABLE(0b,0b) : "=m" (S390_lowcore.stfl_fac_list)); - memcpy(&S390_lowcore.stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); - nr = 4; /* # bytes stored by stfl */ - if (test_facility(7)) { - /* More facility bits available with stfle */ - register unsigned long reg0 asm("0") = MAX_FACILITY_BIT/64 - 1; - asm volatile(".insn s,0xb2b00000,%0" /* stfle */ - : "=m" (S390_lowcore.stfle_fac_list), "+d" (reg0) - : : "cc"); - nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ - } - memset((char *) S390_lowcore.stfle_fac_list + nr, 0, - MAX_FACILITY_BIT/8 - nr); -} - -static noinline __init void setup_hpage(void) -{ -#ifndef CONFIG_DEBUG_PAGEALLOC - if (!test_facility(2) || !test_facility(8)) - return; - S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE; - __ctl_set_bit(0, 23); -#endif + stfle(S390_lowcore.stfle_fac_list, + ARRAY_SIZE(S390_lowcore.stfle_fac_list)); } static __init void detect_mvpg(void) @@ -376,33 +378,41 @@ static __init void detect_diag44(void) static __init void detect_machine_facilities(void) { #ifdef CONFIG_64BIT + if (test_facility(8)) { + S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1; + __ctl_set_bit(0, 23); + } + if (test_facility(78)) + S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2; if (test_facility(3)) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(8)) - S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; - if (test_facility(11)) - S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; - if (test_facility(27)) - S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) - S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; + S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; + if (test_facility(50) && test_facility(73)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TE; + if (test_facility(66)) + S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; + if (test_facility(51)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; #endif } static __init void rescue_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD + unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); /* - * Move the initrd right behind the bss section in case it starts - * within the bss section. So we don't overwrite it when the bss - * section gets cleared. + * Just like in case of IPL from VM reader we make sure there is a + * gap of 4MB between end of kernel and start of initrd. + * That way we can also be sure that saving an NSS will succeed, + * which however only requires different segments. */ if (!INITRD_START || !INITRD_SIZE) return; - if (INITRD_START >= (unsigned long) __bss_stop) + if (INITRD_START >= min_initrd_addr) return; - memmove(__bss_stop, (void *) INITRD_START, INITRD_SIZE); - INITRD_START = (unsigned long) __bss_stop; + memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = min_initrd_addr; #endif } @@ -426,18 +436,22 @@ static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) } } -static void __init setup_boot_command_line(void) +static inline int has_ebcdic_char(const char *str) { int i; - /* convert arch command line to ascii */ - for (i = 0; i < ARCH_COMMAND_LINE_SIZE; i++) - if (COMMAND_LINE[i] & 0x80) - break; - if (i < ARCH_COMMAND_LINE_SIZE) - EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); - COMMAND_LINE[ARCH_COMMAND_LINE_SIZE-1] = 0; + for (i = 0; str[i]; i++) + if (str[i] & 0x80) + return 1; + return 0; +} +static void __init setup_boot_command_line(void) +{ + COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; + /* convert arch command line to ascii if necessary */ + if (has_ebcdic_char(COMMAND_LINE)) + EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); /* copy arch command line */ strlcpy(boot_command_line, strstrip(COMMAND_LINE), ARCH_COMMAND_LINE_SIZE); @@ -449,7 +463,6 @@ static void __init setup_boot_command_line(void) append_to_cmdline(append_ipl_scpdata); } - /* * Save ipl parameters, clear bss memory, initialize storage keys * and create a kernel NSS at startup if the SAVESYS= parm is defined @@ -463,7 +476,6 @@ void __init startup_init(void) init_kernel_storage_key(); lockdep_init(); lockdep_off(); - sort_main_extable(); setup_lowcore_early(); setup_facility_list(); detect_machine_type(); @@ -476,9 +488,8 @@ void __init startup_init(void) detect_diag9c(); detect_diag44(); detect_machine_facilities(); - setup_hpage(); - sclp_facilities_detect(); - detect_memory_layout(memory_chunk); + setup_topology(); + sclp_early_detect(); #ifdef CONFIG_DYNAMIC_FTRACE S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; #endif diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c index cc0dc609d73..b971c6be629 100644 --- a/arch/s390/kernel/ebcdic.c +++ b/arch/s390/kernel/ebcdic.c @@ -1,10 +1,9 @@ /* - * arch/s390/kernel/ebcdic.c * ECBDIC -> ASCII, ASCII -> ECBDIC, * upper to lower case (EBCDIC) conversion tables. * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Martin Peschke <peschke@fh-brandenburg.de> */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 648f64239a9..70203265196 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1,16 +1,16 @@ /* - * arch/s390/kernel/entry.S * S390 low-level entry points. * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright IBM Corp. 1999, 2012 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * Heiko Carstens <heiko.carstens@de.ibm.com> */ -#include <linux/linkage.h> #include <linux/init.h> +#include <linux/linkage.h> +#include <asm/processor.h> #include <asm/cache.h> #include <asm/errno.h> #include <asm/ptrace.h> @@ -18,174 +18,123 @@ #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/page.h> - -/* - * Stack layout for the system_call stack entry. - * The first few entries are identical to the user_regs_struct. - */ -SP_PTREGS = STACK_FRAME_OVERHEAD -SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS -SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW -SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS -SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 4 -SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8 -SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 12 -SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16 -SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 20 -SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24 -SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 28 -SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32 -SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 36 -SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40 -SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 44 -SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48 -SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52 -SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 -SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 -SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR -SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE - -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +#include <asm/sigp.h> +#include <asm/irq.h> + +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 4 +__PT_R2 = __PT_GPRS + 8 +__PT_R3 = __PT_GPRS + 12 +__PT_R4 = __PT_GPRS + 16 +__PT_R5 = __PT_GPRS + 20 +__PT_R6 = __PT_GPRS + 24 +__PT_R7 = __PT_GPRS + 28 +__PT_R8 = __PT_GPRS + 32 +__PT_R9 = __PT_GPRS + 36 +__PT_R10 = __PT_GPRS + 40 +__PT_R11 = __PT_GPRS + 44 +__PT_R12 = __PT_GPRS + 48 +__PT_R13 = __PT_GPRS + 524 +__PT_R14 = __PT_GPRS + 56 +__PT_R15 = __PT_GPRS + 60 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT +STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE + +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) +_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) +_PIF_WORK = (_PIF_PER_TRAP) #define BASED(name) name-system_call(%r13) -#ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 - l %r1,BASED(.Ltrace_irq_on_caller) - basr %r14,%r1 + l %r1,BASED(.Lhardirqs_on) + basr %r14,%r1 # call trace_hardirqs_on_caller +#endif .endm .macro TRACE_IRQS_OFF +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 - l %r1,BASED(.Ltrace_irq_off_caller) - basr %r14,%r1 - .endm -#else -#define TRACE_IRQS_ON -#define TRACE_IRQS_OFF + l %r1,BASED(.Lhardirqs_off) + basr %r14,%r1 # call trace_hardirqs_off_caller #endif + .endm -#ifdef CONFIG_LOCKDEP .macro LOCKDEP_SYS_EXIT - tm SP_PSW+1(%r15),0x01 # returning to user ? - jz 0f +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 l %r1,BASED(.Llockdep_sys_exit) - basr %r14,%r1 -0: - .endm -#else -#define LOCKDEP_SYS_EXIT + basr %r14,%r1 # call lockdep_sys_exit #endif - -/* - * Register usage in interrupt handlers: - * R9 - pointer to current task structure - * R13 - pointer to literal pool - * R14 - return register for function calls - * R15 - kernel stack pointer - */ - - .macro UPDATE_VTIME lc_from,lc_to,lc_sum - lm %r10,%r11,\lc_from - sl %r10,\lc_to - sl %r11,\lc_to+4 - bc 3,BASED(0f) - sl %r10,BASED(.Lc_1) -0: al %r10,\lc_sum - al %r11,\lc_sum+4 - bc 12,BASED(1f) - al %r10,BASED(.Lc_1) -1: stm %r10,%r11,\lc_sum - .endm - - .macro SAVE_ALL_SVC psworg,savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 - l %r15,__LC_KERNEL_STACK # problem state -> load ksp - s %r15,BASED(.Lc_spsize) # make room for registers & psw - .endm - - .macro SAVE_ALL_BASE savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 .endm - .macro SAVE_ALL_PGM psworg,savearea - tm \psworg+1,0x01 # test problem state bit + .macro CHECK_STACK stacksize,savearea #ifdef CONFIG_CHECK_STACK - bnz BASED(1f) - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bnz BASED(2f) - la %r12,\psworg - b BASED(stack_overflow) -#else - bz BASED(2f) + tml %r15,\stacksize - CONFIG_STACK_GUARD + la %r14,\savearea + jz stack_overflow #endif -1: l %r15,__LC_KERNEL_STACK # problem state -> load ksp -2: s %r15,BASED(.Lc_spsize) # make room for registers & psw .endm - .macro SAVE_ALL_ASYNC psworg,savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 - la %r12,\psworg - tm \psworg+1,0x01 # test problem state bit - bnz BASED(1f) # from user -> load async stack - clc \psworg+4(4),BASED(.Lcritical_end) - bhe BASED(0f) - clc \psworg+4(4),BASED(.Lcritical_start) - bl BASED(0f) - l %r14,BASED(.Lcleanup_critical) - basr %r14,%r14 - tm 1(%r12),0x01 # retest problem state after cleanup - bnz BASED(1f) -0: l %r14,__LC_ASYNC_STACK # are we already on the async stack ? + .macro SWITCH_ASYNC savearea,stack,shift + tmh %r8,0x0001 # interrupting from user ? + jnz 1f + lr %r14,%r9 + sl %r14,BASED(.Lcritical_start) + cl %r14,BASED(.Lcritical_length) + jhe 0f + la %r11,\savearea # inside critical section, do cleanup + bras %r14,cleanup_critical + tmh %r8,0x0001 # retest problem state after cleanup + jnz 1f +0: l %r14,\stack # are we already on the target stack? slr %r14,%r15 - sra %r14,STACK_SHIFT -#ifdef CONFIG_CHECK_STACK - bnz BASED(1f) - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bnz BASED(2f) - b BASED(stack_overflow) -#else - bz BASED(2f) -#endif -1: l %r15,__LC_ASYNC_STACK -2: s %r15,BASED(.Lc_spsize) # make room for registers & psw + sra %r14,\shift + jnz 1f + CHECK_STACK 1<<\shift,\savearea + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: l %r15,\stack # load target stack +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm - .macro CREATE_STACK_FRAME savearea - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - mvc SP_R12(16,%r15),\savearea # move %r12-%r15 to stack - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack + .macro ADD64 high,low,timer + al \high,\timer + al \low,4+\timer + brc 12,.+8 + ahi \high,1 .endm - .macro RESTORE_ALL psworg,sync - mvc \psworg(8),SP_PSW(%r15) # move user PSW to lowcore - .if !\sync - ni \psworg+1,0xfd # clear wait state bit - .endif - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user - stpt __LC_EXIT_TIMER - lpsw \psworg # back to caller + .macro SUB64 high,low,timer + sl \high,\timer + sl \low,4+\timer + brc 3,.+8 + ahi \high,-1 + .endm + + .macro UPDATE_VTIME high,low,enter_timer + lm \high,\low,__LC_EXIT_TIMER + SUB64 \high,\low,\enter_timer + ADD64 \high,\low,__LC_USER_TIMER + stm \high,\low,__LC_USER_TIMER + lm \high,\low,__LC_LAST_UPDATE_TIMER + SUB64 \high,\low,__LC_EXIT_TIMER + ADD64 \high,\low,__LC_SYSTEM_TIMER + stm \high,\low,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer .endm .macro REENABLE_IRQS - mvc __SF_EMPTY(1,%r15),SP_PSW(%r15) - ni __SF_EMPTY(%r15),0xbf - ssm __SF_EMPTY(%r15) + st %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW .endm .section .kprobes.text, "ax" @@ -197,24 +146,20 @@ STACK_SIZE = 1 << STACK_SHIFT * Returns: * gpr2 = prev */ - .globl __switch_to -__switch_to: - basr %r1,0 -0: l %r4,__THREAD_info(%r2) # get thread_info of prev - l %r5,__THREAD_info(%r3) # get thread_info of next - tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? - bz 1f-0b(%r1) - ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev - oi __TI_flags+3(%r5),_TIF_MCCK_PENDING # set it in next -1: stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task +ENTRY(__switch_to) + stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task st %r15,__THREAD_ksp(%r2) # store kernel stack of prev - l %r15,__THREAD_ksp(%r3) # load kernel stack of next - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task + l %r4,__THREAD_info(%r2) # get thread_info of prev + l %r5,__THREAD_info(%r3) # get thread_info of next + lr %r15,%r5 + ahi %r15,STACK_INIT # end of kernel stack of next st %r3,__LC_CURRENT # store task struct of next st %r5,__LC_THREAD_INFO # store thread info of next - ahi %r5,STACK_SIZE # end of kernel stack of next - st %r5,__LC_KERNEL_STACK # store end of kernel stack + st %r15,__LC_KERNEL_STACK # store end of kernel stack + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next + l %r15,__THREAD_ksp(%r3) # load kernel stack of next + lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 __critical_start: @@ -223,73 +168,79 @@ __critical_start: * are executed with interrupts enabled. */ - .globl system_call -system_call: +ENTRY(system_call) stpt __LC_SYNC_ENTER_TIMER -sysc_saveall: - SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct +sysc_stm: + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lhi %r14,_PIF_SYSCALL +sysc_per: + l %r15,__LC_KERNEL_STACK + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -sysc_stime: - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -sysc_update: - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + st %r14,__PT_FLAGS(%r11) sysc_do_svc: - xr %r7,%r7 - icm %r7,3,SP_SVCNR(%r15) # load svc number and test for svc 0 - bnz BASED(sysc_nr_ok) # svc number > 0 + l %r10,__TI_sysc_table(%r12) # 31 bit system call table + lh %r8,__PT_INT_CODE+2(%r11) + sla %r8,2 # shift and test for svc0 + jnz sysc_nr_ok # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) - bnl BASED(sysc_nr_ok) - sth %r1,SP_SVCNR(%r15) - lr %r7,%r1 # copy svc number to %r7 + jnl sysc_nr_ok + sth %r1,__PT_INT_CODE+2(%r11) + lr %r8,%r1 + sla %r8,2 sysc_nr_ok: - sll %r7,2 # svc number *4 - l %r10,BASED(.Lsysc_table) - tm __TI_flags+2(%r12),_TIF_SYSCALL - mvc SP_ARGS(4,%r15),SP_R7(%r15) - l %r8,0(%r7,%r10) # get system call addr. - bnz BASED(sysc_tracesys) - basr %r14,%r8 # call sys_xxxx - st %r2,SP_R2(%r15) # store return value (change R2 on stack) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + st %r2,__PT_ORIG_GPR2(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r9,0(%r8,%r10) # get system call addr. + tm __TI_flags+3(%r12),_TIF_TRACE + jnz sysc_tracesys + basr %r14,%r9 # call sys_xxxx + st %r2,__PT_R2(%r11) # store return value sysc_return: LOCKDEP_SYS_EXIT sysc_tif: - tm __TI_flags+3(%r12),_TIF_WORK_SVC - bnz BASED(sysc_work) # there is work to do (signals etc.) + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno sysc_restore + tm __PT_FLAGS+3(%r11),_PIF_WORK + jnz sysc_work + tm __TI_flags+3(%r12),_TIF_WORK + jnz sysc_work # check for thread work + tm __LC_CPU_FLAGS+3,_CIF_WORK + jnz sysc_work sysc_restore: - RESTORE_ALL __LC_RETURN_PSW,1 + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW sysc_done: # -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_restore) - -# # One of the work bits is on. Find out which one. # -sysc_work_tif: - tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - bo BASED(sysc_mcck_pending) +sysc_work: + tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING + jo sysc_mcck_pending tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - bo BASED(sysc_reschedule) + jo sysc_reschedule + tm __PT_FLAGS+3(%r11),_PIF_PER_TRAP + jo sysc_singlestep tm __TI_flags+3(%r12),_TIF_SIGPENDING - bo BASED(sysc_sigpending) + jo sysc_sigpending tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME - bo BASED(sysc_notify_resume) - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r12),_TIF_PER_TRAP - bo BASED(sysc_singlestep) - b BASED(sysc_return) # beware of critical section cleanup + jo sysc_notify_resume + tm __LC_CPU_FLAGS+3,_CIF_ASCE + jo sysc_uaccess + j sysc_return # beware of critical section cleanup # # _TIF_NEED_RESCHED is set, call schedule @@ -297,331 +248,265 @@ sysc_work_tif: sysc_reschedule: l %r1,BASED(.Lschedule) la %r14,BASED(sysc_return) - br %r1 # call scheduler + br %r1 # call schedule # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: - l %r1,BASED(.Ls390_handle_mcck) + l %r1,BASED(.Lhandle_mcck) la %r14,BASED(sysc_return) br %r1 # TIF bit will be cleared by handler # +# _CIF_ASCE is set, load user space asce +# +sysc_uaccess: + ni __LC_CPU_FLAGS+3,255-_CIF_ASCE + lctl %c1,%c1,__LC_USER_ASCE # load primary asce + j sysc_return + +# # _TIF_SIGPENDING is set, call do_signal # sysc_sigpending: - ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - la %r2,SP_PTREGS(%r15) # load pt_regs + lr %r2,%r11 # pass pointer to pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r12),_TIF_PER_TRAP - bo BASED(sysc_singlestep) - b BASED(sysc_return) + tm __PT_FLAGS+3(%r11),_PIF_SYSCALL + jno sysc_return + lm %r2,%r7,__PT_R2(%r11) # load svc arguments + l %r10,__TI_sysc_table(%r12) # 31 bit system call table + xr %r8,%r8 # svc 0 returns -ENOSYS + clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2) + jnl sysc_nr_ok # invalid svc number -> do svc 0 + lh %r8,__PT_INT_CODE+2(%r11) # load new svc number + sla %r8,2 + j sysc_nr_ok # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume # sysc_notify_resume: - la %r2,SP_PTREGS(%r15) # load pt_regs + lr %r2,%r11 # pass pointer to pt_regs l %r1,BASED(.Ldo_notify_resume) la %r14,BASED(sysc_return) br %r1 # call do_notify_resume - -# -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - l %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument - lm %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - b BASED(sysc_nr_ok) # restart svc - # -# _TIF_PER_TRAP is set, call do_per_trap +# _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number - la %r2,SP_PTREGS(%r15) # address of register-save area - l %r1,BASED(.Lhandle_per) # load adr. of per handler - la %r14,BASED(sysc_return) # load adr. of system return - br %r1 # branch to do_per_trap + ni __PT_FLAGS+3(%r11),255-_PIF_PER_TRAP + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_per_trap) + la %r14,BASED(sysc_return) + br %r1 # call do_per_trap # # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before # and after the system call # sysc_tracesys: - l %r1,BASED(.Ltrace_entry) - la %r2,SP_PTREGS(%r15) # load pt_regs + l %r1,BASED(.Ltrace_enter) + lr %r2,%r11 # pass pointer to pt_regs la %r3,0 xr %r0,%r0 - icm %r0,3,SP_SVCNR(%r15) - st %r0,SP_R2(%r15) - basr %r14,%r1 + icm %r0,3,__PT_INT_CODE+2(%r11) + st %r0,__PT_R2(%r11) + basr %r14,%r1 # call do_syscall_trace_enter cl %r2,BASED(.Lnr_syscalls) - bnl BASED(sysc_tracenogo) - lr %r7,%r2 - sll %r7,2 # svc number *4 - l %r8,0(%r7,%r10) + jnl sysc_tracenogo + lr %r8,%r2 + sll %r8,2 + l %r9,0(%r8,%r10) sysc_tracego: - lm %r3,%r6,SP_R3(%r15) - mvc SP_ARGS(4,%r15),SP_R7(%r15) - l %r2,SP_ORIG_R2(%r15) - basr %r14,%r8 # call sys_xxx - st %r2,SP_R2(%r15) # store return value + lm %r3,%r7,__PT_R3(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + st %r2,__PT_R2(%r11) # store return value sysc_tracenogo: - tm __TI_flags+2(%r12),_TIF_SYSCALL - bz BASED(sysc_return) + tm __TI_flags+3(%r12),_TIF_TRACE + jz sysc_return l %r1,BASED(.Ltrace_exit) - la %r2,SP_PTREGS(%r15) # load pt_regs + lr %r2,%r11 # pass pointer to pt_regs la %r14,BASED(sysc_return) - br %r1 + br %r1 # call do_syscall_trace_exit # # a new process exits the kernel with ret_from_fork # - .globl ret_from_fork -ret_from_fork: +ENTRY(ret_from_fork) + la %r11,STACK_FRAME_OVERHEAD(%r15) + l %r12,__LC_THREAD_INFO l %r13,__LC_SVC_NEW_PSW+4 - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? - bo BASED(0f) - st %r15,SP_R15(%r15) # store stack pointer for new kthread -0: l %r1,BASED(.Lschedtail) - basr %r14,%r1 + l %r1,BASED(.Lschedule_tail) + basr %r14,%r1 # call schedule_tail TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - b BASED(sysc_tracenogo) - -# -# kernel_execve function needs to deal with pt_regs that is not -# at the usual place -# - .globl kernel_execve -kernel_execve: - stm %r12,%r15,48(%r15) - lr %r14,%r15 - l %r13,__LC_SVC_NEW_PSW+4 - s %r15,BASED(.Lc_spsize) - st %r14,__SF_BACKCHAIN(%r15) - la %r12,SP_PTREGS(%r15) - xc 0(__PT_SIZE,%r12),0(%r12) - l %r1,BASED(.Ldo_execve) - lr %r5,%r12 - basr %r14,%r1 - ltr %r2,%r2 - be BASED(0f) - a %r15,BASED(.Lc_spsize) - lm %r12,%r15,48(%r15) - br %r14 - # execve succeeded. -0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts - l %r15,__LC_KERNEL_STACK # load ksp - s %r15,BASED(.Lc_spsize) # make room for registers & psw - mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs - l %r12,__LC_THREAD_INFO - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - l %r1,BASED(.Lexecve_tail) - basr %r14,%r1 - b BASED(sysc_return) + ssm __LC_SVC_NEW_PSW # reenable interrupts + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? + jne sysc_tracenogo + # it's a kernel thread + lm %r9,%r10,__PT_R9(%r11) # load gprs +ENTRY(kernel_thread_starter) + la %r2,0(%r10) + basr %r14,%r9 + j sysc_tracenogo /* * Program check handler routine */ - .globl pgm_check_handler -pgm_check_handler: -/* - * First we need to check for a special case: - * Single stepping an instruction that disables the PER event mask will - * cause a PER event AFTER the mask has been set. Example: SVC or LPSW. - * For a single stepped SVC the program check handler gets control after - * the SVC new PSW has been loaded. But we want to execute the SVC first and - * then handle the PER event. Therefore we update the SVC old PSW to point - * to the pgm_check_handler and branch to the SVC handler after we checked - * if we have to load the kernel stack register. - * For every other possible cause for PER event without the PER mask set - * we just ignore the PER event (FIXME: is there anything we have to do - * for LPSW?). - */ +ENTRY(pgm_check_handler) stpt __LC_SYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA - tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception - bnz BASED(pgm_per) # got per exception -> special case - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) - mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime: - l %r3,__LC_PGM_ILC # load program interruption code - l %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - la %r8,0x7f - nr %r8,%r3 - sll %r8,2 - l %r1,BASED(.Ljump_table) - l %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to interrupt-handler -pgm_exit: - b BASED(sysc_return) - -# -# handle per exception -# -pgm_per: - tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on - bnz BASED(pgm_per_std) # ok, normal per event from user space -# ok its one of the special cases, now we need to find out which one - clc __LC_PGM_OLD_PSW(8),__LC_SVC_NEW_PSW - be BASED(pgm_svcper) -# no interesting special case, ignore PER event - lm %r12,%r15,__LC_SAVE_AREA - lpsw 0x28 - -# -# Normal per exception -# -pgm_per_std: - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime2) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime2: + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_PGM_OLD_PSW + tmh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + l %r15,__LC_KERNEL_STACK +2: la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f l %r1,__TI_task(%r12) - tm SP_PSW+1(%r15),0x01 # kernel per event ? - bz BASED(kernel_per) - mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE + tmh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __PT_FLAGS+3(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS - mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID - oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP - l %r3,__LC_PGM_ILC # load program interruption code - l %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - la %r8,0x7f - nr %r8,%r3 # clear per-event-bit and ilc - be BASED(pgm_exit2) # only per or per+check ? - sll %r8,2 + mvc __THREAD_per_cause(2,%r1),__LC_PER_CODE + mvc __THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID +0: REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) l %r1,BASED(.Ljump_table) - l %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area + la %r10,0x7f + n %r10,__PT_INT_CODE(%r11) + je sysc_return + sll %r10,2 + l %r1,0(%r10,%r1) # load address of handler routine + lr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # branch to interrupt-handler -pgm_exit2: - b BASED(sysc_return) + j sysc_return # -# it was a single stepped SVC that is causing all the trouble +# PER event in supervisor state, must be kprobes # -pgm_svcper: - SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - l %r8,__TI_task(%r12) - mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE - mvc __THREAD_per_address(4,%r8),__LC_PER_ADDRESS - mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - lm %r2,%r6,SP_R2(%r15) # load svc arguments - b BASED(sysc_do_svc) +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_per_trap) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_per_trap + j sysc_return # -# per was called from kernel, must be kprobes +# single stepped system call # -kernel_per: - REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) - la %r2,SP_PTREGS(%r15) # address of register-save area - l %r1,BASED(.Lhandle_per) # load adr. of per handler - basr %r14,%r1 # branch to do_single_step - b BASED(pgm_exit) +pgm_svcper: + mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW + mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per) + lhi %r14,_PIF_SYSCALL | _PIF_PER_TRAP + lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs /* * IO interrupt handler routine */ - .globl io_int_handler -io_int_handler: +ENTRY(io_int_handler) stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 - CREATE_STACK_FRAME __LC_SAVE_AREA+16 - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(io_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -io_no_vtime: + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_IO_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz io_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +io_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF - l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to standard irq handler + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) +io_loop: + l %r1,BASED(.Ldo_IRQ) + lr %r2,%r11 # pass pointer to pt_regs + lhi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lhi %r3,THIN_INTERRUPT +io_call: + basr %r14,%r1 # call do_IRQ + tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON io_tif: - tm __TI_flags+3(%r12),_TIF_WORK_INT - bnz BASED(io_work) # there is work to do (signals etc.) + tm __TI_flags+3(%r12),_TIF_WORK + jnz io_work # there is work to do (signals etc.) + tm __LC_CPU_FLAGS+3,_CIF_WORK + jnz io_work io_restore: - RESTORE_ALL __LC_RETURN_PSW,0 + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW io_done: # # There is work todo, find out in which context we have been interrupted: -# 1) if we return to user space we can do all _TIF_WORK_INT work +# 1) if we return to user space we can do all _TIF_WORK work # 2) if we return to kernel code and preemptive scheduling is enabled check # the preemption counter and if it is zero call preempt_schedule_irq # Before any work can be done, a switch to the kernel stack is required. # io_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bo BASED(io_work_user) # yes -> do resched & signal + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT # check for preemptive scheduling icm %r0,15,__TI_precount(%r12) - bnz BASED(io_restore) # preemption disabled + jnz io_restore # preemption disabled tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - bno BASED(io_restore) + jno io_restore # switch to kernel stack - l %r1,SP_R15(%r15) - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + l %r1,__PT_R15(%r11) + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lr %r15,%r1 # TRACE_IRQS_ON already done at io_return, call # TRACE_IRQS_OFF to keep things symmetrical TRACE_IRQS_OFF - l %r1,BASED(.Lpreempt_schedule_irq) + l %r1,BASED(.Lpreempt_irq) basr %r14,%r1 # call preempt_schedule_irq - b BASED(io_return) + j io_return #else - b BASED(io_restore) + j io_restore #endif # @@ -629,36 +514,44 @@ io_work: # io_work_user: l %r1,__LC_KERNEL_STACK - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lr %r15,%r1 # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED -# and _TIF_MCCK_PENDING # io_work_tif: - tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - bo BASED(io_mcck_pending) + tm __LC_CPU_FLAGS+3(%r12),_CIF_MCCK_PENDING + jo io_mcck_pending tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - bo BASED(io_reschedule) + jo io_reschedule tm __TI_flags+3(%r12),_TIF_SIGPENDING - bo BASED(io_sigpending) + jo io_sigpending tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME - bo BASED(io_notify_resume) - b BASED(io_return) # beware of critical section cleanup + jo io_notify_resume + tm __LC_CPU_FLAGS+3,_CIF_ASCE + jo io_uaccess + j io_return # beware of critical section cleanup # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # io_mcck_pending: # TRACE_IRQS_ON already done at io_return - l %r1,BASED(.Ls390_handle_mcck) + l %r1,BASED(.Lhandle_mcck) basr %r14,%r1 # TIF bit will be cleared by handler TRACE_IRQS_OFF - b BASED(io_return) + j io_return + +# +# _CIF_ASCE is set, load user space asce +# +io_uaccess: + ni __LC_CPU_FLAGS+3,255-_CIF_ASCE + lctl %c1,%c1,__LC_USER_ASCE # load primary asce + j io_return # # _TIF_NEED_RESCHED is set, call schedule @@ -666,63 +559,82 @@ io_mcck_pending: io_reschedule: # TRACE_IRQS_ON already done at io_return l %r1,BASED(.Lschedule) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts basr %r14,%r1 # call scheduler - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_return) + j io_return # # _TIF_SIGPENDING is set, call do_signal # io_sigpending: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_signal) + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_return) + j io_return # # _TIF_SIGPENDING is set, call do_signal # io_notify_resume: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_notify_resume) - basr %r14,%r1 # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_return) + j io_return /* * External interrupt handler routine */ - .globl ext_int_handler -ext_int_handler: +ENTRY(ext_int_handler) stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 - CREATE_STACK_FRAME __LC_SAVE_AREA+16 - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(ext_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -ext_no_vtime: + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_EXT_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +ext_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF - la %r2,SP_PTREGS(%r15) # address of register-save area - l %r3,__LC_CPU_ADDRESS # get cpu address + interruption code - l %r4,__LC_EXT_PARAMS # get external parameters - l %r1,BASED(.Ldo_extint) - basr %r14,%r1 - b BASED(io_return) + l %r1,BASED(.Ldo_IRQ) + lr %r2,%r11 # pass pointer to pt_regs + lhi %r3,EXT_INTERRUPT + basr %r14,%r1 # call do_IRQ + j io_return + +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + st %r3,__SF_EMPTY(%r15) + basr %r1,0 + la %r1,psw_idle_lpsw+4-.(%r1) + st %r1,__SF_EMPTY+4(%r15) + oi __SF_EMPTY+4(%r15),0x80 + stck __CLOCK_IDLE_ENTER(%r2) + stpt __TIMER_IDLE_ENTER(%r2) +psw_idle_lpsw: + lpsw __SF_EMPTY(%r15) + br %r14 +psw_idle_end: __critical_end: @@ -730,133 +642,106 @@ __critical_end: * Machine check handler routines */ - .globl mcck_int_handler -mcck_int_handler: +ENTRY(mcck_int_handler) stck __LC_MCCK_CLOCK spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs - SAVE_ALL_BASE __LC_SAVE_AREA+32 - la %r12,__LC_MCK_OLD_PSW + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_MCK_OLD_PSW tm __LC_MCCK_CODE,0x80 # system damage? - bo BASED(mcck_int_main) # yes -> rest of mcck code invalid - mvc __LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA + jo mcck_panic # yes -> rest of mcck code invalid + la %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - bo BASED(1f) + jo 3f la %r14,__LC_SYNC_ENTER_TIMER clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER - bl BASED(0f) + jl 0f la %r14,__LC_ASYNC_ENTER_TIMER 0: clc 0(8,%r14),__LC_EXIT_TIMER - bl BASED(0f) + jl 1f la %r14,__LC_EXIT_TIMER -0: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER - bl BASED(0f) +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f la %r14,__LC_LAST_UPDATE_TIMER -0: spt 0(%r14) +2: spt 0(%r14) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -1: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - bno BASED(mcck_int_main) # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit - bnz BASED(mcck_int_main) # from user -> load async stack - clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_end) - bhe BASED(mcck_int_main) - clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_start) - bl BASED(mcck_int_main) - l %r14,BASED(.Lcleanup_critical) - basr %r14,%r14 -mcck_int_main: - l %r14,__LC_PANIC_STACK # are we already on the panic stack? - slr %r14,%r15 - sra %r14,PAGE_SHIFT - be BASED(0f) - l %r15,__LC_PANIC_STACK # load panic stack -0: s %r15,BASED(.Lc_spsize) # make room for registers & psw - CREATE_STACK_FRAME __LC_SAVE_AREA+32 - mvc SP_PSW(8,%r15),0(%r12) - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? - bno BASED(mcck_no_vtime) # no -> skip cleanup critical - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(mcck_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER -mcck_no_vtime: - la %r2,SP_PTREGS(%r15) # load pt_regs - l %r1,BASED(.Ls390_mcck) - basr %r14,%r1 # call machine check handler - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(mcck_return) +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER +mcck_skip: + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32 + stm %r8,%r9,__PT_PSW(%r11) + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_machine_check) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call s390_do_machine_check + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno mcck_return l %r1,__LC_KERNEL_STACK # switch to kernel stack - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r15) lr %r15,%r1 - stosm __SF_EMPTY(%r15),0x04 # turn dat on - tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - bno BASED(mcck_return) + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off + tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING + jno mcck_return TRACE_IRQS_OFF - l %r1,BASED(.Ls390_handle_mcck) - basr %r14,%r1 # call machine check handler + l %r1,BASED(.Lhandle_mcck) + basr %r14,%r1 # call s390_handle_mcck TRACE_IRQS_ON mcck_return: - mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW - ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit + mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? - bno BASED(0f) - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 + jno 0f + lm %r0,%r15,__PT_R0(%r11) stpt __LC_EXIT_TIMER - lpsw __LC_RETURN_MCCK_PSW # back to caller -0: lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 - lpsw __LC_RETURN_MCCK_PSW # back to caller + lpsw __LC_RETURN_MCCK_PSW +0: lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_MCCK_PSW - RESTORE_ALL __LC_RETURN_MCCK_PSW,0 - -/* - * Restart interruption handler, kick starter for additional CPUs - */ -#ifdef CONFIG_SMP - __CPUINIT - .globl restart_int_handler -restart_int_handler: - basr %r1,0 -restart_base: - spt restart_vtime-restart_base(%r1) - stck __LC_LAST_UPDATE_CLOCK - mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) - mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) - l %r15,__LC_SAVE_AREA+60 # load ksp - lctl %c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs - lam %a0,%a15,__LC_AREGS_SAVE_AREA - lm %r6,%r15,__SF_GPRS(%r15) # load registers from clone - l %r1,__LC_THREAD_INFO - mvc __LC_USER_TIMER(8),__TI_user_timer(%r1) - mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) - xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER - stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on - basr %r14,0 - l %r14,restart_addr-.(%r14) - br %r14 # branch to start_secondary -restart_addr: - .long start_secondary - .align 8 -restart_vtime: - .long 0x7fffffff,0xffffffff - .previous -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ - .globl restart_int_handler -restart_int_handler: - basr %r1,0 -restart_base: - lpsw restart_crash-restart_base(%r1) - .align 8 -restart_crash: - .long 0x000a0000,0x00000000 -restart_go: -#endif +mcck_panic: + l %r14,__LC_PANIC_STACK + slr %r14,%r15 + sra %r14,PAGE_SHIFT + jz 0f + l %r15,__LC_PANIC_STACK + j mcck_skip +0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip + +# +# PSW restart interrupt handler +# +ENTRY(restart_int_handler) + st %r15,__LC_SAVE_AREA_RESTART + l %r15,__LC_RESTART_STACK + ahi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) + stm %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw + ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + l %r1,__LC_RESTART_FN # load fn, parm & source cpu + l %r2,__LC_RESTART_DATA + l %r3,__LC_RESTART_SOURCE + ltr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + lh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu + brc 2,2b +3: j 3b .section .kprobes.text, "ax" @@ -868,213 +753,210 @@ restart_go: */ stack_overflow: l %r15,__LC_PANIC_STACK # change to panic stack - sl %r15,BASED(.Lc_spsize) - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - la %r1,__LC_SAVE_AREA - ch %r12,BASED(.L0x020) # old psw addr == __LC_SVC_OLD_PSW ? - be BASED(0f) - ch %r12,BASED(.L0x028) # old psw addr == __LC_PGM_OLD_PSW ? - be BASED(0f) - la %r1,__LC_SAVE_AREA+16 -0: mvc SP_R12(16,%r15),0(%r1) # move %r12-%r15 to stack - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear back chain - l %r1,BASED(1f) # branch to kernel_stack_overflow - la %r2,SP_PTREGS(%r15) # load pt_regs - br %r1 + la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_R8(32,%r11),0(%r14) + l %r1,BASED(1f) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + lr %r2,%r11 # pass pointer to pt_regs + br %r1 # branch to kernel_stack_overflow 1: .long kernel_stack_overflow #endif -cleanup_table_system_call: - .long system_call + 0x80000000, sysc_do_svc + 0x80000000 -cleanup_table_sysc_tif: - .long sysc_tif + 0x80000000, sysc_restore + 0x80000000 -cleanup_table_sysc_restore: - .long sysc_restore + 0x80000000, sysc_done + 0x80000000 -cleanup_table_io_tif: - .long io_tif + 0x80000000, io_restore + 0x80000000 -cleanup_table_io_restore: - .long io_restore + 0x80000000, io_done + 0x80000000 +cleanup_table: + .long system_call + 0x80000000 + .long sysc_do_svc + 0x80000000 + .long sysc_tif + 0x80000000 + .long sysc_restore + 0x80000000 + .long sysc_done + 0x80000000 + .long io_tif + 0x80000000 + .long io_restore + 0x80000000 + .long io_done + 0x80000000 + .long psw_idle + 0x80000000 + .long psw_idle_end + 0x80000000 cleanup_critical: - clc 4(4,%r12),BASED(cleanup_table_system_call) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_system_call+4) - bl BASED(cleanup_system_call) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_tif) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_tif+4) - bl BASED(cleanup_sysc_tif) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_restore) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_restore+4) - bl BASED(cleanup_sysc_restore) -0: - clc 4(4,%r12),BASED(cleanup_table_io_tif) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_tif+4) - bl BASED(cleanup_io_tif) -0: - clc 4(4,%r12),BASED(cleanup_table_io_restore) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_restore+4) - bl BASED(cleanup_io_restore) -0: - br %r14 + cl %r9,BASED(cleanup_table) # system_call + jl 0f + cl %r9,BASED(cleanup_table+4) # sysc_do_svc + jl cleanup_system_call + cl %r9,BASED(cleanup_table+8) # sysc_tif + jl 0f + cl %r9,BASED(cleanup_table+12) # sysc_restore + jl cleanup_sysc_tif + cl %r9,BASED(cleanup_table+16) # sysc_done + jl cleanup_sysc_restore + cl %r9,BASED(cleanup_table+20) # io_tif + jl 0f + cl %r9,BASED(cleanup_table+24) # io_restore + jl cleanup_io_tif + cl %r9,BASED(cleanup_table+28) # io_done + jl cleanup_io_restore + cl %r9,BASED(cleanup_table+32) # psw_idle + jl 0f + cl %r9,BASED(cleanup_table+36) # psw_idle_end + jl cleanup_idle +0: br %r14 cleanup_system_call: - mvc __LC_RETURN_PSW(8),0(%r12) - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+4) - bh BASED(0f) - mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER - c %r12,BASED(.Lmck_old_psw) - be BASED(0f) + # check if stpt has been executed + cl %r9,BASED(cleanup_system_call_insn) + jh 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: c %r12,BASED(.Lmck_old_psw) - la %r12,__LC_SAVE_AREA+32 - be BASED(0f) - la %r12,__LC_SAVE_AREA+16 -0: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+8) - bhe BASED(cleanup_vtime) - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn) - bh BASED(0f) - mvc __LC_SAVE_AREA(16),0(%r12) -0: st %r13,4(%r12) - l %r15,__LC_KERNEL_STACK # problem state -> load ksp - s %r15,BASED(.Lc_spsize) # make room for registers & psw - st %r15,12(%r12) - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - mvc 0(4,%r12),__LC_THREAD_INFO -cleanup_vtime: - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) - bhe BASED(cleanup_stime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -cleanup_stime: - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+16) - bh BASED(cleanup_update) - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -cleanup_update: + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stm has been executed + cl %r9,BASED(cleanup_system_call_insn+4) + jh 0f + mvc __LC_SAVE_AREA_SYNC(32),0(%r11) +0: # set up saved registers r12, and r13 + st %r12,16(%r11) # r12 thread-info pointer + st %r13,20(%r11) # r13 literal-pool pointer + # check if the user time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+8) + jh 0f + l %r10,__LC_EXIT_TIMER + l %r15,__LC_EXIT_TIMER+4 + SUB64 %r10,%r15,__LC_SYNC_ENTER_TIMER + ADD64 %r10,%r15,__LC_USER_TIMER + st %r10,__LC_USER_TIMER + st %r15,__LC_USER_TIMER+4 +0: # check if the system time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+12) + jh 0f + l %r10,__LC_LAST_UPDATE_TIMER + l %r15,__LC_LAST_UPDATE_TIMER+4 + SUB64 %r10,%r15,__LC_EXIT_TIMER + ADD64 %r10,%r15,__LC_SYSTEM_TIMER + st %r10,__LC_SYSTEM_TIMER + st %r15,__LC_SYSTEM_TIMER+4 +0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_system_call+4) - la %r12,__LC_RETURN_PSW + # set up saved register 11 + l %r15,__LC_KERNEL_STACK + la %r9,STACK_FRAME_OVERHEAD(%r15) + st %r9,12(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(32,%r9),__LC_SAVE_AREA_SYNC + stm %r0,%r7,__PT_R0(%r9) + mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC + xc __PT_FLAGS(4,%r9),__PT_FLAGS(%r9) + mvi __PT_FLAGS+3(%r9),_PIF_SYSCALL + # setup saved register 15 + st %r15,28(%r11) # r15 stack pointer + # set new psw address and exit + l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000 br %r14 cleanup_system_call_insn: - .long sysc_saveall + 0x80000000 .long system_call + 0x80000000 - .long sysc_vtime + 0x80000000 - .long sysc_stime + 0x80000000 - .long sysc_update + 0x80000000 + .long sysc_stm + 0x80000000 + .long sysc_vtime + 0x80000000 + 36 + .long sysc_vtime + 0x80000000 + 76 cleanup_sysc_tif: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_sysc_tif) - la %r12,__LC_RETURN_PSW + l %r9,BASED(cleanup_table+8) # sysc_tif + 0x80000000 br %r14 cleanup_sysc_restore: - clc 4(4,%r12),BASED(cleanup_sysc_restore_insn) - be BASED(2f) - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER - c %r12,BASED(.Lmck_old_psw) - be BASED(0f) - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: clc 4(4,%r12),BASED(cleanup_sysc_restore_insn+4) - be BASED(2f) - mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - c %r12,BASED(.Lmck_old_psw) - la %r12,__LC_SAVE_AREA+32 - be BASED(1f) - la %r12,__LC_SAVE_AREA+16 -1: mvc 0(16,%r12),SP_R12(%r15) - lm %r0,%r11,SP_R0(%r15) - l %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW + cl %r9,BASED(cleanup_sysc_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_sysc_restore_insn: .long sysc_done - 4 + 0x80000000 - .long sysc_done - 8 + 0x80000000 cleanup_io_tif: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_tif) - la %r12,__LC_RETURN_PSW + l %r9,BASED(cleanup_table+20) # io_tif + 0x80000000 br %r14 cleanup_io_restore: - clc 4(4,%r12),BASED(cleanup_io_restore_insn) - be BASED(1f) - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER - clc 4(4,%r12),BASED(cleanup_io_restore_insn+4) - be BASED(1f) - mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - mvc __LC_SAVE_AREA+32(16),SP_R12(%r15) - lm %r0,%r11,SP_R0(%r15) - l %r15,SP_R15(%r15) -1: la %r12,__LC_RETURN_PSW + cl %r9,BASED(cleanup_io_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_io_restore_insn: .long io_done - 4 + 0x80000000 - .long io_done - 8 + 0x80000000 + +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER +0: # check if stck has been executed + cl %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) + mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r3) +1: # account system time going idle + lm %r9,%r10,__LC_STEAL_TIMER + ADD64 %r9,%r10,__CLOCK_IDLE_ENTER(%r2) + SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK + stm %r9,%r10,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) + lm %r9,%r10,__LC_SYSTEM_TIMER + ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER + SUB64 %r9,%r10,__TIMER_IDLE_ENTER(%r2) + stm %r9,%r10,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) + # prepare return psw + n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits + l %r9,24(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .long psw_idle_lpsw + 0x80000000 +cleanup_idle_wait: + .long 0xfcfdffff /* * Integer constants */ - .align 4 -.Lc_spsize: .long SP_SIZE -.Lc_overhead: .long STACK_FRAME_OVERHEAD -.Lnr_syscalls: .long NR_syscalls -.L0x018: .short 0x018 -.L0x020: .short 0x020 -.L0x028: .short 0x028 -.L0x030: .short 0x030 -.L0x038: .short 0x038 -.Lc_1: .long 1 + .align 4 +.Lnr_syscalls: + .long NR_syscalls +.Lvtimer_max: + .quad 0x7fffffffffffffff /* * Symbol constants */ -.Ls390_mcck: .long s390_do_machine_check -.Ls390_handle_mcck: - .long s390_handle_mcck -.Lmck_old_psw: .long __LC_MCK_OLD_PSW -.Ldo_IRQ: .long do_IRQ -.Ldo_extint: .long do_extint -.Ldo_signal: .long do_signal -.Ldo_notify_resume: - .long do_notify_resume -.Lhandle_per: .long do_per_trap -.Ldo_execve: .long do_execve -.Lexecve_tail: .long execve_tail -.Ljump_table: .long pgm_check_table -.Lschedule: .long schedule +.Ldo_machine_check: .long s390_do_machine_check +.Lhandle_mcck: .long s390_handle_mcck +.Ldo_IRQ: .long do_IRQ +.Ldo_signal: .long do_signal +.Ldo_notify_resume: .long do_notify_resume +.Ldo_per_trap: .long do_per_trap +.Ljump_table: .long pgm_check_table +.Lschedule: .long schedule #ifdef CONFIG_PREEMPT -.Lpreempt_schedule_irq: - .long preempt_schedule_irq +.Lpreempt_irq: .long preempt_schedule_irq #endif -.Ltrace_entry: .long do_syscall_trace_enter -.Ltrace_exit: .long do_syscall_trace_exit -.Lschedtail: .long schedule_tail -.Lsysc_table: .long sys_call_table +.Ltrace_enter: .long do_syscall_trace_enter +.Ltrace_exit: .long do_syscall_trace_exit +.Lschedule_tail: .long schedule_tail +.Lsysc_per: .long sysc_per + 0x80000000 #ifdef CONFIG_TRACE_IRQFLAGS -.Ltrace_irq_on_caller: - .long trace_hardirqs_on_caller -.Ltrace_irq_off_caller: - .long trace_hardirqs_off_caller +.Lhardirqs_on: .long trace_hardirqs_on_caller +.Lhardirqs_off: .long trace_hardirqs_off_caller #endif #ifdef CONFIG_LOCKDEP -.Llockdep_sys_exit: - .long lockdep_sys_exit +.Llockdep_sys_exit: .long lockdep_sys_exit #endif -.Lcritical_start: - .long __critical_start + 0x80000000 -.Lcritical_end: - .long __critical_end + 0x80000000 -.Lcleanup_critical: - .long cleanup_critical +.Lcritical_start: .long __critical_start + 0x80000000 +.Lcritical_length: .long __critical_end - __critical_start .section .rodata, "a" #define SYSCALL(esa,esame,emu) .long esa diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 17a6f83a2d6..6ac78192455 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -4,53 +4,70 @@ #include <linux/types.h> #include <linux/signal.h> #include <asm/ptrace.h> +#include <asm/cputime.h> -typedef void pgm_check_handler_t(struct pt_regs *, long, unsigned long); -extern pgm_check_handler_t *pgm_check_table[128]; -pgm_check_handler_t do_protection_exception; -pgm_check_handler_t do_dat_exception; +extern void *restart_stack; +extern unsigned long suspend_zero_pages; -extern int sysctl_userprocess_debug; +void system_call(void); +void pgm_check_handler(void); +void ext_int_handler(void); +void io_int_handler(void); +void mcck_int_handler(void); +void restart_int_handler(void); +void restart_call_handler(void); +void psw_idle(struct s390_idle_data *, unsigned long); + +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); + +void do_protection_exception(struct pt_regs *regs); +void do_dat_exception(struct pt_regs *regs); + +void addressing_exception(struct pt_regs *regs); +void data_exception(struct pt_regs *regs); +void default_trap_handler(struct pt_regs *regs); +void divide_exception(struct pt_regs *regs); +void execute_exception(struct pt_regs *regs); +void hfp_divide_exception(struct pt_regs *regs); +void hfp_overflow_exception(struct pt_regs *regs); +void hfp_significance_exception(struct pt_regs *regs); +void hfp_sqrt_exception(struct pt_regs *regs); +void hfp_underflow_exception(struct pt_regs *regs); +void illegal_op(struct pt_regs *regs); +void operand_exception(struct pt_regs *regs); +void overflow_exception(struct pt_regs *regs); +void privileged_op(struct pt_regs *regs); +void space_switch_exception(struct pt_regs *regs); +void special_op_exception(struct pt_regs *regs); +void specification_exception(struct pt_regs *regs); +void transaction_exception(struct pt_regs *regs); +void translation_exception(struct pt_regs *regs); void do_per_trap(struct pt_regs *regs); void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); -int handle_signal32(unsigned long sig, struct k_sigaction *ka, +void handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); +void do_notify_resume(struct pt_regs *regs); -void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); -int __cpuinit start_secondary(void *cpuvoid); +void __init init_IRQ(void); +void do_IRQ(struct pt_regs *regs, int irq); +void do_restart(void); void __init startup_init(void); -void die(const char * str, struct pt_regs * regs, long err); +void die(struct pt_regs *regs, const char *str); +int setup_profiling_timer(unsigned int multiplier); +void __init time_init(void); +int pfn_is_nosave(unsigned long); +void s390_early_resume(void); +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); struct s390_mmap_arg_struct; struct fadvise64_64_args; struct old_sigaction; -long sys_mmap2(struct s390_mmap_arg_struct __user *arg); -long sys_s390_ipc(uint call, int first, unsigned long second, - unsigned long third, void __user *ptr); long sys_s390_personality(unsigned int personality); -long sys_s390_fadvise64(int fd, u32 offset_high, u32 offset_low, - size_t len, int advice); -long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); -long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, - u32 len_low); -long sys_fork(void); -long sys_clone(unsigned long newsp, unsigned long clone_flags, - int __user *parent_tidptr, int __user *child_tidptr); -long sys_vfork(void); -void execve_tail(void); -long sys_execve(const char __user *name, const char __user *const __user *argv, - const char __user *const __user *envp); -long sys_sigsuspend(int history0, int history1, old_sigset_t mask); -long sys_sigaction(int sig, const struct old_sigaction __user *act, - struct old_sigaction __user *oact); -long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss); -long sys_sigreturn(void); -long sys_rt_sigreturn(void); -long sys32_sigreturn(void); -long sys32_rt_sigreturn(void); +long sys_s390_runtime_instr(int command, int signum); #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9d3603d6c51..f2e674c702e 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -1,16 +1,16 @@ /* - * arch/s390/kernel/entry64.S * S390 low-level entry points. * - * Copyright (C) IBM Corp. 1999,2010 + * Copyright IBM Corp. 1999, 2012 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * Heiko Carstens <heiko.carstens@de.ibm.com> */ -#include <linux/linkage.h> #include <linux/init.h> +#include <linux/linkage.h> +#include <asm/processor.h> #include <asm/cache.h> #include <asm/errno.h> #include <asm/ptrace.h> @@ -18,183 +18,152 @@ #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/page.h> - -/* - * Stack layout for the system_call stack entry. - * The first few entries are identical to the user_regs_struct. - */ -SP_PTREGS = STACK_FRAME_OVERHEAD -SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS -SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW -SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS -SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8 -SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16 -SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24 -SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32 -SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40 -SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48 -SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 -SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 64 -SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 72 -SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 80 -SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 88 -SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 96 -SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104 -SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112 -SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 -SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR -SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE +#include <asm/sigp.h> +#include <asm/irq.h> + +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 8 +__PT_R2 = __PT_GPRS + 16 +__PT_R3 = __PT_GPRS + 24 +__PT_R4 = __PT_GPRS + 32 +__PT_R5 = __PT_GPRS + 40 +__PT_R6 = __PT_GPRS + 48 +__PT_R7 = __PT_GPRS + 56 +__PT_R8 = __PT_GPRS + 64 +__PT_R9 = __PT_GPRS + 72 +__PT_R10 = __PT_GPRS + 80 +__PT_R11 = __PT_GPRS + 88 +__PT_R12 = __PT_GPRS + 96 +__PT_R13 = __PT_GPRS + 104 +__PT_R14 = __PT_GPRS + 112 +__PT_R15 = __PT_GPRS + 120 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT +STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) +_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) +_PIF_WORK = (_PIF_PER_TRAP) #define BASED(name) name-system_call(%r13) - .macro HANDLE_SIE_INTERCEPT -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - lg %r3,__LC_SIE_HOOK - ltgr %r3,%r3 - jz 0f - basr %r14,%r3 -0: -#endif - .endm - -#ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 brasl %r14,trace_hardirqs_on_caller +#endif .endm .macro TRACE_IRQS_OFF +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 brasl %r14,trace_hardirqs_off_caller - .endm -#else -#define TRACE_IRQS_ON -#define TRACE_IRQS_OFF #endif + .endm -#ifdef CONFIG_LOCKDEP .macro LOCKDEP_SYS_EXIT - tm SP_PSW+1(%r15),0x01 # returning to user ? - jz 0f +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 brasl %r14,lockdep_sys_exit -0: - .endm -#else -#define LOCKDEP_SYS_EXIT #endif - - .macro UPDATE_VTIME lc_from,lc_to,lc_sum - lg %r10,\lc_from - slg %r10,\lc_to - alg %r10,\lc_sum - stg %r10,\lc_sum .endm -/* - * Register usage in interrupt handlers: - * R9 - pointer to current task structure - * R13 - pointer to literal pool - * R14 - return register for function calls - * R15 - kernel stack pointer - */ + .macro LPP newpp +#if IS_ENABLED(CONFIG_KVM) + tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP + jz .+8 + .insn s,0xb2800000,\newpp +#endif + .endm - .macro SAVE_ALL_SVC psworg,savearea - stmg %r11,%r15,\savearea - lg %r15,__LC_KERNEL_STACK # problem state -> load ksp - aghi %r15,-SP_SIZE # make room for registers & psw - lg %r11,__LC_LAST_BREAK + .macro HANDLE_SIE_INTERCEPT scratch,reason +#if IS_ENABLED(CONFIG_KVM) + tmhh %r8,0x0001 # interrupting from user ? + jnz .+62 + lgr \scratch,%r9 + slg \scratch,BASED(.Lsie_critical) + clg \scratch,BASED(.Lsie_critical_length) + .if \reason==1 + # Some program interrupts are suppressing (e.g. protection). + # We must also check the instruction after SIE in that case. + # do_protection_exception will rewind to rewind_pad + jh .+42 + .else + jhe .+42 + .endif + lg %r14,__SF_EMPTY(%r15) # get control block pointer + LPP __SF_EMPTY+16(%r15) # set host id + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + larl %r9,sie_exit # skip forward to sie_exit + mvi __SF_EMPTY+31(%r15),\reason # set exit reason +#endif .endm - .macro SAVE_ALL_PGM psworg,savearea - stmg %r11,%r15,\savearea - tm \psworg+1,0x01 # test problem state bit + .macro CHECK_STACK stacksize,savearea #ifdef CONFIG_CHECK_STACK - jnz 1f - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - jnz 2f - la %r12,\psworg - j stack_overflow -#else - jz 2f + tml %r15,\stacksize - CONFIG_STACK_GUARD + lghi %r14,\savearea + jz stack_overflow #endif -1: lg %r15,__LC_KERNEL_STACK # problem state -> load ksp -2: aghi %r15,-SP_SIZE # make room for registers & psw - larl %r13,system_call - lg %r11,__LC_LAST_BREAK .endm - .macro SAVE_ALL_ASYNC psworg,savearea - stmg %r11,%r15,\savearea - larl %r13,system_call - lg %r11,__LC_LAST_BREAK - la %r12,\psworg - tm \psworg+1,0x01 # test problem state bit - jnz 1f # from user -> load kernel stack - clc \psworg+8(8),BASED(.Lcritical_end) + .macro SWITCH_ASYNC savearea,stack,shift + tmhh %r8,0x0001 # interrupting from user ? + jnz 1f + lgr %r14,%r9 + slg %r14,BASED(.Lcritical_start) + clg %r14,BASED(.Lcritical_length) jhe 0f - clc \psworg+8(8),BASED(.Lcritical_start) - jl 0f + lghi %r11,\savearea # inside critical section, do cleanup brasl %r14,cleanup_critical - tm 1(%r12),0x01 # retest problem state after cleanup + tmhh %r8,0x0001 # retest problem state after cleanup jnz 1f -0: lg %r14,__LC_ASYNC_STACK # are we already on the async. stack ? +0: lg %r14,\stack # are we already on the target stack? slgr %r14,%r15 - srag %r14,%r14,STACK_SHIFT -#ifdef CONFIG_CHECK_STACK + srag %r14,%r14,\shift jnz 1f - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - jnz 2f - j stack_overflow -#else - jz 2f -#endif -1: lg %r15,__LC_ASYNC_STACK # load async stack -2: aghi %r15,-SP_SIZE # make room for registers & psw + CHECK_STACK 1<<\shift,\savearea + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: lg %r15,\stack # load target stack +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm - .macro CREATE_STACK_FRAME savearea - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - stg %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - mvc SP_R11(40,%r15),\savearea # move %r11-%r15 to stack - stmg %r0,%r10,SP_R0(%r15) # store gprs %r0-%r10 to kernel stack + .macro UPDATE_VTIME scratch,enter_timer + lg \scratch,__LC_EXIT_TIMER + slg \scratch,\enter_timer + alg \scratch,__LC_USER_TIMER + stg \scratch,__LC_USER_TIMER + lg \scratch,__LC_LAST_UPDATE_TIMER + slg \scratch,__LC_EXIT_TIMER + alg \scratch,__LC_SYSTEM_TIMER + stg \scratch,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer .endm - .macro RESTORE_ALL psworg,sync - mvc \psworg(16),SP_PSW(%r15) # move user PSW to lowcore - .if !\sync - ni \psworg+1,0xfd # clear wait state bit - .endif - lg %r14,__LC_VDSO_PER_CPU - lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user - stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER - lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user - lpswe \psworg # back to caller + .macro LAST_BREAK scratch + srag \scratch,%r10,23 + jz .+10 + stg %r10,__TI_last_break(%r12) .endm - .macro LAST_BREAK - srag %r10,%r11,23 - jz 0f - stg %r11,__TI_last_break(%r12) -0: + .macro REENABLE_IRQS + stg %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW .endm - .macro REENABLE_IRQS - mvc __SF_EMPTY(1,%r15),SP_PSW(%r15) - ni __SF_EMPTY(%r15),0xbf - ssm __SF_EMPTY(%r15) + .macro STCK savearea +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + .insn s,0xb27c0000,\savearea # store clock fast +#else + .insn s,0xb2050000,\savearea # store clock +#endif .endm .section .kprobes.text, "ax" @@ -206,23 +175,20 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ * Returns: * gpr2 = prev */ - .globl __switch_to -__switch_to: +ENTRY(__switch_to) + stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev lg %r4,__THREAD_info(%r2) # get thread_info of prev lg %r5,__THREAD_info(%r3) # get thread_info of next - tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending? - jz 0f - ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev - oi __TI_flags+7(%r5),_TIF_MCCK_PENDING # set it in next -0: stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task - stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev - lg %r15,__THREAD_ksp(%r3) # load kernel stack of next - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task + lgr %r15,%r5 + aghi %r15,STACK_INIT # end of kernel stack of next stg %r3,__LC_CURRENT # store task struct of next stg %r5,__LC_THREAD_INFO # store thread info of next - aghi %r5,STACK_SIZE # end of kernel stack of next - stg %r5,__LC_KERNEL_STACK # store end of kernel stack + stg %r15,__LC_KERNEL_STACK # store end of kernel stack + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next + lg %r15,__THREAD_ksp(%r3) # load kernel stack of next + lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 __critical_start: @@ -231,79 +197,82 @@ __critical_start: * are executed with interrupts enabled. */ - .globl system_call -system_call: +ENTRY(system_call) stpt __LC_SYNC_ENTER_TIMER -sysc_saveall: - SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct +sysc_stmg: + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + lghi %r14,_PIF_SYSCALL +sysc_per: + lg %r15,__LC_KERNEL_STACK + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -sysc_stime: - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -sysc_update: - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK + UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r13 + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + stg %r14,__PT_FLAGS(%r11) sysc_do_svc: - llgh %r7,SP_SVCNR(%r15) - slag %r7,%r7,2 # shift and test for svc 0 + lg %r10,__TI_sysc_table(%r12) # address of system call table + llgh %r8,__PT_INT_CODE+2(%r11) + slag %r8,%r8,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 - llgfr %r1,%r1 # clear high word in r1 + llgfr %r1,%r1 # clear high word in r1 cghi %r1,NR_syscalls jnl sysc_nr_ok - sth %r1,SP_SVCNR(%r15) - slag %r7,%r1,2 # shift and test for svc 0 + sth %r1,__PT_INT_CODE+2(%r11) + slag %r8,%r1,2 sysc_nr_ok: - larl %r10,sys_call_table -#ifdef CONFIG_COMPAT - tm __TI_flags+5(%r12),(_TIF_31BIT>>16) # running in 31 bit mode ? - jno sysc_noemu - larl %r10,sys_call_table_emu # use 31 bit emulation system calls -sysc_noemu: -#endif - tm __TI_flags+6(%r12),_TIF_SYSCALL - mvc SP_ARGS(8,%r15),SP_R7(%r15) - lgf %r8,0(%r7,%r10) # load address of system call routine + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stg %r2,__PT_ORIG_GPR2(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lgf %r9,0(%r8,%r10) # get system call add. + tm __TI_flags+7(%r12),_TIF_TRACE jnz sysc_tracesys - basr %r14,%r8 # call sys_xxxx - stg %r2,SP_R2(%r15) # store return value (change R2 on stack) + basr %r14,%r9 # call sys_xxxx + stg %r2,__PT_R2(%r11) # store return value sysc_return: LOCKDEP_SYS_EXIT sysc_tif: - tm __TI_flags+7(%r12),_TIF_WORK_SVC - jnz sysc_work # there is work to do (signals etc.) + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno sysc_restore + tm __PT_FLAGS+7(%r11),_PIF_WORK + jnz sysc_work + tm __TI_flags+7(%r12),_TIF_WORK + jnz sysc_work # check for work + tm __LC_CPU_FLAGS+7,_CIF_WORK + jnz sysc_work sysc_restore: - RESTORE_ALL __LC_RETURN_PSW,1 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW sysc_done: # -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - jno sysc_restore - -# # One of the work bits is on. Find out which one. # -sysc_work_tif: - tm __TI_flags+7(%r12),_TIF_MCCK_PENDING +sysc_work: + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo sysc_reschedule + tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP + jo sysc_singlestep tm __TI_flags+7(%r12),_TIF_SIGPENDING jo sysc_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo sysc_notify_resume - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r12),_TIF_PER_TRAP - jo sysc_singlestep + tm __LC_CPU_FLAGS+7,_CIF_ASCE + jo sysc_uaccess j sysc_return # beware of critical section cleanup # @@ -311,56 +280,55 @@ sysc_work_tif: # sysc_reschedule: larl %r14,sysc_return - jg schedule # return point is sysc_return + jg schedule # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: larl %r14,sysc_return jg s390_handle_mcck # TIF bit will be cleared by handler # +# _CIF_ASCE is set, load user space asce +# +sysc_uaccess: + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + j sysc_return + +# # _TIF_SIGPENDING is set, call do_signal # sysc_sigpending: - ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_signal # call do_signal - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r12),_TIF_PER_TRAP - jo sysc_singlestep - j sysc_return + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + tm __PT_FLAGS+7(%r11),_PIF_SYSCALL + jno sysc_return + lmg %r2,%r7,__PT_R2(%r11) # load svc arguments + lg %r10,__TI_sysc_table(%r12) # address of system call table + lghi %r8,0 # svc 0 returns -ENOSYS + llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r8,%r1,2 + j sysc_nr_ok # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume # sysc_notify_resume: - la %r2,SP_PTREGS(%r15) # load pt_regs + lgr %r2,%r11 # pass pointer to pt_regs larl %r14,sysc_return - jg do_notify_resume # call do_notify_resume - -# -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lg %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - slag %r7,%r7,2 - j sysc_nr_ok # restart svc + jg do_notify_resume # -# _TIF_PER_TRAP is set, call do_per_trap +# _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number - la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_return # load adr. of system return + ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return jg do_per_trap # @@ -368,240 +336,184 @@ sysc_singlestep: # and after the system call # sysc_tracesys: - la %r2,SP_PTREGS(%r15) # load pt_regs + lgr %r2,%r11 # pass pointer to pt_regs la %r3,0 - llgh %r0,SP_SVCNR(%r15) - stg %r0,SP_R2(%r15) + llgh %r0,__PT_INT_CODE+2(%r11) + stg %r0,__PT_R2(%r11) brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls clgr %r0,%r2 jnh sysc_tracenogo - sllg %r7,%r2,2 # svc number *4 - lgf %r8,0(%r7,%r10) + sllg %r8,%r2,2 + lgf %r9,0(%r8,%r10) sysc_tracego: - lmg %r3,%r6,SP_R3(%r15) - mvc SP_ARGS(8,%r15),SP_R7(%r15) - lg %r2,SP_ORIG_R2(%r15) - basr %r14,%r8 # call sys_xxx - stg %r2,SP_R2(%r15) # store return value + lmg %r3,%r7,__PT_R3(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lg %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + stg %r2,__PT_R2(%r11) # store return value sysc_tracenogo: - tm __TI_flags+6(%r12),_TIF_SYSCALL + tm __TI_flags+7(%r12),_TIF_TRACE jz sysc_return - la %r2,SP_PTREGS(%r15) # load pt_regs - larl %r14,sysc_return # return point is sysc_return + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return jg do_syscall_trace_exit # # a new process exits the kernel with ret_from_fork # - .globl ret_from_fork -ret_from_fork: - lg %r13,__LC_SVC_NEW_PSW+8 - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? - jo 0f - stg %r15,SP_R15(%r15) # store stack pointer for new kthread -0: brasl %r14,schedule_tail +ENTRY(ret_from_fork) + la %r11,STACK_FRAME_OVERHEAD(%r15) + lg %r12,__LC_THREAD_INFO + brasl %r14,schedule_tail TRACE_IRQS_ON - stosm 24(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? + jne sysc_tracenogo + # it's a kernel thread + lmg %r9,%r10,__PT_R9(%r11) # load gprs +ENTRY(kernel_thread_starter) + la %r2,0(%r10) + basr %r14,%r9 j sysc_tracenogo -# -# kernel_execve function needs to deal with pt_regs that is not -# at the usual place -# - .globl kernel_execve -kernel_execve: - stmg %r12,%r15,96(%r15) - lgr %r14,%r15 - aghi %r15,-SP_SIZE - stg %r14,__SF_BACKCHAIN(%r15) - la %r12,SP_PTREGS(%r15) - xc 0(__PT_SIZE,%r12),0(%r12) - lgr %r5,%r12 - brasl %r14,do_execve - ltgfr %r2,%r2 - je 0f - aghi %r15,SP_SIZE - lmg %r12,%r15,96(%r15) - br %r14 - # execve succeeded. -0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts - lg %r15,__LC_KERNEL_STACK # load ksp - aghi %r15,-SP_SIZE # make room for registers & psw - lg %r13,__LC_SVC_NEW_PSW+8 - mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs - lg %r12,__LC_THREAD_INFO - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - brasl %r14,execve_tail - j sysc_return - /* * Program check handler routine */ - .globl pgm_check_handler -pgm_check_handler: -/* - * First we need to check for a special case: - * Single stepping an instruction that disables the PER event mask will - * cause a PER event AFTER the mask has been set. Example: SVC or LPSW. - * For a single stepped SVC the program check handler gets control after - * the SVC new PSW has been loaded. But we want to execute the SVC first and - * then handle the PER event. Therefore we update the SVC old PSW to point - * to the pgm_check_handler and branch to the SVC handler after we checked - * if we have to load the kernel stack register. - * For every other possible cause for PER event without the PER mask set - * we just ignore the PER event (FIXME: is there anything we have to do - * for LPSW?). - */ +ENTRY(pgm_check_handler) stpt __LC_SYNC_ENTER_TIMER - tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception - jnz pgm_per # got per exception -> special case - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) - mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK -pgm_no_vtime: - HANDLE_SIE_INTERCEPT - stg %r11,SP_ARGS(%r15) - lgf %r3,__LC_PGM_ILC # load program interruption code - lg %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - lghi %r8,0x7f - ngr %r8,%r3 - sll %r8,3 + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_PGM_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,1 + tmhh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r14 + lg %r15,__LC_KERNEL_STACK + lg %r14,__TI_task(%r12) + lghi %r13,__LC_PGM_TDB + tm __LC_PGM_ILC+2,0x02 # check for transaction abort + jz 2f + mvc __THREAD_trap_tdb(256,%r14),0(%r13) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + stg %r10,__PT_ARGS(%r11) + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f + tmhh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP + mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE + mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID +0: REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table - lg %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area + llgh %r10,__PT_INT_CODE+2(%r11) + nill %r10,0x007f + sll %r10,2 + je sysc_return + lgf %r1,0(%r10,%r1) # load address of handler routine + lgr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # branch to interrupt-handler -pgm_exit: j sysc_return # -# handle per exception -# -pgm_per: - tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on - jnz pgm_per_std # ok, normal per event from user space -# ok its one of the special cases, now we need to find out which one - clc __LC_PGM_OLD_PSW(16),__LC_SVC_NEW_PSW - je pgm_svcper -# no interesting special case, ignore PER event - lpswe __LC_PGM_OLD_PSW - -# -# Normal per exception +# PER event in supervisor state, must be kprobes # -pgm_per_std: - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime2 - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK -pgm_no_vtime2: - HANDLE_SIE_INTERCEPT - lg %r1,__TI_task(%r12) - tm SP_PSW+1(%r15),0x01 # kernel per event ? - jz kernel_per - mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE - mvc __THREAD_per_address(8,%r1),__LC_PER_ADDRESS - mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID - oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP - lgf %r3,__LC_PGM_ILC # load program interruption code - lg %r4,__LC_TRANS_EXC_CODE +pgm_kprobe: REENABLE_IRQS - lghi %r8,0x7f - ngr %r8,%r3 # clear per-event-bit and ilc - je pgm_exit2 - sll %r8,3 - larl %r1,pgm_check_table - lg %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to interrupt-handler -pgm_exit2: + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_per_trap j sysc_return # -# it was a single stepped SVC that is causing all the trouble +# single stepped system call # pgm_svcper: - SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK - lg %r8,__TI_task(%r12) - mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE - mvc __THREAD_per_address(8,%r8),__LC_PER_ADDRESS - mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - j sysc_do_svc - -# -# per was called from kernel, must be kprobes -# -kernel_per: - REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number - la %r2,SP_PTREGS(%r15) # address of register-save area - brasl %r14,do_per_trap - j pgm_exit + mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW + larl %r14,sysc_per + stg %r14,__LC_RETURN_PSW+8 + lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP + lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs /* * IO interrupt handler routine */ - .globl io_int_handler -io_int_handler: - stck __LC_INT_CLOCK +ENTRY(io_int_handler) + STCK __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+40 - CREATE_STACK_FRAME __LC_SAVE_AREA+40 - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz io_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER - LAST_BREAK -io_no_vtime: - HANDLE_SIE_INTERCEPT + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_IO_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,2 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user? + jz io_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +io_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF - la %r2,SP_PTREGS(%r15) # address of register-save area - brasl %r14,do_IRQ # call standard irq handler + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +io_loop: + lgr %r2,%r11 # pass pointer to pt_regs + lghi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lghi %r3,THIN_INTERRUPT +io_call: + brasl %r14,do_IRQ + tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON io_tif: - tm __TI_flags+7(%r12),_TIF_WORK_INT + tm __TI_flags+7(%r12),_TIF_WORK jnz io_work # there is work to do (signals etc.) + tm __LC_CPU_FLAGS+7,_CIF_WORK + jnz io_work io_restore: - RESTORE_ALL __LC_RETURN_PSW,0 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW io_done: # # There is work todo, find out in which context we have been interrupted: -# 1) if we return to user space we can do all _TIF_WORK_INT work +# 1) if we return to user space we can do all _TIF_WORK work # 2) if we return to kernel code and kvm is enabled check if we need to # modify the psw to leave SIE # 3) if we return to kernel code and preemptive scheduling is enabled check @@ -609,7 +521,7 @@ io_done: # Before any work can be done, a switch to the kernel stack is required. # io_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? + tm __PT_PSW+1(%r11),0x01 # returning to user ? jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT # check for preemptive scheduling @@ -618,10 +530,11 @@ io_work: tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jno io_restore # switch to kernel stack - lg %r1,SP_R15(%r15) - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + lg %r1,__PT_R15(%r11) + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 # TRACE_IRQS_ON already done at io_return, call # TRACE_IRQS_OFF to keep things symmetrical @@ -637,18 +550,16 @@ io_work: # io_work_user: lg %r1,__LC_KERNEL_STACK - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED -# and _TIF_MCCK_PENDING # io_work_tif: - tm __TI_flags+7(%r12),_TIF_MCCK_PENDING + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jo io_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo io_reschedule @@ -656,10 +567,12 @@ io_work_tif: jo io_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo io_notify_resume + tm __LC_CPU_FLAGS+7,_CIF_ASCE + jo io_uaccess j io_return # beware of critical section cleanup # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # io_mcck_pending: # TRACE_IRQS_ON already done at io_return @@ -668,13 +581,21 @@ io_mcck_pending: j io_return # +# _CIF_ASCE is set, load user space asce +# +io_uaccess: + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + j io_return + +# # _TIF_NEED_RESCHED is set, call schedule # io_reschedule: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts brasl %r14,schedule # call scheduler - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF j io_return @@ -683,10 +604,10 @@ io_reschedule: # io_sigpending: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_signal # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF j io_return @@ -695,171 +616,168 @@ io_sigpending: # io_notify_resume: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_notify_resume # call do_notify_resume - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF j io_return /* * External interrupt handler routine */ - .globl ext_int_handler -ext_int_handler: - stck __LC_INT_CLOCK +ENTRY(ext_int_handler) + STCK __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+40 - CREATE_STACK_FRAME __LC_SAVE_AREA+40 - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz ext_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER - LAST_BREAK -ext_no_vtime: - HANDLE_SIE_INTERCEPT + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_EXT_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,3 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +ext_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) + lghi %r1,__LC_EXT_PARAMS2 + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF - lghi %r1,4096 - la %r2,SP_PTREGS(%r15) # address of register-save area - llgf %r3,__LC_CPU_ADDRESS # get cpu address + interruption code - llgf %r4,__LC_EXT_PARAMS # get external parameter - lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter - brasl %r14,do_extint + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + lghi %r3,EXT_INTERRUPT + brasl %r14,do_IRQ j io_return +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + stg %r3,__SF_EMPTY(%r15) + larl %r1,psw_idle_lpsw+4 + stg %r1,__SF_EMPTY+8(%r15) + STCK __CLOCK_IDLE_ENTER(%r2) + stpt __TIMER_IDLE_ENTER(%r2) +psw_idle_lpsw: + lpswe __SF_EMPTY(%r15) + br %r14 +psw_idle_end: + __critical_end: /* * Machine check handler routines */ - .globl mcck_int_handler -mcck_int_handler: - stck __LC_MCCK_CLOCK +ENTRY(mcck_int_handler) + STCK __LC_MCCK_CLOCK la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs - stmg %r11,%r15,__LC_SAVE_AREA+80 + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO larl %r13,system_call - lg %r11,__LC_LAST_BREAK - la %r12,__LC_MCK_OLD_PSW + lmg %r8,%r9,__LC_MCK_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,4 tm __LC_MCCK_CODE,0x80 # system damage? - jo mcck_int_main # yes -> rest of mcck code invalid - la %r14,4095 - mvc __LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14) + jo mcck_panic # yes -> rest of mcck code invalid + lghi %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - jo 1f + jo 3f la %r14,__LC_SYNC_ENTER_TIMER clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER jl 0f la %r14,__LC_ASYNC_ENTER_TIMER 0: clc 0(8,%r14),__LC_EXIT_TIMER - jl 0f + jl 1f la %r14,__LC_EXIT_TIMER -0: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER - jl 0f +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f la %r14,__LC_LAST_UPDATE_TIMER -0: spt 0(%r14) +2: spt 0(%r14) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -1: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - jno mcck_int_main # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit - jnz mcck_int_main # from user -> load kernel stack - clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_end) - jhe mcck_int_main - clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_start) - jl mcck_int_main - brasl %r14,cleanup_critical -mcck_int_main: - lg %r14,__LC_PANIC_STACK # are we already on the panic stack? - slgr %r14,%r15 - srag %r14,%r14,PAGE_SHIFT - jz 0f - lg %r15,__LC_PANIC_STACK # load panic stack -0: aghi %r15,-SP_SIZE # make room for registers & psw - CREATE_STACK_FRAME __LC_SAVE_AREA+80 - mvc SP_PSW(16,%r15),0(%r12) - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? - jno mcck_no_vtime # no -> no timer update - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz mcck_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER - LAST_BREAK -mcck_no_vtime: - la %r2,SP_PTREGS(%r15) # load pt_regs +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER + LAST_BREAK %r14 +mcck_skip: + lghi %r14,__LC_GPREGS_SAVE_AREA+64 + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),0(%r14) + stmg %r8,%r9,__PT_PSW(%r11) + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - tm SP_PSW+1(%r15),0x01 # returning to user ? + tm __PT_PSW+1(%r11),0x01 # returning to user ? jno mcck_return lg %r1,__LC_KERNEL_STACK # switch to kernel stack - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - stosm __SF_EMPTY(%r15),0x04 # turn dat on - tm __TI_flags+7(%r12),_TIF_MCCK_PENDING + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jno mcck_return - HANDLE_SIE_INTERCEPT TRACE_IRQS_OFF brasl %r14,s390_handle_mcck TRACE_IRQS_ON mcck_return: - mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW - ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f stpt __LC_EXIT_TIMER -0: lpswe __LC_RETURN_MCCK_PSW # back to caller -mcck_done: + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER +0: lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_MCCK_PSW -/* - * Restart interruption handler, kick starter for additional CPUs - */ -#ifdef CONFIG_SMP - __CPUINIT - .globl restart_int_handler -restart_int_handler: - basr %r1,0 -restart_base: - spt restart_vtime-restart_base(%r1) - stck __LC_LAST_UPDATE_CLOCK - mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) - mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) - lg %r15,__LC_SAVE_AREA+120 # load ksp - lghi %r10,__LC_CREGS_SAVE_AREA - lctlg %c0,%c15,0(%r10) # get new ctl regs - lghi %r10,__LC_AREGS_SAVE_AREA - lam %a0,%a15,0(%r10) - lmg %r6,%r15,__SF_GPRS(%r15) # load registers from clone - lg %r1,__LC_THREAD_INFO - mvc __LC_USER_TIMER(8),__TI_user_timer(%r1) - mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) - xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER - stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on - jg start_secondary - .align 8 -restart_vtime: - .long 0x7fffffff,0xffffffff - .previous -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ - .globl restart_int_handler -restart_int_handler: - basr %r1,0 -restart_base: - lpswe restart_crash-restart_base(%r1) - .align 8 -restart_crash: - .long 0x000a0000,0x00000000,0x00000000,0x00000000 -restart_go: -#endif +mcck_panic: + lg %r14,__LC_PANIC_STACK + slgr %r14,%r15 + srag %r14,%r14,PAGE_SHIFT + jz 0f + lg %r15,__LC_PANIC_STACK +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip + +# +# PSW restart interrupt handler +# +ENTRY(restart_int_handler) + stg %r15,__LC_SAVE_AREA_RESTART + lg %r15,__LC_RESTART_STACK + aghi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) + stmg %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw + aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + lg %r1,__LC_RESTART_FN # load fn, parm & source cpu + lg %r2,__LC_RESTART_DATA + lg %r3,__LC_RESTART_SOURCE + ltgr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + llgh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu + brc 2,2b +3: j 3b .section .kprobes.text, "ax" @@ -871,169 +789,243 @@ restart_go: */ stack_overflow: lg %r15,__LC_PANIC_STACK # change to panic stack - aghi %r15,-SP_SIZE - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - stmg %r0,%r10,SP_R0(%r15) # store gprs %r0-%r10 to kernel stack - la %r1,__LC_SAVE_AREA - chi %r12,__LC_SVC_OLD_PSW - je 0f - chi %r12,__LC_PGM_OLD_PSW - je 0f - la %r1,__LC_SAVE_AREA+40 -0: mvc SP_R11(40,%r15),0(%r1) # move %r11-%r15 to stack - mvc SP_ARGS(8,%r15),__LC_LAST_BREAK - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain - la %r2,SP_PTREGS(%r15) # load pt_regs + la %r11,STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r7,__PT_R0(%r11) + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_R8(64,%r11),0(%r14) + stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_overflow #endif -cleanup_table_system_call: - .quad system_call, sysc_do_svc -cleanup_table_sysc_tif: - .quad sysc_tif, sysc_restore -cleanup_table_sysc_restore: - .quad sysc_restore, sysc_done -cleanup_table_io_tif: - .quad io_tif, io_restore -cleanup_table_io_restore: - .quad io_restore, io_done + .align 8 +cleanup_table: + .quad system_call + .quad sysc_do_svc + .quad sysc_tif + .quad sysc_restore + .quad sysc_done + .quad io_tif + .quad io_restore + .quad io_done + .quad psw_idle + .quad psw_idle_end cleanup_critical: - clc 8(8,%r12),BASED(cleanup_table_system_call) + clg %r9,BASED(cleanup_table) # system_call jl 0f - clc 8(8,%r12),BASED(cleanup_table_system_call+8) + clg %r9,BASED(cleanup_table+8) # sysc_do_svc jl cleanup_system_call -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_tif) + clg %r9,BASED(cleanup_table+16) # sysc_tif jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_tif+8) + clg %r9,BASED(cleanup_table+24) # sysc_restore jl cleanup_sysc_tif -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_restore) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_restore+8) + clg %r9,BASED(cleanup_table+32) # sysc_done jl cleanup_sysc_restore -0: - clc 8(8,%r12),BASED(cleanup_table_io_tif) + clg %r9,BASED(cleanup_table+40) # io_tif jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_tif+8) + clg %r9,BASED(cleanup_table+48) # io_restore jl cleanup_io_tif -0: - clc 8(8,%r12),BASED(cleanup_table_io_restore) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_restore+8) + clg %r9,BASED(cleanup_table+56) # io_done jl cleanup_io_restore -0: - br %r14 + clg %r9,BASED(cleanup_table+64) # psw_idle + jl 0f + clg %r9,BASED(cleanup_table+72) # psw_idle_end + jl cleanup_idle +0: br %r14 + cleanup_system_call: - mvc __LC_RETURN_PSW(16),0(%r12) - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+8) + # check if stpt has been executed + clg %r9,BASED(cleanup_system_call_insn) jh 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER - cghi %r12,__LC_MCK_OLD_PSW - je 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: cghi %r12,__LC_MCK_OLD_PSW - la %r12,__LC_SAVE_AREA+80 + cghi %r11,__LC_SAVE_AREA_ASYNC je 0f - la %r12,__LC_SAVE_AREA+40 -0: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16) - jhe cleanup_vtime - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn) + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stmg has been executed + clg %r9,BASED(cleanup_system_call_insn+8) + jh 0f + mvc __LC_SAVE_AREA_SYNC(64),0(%r11) +0: # check if base register setup + TIF bit load has been done + clg %r9,BASED(cleanup_system_call_insn+16) + jhe 0f + # set up saved registers r10 and r12 + stg %r10,16(%r11) # r10 last break + stg %r12,32(%r11) # r12 thread-info pointer +0: # check if the user time update has been done + clg %r9,BASED(cleanup_system_call_insn+24) jh 0f - mvc __LC_SAVE_AREA(40),0(%r12) -0: lg %r15,__LC_KERNEL_STACK # problem state -> load ksp - aghi %r15,-SP_SIZE # make room for registers & psw - stg %r15,32(%r12) - stg %r11,0(%r12) - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - mvc 8(8,%r12),__LC_THREAD_INFO -cleanup_vtime: - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) - jhe cleanup_stime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -cleanup_stime: - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+32) - jh cleanup_update - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -cleanup_update: + lg %r15,__LC_EXIT_TIMER + slg %r15,__LC_SYNC_ENTER_TIMER + alg %r15,__LC_USER_TIMER + stg %r15,__LC_USER_TIMER +0: # check if the system time update has been done + clg %r9,BASED(cleanup_system_call_insn+32) + jh 0f + lg %r15,__LC_LAST_UPDATE_TIMER + slg %r15,__LC_EXIT_TIMER + alg %r15,__LC_SYSTEM_TIMER + stg %r15,__LC_SYSTEM_TIMER +0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - srag %r12,%r11,23 - lg %r12,__LC_THREAD_INFO + # do LAST_BREAK + lg %r9,16(%r11) + srag %r9,%r9,23 jz 0f - stg %r11,__TI_last_break(%r12) -0: mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8) - la %r12,__LC_RETURN_PSW + mvc __TI_last_break(8,%r12),16(%r11) +0: # set up saved register r11 + lg %r15,__LC_KERNEL_STACK + la %r9,STACK_FRAME_OVERHEAD(%r15) + stg %r9,24(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC + stmg %r0,%r7,__PT_R0(%r9) + mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC + xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9) + mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL + # setup saved register r15 + stg %r15,56(%r11) # r15 stack pointer + # set new psw address and exit + larl %r9,sysc_do_svc br %r14 cleanup_system_call_insn: - .quad sysc_saveall .quad system_call - .quad sysc_vtime - .quad sysc_stime - .quad sysc_update + .quad sysc_stmg + .quad sysc_per + .quad sysc_vtime+18 + .quad sysc_vtime+42 cleanup_sysc_tif: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_sysc_tif) - la %r12,__LC_RETURN_PSW + larl %r9,sysc_tif br %r14 cleanup_sysc_restore: - clc 8(8,%r12),BASED(cleanup_sysc_restore_insn) - je 2f - clc 8(8,%r12),BASED(cleanup_sysc_restore_insn+8) - jhe 0f - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER - cghi %r12,__LC_MCK_OLD_PSW + clg %r9,BASED(cleanup_sysc_restore_insn) je 0f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - cghi %r12,__LC_MCK_OLD_PSW - la %r12,__LC_SAVE_AREA+80 - je 1f - la %r12,__LC_SAVE_AREA+40 -1: mvc 0(40,%r12),SP_R11(%r15) - lmg %r0,%r10,SP_R0(%r15) - lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW + lg %r9,24(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_sysc_restore_insn: .quad sysc_done - 4 - .quad sysc_done - 16 cleanup_io_tif: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_tif) - la %r12,__LC_RETURN_PSW + larl %r9,io_tif br %r14 cleanup_io_restore: - clc 8(8,%r12),BASED(cleanup_io_restore_insn) - je 1f - clc 8(8,%r12),BASED(cleanup_io_restore_insn+8) - jhe 0f - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER -0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - mvc __LC_SAVE_AREA+80(40),SP_R11(%r15) - lmg %r0,%r10,SP_R0(%r15) - lg %r15,SP_R15(%r15) -1: la %r12,__LC_RETURN_PSW + clg %r9,BASED(cleanup_io_restore_insn) + je 0f + lg %r9,24(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_io_restore_insn: .quad io_done - 4 - .quad io_done - 16 + +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER + cghi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER +0: # check if stck & stpt have been executed + clg %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) + mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2) +1: # account system time going idle + lg %r9,__LC_STEAL_TIMER + alg %r9,__CLOCK_IDLE_ENTER(%r2) + slg %r9,__LC_LAST_UPDATE_CLOCK + stg %r9,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) + lg %r9,__LC_SYSTEM_TIMER + alg %r9,__LC_LAST_UPDATE_TIMER + slg %r9,__TIMER_IDLE_ENTER(%r2) + stg %r9,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) + # prepare return psw + nihh %r8,0xfcfd # clear irq & wait state bits + lg %r9,48(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .quad psw_idle_lpsw /* * Integer constants */ - .align 4 + .align 8 .Lcritical_start: - .quad __critical_start -.Lcritical_end: - .quad __critical_end + .quad __critical_start +.Lcritical_length: + .quad __critical_end - __critical_start + + +#if IS_ENABLED(CONFIG_KVM) +/* + * sie64a calling convention: + * %r2 pointer to sie control block + * %r3 guest register save area + */ +ENTRY(sie64a) + stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers + stg %r2,__SF_EMPTY(%r15) # save control block pointer + stg %r3,__SF_EMPTY+8(%r15) # save guest register save area + xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason + lmg %r0,%r13,0(%r3) # load guest gprs 0-13 + lg %r14,__LC_GMAP # get gmap pointer + ltgr %r14,%r14 + jz sie_gmap + lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce +sie_gmap: + lg %r14,__SF_EMPTY(%r15) # get control block pointer + oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now + tm __SIE_PROG20+3(%r14),1 # last exit... + jnz sie_done + LPP __SF_EMPTY(%r15) # set guest id + sie 0(%r14) +sie_done: + LPP __SF_EMPTY+16(%r15) # set host id + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce +# some program checks are suppressing. C code (e.g. do_protection_exception) +# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other +# instructions between sie64a and sie_done should not cause program +# interrupts. So lets use a nop (47 00 00 00) as a landing pad. +# See also HANDLE_SIE_INTERCEPT +rewind_pad: + nop 0 + .globl sie_exit +sie_exit: + lg %r14,__SF_EMPTY+8(%r15) # load guest register save area + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers + lg %r2,__SF_EMPTY+24(%r15) # return exit reason code + br %r14 +sie_fault: + lghi %r14,-EFAULT + stg %r14,__SF_EMPTY+24(%r15) # set exit reason code + j sie_exit + + .align 8 +.Lsie_critical: + .quad sie_gmap +.Lsie_critical_length: + .quad sie_done - sie_gmap + + EX_TABLE(rewind_pad,sie_fault) + EX_TABLE(sie_exit,sie_fault) +#endif .section .rodata, "a" #define SYSCALL(esa,esame,emu) .long esame @@ -1045,6 +1037,7 @@ sys_call_table: #ifdef CONFIG_COMPAT #define SYSCALL(esa,esame,emu) .long emu + .globl sys_call_table_emu sys_call_table_emu: #include "syscalls.S" #undef SYSCALL diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 78bdf0e5dff..54d6493c4a5 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -15,12 +15,7 @@ #include <linux/kprobes.h> #include <trace/syscall.h> #include <asm/asm-offsets.h> - -#ifdef CONFIG_64BIT -#define MCOUNT_OFFSET_RET 12 -#else -#define MCOUNT_OFFSET_RET 22 -#endif +#include "entry.h" #ifdef CONFIG_DYNAMIC_FTRACE @@ -135,9 +130,8 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return 0; } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { - *(unsigned long *) data = 0; return 0; } @@ -155,14 +149,14 @@ unsigned long __kprobes prepare_ftrace_return(unsigned long parent, if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) - goto out; - trace.func = (ip & PSW_ADDR_INSN) - MCOUNT_OFFSET_RET; + ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE; + trace.func = ip; + trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to. */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; + if (!ftrace_graph_entry(&trace)) + goto out; + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) goto out; - } parent = (unsigned long) return_to_handler; out: return parent; @@ -182,7 +176,7 @@ int ftrace_enable_ftrace_graph_caller(void) offset = ((void *) prepare_ftrace_return - (void *) ftrace_graph_caller) / 2; - return probe_kernel_write(ftrace_graph_caller + 2, + return probe_kernel_write((void *) ftrace_graph_caller + 2, &offset, sizeof(offset)); } @@ -190,7 +184,7 @@ int ftrace_disable_ftrace_graph_caller(void) { static unsigned short offset = 0x0002; - return probe_kernel_write(ftrace_graph_caller + 2, + return probe_kernel_write((void *) ftrace_graph_caller + 2, &offset, sizeof(offset)); } diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 7061398341d..e88d35d7495 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 1999,2010 + * Copyright IBM Corp. 1999, 2010 * * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -22,6 +22,7 @@ */ #include <linux/init.h> +#include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> @@ -33,125 +34,7 @@ #endif __HEAD -#ifndef CONFIG_IPL - .org 0 - .long 0x00080000,0x80000000+startup # Just a restart PSW -#else -#ifdef CONFIG_IPL_TAPE -#define IPL_BS 1024 - .org 0 - .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded - .long 0x27000000,0x60000001 # by ipl to addresses 0-23. - .long 0x02000000,0x20000000+IPL_BS # (a PSW and two CCWs). - .long 0x00000000,0x00000000 # external old psw - .long 0x00000000,0x00000000 # svc old psw - .long 0x00000000,0x00000000 # program check old psw - .long 0x00000000,0x00000000 # machine check old psw - .long 0x00000000,0x00000000 # io old psw - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x000a0000,0x00000058 # external new psw - .long 0x000a0000,0x00000060 # svc new psw - .long 0x000a0000,0x00000068 # program check new psw - .long 0x000a0000,0x00000070 # machine check new psw - .long 0x00080000,0x80000000+.Lioint # io new psw - - .org 0x100 -# -# subroutine for loading from tape -# Parameters: -# R1 = device number -# R2 = load address -.Lloader: - st %r14,.Lldret - la %r3,.Lorbread # r3 = address of orb - la %r5,.Lirb # r5 = address of irb - st %r2,.Lccwread+4 # initialize CCW data addresses - lctl %c6,%c6,.Lcr6 - slr %r2,%r2 -.Lldlp: - la %r6,3 # 3 retries -.Lssch: - ssch 0(%r3) # load chunk of IPL_BS bytes - bnz .Llderr -.Lw4end: - bas %r14,.Lwait4io - tm 8(%r5),0x82 # do we have a problem ? - bnz .Lrecov - slr %r7,%r7 - icm %r7,3,10(%r5) # get residual count - lcr %r7,%r7 - la %r7,IPL_BS(%r7) # IPL_BS-residual=#bytes read - ar %r2,%r7 # add to total size - tm 8(%r5),0x01 # found a tape mark ? - bnz .Ldone - l %r0,.Lccwread+4 # update CCW data addresses - ar %r0,%r7 - st %r0,.Lccwread+4 - b .Lldlp -.Ldone: - l %r14,.Lldret - br %r14 # r2 contains the total size -.Lrecov: - bas %r14,.Lsense # do the sensing - bct %r6,.Lssch # dec. retry count & branch - b .Llderr -# -# Sense subroutine -# -.Lsense: - st %r14,.Lsnsret - la %r7,.Lorbsense - ssch 0(%r7) # start sense command - bnz .Llderr - bas %r14,.Lwait4io - l %r14,.Lsnsret - tm 8(%r5),0x82 # do we have a problem ? - bnz .Llderr - br %r14 -# -# Wait for interrupt subroutine -# -.Lwait4io: - lpsw .Lwaitpsw -.Lioint: - c %r1,0xb8 # compare subchannel number - bne .Lwait4io - tsch 0(%r5) - slr %r0,%r0 - tm 8(%r5),0x82 # do we have a problem ? - bnz .Lwtexit - tm 8(%r5),0x04 # got device end ? - bz .Lwait4io -.Lwtexit: - br %r14 -.Llderr: - lpsw .Lcrash - .align 8 -.Lorbread: - .long 0x00000000,0x0080ff00,.Lccwread - .align 8 -.Lorbsense: - .long 0x00000000,0x0080ff00,.Lccwsense - .align 8 -.Lccwread: - .long 0x02200000+IPL_BS,0x00000000 -.Lccwsense: - .long 0x04200001,0x00000000 -.Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint - -.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -.Lcr6: .long 0xff000000 - .align 8 -.Lcrash:.long 0x000a0000,0x00000000 -.Lldret:.long 0 -.Lsnsret: .long 0 -#endif /* CONFIG_IPL_TAPE */ - -#ifdef CONFIG_IPL_VM #define IPL_BS 0x730 .org 0 .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded @@ -169,7 +52,7 @@ __HEAD .long 0x02000370,0x60000050 # the channel program the PSW .long 0x020003c0,0x60000050 # at location 0 is loaded. .long 0x02000410,0x60000050 # Initial processing starts - .long 0x02000460,0x60000050 # at 0xf0 = iplstart. + .long 0x02000460,0x60000050 # at 0x200 = iplstart. .long 0x020004b0,0x60000050 .long 0x02000500,0x60000050 .long 0x02000550,0x60000050 @@ -179,11 +62,54 @@ __HEAD .long 0x02000690,0x60000050 .long 0x020006e0,0x20000050 - .org 0xf0 + .org 0x200 +# +# subroutine to set architecture mode +# +.Lsetmode: +#ifdef CONFIG_64BIT + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam31 # switch to 31 bit addressing mode +#else + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +#endif + br %r14 + +# +# subroutine to wait for end I/O +# +.Lirqwait: +#ifdef CONFIG_64BIT + mvc 0x1f0(16),.Lnewpsw # set up IO interrupt psw + lpsw .Lwaitpsw +.Lioint: + br %r14 + .align 8 +.Lnewpsw: + .quad 0x0000000080000000,.Lioint +#else + mvc 0x78(8),.Lnewpsw # set up IO interrupt psw + lpsw .Lwaitpsw +.Lioint: + br %r14 + .align 8 +.Lnewpsw: + .long 0x00080000,0x80000000+.Lioint +#endif +.Lwaitpsw: + .long 0x020a0000,0x80000000+.Lioint + # # subroutine for loading cards from the reader # .Lloader: + la %r4,0(%r14) la %r3,.Lorb # r2 = address of orb into r2 la %r5,.Lirb # r4 = address of irb la %r6,.Lccws @@ -200,9 +126,7 @@ __HEAD ssch 0(%r3) # load chunk of 1600 bytes bnz .Llderr .Lwait4irq: - mvc 0x78(8),.Lnewpsw # set up IO interrupt psw - lpsw .Lwaitpsw -.Lioint: + bas %r14,.Lirqwait c %r1,0xb8 # compare subchannel number bne .Lwait4irq tsch 0(%r5) @@ -221,7 +145,7 @@ __HEAD sr %r0,%r3 # #ccws*80-residual=#bytes read ar %r2,%r0 - br %r14 # r2 contains the total size + br %r4 # r2 contains the total size .Lcont: ahi %r2,0x640 # add 0x640 to total size @@ -245,19 +169,15 @@ __HEAD .Lloadp:.long 0,0 .align 8 .Lcrash:.long 0x000a0000,0x00000000 -.Lnewpsw: - .long 0x00080000,0x80000000+.Lioint -.Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint .align 8 .Lccws: .rept 19 .long 0x02600050,0x00000000 .endr .long 0x02200050,0x00000000 -#endif /* CONFIG_IPL_VM */ iplstart: + bas %r14,.Lsetmode # Immediately switch to 64 bit mode lh %r1,0xb8 # test if subchannel number bct %r1,.Lnoload # is valid l %r1,0xb8 # load ipl subchannel number @@ -324,12 +244,11 @@ iplstart: clc 0(3,%r2),.L_eof bz .Lagain2 -#ifdef CONFIG_IPL_VM # # reset files in VM reader # - stidp __LC_SAVE_AREA # store cpuid - tm __LC_SAVE_AREA,0xff # running VM ? + stidp .Lcpuid # store cpuid + tm .Lcpuid,0xff # running VM ? bno .Lnoreset la %r2,.Lreset lhi %r3,26 @@ -341,24 +260,14 @@ iplstart: tm 31(%r5),0xff # bits is set in the schib bz .Lnoreset .Lwaitforirq: - mvc 0x78(8),.Lrdrnewpsw # set up IO interrupt psw -.Lwaitrdrirq: - lpsw .Lrdrwaitpsw -.Lrdrint: + bas %r14,.Lirqwait # wait for IO interrupt c %r1,0xb8 # compare subchannel number - bne .Lwaitrdrirq + bne .Lwaitforirq la %r5,.Lirb tsch 0(%r5) .Lnoreset: b .Lnoload - .align 8 -.Lrdrnewpsw: - .long 0x00080000,0x80000000+.Lrdrint -.Lrdrwaitpsw: - .long 0x020a0000,0x80000000+.Lrdrint -#endif - # # everything loaded, go for it # @@ -374,8 +283,8 @@ iplstart: .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" .L_eof: .long 0xc5d6c600 /* C'EOF' */ .L_hdr: .long 0xc8c4d900 /* C'HDR' */ - -#endif /* CONFIG_IPL */ + .align 8 +.Lcpuid:.fill 8,1,0 # # SALIPL loader support. Based on a patch by Rob van der Heij. @@ -383,9 +292,9 @@ iplstart: # doesn't need a builtin ipl record. # .org 0x800 - .globl start -start: +ENTRY(start) stm %r0,%r15,0x07b0 # store registers + bas %r14,.Lsetmode # Immediately switch to 64 bit mode basr %r12,%r0 .base: l %r11,.parm @@ -448,37 +357,71 @@ start: # or linload or SALIPL # .org 0x10000 - .globl startup -startup: +ENTRY(startup) + j .Lep_startup_normal + .org 0x10008 +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii "S390EP" + .byte 0x00,0x01 +# +# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# + .org 0x10010 +ENTRY(startup_kdump) + j .Lep_startup_kdump +.Lep_startup_normal: +#ifdef CONFIG_64BIT + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam31 # switch to 31 bit addressing mode +#else + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +#endif basr %r13,0 # get base .LPG0: xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 + xc 0xe00(256),0xe00 stck __LC_LAST_UPDATE_CLOCK - spt 5f-.LPG0(%r13) - mvc __LC_LAST_UPDATE_TIMER(8),5f-.LPG0(%r13) + spt 6f-.LPG0(%r13) + mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) + xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST #ifndef CONFIG_MARCH_G5 # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} - xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST - stfl __LC_STFL_FAC_LIST # store facility list + .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list tm __LC_STFL_FAC_LIST,0x01 # stfle available ? jz 0f - la %r0,0 + la %r0,1 .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended -0: l %r0,__LC_STFL_FAC_LIST - n %r0,2f+8-.LPG0(%r13) - cl %r0,2f+8-.LPG0(%r13) - jne 1f - l %r0,__LC_STFL_FAC_LIST+4 - n %r0,2f+12-.LPG0(%r13) - cl %r0,2f+12-.LPG0(%r13) - je 3f -1: l %r15,.Lstack-.LPG0(%r13) + # verify if all required facilities are supported by the machine +0: la %r1,__LC_STFL_FAC_LIST + la %r2,3f+8-.LPG0(%r13) + l %r3,0(%r2) +1: l %r0,0(%r1) + n %r0,4(%r2) + cl %r0,4(%r2) + jne 2f + la %r1,4(%r1) + la %r2,4(%r2) + ahi %r3,-1 + jnz 1b + j 4f +2: l %r15,.Lstack-.LPG0(%r13) ahi %r15,-96 la %r2,.Lals_string-.LPG0(%r13) l %r3,.Lsclp_print-.LPG0(%r13) basr %r14,%r3 - lpsw 2f-.LPG0(%r13) # machine type not good enough, crash + lpsw 3f-.LPG0(%r13) # machine type not good enough, crash .Lals_string: .asciz "The Linux kernel requires more recent processor hardware" .Lsclp_print: @@ -486,54 +429,60 @@ startup: .Lstack: .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) .align 16 -2: .long 0x000a0000,0x8badcccc +3: .long 0x000a0000,0x8badcccc + +# List of facilities that are required. If not all facilities are present +# the kernel will crash. Format is number of facility words with bits set, +# followed by the facility words. + #if defined(CONFIG_64BIT) -#if defined(CONFIG_MARCH_Z196) - .long 0xc100efe3, 0xf46c0000 +#if defined(CONFIG_MARCH_ZEC12) + .long 3, 0xc100eff2, 0xf46ce800, 0x00400000 +#elif defined(CONFIG_MARCH_Z196) + .long 2, 0xc100eff2, 0xf46c0000 #elif defined(CONFIG_MARCH_Z10) - .long 0xc100efe3, 0xf0680000 + .long 2, 0xc100eff2, 0xf0680000 #elif defined(CONFIG_MARCH_Z9_109) - .long 0xc100efc3, 0x00000000 + .long 1, 0xc100efc2 #elif defined(CONFIG_MARCH_Z990) - .long 0xc0002000, 0x00000000 + .long 1, 0xc0002000 #elif defined(CONFIG_MARCH_Z900) - .long 0xc0000000, 0x00000000 + .long 1, 0xc0000000 #endif #else -#if defined(CONFIG_MARCH_Z196) - .long 0x8100c880, 0x00000000 +#if defined(CONFIG_MARCH_ZEC12) + .long 1, 0x8100c880 +#elif defined(CONFIG_MARCH_Z196) + .long 1, 0x8100c880 #elif defined(CONFIG_MARCH_Z10) - .long 0x8100c880, 0x00000000 + .long 1, 0x8100c880 #elif defined(CONFIG_MARCH_Z9_109) - .long 0x8100c880, 0x00000000 + .long 1, 0x8100c880 #elif defined(CONFIG_MARCH_Z990) - .long 0x80002000, 0x00000000 + .long 1, 0x80002000 #elif defined(CONFIG_MARCH_Z900) - .long 0x80000000, 0x00000000 + .long 1, 0x80000000 #endif #endif -3: +4: #endif #ifdef CONFIG_64BIT - mvi __LC_AR_MODE_ID,1 # set esame flag - slr %r0,%r0 # set cpuid to zero - lhi %r1,2 # mode 2 = esame (dump) - sigp %r1,%r0,0x12 # switch to esame mode + /* Continue with 64bit startup code in head64.S */ sam64 # switch to 64 bit mode - larl %r13,4f - lmh %r0,%r15,0(%r13) # clear high-order half jg startup_continue -4: .fill 16,4,0x0 #else - mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) - l %r13,4f-.LPG0(%r13) + /* Continue with 31bit startup code in head31.S */ + l %r13,5f-.LPG0(%r13) b 0(%r13) .align 8 -4: .long startup_continue +5: .long startup_continue #endif + .align 8 -5: .long 0x7fffffff,0xffffffff +6: .long 0x7fffffff,0xffffffff + +#include "head_kdump.S" # # params at 10400 (setup.h) @@ -542,6 +491,8 @@ startup: .long 0,0 # IPL_DEVICE .long 0,0 # INITRD_START .long 0,0 # INITRD_SIZE + .long 0,0 # OLDMEM_BASE + .long 0,0 # OLDMEM_SIZE .org COMMAND_LINE .byte "root=/dev/ram0 ro" diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index b8f8dc12610..6dbe80983a2 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/head31.S - * - * Copyright (C) IBM Corp. 2005,2010 + * Copyright IBM Corp. 2005, 2010 * * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -11,13 +9,13 @@ */ #include <linux/init.h> +#include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> __HEAD - .globl startup_continue -startup_continue: +ENTRY(startup_continue) basr %r13,0 # get base .LPG1: @@ -45,7 +43,7 @@ startup_continue: # virtual and never return ... .align 8 .Lentry:.long 0x00080000,0x80000000 + _stext -.Lctl: .long 0x04b50002 # cr0: various things +.Lctl: .long 0x04b50000 # cr0: various things .long 0 # cr1: primary space segment table .long .Lduct # cr2: dispatchable unit control table .long 0 # cr3: instruction authorization @@ -61,7 +59,6 @@ startup_continue: .long 0 # cr13: home space segment table .long 0xc0000000 # cr14: machine check handling off .long 0 # cr15: linkage stack operations -.Lmchunk:.long memory_chunk .Lbss_bgn: .long __bss_start .Lbss_end: .long _end .Lparmaddr: .long PARMAREA @@ -78,22 +75,18 @@ startup_continue: .Lbase_cc: .long sched_clock_base_cc - .globl _ehead -_ehead: +ENTRY(_ehead) -#ifdef CONFIG_SHARED_KERNEL .org 0x100000 - 0x11000 # head.o ends at 0x11000 -#endif - # # startup-code, running in absolute addressing mode # - .globl _stext -_stext: basr %r13,0 # get base +ENTRY(_stext) + basr %r13,0 # get base .LPG3: # check control registers stctl %c0,%c15,0(%r15) - oi 2(%r15),0x40 # enable sigp emergency signal + oi 2(%r15),0x60 # enable sigp emergency & external call oi 0(%r15),0x10 # switch on low address protection lctl %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index cdef6871741..d7c00507568 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/head64.S - * - * Copyright (C) IBM Corp. 1999,2010 + * Copyright IBM Corp. 1999, 2010 * * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -11,13 +9,13 @@ */ #include <linux/init.h> +#include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> __HEAD - .globl startup_continue -startup_continue: +ENTRY(startup_continue) larl %r1,sched_clock_base_cc mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK larl %r13,.LPG1 # get base @@ -46,7 +44,7 @@ startup_continue: .align 16 .LPG1: .Lentry:.quad 0x0000000180000000,_stext -.Lctl: .quad 0x04350002 # cr0: various things +.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table .quad 0 # cr3: instruction authorization @@ -61,7 +59,7 @@ startup_continue: .quad 0 # cr12: tracing off .quad 0 # cr13: home space segment table .quad 0xc0000000 # cr14: machine check handling off - .quad 0 # cr15: linkage stack operations + .quad .Llinkage_stack # cr15: linkage stack operations .Lpcmsk:.quad 0x0000000180000000 .L4malign:.quad 0xffffffffffc00000 .Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8 @@ -69,29 +67,28 @@ startup_continue: .Lparmaddr: .quad PARMAREA .align 64 -.Lduct: .long 0,0,0,0,.Lduald,0,0,0 +.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0 .long 0,0,0,0,0,0,0,0 +.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0 .align 128 .Lduald:.rept 8 .long 0x80000000,0,0,0 # invalid access-list entries .endr +.Llinkage_stack: + .long 0,0,0x89000000,0,0,0,0x8a000000,0 - .globl _ehead -_ehead: +ENTRY(_ehead) -#ifdef CONFIG_SHARED_KERNEL .org 0x100000 - 0x11000 # head.o ends at 0x11000 -#endif - # # startup-code, running in absolute addressing mode # - .globl _stext -_stext: basr %r13,0 # get base +ENTRY(_stext) + basr %r13,0 # get base .LPG3: # check control registers stctg %c0,%c15,0(%r15) - oi 6(%r15),0x40 # enable sigp emergency signal + oi 6(%r15),0x60 # enable sigp emergency & external call oi 4(%r15),0x10 # switch on low address proctection lctlg %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S new file mode 100644 index 00000000000..085a95eb315 --- /dev/null +++ b/arch/s390/kernel/head_kdump.S @@ -0,0 +1,108 @@ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <asm/sigp.h> + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +.align 2 +.Lep_startup_kdump: + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# +.align 2 +startup_kdump_relocated: + basr %r13,0 +0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... +.align 8 +.Lrestart_psw: + .quad 0x0000000080000000,0x0000000000000000 + startup +#else +.align 2 +.Lep_startup_kdump: +#ifdef CONFIG_64BIT + larl %r13,startup_kdump_crash + lpswe 0(%r13) +.align 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#else + basr %r13,0 +0: lpsw startup_kdump_crash-0b(%r13) +.align 8 +startup_kdump_crash: + .long 0x000a0000,0x00000000 + startup_kdump_crash +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c deleted file mode 100644 index 4d1c9fb0b54..00000000000 --- a/arch/s390/kernel/init_task.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * arch/s390/kernel/init_task.c - * - * S390 version - * - * Derived from "arch/i386/kernel/init_task.c" - */ - -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/init_task.h> -#include <linux/mqueue.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -/* - * Initial thread structure. - * - * We need to make sure that this is THREAD_SIZE aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union __init_task_data = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index a689070be28..633ca750453 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1,8 +1,7 @@ /* - * arch/s390/kernel/ipl.c * ipl/reipl/dump support for Linux on s390. * - * Copyright IBM Corp. 2005,2007 + * Copyright IBM Corp. 2005, 2012 * Author(s): Michael Holzheu <holzheu@de.ibm.com> * Heiko Carstens <heiko.carstens@de.ibm.com> * Volker Sameske <sameske@de.ibm.com> @@ -16,6 +15,8 @@ #include <linux/ctype.h> #include <linux/fs.h> #include <linux/gfp.h> +#include <linux/crash_dump.h> +#include <linux/debug_locks.h> #include <asm/ipl.h> #include <asm/smp.h> #include <asm/setup.h> @@ -24,8 +25,10 @@ #include <asm/ebcdic.h> #include <asm/reset.h> #include <asm/sclp.h> -#include <asm/sigp.h> #include <asm/checksum.h> +#include <asm/debug.h> +#include <asm/os_info.h> +#include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 @@ -45,11 +48,13 @@ * - halt * - power off * - reipl + * - restart */ #define ON_PANIC_STR "on_panic" #define ON_HALT_STR "on_halt" #define ON_POFF_STR "on_poff" #define ON_REIPL_STR "on_reboot" +#define ON_RESTART_STR "on_restart" struct shutdown_action; struct shutdown_trigger { @@ -273,8 +278,8 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); /* VM IPL PARM routines */ -size_t reipl_get_ascii_vmparm(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { int i; size_t len; @@ -336,8 +341,8 @@ static size_t scpdata_length(const char* buf, size_t count) return count; } -size_t reipl_append_ascii_scpdata(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_append_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { size_t count; size_t i; @@ -567,7 +572,7 @@ static void __ipl_run(void *unused) static void ipl_run(struct shutdown_trigger *trigger) { - smp_switch_to_ipl_cpu(__ipl_run, NULL); + smp_call_ipl_cpu(__ipl_run, NULL); } static int __init ipl_init(void) @@ -749,9 +754,9 @@ static struct bin_attribute sys_reipl_fcp_scp_data_attr = { .write = reipl_fcp_scpdata_write, }; -DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n", reipl_block_fcp->ipl_info.fcp.wwpn); -DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n", reipl_block_fcp->ipl_info.fcp.lun); DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", reipl_block_fcp->ipl_info.fcp.bootprog); @@ -946,6 +951,13 @@ static struct attribute_group reipl_nss_attr_group = { .attrs = reipl_nss_attrs, }; +static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block) +{ + reipl_block_actual = reipl_block; + os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual, + reipl_block->hdr.len); +} + /* reipl type */ static int reipl_set_type(enum ipl_type type) @@ -961,7 +973,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_CCW_VM; else reipl_method = REIPL_METHOD_CCW_CIO; - reipl_block_actual = reipl_block_ccw; + set_reipl_block_actual(reipl_block_ccw); break; case IPL_TYPE_FCP: if (diag308_set_works) @@ -970,7 +982,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_FCP_RO_VM; else reipl_method = REIPL_METHOD_FCP_RO_DIAG; - reipl_block_actual = reipl_block_fcp; + set_reipl_block_actual(reipl_block_fcp); break; case IPL_TYPE_FCP_DUMP: reipl_method = REIPL_METHOD_FCP_DUMP; @@ -980,7 +992,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_NSS_DIAG; else reipl_method = REIPL_METHOD_NSS; - reipl_block_actual = reipl_block_nss; + set_reipl_block_actual(reipl_block_nss); break; case IPL_TYPE_UNKNOWN: reipl_method = REIPL_METHOD_DEFAULT; @@ -1097,7 +1109,7 @@ static void __reipl_run(void *unused) static void reipl_run(struct shutdown_trigger *trigger) { - smp_switch_to_ipl_cpu(__reipl_run, NULL); + smp_call_ipl_cpu(__reipl_run, NULL); } static void reipl_block_ccw_init(struct ipl_parameter_block *ipb) @@ -1218,7 +1230,7 @@ static int __init reipl_fcp_init(void) /* sysfs: create fcp kset for mixing attr group and bin attrs */ reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL, &reipl_kset->kobj); - if (!reipl_kset) { + if (!reipl_fcp_kset) { free_page((unsigned long) reipl_block_fcp); return -ENOMEM; } @@ -1252,6 +1264,29 @@ static int __init reipl_fcp_init(void) return 0; } +static int __init reipl_type_init(void) +{ + enum ipl_type reipl_type = ipl_info.type; + struct ipl_parameter_block *reipl_block; + unsigned long size; + + reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size); + if (!reipl_block) + goto out; + /* + * If we have an OS info reipl block, this will be used + */ + if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) { + memcpy(reipl_block_fcp, reipl_block, size); + reipl_type = IPL_TYPE_FCP; + } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) { + memcpy(reipl_block_ccw, reipl_block, size); + reipl_type = IPL_TYPE_CCW; + } +out: + return reipl_set_type(reipl_type); +} + static int __init reipl_init(void) { int rc; @@ -1273,10 +1308,7 @@ static int __init reipl_init(void) rc = reipl_nss_init(); if (rc) return rc; - rc = reipl_set_type(ipl_info.type); - if (rc) - return rc; - return 0; + return reipl_type_init(); } static struct shutdown_action __refdata reipl_action = { @@ -1291,9 +1323,9 @@ static struct shutdown_action __refdata reipl_action = { /* FCP dump device attributes */ -DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n", dump_block_fcp->ipl_info.fcp.wwpn); -DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n", dump_block_fcp->ipl_info.fcp.lun); DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", dump_block_fcp->ipl_info.fcp.bootprog); @@ -1382,6 +1414,16 @@ static struct kobj_attribute dump_type_attr = static struct kset *dump_kset; +static void diag308_dump(void *dump_block) +{ + diag308(DIAG308_SET, dump_block); + while (1) { + if (diag308(DIAG308_DUMP, NULL) != 0x302) + break; + udelay_simple(USEC_PER_SEC); + } +} + static void __dump_run(void *unused) { struct ccw_dev_id devid; @@ -1400,12 +1442,10 @@ static void __dump_run(void *unused) __cpcmd(buf, NULL, 0, NULL); break; case DUMP_METHOD_CCW_DIAG: - diag308(DIAG308_SET, dump_block_ccw); - diag308(DIAG308_DUMP, NULL); + diag308_dump(dump_block_ccw); break; case DUMP_METHOD_FCP_DIAG: - diag308(DIAG308_SET, dump_block_fcp); - diag308(DIAG308_DUMP, NULL); + diag308_dump(dump_block_fcp); break; default: break; @@ -1417,7 +1457,7 @@ static void dump_run(struct shutdown_trigger *trigger) if (dump_method == DUMP_METHOD_NONE) return; smp_send_stop(); - smp_switch_to_ipl_cpu(__dump_run, NULL); + smp_call_ipl_cpu(__dump_run, NULL); } static int __init dump_ccw_init(void) @@ -1495,30 +1535,12 @@ static struct shutdown_action __refdata dump_action = { static void dump_reipl_run(struct shutdown_trigger *trigger) { - preempt_disable(); - /* - * Bypass dynamic address translation (DAT) when storing IPL parameter - * information block address and checksum into the prefix area - * (corresponding to absolute addresses 0-8191). - * When enhanced DAT applies and the STE format control in one, - * the absolute address is formed without prefixing. In this case a - * normal store (stg/st) into the prefix area would no more match to - * absolute addresses 0-8191. - */ -#ifdef CONFIG_64BIT - asm volatile("sturg %0,%1" - :: "a" ((unsigned long) reipl_block_actual), - "a" (&lowcore_ptr[smp_processor_id()]->ipib)); -#else - asm volatile("stura %0,%1" - :: "a" ((unsigned long) reipl_block_actual), - "a" (&lowcore_ptr[smp_processor_id()]->ipib)); -#endif - asm volatile("stura %0,%1" - :: "a" (csum_partial(reipl_block_actual, - reipl_block_actual->hdr.len, 0)), - "a" (&lowcore_ptr[smp_processor_id()]->ipib_checksum)); - preempt_enable(); + unsigned long ipib = (unsigned long) reipl_block_actual; + unsigned int csum; + + csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); + mem_assign_absolute(S390_lowcore.ipib, ipib); + mem_assign_absolute(S390_lowcore.ipib_checksum, csum); dump_run(trigger); } @@ -1544,17 +1566,20 @@ static char vmcmd_on_reboot[128]; static char vmcmd_on_panic[128]; static char vmcmd_on_halt[128]; static char vmcmd_on_poff[128]; +static char vmcmd_on_restart[128]; DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart); static struct attribute *vmcmd_attrs[] = { &sys_vmcmd_on_reboot_attr.attr, &sys_vmcmd_on_panic_attr.attr, &sys_vmcmd_on_halt_attr.attr, &sys_vmcmd_on_poff_attr.attr, + &sys_vmcmd_on_restart_attr.attr, NULL, }; @@ -1566,7 +1591,7 @@ static struct kset *vmcmd_kset; static void vmcmd_run(struct shutdown_trigger *trigger) { - char *cmd, *next_cmd; + char *cmd; if (strcmp(trigger->name, ON_REIPL_STR) == 0) cmd = vmcmd_on_reboot; @@ -1576,20 +1601,14 @@ static void vmcmd_run(struct shutdown_trigger *trigger) cmd = vmcmd_on_halt; else if (strcmp(trigger->name, ON_POFF_STR) == 0) cmd = vmcmd_on_poff; + else if (strcmp(trigger->name, ON_RESTART_STR) == 0) + cmd = vmcmd_on_restart; else return; if (strlen(cmd) == 0) return; - do { - next_cmd = strchr(cmd, '\n'); - if (next_cmd) { - next_cmd[0] = 0; - next_cmd += 1; - } - __cpcmd(cmd, NULL, 0, NULL); - cmd = next_cmd; - } while (cmd != NULL); + __cpcmd(cmd, NULL, 0, NULL); } static int vmcmd_init(void) @@ -1611,11 +1630,10 @@ static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR, static void stop_run(struct shutdown_trigger *trigger) { - if (strcmp(trigger->name, ON_PANIC_STR) == 0) + if (strcmp(trigger->name, ON_PANIC_STR) == 0 || + strcmp(trigger->name, ON_RESTART_STR) == 0) disabled_wait((unsigned long) __builtin_return_address(0)); - while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) - cpu_relax(); - for (;;); + smp_stop_cpu(); } static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR, @@ -1703,10 +1721,51 @@ static struct kobj_attribute on_panic_attr = static void do_panic(void) { + lgr_info_log(); on_panic_trigger.action->fn(&on_panic_trigger); stop_run(&on_panic_trigger); } +/* on restart */ + +static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR, + &stop_action}; + +static ssize_t on_restart_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_restart_trigger.action->name); +} + +static ssize_t on_restart_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_restart_trigger, len); +} + +static struct kobj_attribute on_restart_attr = + __ATTR(on_restart, 0644, on_restart_show, on_restart_store); + +static void __do_restart(void *ignore) +{ + __arch_local_irq_stosm(0x04); /* enable DAT */ + smp_send_stop(); +#ifdef CONFIG_CRASH_DUMP + crash_kexec(NULL); +#endif + on_restart_trigger.action->fn(&on_restart_trigger); + stop_run(&on_restart_trigger); +} + +void do_restart(void) +{ + tracing_off(); + debug_locks_off(); + lgr_info_log(); + smp_call_online_cpu(__do_restart, NULL); +} + /* on halt */ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; @@ -1783,7 +1842,9 @@ static void __init shutdown_triggers_init(void) if (sysfs_create_file(&shutdown_actions_kset->kobj, &on_poff_attr.attr)) goto fail; - + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_restart_attr.attr)) + goto fail; return; fail: panic("shutdown_triggers_init failed\n"); @@ -1959,13 +2020,19 @@ static void do_reset_calls(void) { struct reset_call *reset; +#ifdef CONFIG_64BIT + if (diag308_set_works) { + diag308_reset(); + return; + } +#endif list_for_each_entry(reset, &rcall, list) reset->fn(); } u32 dump_prefix_page; -void s390_reset_system(void) +void s390_reset_system(void (*func)(void *), void *data) { struct _lowcore *lc; @@ -1984,15 +2051,19 @@ void s390_reset_system(void) __ctl_clear_bit(0,28); /* Set new machine check handler */ - S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT; S390_lowcore.mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; /* Set new program check handler */ - S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT; S390_lowcore.program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + /* Store status at absolute zero */ + store_status(); + do_reset_calls(); + if (func) + func(data); } - diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ea5099c9709..99b0b09646c 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -1,135 +1,307 @@ /* - * Copyright IBM Corp. 2004,2010 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Thomas Spatzier (tspat@de.ibm.com) + * Copyright IBM Corp. 2004, 2011 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Thomas Spatzier <tspat@de.ibm.com>, * * This file contains interrupt related functions. */ -#include <linux/module.h> -#include <linux/kernel.h> #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/seq_file.h> -#include <linux/cpu.h> #include <linux/proc_fs.h> #include <linux/profile.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/ftrace.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/irq.h> +#include <asm/irq_regs.h> +#include <asm/cputime.h> +#include <asm/lowcore.h> +#include <asm/irq.h> +#include <asm/hw_irq.h> +#include "entry.h" + +DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); +EXPORT_PER_CPU_SYMBOL_GPL(irq_stat); struct irq_class { char *name; char *desc; }; -static const struct irq_class intrclass_names[] = { - {.name = "EXT" }, - {.name = "I/O" }, - {.name = "CLK", .desc = "[EXT] Clock Comparator" }, - {.name = "IPI", .desc = "[EXT] Signal Processor" }, - {.name = "TMR", .desc = "[EXT] CPU Timer" }, - {.name = "TAL", .desc = "[EXT] Timing Alert" }, - {.name = "PFL", .desc = "[EXT] Pseudo Page Fault" }, - {.name = "DSD", .desc = "[EXT] DASD Diag" }, - {.name = "VRT", .desc = "[EXT] Virtio" }, - {.name = "SCP", .desc = "[EXT] Service Call" }, - {.name = "IUC", .desc = "[EXT] IUCV" }, - {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, - {.name = "QDI", .desc = "[I/O] QDIO Interrupt" }, - {.name = "DAS", .desc = "[I/O] DASD" }, - {.name = "C15", .desc = "[I/O] 3215" }, - {.name = "C70", .desc = "[I/O] 3270" }, - {.name = "TAP", .desc = "[I/O] Tape" }, - {.name = "VMR", .desc = "[I/O] Unit Record Devices" }, - {.name = "LCS", .desc = "[I/O] LCS" }, - {.name = "CLW", .desc = "[I/O] CLAW" }, - {.name = "CTC", .desc = "[I/O] CTC" }, - {.name = "APB", .desc = "[I/O] AP Bus" }, - {.name = "NMI", .desc = "[NMI] Machine Check" }, +/* + * The list of "main" irq classes on s390. This is the list of interrupts + * that appear both in /proc/stat ("intr" line) and /proc/interrupts. + * Historically only external and I/O interrupts have been part of /proc/stat. + * We can't add the split external and I/O sub classes since the first field + * in the "intr" line in /proc/stat is supposed to be the sum of all other + * fields. + * Since the external and I/O interrupt fields are already sums we would end + * up with having a sum which accounts each interrupt twice. + */ +static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { + [EXT_INTERRUPT] = {.name = "EXT"}, + [IO_INTERRUPT] = {.name = "I/O"}, + [THIN_INTERRUPT] = {.name = "AIO"}, }; /* + * The list of split external and I/O interrupts that appear only in + * /proc/interrupts. + * In addition this list contains non external / I/O events like NMIs. + */ +static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { + [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"}, + [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"}, + [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"}, + [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"}, + [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"}, + [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, + [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"}, + [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"}, + [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"}, + [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"}, + [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, + [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, + [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, + [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, + [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, + [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"}, + [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"}, + [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"}, + [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"}, + [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"}, + [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"}, + [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"}, + [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"}, + [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"}, + [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"}, + [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"}, + [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, + [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, + [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, + [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, + [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, + [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, +}; + +void __init init_IRQ(void) +{ + init_cio_interrupts(); + init_airq_interrupts(); + init_ext_interrupts(); +} + +void do_IRQ(struct pt_regs *regs, int irq) +{ + struct pt_regs *old_regs; + + old_regs = set_irq_regs(regs); + irq_enter(); + if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + /* Serve timer interrupts first. */ + clock_comparator_work(); + generic_handle_irq(irq); + irq_exit(); + set_irq_regs(old_regs); +} + +/* * show_interrupts is needed by /proc/interrupts. */ int show_interrupts(struct seq_file *p, void *v) { - int i = *(loff_t *) v, j; + int irq = *(loff_t *) v; + int cpu; get_online_cpus(); - if (i == 0) { + if (irq == 0) { seq_puts(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%d ",j); + for_each_online_cpu(cpu) + seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); + goto out; } - - if (i < NR_IRQS) { - seq_printf(p, "%s: ", intrclass_names[i].name); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); -#endif - if (intrclass_names[i].desc) - seq_printf(p, " %s", intrclass_names[i].desc); - seq_putc(p, '\n'); - } + if (irq < NR_IRQS) { + if (irq >= NR_IRQS_BASE) + goto out; + seq_printf(p, "%s: ", irqclass_main_desc[irq].name); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); + seq_putc(p, '\n'); + goto out; + } + for (irq = 0; irq < NR_ARCH_IRQS; irq++) { + seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", + per_cpu(irq_stat, cpu).irqs[irq]); + if (irqclass_sub_desc[irq].desc) + seq_printf(p, " %s", irqclass_sub_desc[irq].desc); + seq_putc(p, '\n'); + } +out: put_online_cpus(); - return 0; + return 0; +} + +unsigned int arch_dynirq_lower_bound(unsigned int from) +{ + return from < THIN_INTERRUPT ? THIN_INTERRUPT : from; } /* - * For compatibilty only. S/390 specific setup of interrupts et al. is done - * much later in init_channel_subsystem(). + * Switch to the asynchronous interrupt stack for softirq execution. */ -void __init -init_IRQ(void) +void do_softirq_own_stack(void) { - /* nothing... */ + unsigned long old, new; + + /* Get current stack pointer. */ + asm volatile("la %0,0(15)" : "=a" (old)); + /* Check against async. stack address range. */ + new = S390_lowcore.async_stack; + if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { + /* Need to switch to the async. stack. */ + new -= STACK_FRAME_OVERHEAD; + ((struct stack_frame *) new)->back_chain = old; + asm volatile(" la 15,0(%0)\n" + " basr 14,%2\n" + " la 15,0(%1)\n" + : : "a" (new), "a" (old), + "a" (__do_softirq) + : "0", "1", "2", "3", "4", "5", "14", + "cc", "memory" ); + } else { + /* We are already on the async stack. */ + __do_softirq(); + } } /* - * Switch to the asynchronous interrupt stack for softirq execution. + * ext_int_hash[index] is the list head for all external interrupts that hash + * to this index. */ -asmlinkage void do_softirq(void) +static struct hlist_head ext_int_hash[32] ____cacheline_aligned; + +struct ext_int_info { + ext_int_handler_t handler; + struct hlist_node entry; + struct rcu_head rcu; + u16 code; +}; + +/* ext_int_hash_lock protects the handler lists for external interrupts */ +static DEFINE_SPINLOCK(ext_int_hash_lock); + +static inline int ext_hash(u16 code) +{ + BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash))); + + return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1); +} + +int register_external_irq(u16 code, ext_int_handler_t handler) +{ + struct ext_int_info *p; + unsigned long flags; + int index; + + p = kmalloc(sizeof(*p), GFP_ATOMIC); + if (!p) + return -ENOMEM; + p->code = code; + p->handler = handler; + index = ext_hash(code); + + spin_lock_irqsave(&ext_int_hash_lock, flags); + hlist_add_head_rcu(&p->entry, &ext_int_hash[index]); + spin_unlock_irqrestore(&ext_int_hash_lock, flags); + return 0; +} +EXPORT_SYMBOL(register_external_irq); + +int unregister_external_irq(u16 code, ext_int_handler_t handler) +{ + struct ext_int_info *p; + unsigned long flags; + int index = ext_hash(code); + + spin_lock_irqsave(&ext_int_hash_lock, flags); + hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + if (p->code == code && p->handler == handler) { + hlist_del_rcu(&p->entry); + kfree_rcu(p, rcu); + } + } + spin_unlock_irqrestore(&ext_int_hash_lock, flags); + return 0; +} +EXPORT_SYMBOL(unregister_external_irq); + +static irqreturn_t do_ext_interrupt(int irq, void *dummy) { - unsigned long flags, old, new; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - if (local_softirq_pending()) { - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (old)); - /* Check against async. stack address range. */ - new = S390_lowcore.async_stack; - if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { - /* Need to switch to the async. stack. */ - new -= STACK_FRAME_OVERHEAD; - ((struct stack_frame *) new)->back_chain = old; - - asm volatile(" la 15,0(%0)\n" - " basr 14,%2\n" - " la 15,0(%1)\n" - : : "a" (new), "a" (old), - "a" (__do_softirq) - : "0", "1", "2", "3", "4", "5", "14", - "cc", "memory" ); - } else - /* We are already on the async stack. */ - __do_softirq(); + struct pt_regs *regs = get_irq_regs(); + struct ext_code ext_code; + struct ext_int_info *p; + int index; + + ext_code = *(struct ext_code *) ®s->int_code; + if (ext_code.code != EXT_IRQ_CLK_COMP) + __get_cpu_var(s390_idle).nohz_delay = 1; + + index = ext_hash(ext_code.code); + rcu_read_lock(); + hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + if (unlikely(p->code != ext_code.code)) + continue; + p->handler(ext_code, regs->int_parm, regs->int_parm_long); } + rcu_read_unlock(); + return IRQ_HANDLED; +} - local_irq_restore(flags); +static struct irqaction external_interrupt = { + .name = "EXT", + .handler = do_ext_interrupt, +}; + +void __init init_ext_interrupts(void) +{ + int idx; + + for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) + INIT_HLIST_HEAD(&ext_int_hash[idx]); + + irq_set_chip_and_handler(EXT_INTERRUPT, + &dummy_irq_chip, handle_percpu_irq); + setup_irq(EXT_INTERRUPT, &external_interrupt); } -#ifdef CONFIG_PROC_FS -void init_irq_proc(void) +static DEFINE_SPINLOCK(irq_subclass_lock); +static unsigned char irq_subclass_refcount[64]; + +void irq_subclass_register(enum irq_subclass subclass) { - struct proc_dir_entry *root_irq_dir; + spin_lock(&irq_subclass_lock); + if (!irq_subclass_refcount[subclass]) + ctl_set_bit(0, subclass); + irq_subclass_refcount[subclass]++; + spin_unlock(&irq_subclass_lock); +} +EXPORT_SYMBOL(irq_subclass_register); - root_irq_dir = proc_mkdir("irq", NULL); - create_prof_cpu_mask(root_irq_dir); +void irq_subclass_unregister(enum irq_subclass subclass) +{ + spin_lock(&irq_subclass_lock); + irq_subclass_refcount[subclass]--; + if (!irq_subclass_refcount[subclass]) + ctl_clear_bit(0, subclass); + spin_unlock(&irq_subclass_lock); } -#endif +EXPORT_SYMBOL(irq_subclass_unregister); diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c new file mode 100644 index 00000000000..b987ab2c154 --- /dev/null +++ b/arch/s390/kernel/jump_label.c @@ -0,0 +1,70 @@ +/* + * Jump label s390 support + * + * Copyright IBM Corp. 2011 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/stop_machine.h> +#include <linux/jump_label.h> +#include <asm/ipl.h> + +#ifdef HAVE_JUMP_LABEL + +struct insn { + u16 opcode; + s32 offset; +} __packed; + +struct insn_args { + struct jump_entry *entry; + enum jump_label_type type; +}; + +static void __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn insn; + int rc; + + if (type == JUMP_LABEL_ENABLE) { + /* brcl 15,offset */ + insn.opcode = 0xc0f4; + insn.offset = (entry->target - entry->code) >> 1; + } else { + /* brcl 0,0 */ + insn.opcode = 0xc004; + insn.offset = 0; + } + + rc = probe_kernel_write((void *)entry->code, &insn, JUMP_LABEL_NOP_SIZE); + WARN_ON_ONCE(rc < 0); +} + +static int __sm_arch_jump_label_transform(void *data) +{ + struct insn_args *args = data; + + __jump_label_transform(args->entry, args->type); + return 0; +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn_args args; + + args.entry = entry; + args.type = type; + + stop_machine(__sm_arch_jump_label_transform, &args, NULL); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __jump_label_transform(entry, type); +} + +#endif diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1d05d669107..bc71a7b95af 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -15,7 +15,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright (C) IBM Corporation, 2002, 2006 + * Copyright IBM Corp. 2002, 2006 * * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com> */ @@ -26,19 +26,42 @@ #include <linux/stop_machine.h> #include <linux/kdebug.h> #include <linux/uaccess.h> -#include <asm/cacheflush.h> -#include <asm/sections.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/hardirq.h> +#include <asm/cacheflush.h> +#include <asm/sections.h> +#include <asm/dis.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe); DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = { }; +DEFINE_INSN_CACHE_OPS(dmainsn); + +static void *alloc_dmainsn_page(void) +{ + return (void *)__get_free_page(GFP_KERNEL | GFP_DMA); +} + +static void free_dmainsn_page(void *page) +{ + free_page((unsigned long)page); +} + +struct kprobe_insn_cache kprobe_dmainsn_slots = { + .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex), + .alloc = alloc_dmainsn_page, + .free = free_dmainsn_page, + .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages), + .insn_size = MAX_INSN_SIZE, +}; + static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) { + if (!is_known_insn((unsigned char *)insn)) + return -EINVAL; switch (insn[0] >> 8) { case 0x0c: /* bassm */ case 0x0b: /* bsm */ @@ -47,6 +70,11 @@ static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) case 0xac: /* stnsm */ case 0xad: /* stosm */ return -EINVAL; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x00: /* exrl */ + return -EINVAL; + } } switch (insn[0]) { case 0x0101: /* pr */ @@ -100,35 +128,160 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn) fixup |= FIXUP_RETURN_REGISTER; break; case 0xc0: - if ((insn[0] & 0x0f) == 0x00 || /* larl */ - (insn[0] & 0x0f) == 0x05) /* brasl */ - fixup |= FIXUP_RETURN_REGISTER; + if ((insn[0] & 0x0f) == 0x05) /* brasl */ + fixup |= FIXUP_RETURN_REGISTER; break; case 0xeb: - if ((insn[2] & 0xff) == 0x44 || /* bxhg */ - (insn[2] & 0xff) == 0x45) /* bxleg */ + switch (insn[2] & 0xff) { + case 0x44: /* bxhg */ + case 0x45: /* bxleg */ fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } break; case 0xe3: /* bctg */ if ((insn[2] & 0xff) == 0x46) fixup = FIXUP_BRANCH_NOT_TAKEN; break; + case 0xec: + switch (insn[2] & 0xff) { + case 0xe5: /* clgrb */ + case 0xe6: /* cgrb */ + case 0xf6: /* crb */ + case 0xf7: /* clrb */ + case 0xfc: /* cgib */ + case 0xfd: /* cglib */ + case 0xfe: /* cib */ + case 0xff: /* clib */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; } return fixup; } +static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn) +{ + /* Check if we have a RIL-b or RIL-c format instruction which + * we need to modify in order to avoid instruction emulation. */ + switch (insn[0] >> 8) { + case 0xc0: + if ((insn[0] & 0x0f) == 0x00) /* larl */ + return true; + break; + case 0xc4: + switch (insn[0] & 0x0f) { + case 0x02: /* llhrl */ + case 0x04: /* lghrl */ + case 0x05: /* lhrl */ + case 0x06: /* llghrl */ + case 0x07: /* sthrl */ + case 0x08: /* lgrl */ + case 0x0b: /* stgrl */ + case 0x0c: /* lgfrl */ + case 0x0d: /* lrl */ + case 0x0e: /* llgfrl */ + case 0x0f: /* strl */ + return true; + } + break; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x02: /* pfdrl */ + case 0x04: /* cghrl */ + case 0x05: /* chrl */ + case 0x06: /* clghrl */ + case 0x07: /* clhrl */ + case 0x08: /* cgrl */ + case 0x0a: /* clgrl */ + case 0x0c: /* cgfrl */ + case 0x0d: /* crl */ + case 0x0e: /* clgfrl */ + case 0x0f: /* clrl */ + return true; + } + break; + } + return false; +} + +static void __kprobes copy_instruction(struct kprobe *p) +{ + s64 disp, new_disp; + u64 addr, new_addr; + + memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8)); + if (!is_insn_relative_long(p->ainsn.insn)) + return; + /* + * For pc-relative instructions in RIL-b or RIL-c format patch the + * RI2 displacement field. We have already made sure that the insn + * slot for the patched instruction is within the same 2GB area + * as the original instruction (either kernel image or module area). + * Therefore the new displacement will always fit. + */ + disp = *(s32 *)&p->ainsn.insn[1]; + addr = (u64)(unsigned long)p->addr; + new_addr = (u64)(unsigned long)p->ainsn.insn; + new_disp = ((addr + (disp * 2)) - new_addr) / 2; + *(s32 *)&p->ainsn.insn[1] = new_disp; +} + +static inline int is_kernel_addr(void *addr) +{ + return addr < (void *)_end; +} + +static inline int is_module_addr(void *addr) +{ +#ifdef CONFIG_64BIT + BUILD_BUG_ON(MODULES_LEN > (1UL << 31)); + if (addr < (void *)MODULES_VADDR) + return 0; + if (addr > (void *)MODULES_END) + return 0; +#endif + return 1; +} + +static int __kprobes s390_get_insn_slot(struct kprobe *p) +{ + /* + * Get an insn slot that is within the same 2GB area like the original + * instruction. That way instructions with a 32bit signed displacement + * field can be patched and executed within the insn slot. + */ + p->ainsn.insn = NULL; + if (is_kernel_addr(p->addr)) + p->ainsn.insn = get_dmainsn_slot(); + else if (is_module_addr(p->addr)) + p->ainsn.insn = get_insn_slot(); + return p->ainsn.insn ? 0 : -ENOMEM; +} + +static void __kprobes s390_free_insn_slot(struct kprobe *p) +{ + if (!p->ainsn.insn) + return; + if (is_kernel_addr(p->addr)) + free_dmainsn_slot(p->ainsn.insn, 0); + else + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; +} + int __kprobes arch_prepare_kprobe(struct kprobe *p) { if ((unsigned long) p->addr & 0x01) return -EINVAL; - /* Make sure the probe isn't going on a difficult instruction */ if (is_prohibited_opcode(p->addr)) return -EINVAL; - + if (s390_get_insn_slot(p)) + return -ENOMEM; p->opcode = *p->addr; - memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2); - + copy_instruction(p); return 0; } @@ -169,6 +322,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { + s390_free_insn_slot(p); } static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb, @@ -354,7 +508,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, { struct kretprobe_instance *ri; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address; unsigned long trampoline_address; kprobe_opcode_t *correct_ret_addr; @@ -379,7 +533,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, orig_ret_address = 0; correct_ret_addr = NULL; trampoline_address = (unsigned long) &kretprobe_trampoline; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -398,7 +552,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_assert(ri, orig_ret_address, trampoline_address); correct_ret_addr = ri->ret_addr; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -427,7 +581,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } @@ -457,7 +611,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; if (fixup & FIXUP_BRANCH_NOT_TAKEN) { - int ilen = ((p->ainsn.insn[0] >> 14) + 3) & -2; + int ilen = insn_length(p->ainsn.insn[0] >> 8); if (ip - (unsigned long) p->ainsn.insn == ilen) ip = (unsigned long) p->addr + ilen; } @@ -526,7 +680,7 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) case KPROBE_HIT_SSDONE: /* * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accouting + * we can also use npre/npostfault count for accounting * these specific fault cases. */ kprobes_inc_nmissed_count(p); @@ -547,7 +701,7 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) */ entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (entry) { - regs->psw.addr = entry->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE; return 1; } @@ -635,7 +789,7 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -void __kprobes jprobe_return_end(void) +static void __used __kprobes jprobe_return_end(void) { asm volatile("bcr 0,0"); } diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c new file mode 100644 index 00000000000..6ea6d69339b --- /dev/null +++ b/arch/s390/kernel/lgr.c @@ -0,0 +1,186 @@ +/* + * Linux Guest Relocation (LGR) detection + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/slab.h> +#include <asm/facility.h> +#include <asm/sysinfo.h> +#include <asm/ebcdic.h> +#include <asm/debug.h> +#include <asm/ipl.h> + +#define LGR_TIMER_INTERVAL_SECS (30 * 60) +#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */ + +/* + * LGR info: Contains stfle and stsi data + */ +struct lgr_info { + /* Bit field with facility information: 4 DWORDs are stored */ + u64 stfle_fac_list[4]; + /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */ + u32 level; + /* Level 1: CEC info (stsi 1.1.1) */ + char manufacturer[16]; + char type[4]; + char sequence[16]; + char plant[4]; + char model[16]; + /* Level 2: LPAR info (stsi 2.2.2) */ + u16 lpar_number; + char name[8]; + /* Level 3: VM info (stsi 3.2.2) */ + u8 vm_count; + struct { + char name[8]; + char cpi[16]; + } vm[VM_LEVEL_MAX]; +} __packed __aligned(8); + +/* + * LGR globals + */ +static char lgr_page[PAGE_SIZE] __aligned(PAGE_SIZE); +static struct lgr_info lgr_info_last; +static struct lgr_info lgr_info_cur; +static struct debug_info *lgr_dbf; + +/* + * Copy buffer and then convert it to ASCII + */ +static void cpascii(char *dst, char *src, int size) +{ + memcpy(dst, src, size); + EBCASC(dst, size); +} + +/* + * Fill LGR info with 1.1.1 stsi data + */ +static void lgr_stsi_1_1_1(struct lgr_info *lgr_info) +{ + struct sysinfo_1_1_1 *si = (void *) lgr_page; + + if (stsi(si, 1, 1, 1)) + return; + cpascii(lgr_info->manufacturer, si->manufacturer, + sizeof(si->manufacturer)); + cpascii(lgr_info->type, si->type, sizeof(si->type)); + cpascii(lgr_info->model, si->model, sizeof(si->model)); + cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence)); + cpascii(lgr_info->plant, si->plant, sizeof(si->plant)); +} + +/* + * Fill LGR info with 2.2.2 stsi data + */ +static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_2_2_2 *si = (void *) lgr_page; + + if (stsi(si, 2, 2, 2)) + return; + cpascii(lgr_info->name, si->name, sizeof(si->name)); + memcpy(&lgr_info->lpar_number, &si->lpar_number, + sizeof(lgr_info->lpar_number)); +} + +/* + * Fill LGR info with 3.2.2 stsi data + */ +static void lgr_stsi_3_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_3_2_2 *si = (void *) lgr_page; + int i; + + if (stsi(si, 3, 2, 2)) + return; + for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) { + cpascii(lgr_info->vm[i].name, si->vm[i].name, + sizeof(si->vm[i].name)); + cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi, + sizeof(si->vm[i].cpi)); + } + lgr_info->vm_count = si->count; +} + +/* + * Fill LGR info with current data + */ +static void lgr_info_get(struct lgr_info *lgr_info) +{ + int level; + + memset(lgr_info, 0, sizeof(*lgr_info)); + stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list)); + level = stsi(NULL, 0, 0, 0); + lgr_info->level = level; + if (level >= 1) + lgr_stsi_1_1_1(lgr_info); + if (level >= 2) + lgr_stsi_2_2_2(lgr_info); + if (level >= 3) + lgr_stsi_3_2_2(lgr_info); +} + +/* + * Check if LGR info has changed and if yes log new LGR info to s390dbf + */ +void lgr_info_log(void) +{ + static DEFINE_SPINLOCK(lgr_info_lock); + unsigned long flags; + + if (!spin_trylock_irqsave(&lgr_info_lock, flags)) + return; + lgr_info_get(&lgr_info_cur); + if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) { + debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur)); + lgr_info_last = lgr_info_cur; + } + spin_unlock_irqrestore(&lgr_info_lock, flags); +} +EXPORT_SYMBOL_GPL(lgr_info_log); + +static void lgr_timer_set(void); + +/* + * LGR timer callback + */ +static void lgr_timer_fn(unsigned long ignored) +{ + lgr_info_log(); + lgr_timer_set(); +} + +static struct timer_list lgr_timer = + TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0); + +/* + * Setup next LGR timer + */ +static void lgr_timer_set(void) +{ + mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); +} + +/* + * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer + */ +static int __init lgr_init(void) +{ + lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info)); + if (!lgr_dbf) + return -ENOMEM; + debug_register_view(lgr_dbf, &debug_hex_ascii_view); + lgr_info_get(&lgr_info_last); + debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last)); + lgr_timer_set(); + return 0; +} +module_init(lgr_init); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index a922d51df6b..719e27b2cf2 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,10 +1,9 @@ /* - * arch/s390/kernel/machine_kexec.c - * - * Copyright IBM Corp. 2005,2006 + * Copyright IBM Corp. 2005, 2011 * * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> */ #include <linux/device.h> @@ -12,27 +11,187 @@ #include <linux/kexec.h> #include <linux/delay.h> #include <linux/reboot.h> +#include <linux/ftrace.h> +#include <linux/debug_locks.h> +#include <linux/suspend.h> #include <asm/cio.h> #include <asm/setup.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> -#include <asm/system.h> #include <asm/smp.h> #include <asm/reset.h> #include <asm/ipl.h> +#include <asm/diag.h> +#include <asm/elf.h> +#include <asm/asm-offsets.h> +#include <asm/os_info.h> typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; +#ifdef CONFIG_CRASH_DUMP + +/* + * Create ELF notes for one CPU + */ +static void add_elf_notes(int cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + void *ptr; + + memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); + ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); + ptr = fill_cpu_elf_notes(ptr, sa); + memset(ptr, 0, sizeof(struct elf_note)); +} + +/* + * Initialize CPU ELF notes + */ +static void setup_regs(void) +{ + unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + int cpu, this_cpu; + + this_cpu = smp_find_processor_id(stap()); + add_elf_notes(this_cpu); + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + if (smp_store_status(cpu)) + continue; + add_elf_notes(cpu); + } + /* Copy dump CPU store status info to absolute zero */ + memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); +} + +/* + * PM notifier callback for kdump + */ +static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + if (crashk_res.start) + crash_map_reserved_pages(); + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + if (crashk_res.start) + crash_unmap_reserved_pages(); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static int __init machine_kdump_pm_init(void) +{ + pm_notifier(machine_kdump_pm_cb, 0); + return 0; +} +arch_initcall(machine_kdump_pm_init); +#endif + +/* + * Start kdump: We expect here that a store status has been done on our CPU + */ +static void __do_machine_kdump(void *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + + setup_regs(); + __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); + start_kdump(1); +#endif +} + +/* + * Check if kdump checksums are valid: We call purgatory with parameter "0" + */ +static int kdump_csum_valid(struct kimage *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)image->start; + int rc; + + __arch_local_irq_stnsm(0xfb); /* disable DAT */ + rc = start_kdump(0); + __arch_local_irq_stosm(0x04); /* enable DAT */ + return rc ? 0 : -EINVAL; +#else + return -EINVAL; +#endif +} + +/* + * Map or unmap crashkernel memory + */ +static void crash_map_pages(int enable) +{ + unsigned long size = resource_size(&crashk_res); + + BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || + size % KEXEC_CRASH_MEM_ALIGN); + if (enable) + vmem_add_mapping(crashk_res.start, size); + else { + vmem_remove_mapping(crashk_res.start, size); + if (size) + os_info_crashkernel_add(crashk_res.start, size); + else + os_info_crashkernel_add(0, 0); + } +} + +/* + * Map crashkernel memory + */ +void crash_map_reserved_pages(void) +{ + crash_map_pages(1); +} + +/* + * Unmap crashkernel memory + */ +void crash_unmap_reserved_pages(void) +{ + crash_map_pages(0); +} + +/* + * Give back memory to hypervisor before new kdump is loaded + */ +static int machine_kexec_prepare_kdump(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (MACHINE_IS_VM) + diag10_range(PFN_DOWN(crashk_res.start), + PFN_DOWN(crashk_res.end - crashk_res.start + 1)); + return 0; +#else + return -EINVAL; +#endif +} + int machine_kexec_prepare(struct kimage *image) { void *reboot_code_buffer; /* Can't replace kernel image since it is read-only. */ if (ipl_flags & IPL_NSS_VALID) - return -ENOSYS; + return -EOPNOTSUPP; + + if (image->type == KEXEC_TYPE_CRASH) + return machine_kexec_prepare_kdump(); /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) @@ -50,27 +209,64 @@ void machine_kexec_cleanup(struct kimage *image) { } +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_SYMBOL(high_memory); + VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); +} + void machine_shutdown(void) { } -static void __machine_kexec(void *data) +void machine_crash_shutdown(struct pt_regs *regs) +{ +} + +/* + * Do normal kexec + */ +static void __do_machine_kexec(void *data) { relocate_kernel_t data_mover; struct kimage *image = data; - pfault_fini(); - s390_reset_system(); - data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ (*data_mover)(&image->head, image->start); - for (;;); } +/* + * Reset system and call either kdump or normal kexec + */ +static void __machine_kexec(void *data) +{ + struct kimage *image = data; + + __arch_local_irq_stosm(0x04); /* enable DAT */ + pfault_fini(); + tracing_off(); + debug_locks_off(); + if (image->type == KEXEC_TYPE_CRASH) { + lgr_info_log(); + s390_reset_system(__do_machine_kdump, data); + } else { + s390_reset_system(__do_machine_kexec, data); + } + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +/* + * Do either kdump or normal kexec. In case of kdump we first ask + * purgatory, if kdump checksums are valid. + */ void machine_kexec(struct kimage *image) { + if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image)) + return; + tracer_disable(); smp_send_stop(); - smp_switch_to_ipl_cpu(__machine_kexec, image); + smp_call_ipl_cpu(__machine_kexec, image); } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 1e6a5579562..08dcf21cb8d 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -1,25 +1,24 @@ /* - * Copyright IBM Corp. 2008,2009 + * Copyright IBM Corp. 2008, 2009 * * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, * */ +#include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/ftrace.h> .section .kprobes.text, "ax" - .globl ftrace_stub -ftrace_stub: +ENTRY(ftrace_stub) br %r14 - .globl _mcount -_mcount: +ENTRY(_mcount) #ifdef CONFIG_DYNAMIC_FTRACE br %r14 - .globl ftrace_caller -ftrace_caller: +ENTRY(ftrace_caller) #endif stm %r2,%r5,16(%r15) bras %r1,2f @@ -35,14 +34,14 @@ ftrace_caller: la %r2,0(%r14) st %r0,__SF_BACKCHAIN(%r15) la %r3,0(%r3) + ahi %r2,-MCOUNT_INSN_SIZE l %r14,0b-0b(%r1) l %r14,0(%r14) basr %r14,%r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER l %r2,100(%r15) l %r3,152(%r15) - .globl ftrace_graph_caller -ftrace_graph_caller: +ENTRY(ftrace_graph_caller) # The bras instruction gets runtime patched to call prepare_ftrace_return. # See ftrace_enable_ftrace_graph_caller. The patched instruction is: # bras %r14,prepare_ftrace_return @@ -56,8 +55,7 @@ ftrace_graph_caller: #ifdef CONFIG_FUNCTION_GRAPH_TRACER - .globl return_to_handler -return_to_handler: +ENTRY(return_to_handler) stm %r2,%r5,16(%r15) st %r14,56(%r15) lr %r0,%r15 diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index e73667286ac..1c52eae3396 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -1,25 +1,24 @@ /* - * Copyright IBM Corp. 2008,2009 + * Copyright IBM Corp. 2008, 2009 * * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, * */ +#include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/ftrace.h> .section .kprobes.text, "ax" - .globl ftrace_stub -ftrace_stub: +ENTRY(ftrace_stub) br %r14 - .globl _mcount -_mcount: +ENTRY(_mcount) #ifdef CONFIG_DYNAMIC_FTRACE br %r14 - .globl ftrace_caller -ftrace_caller: +ENTRY(ftrace_caller) #endif larl %r1,function_trace_stop icm %r1,0xf,0(%r1) @@ -31,14 +30,14 @@ ftrace_caller: stg %r1,__SF_BACKCHAIN(%r15) lgr %r2,%r14 lg %r3,168(%r15) + aghi %r2,-MCOUNT_INSN_SIZE larl %r14,ftrace_trace_function lg %r14,0(%r14) basr %r14,%r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER lg %r2,168(%r15) lg %r3,272(%r15) - .globl ftrace_graph_caller -ftrace_graph_caller: +ENTRY(ftrace_graph_caller) # The bras instruction gets runtime patched to call prepare_ftrace_return. # See ftrace_enable_ftrace_graph_caller. The patched instruction is: # bras %r14,prepare_ftrace_return @@ -52,8 +51,7 @@ ftrace_graph_caller: #ifdef CONFIG_FUNCTION_GRAPH_TRACER - .globl return_to_handler -return_to_handler: +ENTRY(return_to_handler) stmg %r2,%r5,32(%r15) lgr %r1,%r15 aghi %r15,-160 diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c deleted file mode 100644 index 0fbe4e32f7b..00000000000 --- a/arch/s390/kernel/mem_detect.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright IBM Corp. 2008, 2009 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <asm/ipl.h> -#include <asm/sclp.h> -#include <asm/setup.h> - -#define ADDR2G (1ULL << 31) - -static void find_memory_chunks(struct mem_chunk chunk[]) -{ - unsigned long long memsize, rnmax, rzm; - unsigned long addr = 0, size; - int i = 0, type; - - rzm = sclp_get_rzm(); - rnmax = sclp_get_rnmax(); - memsize = rzm * rnmax; - if (!rzm) - rzm = 1ULL << 17; - if (sizeof(long) == 4) { - rzm = min(ADDR2G, rzm); - memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; - } - do { - size = 0; - type = tprot(addr); - do { - size += rzm; - if (memsize && addr + size >= memsize) - break; - } while (type == tprot(addr + size)); - if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { - chunk[i].addr = addr; - chunk[i].size = size; - chunk[i].type = type; - i++; - } - addr += size; - } while (addr < memsize && i < MEMORY_CHUNKS); -} - -void detect_memory_layout(struct mem_chunk chunk[]) -{ - unsigned long flags, cr0; - - memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); - /* Disable IRQs, DAT and low address protection so tprot does the - * right thing and we don't get scheduled away with low address - * protection disabled. - */ - flags = __arch_local_irq_stnsm(0xf8); - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); - find_memory_chunks(chunk); - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); -} -EXPORT_SYMBOL(detect_memory_layout); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index f7167ee4604..b89b59158b9 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -1,9 +1,8 @@ /* - * arch/s390/kernel/module.c - Kernel module help for s390. + * Kernel module help for s390. * * S390 version - * Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH, - * IBM Corporation + * Copyright IBM Corp. 2002, 2003 * Author(s): Arnd Bergmann (arndb@de.ibm.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -45,12 +44,16 @@ #define PLT_ENTRY_SIZE 20 #endif /* CONFIG_64BIT */ +#ifdef CONFIG_64BIT void *module_alloc(unsigned long size) { - if (size == 0) + if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; - return vmalloc(size); + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + __builtin_return_address(0)); } +#endif /* Free memory returned from module_alloc */ void module_free(struct module *mod, void *module_region) @@ -62,8 +65,7 @@ void module_free(struct module *mod, void *module_region) vfree(module_region); } -static void -check_rela(Elf_Rela *rela, struct module *me) +static void check_rela(Elf_Rela *rela, struct module *me) { struct mod_arch_syminfo *info; @@ -112,9 +114,8 @@ check_rela(Elf_Rela *rela, struct module *me) * Account for GOT and PLT relocations. We can't add sections for * got and plt but we can increase the core module size. */ -int -module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, - char *secstrings, struct module *me) +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *me) { Elf_Shdr *symtab; Elf_Sym *symbols; @@ -176,22 +177,52 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, return 0; } -int -apply_relocate(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, - unsigned int relsec, struct module *me) +static int apply_rela_bits(Elf_Addr loc, Elf_Addr val, + int sign, int bits, int shift) { - printk(KERN_ERR "module %s: RELOCATION unsupported\n", - me->name); - return -ENOEXEC; + unsigned long umax; + long min, max; + + if (val & ((1UL << shift) - 1)) + return -ENOEXEC; + if (sign) { + val = (Elf_Addr)(((long) val) >> shift); + min = -(1L << (bits - 1)); + max = (1L << (bits - 1)) - 1; + if ((long) val < min || (long) val > max) + return -ENOEXEC; + } else { + val >>= shift; + umax = ((1UL << (bits - 1)) << 1) - 1; + if ((unsigned long) val > umax) + return -ENOEXEC; + } + + if (bits == 8) + *(unsigned char *) loc = val; + else if (bits == 12) + *(unsigned short *) loc = (val & 0xfff) | + (*(unsigned short *) loc & 0xf000); + else if (bits == 16) + *(unsigned short *) loc = val; + else if (bits == 20) + *(unsigned int *) loc = (val & 0xfff) << 16 | + (val & 0xff000) >> 4 | + (*(unsigned int *) loc & 0xf00000ff); + else if (bits == 32) + *(unsigned int *) loc = val; + else if (bits == 64) + *(unsigned long *) loc = val; + return 0; } -static int -apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, - struct module *me) +static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, + const char *strtab, struct module *me) { struct mod_arch_syminfo *info; Elf_Addr loc, val; int r_type, r_sym; + int rc = -ENOEXEC; /* This is where to make the change */ loc = base + rela->r_offset; @@ -203,6 +234,9 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = symtab[r_sym].st_value; switch (r_type) { + case R_390_NONE: /* No relocation. */ + rc = 0; + break; case R_390_8: /* Direct 8 bit. */ case R_390_12: /* Direct 12 bit. */ case R_390_16: /* Direct 16 bit. */ @@ -211,20 +245,17 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_64: /* Direct 64 bit. */ val += rela->r_addend; if (r_type == R_390_8) - *(unsigned char *) loc = val; + rc = apply_rela_bits(loc, val, 0, 8, 0); else if (r_type == R_390_12) - *(unsigned short *) loc = (val & 0xfff) | - (*(unsigned short *) loc & 0xf000); + rc = apply_rela_bits(loc, val, 0, 12, 0); else if (r_type == R_390_16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_20) - *(unsigned int *) loc = - (*(unsigned int *) loc & 0xf00000ff) | - (val & 0xfff) << 16 | (val & 0xff000) >> 4; + rc = apply_rela_bits(loc, val, 1, 20, 0); else if (r_type == R_390_32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); break; case R_390_PC16: /* PC relative 16 bit. */ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ @@ -233,15 +264,15 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PC64: /* PC relative 64 bit. */ val += rela->r_addend - loc; if (r_type == R_390_PC16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 1, 16, 0); else if (r_type == R_390_PC16DBL) - *(unsigned short *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 16, 1); else if (r_type == R_390_PC32DBL) - *(unsigned int *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 32, 1); else if (r_type == R_390_PC32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 1, 32, 0); else if (r_type == R_390_PC64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 1, 64, 0); break; case R_390_GOT12: /* 12 bit GOT offset. */ case R_390_GOT16: /* 16 bit GOT offset. */ @@ -266,26 +297,24 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = info->got_offset + rela->r_addend; if (r_type == R_390_GOT12 || r_type == R_390_GOTPLT12) - *(unsigned short *) loc = (val & 0xfff) | - (*(unsigned short *) loc & 0xf000); + rc = apply_rela_bits(loc, val, 0, 12, 0); else if (r_type == R_390_GOT16 || r_type == R_390_GOTPLT16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_GOT20 || r_type == R_390_GOTPLT20) - *(unsigned int *) loc = - (*(unsigned int *) loc & 0xf00000ff) | - (val & 0xfff) << 16 | (val & 0xff000) >> 4; + rc = apply_rela_bits(loc, val, 1, 20, 0); else if (r_type == R_390_GOT32 || r_type == R_390_GOTPLT32) - *(unsigned int *) loc = val; - else if (r_type == R_390_GOTENT || - r_type == R_390_GOTPLTENT) - *(unsigned int *) loc = - (val + (Elf_Addr) me->module_core - loc) >> 1; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_GOT64 || r_type == R_390_GOTPLT64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); + else if (r_type == R_390_GOTENT || + r_type == R_390_GOTPLTENT) { + val += (Elf_Addr) me->module_core - loc; + rc = apply_rela_bits(loc, val, 1, 32, 1); + } break; case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */ case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */ @@ -327,17 +356,17 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val += rela->r_addend - loc; } if (r_type == R_390_PLT16DBL) - *(unsigned short *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 16, 1); else if (r_type == R_390_PLTOFF16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_PLT32DBL) - *(unsigned int *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 32, 1); else if (r_type == R_390_PLT32 || r_type == R_390_PLTOFF32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_PLT64 || r_type == R_390_PLTOFF64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); break; case R_390_GOTOFF16: /* 16 bit offset to GOT. */ case R_390_GOTOFF32: /* 32 bit offset to GOT. */ @@ -345,20 +374,20 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = val + rela->r_addend - ((Elf_Addr) me->module_core + me->arch.got_offset); if (r_type == R_390_GOTOFF16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_GOTOFF32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_GOTOFF64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ val = (Elf_Addr) me->module_core + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 1, 32, 0); else if (r_type == R_390_GOTPCDBL) - *(unsigned int *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 32, 1); break; case R_390_COPY: case R_390_GLOB_DAT: /* Create GOT entry. */ @@ -366,19 +395,25 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_RELATIVE: /* Adjust by program base. */ /* Only needed if we want to support loading of modules linked with -shared. */ - break; + return -ENOEXEC; default: - printk(KERN_ERR "module %s: Unknown relocation: %u\n", + printk(KERN_ERR "module %s: unknown relocation: %u\n", me->name, r_type); return -ENOEXEC; } + if (rc) { + printk(KERN_ERR "module %s: relocation error for symbol %s " + "(r_type %i, value 0x%lx)\n", + me->name, strtab + symtab[r_sym].st_name, + r_type, (unsigned long) val); + return rc; + } return 0; } -int -apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, - unsigned int symindex, unsigned int relsec, - struct module *me) +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *me) { Elf_Addr base; Elf_Sym *symtab; @@ -394,7 +429,7 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, n = sechdrs[relsec].sh_size / sizeof(Elf_Rela); for (i = 0; i < n; i++, rela++) { - rc = apply_rela(rela, base, symtab, me); + rc = apply_rela(rela, base, symtab, strtab, me); if (rc) return rc; } @@ -409,7 +444,3 @@ int module_finalize(const Elf_Ehdr *hdr, me->arch.syminfo = NULL; return 0; } - -void module_arch_cleanup(struct module *mod) -{ -} diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index fab88431a06..210e1285f75 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -1,7 +1,7 @@ /* * Machine check handler * - * Copyright IBM Corp. 2000,2009 + * Copyright IBM Corp. 2000, 2009 * Author(s): Ingo Adlung <adlung@de.ibm.com>, * Martin Schwidefsky <schwidefsky@de.ibm.com>, * Cornelia Huck <cornelia.huck@de.ibm.com>, @@ -30,7 +30,7 @@ struct mcck_struct { static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); -static NORET_TYPE void s390_handle_damage(char *msg) +static void s390_handle_damage(char *msg) { smp_send_stop(); disabled_wait((unsigned long) __builtin_return_address(0)); @@ -55,7 +55,7 @@ void s390_handle_mcck(void) local_mcck_disable(); mcck = __get_cpu_var(cpu_mcck); memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); - clear_thread_flag(TIF_MCCK_PENDING); + clear_cpu_flag(CIF_MCCK_PENDING); local_mcck_enable(); local_irq_restore(flags); @@ -214,10 +214,7 @@ static int notrace s390_revalidate_registers(struct mci *mci) : "0", "cc"); #endif /* Revalidate clock comparator register */ - if (S390_lowcore.clock_comparator == -1) - set_clock_comparator(S390_lowcore.mcck_clock); - else - set_clock_comparator(S390_lowcore.clock_comparator); + set_clock_comparator(S390_lowcore.clock_comparator); /* Check if old PSW is valid */ if (!mci->wp) /* @@ -254,9 +251,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) int umode; nmi_enter(); - s390_idle_check(regs, S390_lowcore.mcck_clock, - S390_lowcore.mcck_enter_timer); - kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++; + inc_irq_stat(NMI_NMI); mci = (struct mci *) &S390_lowcore.mcck_interruption_code; mcck = &__get_cpu_var(cpu_mcck); umode = user_mode(regs); @@ -295,7 +290,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) * retry this instruction. */ spin_lock(&ipd_lock); - tmp = get_clock(); + tmp = get_tod_clock(); if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME) ipd_count++; else @@ -318,7 +313,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) */ mcck->kill_task = 1; mcck->mcck_code = *(unsigned long long *) mci; - set_thread_flag(TIF_MCCK_PENDING); + set_cpu_flag(CIF_MCCK_PENDING); } else { /* * Couldn't restore all register contents while in @@ -357,12 +352,12 @@ void notrace s390_do_machine_check(struct pt_regs *regs) if (mci->cp) { /* Channel report word pending */ mcck->channel_report = 1; - set_thread_flag(TIF_MCCK_PENDING); + set_cpu_flag(CIF_MCCK_PENDING); } if (mci->w) { /* Warning pending */ mcck->warning = 1; - set_thread_flag(TIF_MCCK_PENDING); + set_cpu_flag(CIF_MCCK_PENDING); } nmi_exit(); } diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c new file mode 100644 index 00000000000..d112fc66f99 --- /dev/null +++ b/arch/s390/kernel/os_info.c @@ -0,0 +1,168 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#define KMSG_COMPONENT "os_info" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/crash_dump.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <asm/checksum.h> +#include <asm/lowcore.h> +#include <asm/os_info.h> + +/* + * OS info structure has to be page aligned + */ +static struct os_info os_info __page_aligned_data; + +/* + * Compute checksum over OS info structure + */ +u32 os_info_csum(struct os_info *os_info) +{ + int size = sizeof(*os_info) - offsetof(struct os_info, version_major); + return csum_partial(&os_info->version_major, size, 0); +} + +/* + * Add crashkernel info to OS info and update checksum + */ +void os_info_crashkernel_add(unsigned long base, unsigned long size) +{ + os_info.crashkernel_addr = (u64)(unsigned long)base; + os_info.crashkernel_size = (u64)(unsigned long)size; + os_info.csum = os_info_csum(&os_info); +} + +/* + * Add OS info entry and update checksum + */ +void os_info_entry_add(int nr, void *ptr, u64 size) +{ + os_info.entry[nr].addr = (u64)(unsigned long)ptr; + os_info.entry[nr].size = size; + os_info.entry[nr].csum = csum_partial(ptr, size, 0); + os_info.csum = os_info_csum(&os_info); +} + +/* + * Initialize OS info struture and set lowcore pointer + */ +void __init os_info_init(void) +{ + void *ptr = &os_info; + + os_info.version_major = OS_INFO_VERSION_MAJOR; + os_info.version_minor = OS_INFO_VERSION_MINOR; + os_info.magic = OS_INFO_MAGIC; + os_info.csum = os_info_csum(&os_info); + mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr); +} + +#ifdef CONFIG_CRASH_DUMP + +static struct os_info *os_info_old; + +/* + * Allocate and copy OS info entry from oldmem + */ +static void os_info_old_alloc(int nr, int align) +{ + unsigned long addr, size = 0; + char *buf, *buf_align, *msg; + u32 csum; + + addr = os_info_old->entry[nr].addr; + if (!addr) { + msg = "not available"; + goto fail; + } + size = os_info_old->entry[nr].size; + buf = kmalloc(size + align - 1, GFP_KERNEL); + if (!buf) { + msg = "alloc failed"; + goto fail; + } + buf_align = PTR_ALIGN(buf, align); + if (copy_from_oldmem(buf_align, (void *) addr, size)) { + msg = "copy failed"; + goto fail_free; + } + csum = csum_partial(buf_align, size, 0); + if (csum != os_info_old->entry[nr].csum) { + msg = "checksum failed"; + goto fail_free; + } + os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align; + msg = "copied"; + goto out; +fail_free: + kfree(buf); +fail: + os_info_old->entry[nr].addr = 0; +out: + pr_info("entry %i: %s (addr=0x%lx size=%lu)\n", + nr, msg, addr, size); +} + +/* + * Initialize os info and os info entries from oldmem + */ +static void os_info_old_init(void) +{ + static int os_info_init; + unsigned long addr; + + if (os_info_init) + return; + if (!OLDMEM_BASE) + goto fail; + if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr))) + goto fail; + if (addr == 0 || addr % PAGE_SIZE) + goto fail; + os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); + if (!os_info_old) + goto fail; + if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old))) + goto fail_free; + if (os_info_old->magic != OS_INFO_MAGIC) + goto fail_free; + if (os_info_old->csum != os_info_csum(os_info_old)) + goto fail_free; + if (os_info_old->version_major > OS_INFO_VERSION_MAJOR) + goto fail_free; + os_info_old_alloc(OS_INFO_VMCOREINFO, 1); + os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1); + pr_info("crashkernel: addr=0x%lx size=%lu\n", + (unsigned long) os_info_old->crashkernel_addr, + (unsigned long) os_info_old->crashkernel_size); + os_info_init = 1; + return; +fail_free: + kfree(os_info_old); +fail: + os_info_init = 1; + os_info_old = NULL; +} + +/* + * Return pointer to os infor entry and its size + */ +void *os_info_old_entry(int nr, unsigned long *size) +{ + os_info_old_init(); + + if (!os_info_old) + return NULL; + if (!os_info_old->entry[nr].addr) + return NULL; + *size = (unsigned long) os_info_old->entry[nr].size; + return (void *)(unsigned long)os_info_old->entry[nr].addr; +} +#endif diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c new file mode 100644 index 00000000000..ea75d011a6f --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -0,0 +1,696 @@ +/* + * Performance event support for s390x - CPU-measurement Counter Facility + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_cf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/export.h> +#include <asm/ctl_reg.h> +#include <asm/irq.h> +#include <asm/cpu_mf.h> + +/* CPU-measurement counter facility supports these CPU counter sets: + * For CPU counter sets: + * Basic counter set: 0-31 + * Problem-state counter set: 32-63 + * Crypto-activity counter set: 64-127 + * Extented counter set: 128-159 + */ +enum cpumf_ctr_set { + /* CPU counter sets */ + CPUMF_CTR_SET_BASIC = 0, + CPUMF_CTR_SET_USER = 1, + CPUMF_CTR_SET_CRYPTO = 2, + CPUMF_CTR_SET_EXT = 3, + + /* Maximum number of counter sets */ + CPUMF_CTR_SET_MAX, +}; + +#define CPUMF_LCCTL_ENABLE_SHIFT 16 +#define CPUMF_LCCTL_ACTCTL_SHIFT 0 +static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { + [CPUMF_CTR_SET_BASIC] = 0x02, + [CPUMF_CTR_SET_USER] = 0x04, + [CPUMF_CTR_SET_CRYPTO] = 0x08, + [CPUMF_CTR_SET_EXT] = 0x01, +}; + +static void ctr_set_enable(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; +} +static void ctr_set_disable(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); +} +static void ctr_set_start(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; +} +static void ctr_set_stop(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); +} + +/* Local CPUMF event structure */ +struct cpu_hw_events { + struct cpumf_ctr_info info; + atomic_t ctr_set[CPUMF_CTR_SET_MAX]; + u64 state, tx_state; + unsigned int flags; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .ctr_set = { + [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + }, + .state = 0, + .flags = 0, +}; + +static int get_counter_set(u64 event) +{ + int set = -1; + + if (event < 32) + set = CPUMF_CTR_SET_BASIC; + else if (event < 64) + set = CPUMF_CTR_SET_USER; + else if (event < 128) + set = CPUMF_CTR_SET_CRYPTO; + else if (event < 256) + set = CPUMF_CTR_SET_EXT; + + return set; +} + +static int validate_event(const struct hw_perf_event *hwc) +{ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + /* check for reserved counters */ + if ((hwc->config >= 6 && hwc->config <= 31) || + (hwc->config >= 38 && hwc->config <= 63) || + (hwc->config >= 80 && hwc->config <= 127)) + return -EOPNOTSUPP; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int validate_ctr_version(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check required version for counter sets */ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + if (cpuhw->info.cfvn < 1) + err = -EOPNOTSUPP; + break; + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + if (cpuhw->info.csvn < 1) + err = -EOPNOTSUPP; + if ((cpuhw->info.csvn == 1 && hwc->config > 159) || + (cpuhw->info.csvn == 2 && hwc->config > 175) || + (cpuhw->info.csvn > 2 && hwc->config > 255)) + err = -EOPNOTSUPP; + break; + } + + put_cpu_var(cpu_hw_events); + return err; +} + +static int validate_ctr_auth(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + u64 ctrs_state; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check authorization for cpu counter sets */ + ctrs_state = cpumf_state_ctl[hwc->config_base]; + if (!(ctrs_state & cpuhw->info.auth_ctl)) + err = -EPERM; + + put_cpu_var(cpu_hw_events); + return err; +} + +/* + * Change the CPUMF state to active. + * Enable and activate the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + err = lcctl(cpuhw->state); + if (err) { + pr_err("Enabling the performance measuring unit " + "failed with rc=%x\n", err); + return; + } + + cpuhw->flags |= PMU_F_ENABLED; +} + +/* + * Change the CPUMF state to inactive. + * Disable and enable (inactive) the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + u64 inactive; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + err = lcctl(inactive); + if (err) { + pr_err("Disabling the performance measuring unit " + "failed with rc=%x\n", err); + return; + } + + cpuhw->flags &= ~PMU_F_ENABLED; +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events = ATOMIC_INIT(0); +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* CPU-measurement alerts for the counter facility */ +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_events *cpuhw; + + if (!(alert & CPU_MF_INT_CF_MASK)) + return; + + inc_irq_stat(IRQEXT_CMC); + cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* counter authorization change alert */ + if (alert & CPU_MF_INT_CF_CACA) + qctri(&cpuhw->info); + + /* loss of counter data alert */ + if (alert & CPU_MF_INT_CF_LCDA) + pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); +} + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +static void setup_pmc_cpu(void *flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + switch (*((int *) flags)) { + case PMC_INIT: + memset(&cpuhw->info, 0, sizeof(cpuhw->info)); + qctri(&cpuhw->info); + cpuhw->flags |= PMU_F_RESERVED; + break; + + case PMC_RELEASE: + cpuhw->flags &= ~PMU_F_RESERVED; + break; + } + + /* Disable CPU counter sets */ + lcctl(0); +} + +/* Initialize the CPU-measurement facility */ +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +/* Release the CPU-measurement facility */ +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); +} + +/* Release the PMU if event is the last perf event */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ +static const int cpumf_generic_events_basic[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0, + [PERF_COUNT_HW_INSTRUCTIONS] = 1, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; +/* CPUMF <-> perf event mappings for userspace (problem-state set) */ +static const int cpumf_generic_events_user[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 32, + [PERF_COUNT_HW_INSTRUCTIONS] = 33, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int err; + u64 ev; + + switch (attr->type) { + case PERF_TYPE_RAW: + /* Raw events are used to access counters directly, + * hence do not permit excludes */ + if (attr->exclude_kernel || attr->exclude_user || + attr->exclude_hv) + return -EOPNOTSUPP; + ev = attr->config; + break; + + case PERF_TYPE_HARDWARE: + ev = attr->config; + /* Count user space (problem-state) only */ + if (!attr->exclude_user && attr->exclude_kernel) { + if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_user[ev]; + + /* No support for kernel space counters only */ + } else if (!attr->exclude_kernel && attr->exclude_user) { + return -EOPNOTSUPP; + + /* Count user and kernel space */ + } else { + if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_basic[ev]; + } + break; + + default: + return -ENOENT; + } + + if (ev == -1) + return -ENOENT; + + if (ev >= PERF_CPUM_CF_MAX_CTR) + return -EINVAL; + + /* Use the hardware perf event structure to store the counter number + * in 'config' member and the counter set to which the counter belongs + * in the 'config_base'. The counter set (config_base) is then used + * to enable/disable the counters. + */ + hwc->config = ev; + hwc->config_base = get_counter_set(ev); + + /* Validate the counter that is assigned to this event. + * Because the counter facility can use numerous counters at the + * same time without constraints, it is not necessary to explicity + * validate event groups (event->group_leader != event). + */ + err = validate_event(hwc); + if (err) + return err; + + /* Initialize for using the CPU-measurement counter facility */ + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + /* Finally, validate version and authorization of the counter set */ + err = validate_ctr_auth(hwc); + if (!err) + err = validate_ctr_version(hwc); + + return err; +} + +static int cpumf_pmu_event_init(struct perf_event *event) +{ + int err; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_RAW: + /* The CPU measurement counter facility does not have overflow + * interrupts to do sampling. Sampling must be provided by + * external means, for example, by timers. + */ + if (is_sampling_event(event)) + return -ENOENT; + err = __hw_perf_event_init(event); + break; + default: + return -ENOENT; + } + + if (unlikely(err) && event->destroy) + event->destroy(event); + + return err; +} + +static int hw_perf_event_reset(struct perf_event *event) +{ + u64 prev, new; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) { + if (err != 3) + break; + /* The counter is not (yet) available. This + * might happen if the counter set to which + * this counter belongs is in the disabled + * state. + */ + new = 0; + } + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + return err; +} + +static int hw_perf_event_update(struct perf_event *event) +{ + u64 prev, new, delta; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) + goto out; + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + delta = (prev <= new) ? new - prev + : (-1ULL - prev) + new + 1; /* overflow */ + local64_add(delta, &event->count); +out: + return err; +} + +static void cpumf_pmu_read(struct perf_event *event) +{ + if (event->hw.state & PERF_HES_STOPPED) + return; + + hw_perf_event_update(event); +} + +static void cpumf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(hwc->config == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* (Re-)enable and activate the counter set */ + ctr_set_enable(&cpuhw->state, hwc->config_base); + ctr_set_start(&cpuhw->state, hwc->config_base); + + /* The counter set to which this counter belongs can be already active. + * Because all counters in a set are active, the event->hw.prev_count + * needs to be synchronized. At this point, the counter set can be in + * the inactive or disabled state. + */ + hw_perf_event_reset(event); + + /* increment refcount for this counter set */ + atomic_inc(&cpuhw->ctr_set[hwc->config_base]); +} + +static void cpumf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* Decrement reference count for this counter set and if this + * is the last used counter in the set, clear activation + * control and set the counter set state to inactive. + */ + if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base])) + ctr_set_stop(&cpuhw->state, hwc->config_base); + event->hw.state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static int cpumf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Check authorization for the counter set to which this + * counter belongs. + * For group events transaction, the authorization check is + * done in cpumf_pmu_commit_txn(). + */ + if (!(cpuhw->flags & PERF_EVENT_TXN)) + if (validate_ctr_auth(&event->hw)) + return -EPERM; + + ctr_set_enable(&cpuhw->state, event->hw.config_base); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + cpumf_pmu_start(event, PERF_EF_RELOAD); + + perf_event_update_userpage(event); + + return 0; +} + +static void cpumf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpumf_pmu_stop(event, PERF_EF_UPDATE); + + /* Check if any counter in the counter set is still used. If not used, + * change the counter set to the disabled state. This also clears the + * content of all counters in the set. + * + * When a new perf event has been added but not yet started, this can + * clear enable control and resets all counters in a set. Therefore, + * cpumf_pmu_start() always has to reenable a counter set. + */ + if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base])) + ctr_set_disable(&cpuhw->state, event->hw.config_base); + + perf_event_update_userpage(event); +} + +/* + * Start group events scheduling transaction. + * Set flags to perform a single test at commit time. + */ +static void cpumf_pmu_start_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + perf_pmu_disable(pmu); + cpuhw->flags |= PERF_EVENT_TXN; + cpuhw->tx_state = cpuhw->state; +} + +/* + * Stop and cancel a group events scheduling tranctions. + * Assumes cpumf_pmu_del() is called for each successful added + * cpumf_pmu_add() during the transaction. + */ +static void cpumf_pmu_cancel_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + WARN_ON(cpuhw->tx_state != cpuhw->state); + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); +} + +/* + * Commit the group events scheduling transaction. On success, the + * transaction is closed. On error, the transaction is kept open + * until cpumf_pmu_cancel_txn() is called. + */ +static int cpumf_pmu_commit_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + u64 state; + + /* check if the updated state can be scheduled */ + state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + state >>= CPUMF_LCCTL_ENABLE_SHIFT; + if ((state & cpuhw->info.auth_ctl) != state) + return -EPERM; + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); + return 0; +} + +/* Performance monitoring unit for s390x */ +static struct pmu cpumf_pmu = { + .pmu_enable = cpumf_pmu_enable, + .pmu_disable = cpumf_pmu_disable, + .event_init = cpumf_pmu_event_init, + .add = cpumf_pmu_add, + .del = cpumf_pmu_del, + .start = cpumf_pmu_start, + .stop = cpumf_pmu_stop, + .read = cpumf_pmu_read, + .start_txn = cpumf_pmu_start_txn, + .commit_txn = cpumf_pmu_commit_txn, + .cancel_txn = cpumf_pmu_cancel_txn, +}; + +static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int __init cpumf_pmu_init(void) +{ + int rc; + + if (!cpum_cf_avail()) + return -ENODEV; + + /* clear bit 15 of cr0 to unauthorize problem-state to + * extract measurement counters */ + ctl_clear_bit(0, 48); + + /* register handler for measurement-alert interruptions */ + rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + if (rc) { + pr_err("Registering for CPU-measurement alerts " + "failed with rc=%i\n", rc); + goto out; + } + + cpumf_pmu.attr_groups = cpumf_cf_event_group(); + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + if (rc) { + pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return rc; +} +early_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c new file mode 100644 index 00000000000..4554a4bae39 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -0,0 +1,322 @@ +/* + * Perf PMU sysfs events attributes for available CPU-measurement counters + * + */ + +#include <linux/slab.h> +#include <linux/perf_event.h> + + +/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */ + +CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000); +CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001); +CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002); +CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025); +CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004); +CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005); +CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040); +CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041); +CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042); +CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043); +CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044); +CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045); +CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046); +CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047); +CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048); +CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049); +CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a); +CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b); +CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c); +CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d); +CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e); +CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f); +CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092); +CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083); +CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); + +static struct attribute *cpumcf_pmu_event_attr[] = { + CPUMF_EVENT_PTR(cf, CPU_CYCLES), + CPUMF_EVENT_PTR(cf, INSTRUCTIONS), + CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS), + CPUMF_EVENT_PTR(cf, PRNG_CYCLES), + CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS), + CPUMF_EVENT_PTR(cf, SHA_CYCLES), + CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS), + CPUMF_EVENT_PTR(cf, DEA_CYCLES), + CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, AES_FUNCTIONS), + CPUMF_EVENT_PTR(cf, AES_CYCLES), + CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES), + NULL, +}; + +static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT), + NULL, +}; + +static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES), + NULL, +}; + +static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL), + NULL, +}; + +/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumcf_pmu_event_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; + +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + + +static __init struct attribute **merge_attr(struct attribute **a, + struct attribute **b) +{ + struct attribute **new; + int j, i; + + for (j = 0; a[j]; j++) + ; + for (i = 0; b[i]; i++) + j++; + j++; + + new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL); + if (!new) + return NULL; + j = 0; + for (i = 0; a[i]; i++) + new[j++] = a[i]; + for (i = 0; b[i]; i++) + new[j++] = b[i]; + new[j] = NULL; + + return new; +} + +__init const struct attribute_group **cpumf_cf_event_group(void) +{ + struct attribute **combined, **model; + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + model = cpumcf_z10_pmu_event_attr; + break; + case 0x2817: + case 0x2818: + model = cpumcf_z196_pmu_event_attr; + break; + case 0x2827: + case 0x2828: + model = cpumcf_zec12_pmu_event_attr; + break; + default: + model = NULL; + break; + }; + + if (!model) + goto out; + + combined = merge_attr(cpumcf_pmu_event_attr, model); + if (combined) + cpumsf_pmu_events_group.attrs = combined; +out: + return cpumsf_pmu_attr_groups; +} diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c new file mode 100644 index 00000000000..ea0c7b2ef03 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -0,0 +1,1643 @@ +/* + * Performance event support for the System z CPU-measurement Sampling Facility + * + * Copyright IBM Corp. 2013 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_sf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/moduleparam.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> +#include <asm/debug.h> +#include <asm/timex.h> + +/* Minimum number of sample-data-block-tables: + * At least one table is required for the sampling buffer structure. + * A single table contains up to 511 pointers to sample-data-blocks. + */ +#define CPUM_SF_MIN_SDBT 1 + +/* Number of sample-data-blocks per sample-data-block-table (SDBT): + * A table contains SDB pointers (8 bytes) and one table-link entry + * that points to the origin of the next SDBT. + */ +#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) + +/* Maximum page offset for an SDBT table-link entry: + * If this page offset is reached, a table-link entry to the next SDBT + * must be added. + */ +#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) +static inline int require_table_link(const void *sdbt) +{ + return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; +} + +/* Minimum and maximum sampling buffer sizes: + * + * This number represents the maximum size of the sampling buffer taking + * the number of sample-data-block-tables into account. Note that these + * numbers apply to the basic-sampling function only. + * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if + * the diagnostic-sampling function is active. + * + * Sampling buffer size Buffer characteristics + * --------------------------------------------------- + * 64KB == 16 pages (4KB per page) + * 1 page for SDB-tables + * 15 pages for SDBs + * + * 32MB == 8192 pages (4KB per page) + * 16 pages for SDB-tables + * 8176 pages for SDBs + */ +static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; +static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; +static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; + +struct sf_buffer { + unsigned long *sdbt; /* Sample-data-block-table origin */ + /* buffer characteristics (required for buffer increments) */ + unsigned long num_sdb; /* Number of sample-data-blocks */ + unsigned long num_sdbt; /* Number of sample-data-block-tables */ + unsigned long *tail; /* last sample-data-block-table */ +}; + +struct cpu_hw_sf { + /* CPU-measurement sampling information block */ + struct hws_qsi_info_block qsi; + /* CPU-measurement sampling control block */ + struct hws_lsctl_request_block lsctl; + struct sf_buffer sfb; /* Sampling buffer */ + unsigned int flags; /* Status flags */ + struct perf_event *event; /* Scheduled perf event */ +}; +static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); + +/* Debug feature */ +static debug_info_t *sfdbg; + +/* + * sf_disable() - Switch off sampling facility + */ +static int sf_disable(void) +{ + struct hws_lsctl_request_block sreq; + + memset(&sreq, 0, sizeof(sreq)); + return lsctl(&sreq); +} + +/* + * sf_buffer_available() - Check for an allocated sampling buffer + */ +static int sf_buffer_available(struct cpu_hw_sf *cpuhw) +{ + return !!cpuhw->sfb.sdbt; +} + +/* + * deallocate sampling facility buffer + */ +static void free_sampling_buffer(struct sf_buffer *sfb) +{ + unsigned long *sdbt, *curr; + + if (!sfb->sdbt) + return; + + sdbt = sfb->sdbt; + curr = sdbt; + + /* Free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* Process table-link entries */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page((unsigned long) sdbt); + + /* If the origin is reached, sampling buffer is freed */ + if (curr == sfb->sdbt) + break; + else + sdbt = curr; + } else { + /* Process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + } + + debug_sprintf_event(sfdbg, 5, + "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); + memset(sfb, 0, sizeof(*sfb)); +} + +static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) +{ + unsigned long sdb, *trailer; + + /* Allocate and initialize sample-data-block */ + sdb = get_zeroed_page(gfp_flags); + if (!sdb) + return -ENOMEM; + trailer = trailer_entry_ptr(sdb); + *trailer = SDB_TE_ALERT_REQ_MASK; + + /* Link SDB into the sample-data-block-table */ + *sdbt = sdb; + + return 0; +} + +/* + * realloc_sampling_buffer() - extend sampler memory + * + * Allocates new sample-data-blocks and adds them to the specified sampling + * buffer memory. + * + * Important: This modifies the sampling buffer and must be called when the + * sampling facility is disabled. + * + * Returns zero on success, non-zero otherwise. + */ +static int realloc_sampling_buffer(struct sf_buffer *sfb, + unsigned long num_sdb, gfp_t gfp_flags) +{ + int i, rc; + unsigned long *new, *tail; + + if (!sfb->sdbt || !sfb->tail) + return -EINVAL; + + if (!is_link_entry(sfb->tail)) + return -EINVAL; + + /* Append to the existing sampling buffer, overwriting the table-link + * register. + * The tail variables always points to the "tail" (last and table-link) + * entry in an SDB-table. + */ + tail = sfb->tail; + + /* Do a sanity check whether the table-link entry points to + * the sampling buffer origin. + */ + if (sfb->sdbt != get_next_sdbt(tail)) { + debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " + "sampling buffer is not linked: origin=%p" + "tail=%p\n", + (void *) sfb->sdbt, (void *) tail); + return -EINVAL; + } + + /* Allocate remaining SDBs */ + rc = 0; + for (i = 0; i < num_sdb; i++) { + /* Allocate a new SDB-table if it is full. */ + if (require_table_link(tail)) { + new = (unsigned long *) get_zeroed_page(gfp_flags); + if (!new) { + rc = -ENOMEM; + break; + } + sfb->num_sdbt++; + /* Link current page to tail of chain */ + *tail = (unsigned long)(void *) new + 1; + tail = new; + } + + /* Allocate a new sample-data-block. + * If there is not enough memory, stop the realloc process + * and simply use what was allocated. If this is a temporary + * issue, a new realloc call (if required) might succeed. + */ + rc = alloc_sample_data_block(tail, gfp_flags); + if (rc) + break; + sfb->num_sdb++; + tail++; + } + + /* Link sampling buffer to its origin */ + *tail = (unsigned long) sfb->sdbt + 1; + sfb->tail = tail; + + debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" + " settings: sdbt=%lu sdb=%lu\n", + sfb->num_sdbt, sfb->num_sdb); + return rc; +} + +/* + * allocate_sampling_buffer() - allocate sampler memory + * + * Allocates and initializes a sampling buffer structure using the + * specified number of sample-data-blocks (SDB). For each allocation, + * a 4K page is used. The number of sample-data-block-tables (SDBT) + * are calculated from SDBs. + * Also set the ALERT_REQ mask in each SDBs trailer. + * + * Returns zero on success, non-zero otherwise. + */ +static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) +{ + int rc; + + if (sfb->sdbt) + return -EINVAL; + + /* Allocate the sample-data-block-table origin */ + sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!sfb->sdbt) + return -ENOMEM; + sfb->num_sdb = 0; + sfb->num_sdbt = 1; + + /* Link the table origin to point to itself to prepare for + * realloc_sampling_buffer() invocation. + */ + sfb->tail = sfb->sdbt; + *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; + + /* Allocate requested number of sample-data-blocks */ + rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); + if (rc) { + free_sampling_buffer(sfb); + debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " + "realloc_sampling_buffer failed with rc=%i\n", rc); + } else + debug_sprintf_event(sfdbg, 4, + "alloc_sampling_buffer: tear=%p dear=%p\n", + sfb->sdbt, (void *) *sfb->sdbt); + return rc; +} + +static void sfb_set_limits(unsigned long min, unsigned long max) +{ + struct hws_qsi_info_block si; + + CPUM_SF_MIN_SDB = min; + CPUM_SF_MAX_SDB = max; + + memset(&si, 0, sizeof(si)); + if (!qsi(&si)) + CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); +} + +static unsigned long sfb_max_limit(struct hw_perf_event *hwc) +{ + return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR + : CPUM_SF_MAX_SDB; +} + +static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + if (!sfb->sdbt) + return SFB_ALLOC_REG(hwc); + if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) + return SFB_ALLOC_REG(hwc) - sfb->num_sdb; + return 0; +} + +static int sfb_has_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + return sfb_pending_allocs(sfb, hwc) > 0; +} + +static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + /* Limit the number of SDBs to not exceed the maximum */ + num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); + if (num) + SFB_ALLOC_REG(hwc) += num; +} + +static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + SFB_ALLOC_REG(hwc) = 0; + sfb_account_allocs(num, hwc); +} + +static size_t event_sample_size(struct hw_perf_event *hwc) +{ + struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); + size_t sample_size; + + /* The sample size depends on the sampling function: The basic-sampling + * function must be always enabled, diagnostic-sampling function is + * optional. + */ + sample_size = sfr->bsdes; + if (SAMPL_DIAG_MODE(hwc)) + sample_size += sfr->dsdes; + + return sample_size; +} + +static void deallocate_buffers(struct cpu_hw_sf *cpuhw) +{ + if (cpuhw->sfb.sdbt) + free_sampling_buffer(&cpuhw->sfb); +} + +static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) +{ + unsigned long n_sdb, freq, factor; + size_t sfr_size, sample_size; + struct sf_raw_sample *sfr; + + /* Allocate raw sample buffer + * + * The raw sample buffer is used to temporarily store sampling data + * entries for perf raw sample processing. The buffer size mainly + * depends on the size of diagnostic-sampling data entries which is + * machine-specific. The exact size calculation includes: + * 1. The first 4 bytes of diagnostic-sampling data entries are + * already reflected in the sf_raw_sample structure. Subtract + * these bytes. + * 2. The perf raw sample data must be 8-byte aligned (u64) and + * perf's internal data size must be considered too. So add + * an additional u32 for correct alignment and subtract before + * allocating the buffer. + * 3. Store the raw sample buffer pointer in the perf event + * hardware structure. + */ + sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + + sizeof(u32), sizeof(u64)); + sfr_size -= sizeof(u32); + sfr = kzalloc(sfr_size, GFP_KERNEL); + if (!sfr) + return -ENOMEM; + sfr->size = sfr_size; + sfr->bsdes = cpuhw->qsi.bsdes; + sfr->dsdes = cpuhw->qsi.dsdes; + RAWSAMPLE_REG(hwc) = (unsigned long) sfr; + + /* Calculate sampling buffers using 4K pages + * + * 1. Determine the sample data size which depends on the used + * sampling functions, for example, basic-sampling or + * basic-sampling with diagnostic-sampling. + * + * 2. Use the sampling frequency as input. The sampling buffer is + * designed for almost one second. This can be adjusted through + * the "factor" variable. + * In any case, alloc_sampling_buffer() sets the Alert Request + * Control indicator to trigger a measurement-alert to harvest + * sample-data-blocks (sdb). + * + * 3. Compute the number of sample-data-blocks and ensure a minimum + * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not + * exceed a "calculated" maximum. The symbolic maximum is + * designed for basic-sampling only and needs to be increased if + * diagnostic-sampling is active. + * See also the remarks for these symbolic constants. + * + * 4. Compute the number of sample-data-block-tables (SDBT) and + * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up + * to 511 SDBs). + */ + sample_size = event_sample_size(hwc); + freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); + factor = 1; + n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); + if (n_sdb < CPUM_SF_MIN_SDB) + n_sdb = CPUM_SF_MIN_SDB; + + /* If there is already a sampling buffer allocated, it is very likely + * that the sampling facility is enabled too. If the event to be + * initialized requires a greater sampling buffer, the allocation must + * be postponed. Changing the sampling buffer requires the sampling + * facility to be in the disabled state. So, account the number of + * required SDBs and let cpumsf_pmu_enable() resize the buffer just + * before the event is started. + */ + sfb_init_allocs(n_sdb, hwc); + if (sf_buffer_available(cpuhw)) + return 0; + + debug_sprintf_event(sfdbg, 3, + "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" + " sample_size=%lu cpuhw=%p\n", + SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), + sample_size, cpuhw); + + return alloc_sampling_buffer(&cpuhw->sfb, + sfb_pending_allocs(&cpuhw->sfb, hwc)); +} + +static unsigned long min_percent(unsigned int percent, unsigned long base, + unsigned long min) +{ + return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); +} + +static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) +{ + /* Use a percentage-based approach to extend the sampling facility + * buffer. Accept up to 5% sample data loss. + * Vary the extents between 1% to 5% of the current number of + * sample-data-blocks. + */ + if (ratio <= 5) + return 0; + if (ratio <= 25) + return min_percent(1, base, 1); + if (ratio <= 50) + return min_percent(1, base, 1); + if (ratio <= 75) + return min_percent(2, base, 2); + if (ratio <= 100) + return min_percent(3, base, 3); + if (ratio <= 250) + return min_percent(4, base, 4); + + return min_percent(5, base, 8); +} + +static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, + struct hw_perf_event *hwc) +{ + unsigned long ratio, num; + + if (!OVERFLOW_REG(hwc)) + return; + + /* The sample_overflow contains the average number of sample data + * that has been lost because sample-data-blocks were full. + * + * Calculate the total number of sample data entries that has been + * discarded. Then calculate the ratio of lost samples to total samples + * per second in percent. + */ + ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, + sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); + + /* Compute number of sample-data-blocks */ + num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); + if (num) + sfb_account_allocs(num, hwc); + + debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" + " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); + OVERFLOW_REG(hwc) = 0; +} + +/* extend_sampling_buffer() - Extend sampling buffer + * @sfb: Sampling buffer structure (for local CPU) + * @hwc: Perf event hardware structure + * + * Use this function to extend the sampling buffer based on the overflow counter + * and postponed allocation extents stored in the specified Perf event hardware. + * + * Important: This function disables the sampling facility in order to safely + * change the sampling buffer structure. Do not call this function + * when the PMU is active. + */ +static void extend_sampling_buffer(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + unsigned long num, num_old; + int rc; + + num = sfb_pending_allocs(sfb, hwc); + if (!num) + return; + num_old = sfb->num_sdb; + + /* Disable the sampling facility to reset any states and also + * clear pending measurement alerts. + */ + sf_disable(); + + /* Extend the sampling buffer. + * This memory allocation typically happens in an atomic context when + * called by perf. Because this is a reallocation, it is fine if the + * new SDB-request cannot be satisfied immediately. + */ + rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); + if (rc) + debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " + "failed with rc=%i\n", rc); + + if (sfb_has_pending_allocs(sfb, hwc)) + debug_sprintf_event(sfdbg, 5, "sfb: extend: " + "req=%lu alloc=%lu remaining=%lu\n", + num, sfb->num_sdb - num_old, + sfb_pending_allocs(sfb, hwc)); +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +#define PMC_FAILURE 2 +static void setup_pmc_cpu(void *flags) +{ + int err; + struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); + + err = 0; + switch (*((int *) flags)) { + case PMC_INIT: + memset(cpusf, 0, sizeof(*cpusf)); + err = qsi(&cpusf->qsi); + if (err) + break; + cpusf->flags |= PMU_F_RESERVED; + err = sf_disable(); + if (err) + pr_err("Switching off the sampling facility failed " + "with rc=%i\n", err); + debug_sprintf_event(sfdbg, 5, + "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); + break; + case PMC_RELEASE: + cpusf->flags &= ~PMU_F_RESERVED; + err = sf_disable(); + if (err) { + pr_err("Switching off the sampling facility failed " + "with rc=%i\n", err); + } else + deallocate_buffers(cpusf); + debug_sprintf_event(sfdbg, 5, + "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); + break; + } + if (err) + *((int *) flags) |= PMC_FAILURE; +} + +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(setup_pmc_cpu, &flags, 1); + perf_release_sampling(); +} + +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + int err; + + err = perf_reserve_sampling(); + if (err) + return err; + on_each_cpu(setup_pmc_cpu, &flags, 1); + if (flags & PMC_FAILURE) { + release_pmc_hardware(); + return -ENODEV; + } + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + /* Free raw sample buffer */ + if (RAWSAMPLE_REG(&event->hw)) + kfree((void *) RAWSAMPLE_REG(&event->hw)); + + /* Release PMC if this is the last perf event */ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static void hw_init_period(struct hw_perf_event *hwc, u64 period) +{ + hwc->sample_period = period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); +} + +static void hw_reset_registers(struct hw_perf_event *hwc, + unsigned long *sdbt_origin) +{ + struct sf_raw_sample *sfr; + + /* (Re)set to first sample-data-block-table */ + TEAR_REG(hwc) = (unsigned long) sdbt_origin; + + /* (Re)set raw sampling buffer register */ + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); + memset(&sfr->basic, 0, sizeof(sfr->basic)); + memset(&sfr->diag, 0, sfr->dsdes); +} + +static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, + unsigned long rate) +{ + return clamp_t(unsigned long, rate, + si->min_sampl_rate, si->max_sampl_rate); +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct cpu_hw_sf *cpuhw; + struct hws_qsi_info_block si; + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + unsigned long rate; + int cpu, err; + + /* Reserve CPU-measurement sampling facility */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + if (err) + goto out; + + /* Access per-CPU sampling information (query sampling info) */ + /* + * The event->cpu value can be -1 to count on every CPU, for example, + * when attaching to a task. If this is specified, use the query + * sampling info from the current CPU, otherwise use event->cpu to + * retrieve the per-CPU information. + * Later, cpuhw indicates whether to allocate sampling buffers for a + * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). + */ + memset(&si, 0, sizeof(si)); + cpuhw = NULL; + if (event->cpu == -1) + qsi(&si); + else { + /* Event is pinned to a particular CPU, retrieve the per-CPU + * sampling structure for accessing the CPU-specific QSI. + */ + cpuhw = &per_cpu(cpu_hw_sf, event->cpu); + si = cpuhw->qsi; + } + + /* Check sampling facility authorization and, if not authorized, + * fall back to other PMUs. It is safe to check any CPU because + * the authorization is identical for all configured CPUs. + */ + if (!si.as) { + err = -ENOENT; + goto out; + } + + /* Always enable basic sampling */ + SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; + + /* Check if diagnostic sampling is requested. Deny if the required + * sampling authorization is missing. + */ + if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { + if (!si.ad) { + err = -EPERM; + goto out; + } + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; + } + + /* Check and set other sampling flags */ + if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; + + /* The sampling information (si) contains information about the + * min/max sampling intervals and the CPU speed. So calculate the + * correct sampling interval and avoid the whole period adjust + * feedback loop. + */ + rate = 0; + if (attr->freq) { + rate = freq_to_sample_rate(&si, attr->sample_freq); + rate = hw_limit_rate(&si, rate); + attr->freq = 0; + attr->sample_period = rate; + } else { + /* The min/max sampling rates specifies the valid range + * of sample periods. If the specified sample period is + * out of range, limit the period to the range boundary. + */ + rate = hw_limit_rate(&si, hwc->sample_period); + + /* The perf core maintains a maximum sample rate that is + * configurable through the sysctl interface. Ensure the + * sampling rate does not exceed this value. This also helps + * to avoid throttling when pushing samples with + * perf_event_overflow(). + */ + if (sample_rate_to_freq(&si, rate) > + sysctl_perf_event_sample_rate) { + err = -EINVAL; + debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); + goto out; + } + } + SAMPL_RATE(hwc) = rate; + hw_init_period(hwc, SAMPL_RATE(hwc)); + + /* Initialize sample data overflow accounting */ + hwc->extra_reg.reg = REG_OVERFLOW; + OVERFLOW_REG(hwc) = 0; + + /* Allocate the per-CPU sampling buffer using the CPU information + * from the event. If the event is not pinned to a particular + * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling + * buffers for each online CPU. + */ + if (cpuhw) + /* Event is pinned to a particular CPU */ + err = allocate_buffers(cpuhw, hwc); + else { + /* Event is not pinned, allocate sampling buffer on + * each online CPU + */ + for_each_online_cpu(cpu) { + cpuhw = &per_cpu(cpu_hw_sf, cpu); + err = allocate_buffers(cpuhw, hwc); + if (err) + break; + } + } +out: + return err; +} + +static int cpumsf_pmu_event_init(struct perf_event *event) +{ + int err; + + /* No support for taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + if ((event->attr.config != PERF_EVENT_CPUM_SF) && + (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) + return -ENOENT; + break; + case PERF_TYPE_HARDWARE: + /* Support sampling of CPU cycles in addition to the + * counter facility. However, the counter facility + * is more precise and, hence, restrict this PMU to + * sampling events only. + */ + if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) + return -ENOENT; + if (!is_sampling_event(event)) + return -ENOENT; + break; + default: + return -ENOENT; + } + + /* Check online status of the CPU to which the event is pinned */ + if (event->cpu >= nr_cpumask_bits || + (event->cpu >= 0 && !cpu_online(event->cpu))) + return -ENODEV; + + /* Force reset of idle/hv excludes regardless of what the + * user requested. + */ + if (event->attr.exclude_hv) + event->attr.exclude_hv = 0; + if (event->attr.exclude_idle) + event->attr.exclude_idle = 0; + + err = __hw_perf_event_init(event); + if (unlikely(err)) + if (event->destroy) + event->destroy(event); + return err; +} + +static void cpumsf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct hw_perf_event *hwc; + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + /* Check whether to extent the sampling buffer. + * + * Two conditions trigger an increase of the sampling buffer for a + * perf event: + * 1. Postponed buffer allocations from the event initialization. + * 2. Sampling overflows that contribute to pending allocations. + * + * Note that the extend_sampling_buffer() function disables the sampling + * facility, but it can be fully re-enabled using sampling controls that + * have been saved in cpumsf_pmu_disable(). + */ + if (cpuhw->event) { + hwc = &cpuhw->event->hw; + /* Account number of overflow-designated buffer extents */ + sfb_account_overflows(cpuhw, hwc); + if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) + extend_sampling_buffer(&cpuhw->sfb, hwc); + } + + /* (Re)enable the PMU and sampling facility */ + cpuhw->flags |= PMU_F_ENABLED; + barrier(); + + err = lsctl(&cpuhw->lsctl); + if (err) { + cpuhw->flags &= ~PMU_F_ENABLED; + pr_err("Loading sampling controls failed: op=%i err=%i\n", + 1, err); + return; + } + + debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " + "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, + cpuhw->lsctl.ed, cpuhw->lsctl.cd, + (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); +} + +static void cpumsf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct hws_lsctl_request_block inactive; + struct hws_qsi_info_block si; + int err; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + /* Switch off sampling activation control */ + inactive = cpuhw->lsctl; + inactive.cs = 0; + inactive.cd = 0; + + err = lsctl(&inactive); + if (err) { + pr_err("Loading sampling controls failed: op=%i err=%i\n", + 2, err); + return; + } + + /* Save state of TEAR and DEAR register contents */ + if (!qsi(&si)) { + /* TEAR/DEAR values are valid only if the sampling facility is + * enabled. Note that cpumsf_pmu_disable() might be called even + * for a disabled sampling facility because cpumsf_pmu_enable() + * controls the enable/disable state. + */ + if (si.es) { + cpuhw->lsctl.tear = si.tear; + cpuhw->lsctl.dear = si.dear; + } + } else + debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " + "qsi() failed with err=%i\n", err); + + cpuhw->flags &= ~PMU_F_ENABLED; +} + +/* perf_exclude_event() - Filter event + * @event: The perf event + * @regs: pt_regs structure + * @sde_regs: Sample-data-entry (sde) regs structure + * + * Filter perf events according to their exclude specification. + * + * Return non-zero if the event shall be excluded. + */ +static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, + struct perf_sf_sde_regs *sde_regs) +{ + if (event->attr.exclude_user && user_mode(regs)) + return 1; + if (event->attr.exclude_kernel && !user_mode(regs)) + return 1; + if (event->attr.exclude_guest && sde_regs->in_guest) + return 1; + if (event->attr.exclude_host && !sde_regs->in_guest) + return 1; + return 0; +} + +/* perf_push_sample() - Push samples to perf + * @event: The perf event + * @sample: Hardware sample data + * + * Use the hardware sample data to create perf event sample. The sample + * is the pushed to the event subsystem and the function checks for + * possible event overflows. If an event overflow occurs, the PMU is + * stopped. + * + * Return non-zero if an event overflow occurred. + */ +static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) +{ + int overflow; + struct pt_regs regs; + struct perf_sf_sde_regs *sde_regs; + struct perf_sample_data data; + struct perf_raw_record raw; + + /* Setup perf sample */ + perf_sample_data_init(&data, 0, event->hw.last_period); + raw.size = sfr->size; + raw.data = sfr; + data.raw = &raw; + + /* Setup pt_regs to look like an CPU-measurement external interrupt + * using the Program Request Alert code. The regs.int_parm_long + * field which is unused contains additional sample-data-entry related + * indicators. + */ + memset(®s, 0, sizeof(regs)); + regs.int_code = 0x1407; + regs.int_parm = CPU_MF_INT_SF_PRA; + sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; + + regs.psw.addr = sfr->basic.ia; + if (sfr->basic.T) + regs.psw.mask |= PSW_MASK_DAT; + if (sfr->basic.W) + regs.psw.mask |= PSW_MASK_WAIT; + if (sfr->basic.P) + regs.psw.mask |= PSW_MASK_PSTATE; + switch (sfr->basic.AS) { + case 0x0: + regs.psw.mask |= PSW_ASC_PRIMARY; + break; + case 0x1: + regs.psw.mask |= PSW_ASC_ACCREG; + break; + case 0x2: + regs.psw.mask |= PSW_ASC_SECONDARY; + break; + case 0x3: + regs.psw.mask |= PSW_ASC_HOME; + break; + } + + /* The host-program-parameter (hpp) contains the sie control + * block that is set by sie64a() in entry64.S. Check if hpp + * refers to a valid control block and set sde_regs flags + * accordingly. This would allow to use hpp values for other + * purposes too. + * For now, simply use a non-zero value as guest indicator. + */ + if (sfr->basic.hpp) + sde_regs->in_guest = 1; + + overflow = 0; + if (perf_exclude_event(event, ®s, sde_regs)) + goto out; + if (perf_event_overflow(event, &data, ®s)) { + overflow = 1; + event->pmu->stop(event, 0); + } + perf_event_update_userpage(event); +out: + return overflow; +} + +static void perf_event_count_update(struct perf_event *event, u64 count) +{ + local64_add(count, &event->count); +} + +static int sample_format_is_valid(struct hws_combined_entry *sample, + unsigned int flags) +{ + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) + /* Only basic-sampling data entries with data-entry-format + * version of 0x0001 can be processed. + */ + if (sample->basic.def != 0x0001) + return 0; + if (flags & PERF_CPUM_SF_DIAG_MODE) + /* The data-entry-format number of diagnostic-sampling data + * entries can vary. Because diagnostic data is just passed + * through, do only a sanity check on the DEF. + */ + if (sample->diag.def < 0x8001) + return 0; + return 1; +} + +static int sample_is_consistent(struct hws_combined_entry *sample, + unsigned long flags) +{ + /* This check applies only to basic-sampling data entries of potentially + * combined-sampling data entries. Invalid entries cannot be processed + * by the PMU and, thus, do not deliver an associated + * diagnostic-sampling data entry. + */ + if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) + return 0; + /* + * Samples are skipped, if they are invalid or for which the + * instruction address is not predictable, i.e., the wait-state bit is + * set. + */ + if (sample->basic.I || sample->basic.W) + return 0; + return 1; +} + +static void reset_sample_slot(struct hws_combined_entry *sample, + unsigned long flags) +{ + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) + sample->basic.def = 0; + if (flags & PERF_CPUM_SF_DIAG_MODE) + sample->diag.def = 0; +} + +static void sfr_store_sample(struct sf_raw_sample *sfr, + struct hws_combined_entry *sample) +{ + if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) + sfr->basic = sample->basic; + if (sfr->format & PERF_CPUM_SF_DIAG_MODE) + memcpy(&sfr->diag, &sample->diag, sfr->dsdes); +} + +static void debug_sample_entry(struct hws_combined_entry *sample, + struct hws_trailer_entry *te, + unsigned long flags) +{ + debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " + "sampling data entry: te->f=%i basic.def=%04x (%p)" + " diag.def=%04x (%p)\n", te->f, + sample->basic.def, &sample->basic, + (flags & PERF_CPUM_SF_DIAG_MODE) + ? sample->diag.def : 0xFFFF, + (flags & PERF_CPUM_SF_DIAG_MODE) + ? &sample->diag : NULL); +} + +/* hw_collect_samples() - Walk through a sample-data-block and collect samples + * @event: The perf event + * @sdbt: Sample-data-block table + * @overflow: Event overflow counter + * + * Walks through a sample-data-block and collects sampling data entries that are + * then pushed to the perf event subsystem. Depending on the sampling function, + * there can be either basic-sampling or combined-sampling data entries. A + * combined-sampling data entry consists of a basic- and a diagnostic-sampling + * data entry. The sampling function is determined by the flags in the perf + * event hardware structure. The function always works with a combined-sampling + * data entry but ignores the the diagnostic portion if it is not available. + * + * Note that the implementation focuses on basic-sampling data entries and, if + * such an entry is not valid, the entire combined-sampling data entry is + * ignored. + * + * The overflow variables counts the number of samples that has been discarded + * due to a perf event overflow. + */ +static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, + unsigned long long *overflow) +{ + unsigned long flags = SAMPL_FLAGS(&event->hw); + struct hws_combined_entry *sample; + struct hws_trailer_entry *te; + struct sf_raw_sample *sfr; + size_t sample_size; + + /* Prepare and initialize raw sample data */ + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); + sfr->format = flags & PERF_CPUM_SF_MODE_MASK; + + sample_size = event_sample_size(&event->hw); + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + sample = (struct hws_combined_entry *) *sdbt; + while ((unsigned long *) sample < (unsigned long *) te) { + /* Check for an empty sample */ + if (!sample->basic.def) + break; + + /* Update perf event period */ + perf_event_count_update(event, SAMPL_RATE(&event->hw)); + + /* Check sampling data entry */ + if (sample_format_is_valid(sample, flags)) { + /* If an event overflow occurred, the PMU is stopped to + * throttle event delivery. Remaining sample data is + * discarded. + */ + if (!*overflow) { + if (sample_is_consistent(sample, flags)) { + /* Deliver sample data to perf */ + sfr_store_sample(sfr, sample); + *overflow = perf_push_sample(event, sfr); + } + } else + /* Count discarded samples */ + *overflow += 1; + } else { + debug_sample_entry(sample, te, flags); + /* Sample slot is not yet written or other record. + * + * This condition can occur if the buffer was reused + * from a combined basic- and diagnostic-sampling. + * If only basic-sampling is then active, entries are + * written into the larger diagnostic entries. + * This is typically the case for sample-data-blocks + * that are not full. Stop processing if the first + * invalid format was detected. + */ + if (!te->f) + break; + } + + /* Reset sample slot and advance to next sample */ + reset_sample_slot(sample, flags); + sample += sample_size; + } +} + +/* hw_perf_event_update() - Process sampling buffer + * @event: The perf event + * @flush_all: Flag to also flush partially filled sample-data-blocks + * + * Processes the sampling buffer and create perf event samples. + * The sampling buffer position are retrieved and saved in the TEAR_REG + * register of the specified perf event. + * + * Only full sample-data-blocks are processed. Specify the flash_all flag + * to also walk through partially filled sample-data-blocks. It is ignored + * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag + * enforces the processing of full sample-data-blocks only (trailer entries + * with the block-full-indicator bit set). + */ +static void hw_perf_event_update(struct perf_event *event, int flush_all) +{ + struct hw_perf_event *hwc = &event->hw; + struct hws_trailer_entry *te; + unsigned long *sdbt; + unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; + int done; + + if (flush_all && SDB_FULL_BLOCKS(hwc)) + flush_all = 0; + + sdbt = (unsigned long *) TEAR_REG(hwc); + done = event_overflow = sampl_overflow = num_sdb = 0; + while (!done) { + /* Get the trailer entry of the sample-data-block */ + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + + /* Leave loop if no more work to do (block full indicator) */ + if (!te->f) { + done = 1; + if (!flush_all) + break; + } + + /* Check the sample overflow count */ + if (te->overflow) + /* Account sample overflows and, if a particular limit + * is reached, extend the sampling buffer. + * For details, see sfb_account_overflows(). + */ + sampl_overflow += te->overflow; + + /* Timestamps are valid for full sample-data-blocks only */ + debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " + "overflow=%llu timestamp=0x%llx\n", + sdbt, te->overflow, + (te->f) ? trailer_timestamp(te) : 0ULL); + + /* Collect all samples from a single sample-data-block and + * flag if an (perf) event overflow happened. If so, the PMU + * is stopped and remaining samples will be discarded. + */ + hw_collect_samples(event, sdbt, &event_overflow); + num_sdb++; + + /* Reset trailer (using compare-double-and-swap) */ + do { + te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; + te_flags |= SDB_TE_ALERT_REQ_MASK; + } while (!cmpxchg_double(&te->flags, &te->overflow, + te->flags, te->overflow, + te_flags, 0ULL)); + + /* Advance to next sample-data-block */ + sdbt++; + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + /* Update event hardware registers */ + TEAR_REG(hwc) = (unsigned long) sdbt; + + /* Stop processing sample-data if all samples of the current + * sample-data-block were flushed even if it was not full. + */ + if (flush_all && done) + break; + + /* If an event overflow happened, discard samples by + * processing any remaining sample-data-blocks. + */ + if (event_overflow) + flush_all = 1; + } + + /* Account sample overflows in the event hardware structure */ + if (sampl_overflow) + OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + + sampl_overflow, 1 + num_sdb); + if (sampl_overflow || event_overflow) + debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " + "overflow stats: sample=%llu event=%llu\n", + sampl_overflow, event_overflow); +} + +static void cpumsf_pmu_read(struct perf_event *event) +{ + /* Nothing to do ... updates are interrupt-driven */ +} + +/* Activate sampling control. + * Next call of pmu_enable() starts sampling. + */ +static void cpumsf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + perf_pmu_disable(event->pmu); + event->hw.state = 0; + cpuhw->lsctl.cs = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.cd = 1; + perf_pmu_enable(event->pmu); +} + +/* Deactivate sampling control. + * Next call of pmu_enable() stops sampling. + */ +static void cpumsf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + if (event->hw.state & PERF_HES_STOPPED) + return; + + perf_pmu_disable(event->pmu); + cpuhw->lsctl.cs = 0; + cpuhw->lsctl.cd = 0; + event->hw.state |= PERF_HES_STOPPED; + + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event, 1); + event->hw.state |= PERF_HES_UPTODATE; + } + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + int err; + + if (cpuhw->flags & PMU_F_IN_USE) + return -EAGAIN; + + if (!cpuhw->sfb.sdbt) + return -EINVAL; + + err = 0; + perf_pmu_disable(event->pmu); + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + /* Set up sampling controls. Always program the sampling register + * using the SDB-table start. Reset TEAR_REG event hardware register + * that is used by hw_perf_event_update() to store the sampling buffer + * position after samples have been flushed. + */ + cpuhw->lsctl.s = 0; + cpuhw->lsctl.h = 1; + cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; + cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; + cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); + hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + + /* Ensure sampling functions are in the disabled state. If disabled, + * switch on sampling enable control. */ + if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { + err = -EAGAIN; + goto out; + } + cpuhw->lsctl.es = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.ed = 1; + + /* Set in_use flag and store event */ + event->hw.idx = 0; /* only one sampling event per CPU supported */ + cpuhw->event = event; + cpuhw->flags |= PMU_F_IN_USE; + + if (flags & PERF_EF_START) + cpumsf_pmu_start(event, PERF_EF_RELOAD); +out: + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + return err; +} + +static void cpumsf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + perf_pmu_disable(event->pmu); + cpumsf_pmu_stop(event, PERF_EF_UPDATE); + + cpuhw->lsctl.es = 0; + cpuhw->lsctl.ed = 0; + cpuhw->flags &= ~PMU_F_IN_USE; + cpuhw->event = NULL; + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_event_idx(struct perf_event *event) +{ + return event->hw.idx; +} + +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); + +static struct attribute *cpumsf_pmu_events_attr[] = { + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), + NULL, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumsf_pmu_events_attr, +}; +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + +static struct pmu cpumf_sampling = { + .pmu_enable = cpumsf_pmu_enable, + .pmu_disable = cpumsf_pmu_disable, + + .event_init = cpumsf_pmu_event_init, + .add = cpumsf_pmu_add, + .del = cpumsf_pmu_del, + + .start = cpumsf_pmu_start, + .stop = cpumsf_pmu_stop, + .read = cpumsf_pmu_read, + + .event_idx = cpumsf_pmu_event_idx, + .attr_groups = cpumsf_pmu_attr_groups, +}; + +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_sf *cpuhw; + + if (!(alert & CPU_MF_INT_SF_MASK)) + return; + inc_irq_stat(IRQEXT_CMS); + cpuhw = &__get_cpu_var(cpu_hw_sf); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* The processing below must take care of multiple alert events that + * might be indicated concurrently. */ + + /* Program alert request */ + if (alert & CPU_MF_INT_SF_PRA) { + if (cpuhw->flags & PMU_F_IN_USE) + hw_perf_event_update(cpuhw->event, 0); + else + WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); + } + + /* Report measurement alerts only for non-PRA codes */ + if (alert != CPU_MF_INT_SF_PRA) + debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); + + /* Sampling authorization change request */ + if (alert & CPU_MF_INT_SF_SACA) + qsi(&cpuhw->qsi); + + /* Loss of sample data due to high-priority machine activities */ + if (alert & CPU_MF_INT_SF_LSDA) { + pr_err("Sample data was lost\n"); + cpuhw->flags |= PMU_F_ERR_LSDA; + sf_disable(); + } + + /* Invalid sampling buffer entry */ + if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { + pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", + alert); + cpuhw->flags |= PMU_F_ERR_IBE; + sf_disable(); + } +} + +static int cpumf_pmu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + /* Ignore the notification if no events are scheduled on the PMU. + * This might be racy... + */ + if (!atomic_read(&num_events)) + return NOTIFY_OK; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) +{ + if (!cpum_sf_avail()) + return -ENODEV; + return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); +} + +static int param_set_sfb_size(const char *val, const struct kernel_param *kp) +{ + int rc; + unsigned long min, max; + + if (!cpum_sf_avail()) + return -ENODEV; + if (!val || !strlen(val)) + return -EINVAL; + + /* Valid parameter values: "min,max" or "max" */ + min = CPUM_SF_MIN_SDB; + max = CPUM_SF_MAX_SDB; + if (strchr(val, ',')) + rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; + else + rc = kstrtoul(val, 10, &max); + + if (min < 2 || min >= max || max > get_num_physpages()) + rc = -EINVAL; + if (rc) + return rc; + + sfb_set_limits(min, max); + pr_info("The sampling buffer limits have changed to: " + "min=%lu max=%lu (diag=x%lu)\n", + CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); + return 0; +} + +#define param_check_sfb_size(name, p) __param_check(name, p, void) +static struct kernel_param_ops param_ops_sfb_size = { + .set = param_set_sfb_size, + .get = param_get_sfb_size, +}; + +#define RS_INIT_FAILURE_QSI 0x0001 +#define RS_INIT_FAILURE_BSDES 0x0002 +#define RS_INIT_FAILURE_ALRT 0x0003 +#define RS_INIT_FAILURE_PERF 0x0004 +static void __init pr_cpumsf_err(unsigned int reason) +{ + pr_err("Sampling facility support for perf is not available: " + "reason=%04x\n", reason); +} + +static int __init init_cpum_sampling_pmu(void) +{ + struct hws_qsi_info_block si; + int err; + + if (!cpum_sf_avail()) + return -ENODEV; + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) { + pr_cpumsf_err(RS_INIT_FAILURE_QSI); + return -ENODEV; + } + + if (si.bsdes != sizeof(struct hws_basic_entry)) { + pr_cpumsf_err(RS_INIT_FAILURE_BSDES); + return -EINVAL; + } + + if (si.ad) + sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + + sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); + if (!sfdbg) + pr_err("Registering for s390dbf failed\n"); + debug_register_view(sfdbg, &debug_sprintf_view); + + err = register_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + if (err) { + pr_cpumsf_err(RS_INIT_FAILURE_ALRT); + goto out; + } + + err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); + if (err) { + pr_cpumsf_err(RS_INIT_FAILURE_PERF); + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return err; +} +arch_initcall(init_cpum_sampling_pmu); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c new file mode 100644 index 00000000000..61595c1f0a0 --- /dev/null +++ b/arch/s390/kernel/perf_event.c @@ -0,0 +1,324 @@ +/* + * Performance event support for s390x + * + * Copyright IBM Corp. 2012, 2013 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "perf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/kvm_host.h> +#include <linux/percpu.h> +#include <linux/export.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/sysfs.h> +#include <asm/irq.h> +#include <asm/cpu_mf.h> +#include <asm/lowcore.h> +#include <asm/processor.h> +#include <asm/sysinfo.h> + +const char *perf_pmu_name(void) +{ + if (cpum_cf_avail() || cpum_sf_avail()) + return "CPU-Measurement Facilities (CPU-MF)"; + return "pmu"; +} +EXPORT_SYMBOL(perf_pmu_name); + +int perf_num_counters(void) +{ + int num = 0; + + if (cpum_cf_avail()) + num += PERF_CPUM_CF_MAX_CTR; + if (cpum_sf_avail()) + num += PERF_CPUM_SF_MAX_CTR; + + return num; +} +EXPORT_SYMBOL(perf_num_counters); + +static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) +{ + struct stack_frame *stack = (struct stack_frame *) regs->gprs[15]; + + if (!stack) + return NULL; + + return (struct kvm_s390_sie_block *) stack->empty1[0]; +} + +static bool is_in_guest(struct pt_regs *regs) +{ + if (user_mode(regs)) + return false; +#if IS_ENABLED(CONFIG_KVM) + return instruction_pointer(regs) == (unsigned long) &sie_exit; +#else + return false; +#endif +} + +static unsigned long guest_is_user_mode(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE; +} + +static unsigned long instruction_pointer_guest(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN; +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + return is_in_guest(regs) ? instruction_pointer_guest(regs) + : instruction_pointer(regs); +} + +static unsigned long perf_misc_guest_flags(struct pt_regs *regs) +{ + return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; +} + +static unsigned long perf_misc_flags_sf(struct pt_regs *regs) +{ + struct perf_sf_sde_regs *sde_regs; + unsigned long flags; + + sde_regs = (struct perf_sf_sde_regs *) ®s->int_parm_long; + if (sde_regs->in_guest) + flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; + else + flags = user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; + return flags; +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + /* Check if the cpum_sf PMU has created the pt_regs structure. + * In this case, perf misc flags can be easily extracted. Otherwise, + * do regular checks on the pt_regs content. + */ + if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA) + if (!regs->gprs[15]) + return perf_misc_flags_sf(regs); + + if (is_in_guest(regs)) + return perf_misc_guest_flags(regs); + + return user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; +} + +static void print_debug_cf(void) +{ + struct cpumf_ctr_info cf_info; + int cpu = smp_processor_id(); + + memset(&cf_info, 0, sizeof(cf_info)); + if (!qctri(&cf_info)) + pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n", + cpu, cf_info.cfvn, cf_info.csvn, + cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl); +} + +static void print_debug_sf(void) +{ + struct hws_qsi_info_block si; + int cpu = smp_processor_id(); + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) + return; + + pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n", + cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + + if (si.as) + pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i" + " bsdes=%i tear=%016lx dear=%016lx\n", cpu, + si.as, si.es, si.cs, si.bsdes, si.tear, si.dear); + if (si.ad) + pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i" + " dsdes=%i tear=%016lx dear=%016lx\n", cpu, + si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear); +} + +void perf_event_print_debug(void) +{ + unsigned long flags; + + local_irq_save(flags); + if (cpum_cf_avail()) + print_debug_cf(); + if (cpum_sf_avail()) + print_debug_sf(); + local_irq_restore(flags); +} + +/* Service level infrastructure */ +static void sl_print_counter(struct seq_file *m) +{ + struct cpumf_ctr_info ci; + + memset(&ci, 0, sizeof(ci)); + if (qctri(&ci)) + return; + + seq_printf(m, "CPU-MF: Counter facility: version=%u.%u " + "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl); +} + +static void sl_print_sampling(struct seq_file *m) +{ + struct hws_qsi_info_block si; + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) + return; + + if (!si.as && !si.ad) + return; + + seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu" + " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + if (si.as) + seq_printf(m, "CPU-MF: Sampling facility: mode=basic" + " sample_size=%u\n", si.bsdes); + if (si.ad) + seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic" + " sample_size=%u\n", si.dsdes); +} + +static void service_level_perf_print(struct seq_file *m, + struct service_level *sl) +{ + if (cpum_cf_avail()) + sl_print_counter(m); + if (cpum_sf_avail()) + sl_print_sampling(m); +} + +static struct service_level service_level_perf = { + .seq_print = service_level_perf_print, +}; + +static int __init service_level_perf_register(void) +{ + return register_service_level(&service_level_perf); +} +arch_initcall(service_level_perf_register); + +/* See also arch/s390/kernel/traps.c */ +static unsigned long __store_trace(struct perf_callchain_entry *entry, + unsigned long sp, + unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, + sf->gprs[8] & PSW_ADDR_INSN); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + low = sp; + sp = regs->gprs[15]; + } +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + unsigned long head; + struct stack_frame *head_sf; + + if (user_mode(regs)) + return; + + head = regs->gprs[15]; + head_sf = (struct stack_frame *) head; + + if (!head_sf || !head_sf->back_chain) + return; + + head = head_sf->back_chain; + head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + + __store_trace(entry, head, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); +} + +/* Perf defintions for PMU event attributes in sysfs */ +ssize_t cpumf_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sprintf(page, "event=0x%04llx,name=%s\n", + pmu_attr->id, attr->attr.name); +} + +/* Reserve/release functions for sharing perf hardware */ +static DEFINE_SPINLOCK(perf_hw_owner_lock); +static void *perf_sampling_owner; + +int perf_reserve_sampling(void) +{ + int err; + + err = 0; + spin_lock(&perf_hw_owner_lock); + if (perf_sampling_owner) { + pr_warn("The sampling facility is already reserved by %p\n", + perf_sampling_owner); + err = -EBUSY; + } else + perf_sampling_owner = __builtin_return_address(0); + spin_unlock(&perf_hw_owner_lock); + return err; +} +EXPORT_SYMBOL(perf_reserve_sampling); + +void perf_release_sampling(void) +{ + spin_lock(&perf_hw_owner_lock); + WARN_ON(!perf_sampling_owner); + perf_sampling_owner = NULL; + spin_unlock(&perf_hw_owner_lock); +} +EXPORT_SYMBOL(perf_release_sampling); diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S new file mode 100644 index 00000000000..813ec726087 --- /dev/null +++ b/arch/s390/kernel/pgm_check.S @@ -0,0 +1,152 @@ +/* + * Program check table. + * + * Copyright IBM Corp. 2012 + */ + +#include <linux/linkage.h> + +#ifdef CONFIG_32BIT +#define PGM_CHECK_64BIT(handler) .long default_trap_handler +#else +#define PGM_CHECK_64BIT(handler) .long handler +#endif + +#define PGM_CHECK(handler) .long handler +#define PGM_CHECK_DEFAULT PGM_CHECK(default_trap_handler) + +/* + * The program check table contains exactly 128 (0x00-0x7f) entries. Each + * line defines the 31 and/or 64 bit function to be called corresponding + * to the program check interruption code. + */ +.section .rodata, "a" +ENTRY(pgm_check_table) +PGM_CHECK_DEFAULT /* 00 */ +PGM_CHECK(illegal_op) /* 01 */ +PGM_CHECK(privileged_op) /* 02 */ +PGM_CHECK(execute_exception) /* 03 */ +PGM_CHECK(do_protection_exception) /* 04 */ +PGM_CHECK(addressing_exception) /* 05 */ +PGM_CHECK(specification_exception) /* 06 */ +PGM_CHECK(data_exception) /* 07 */ +PGM_CHECK(overflow_exception) /* 08 */ +PGM_CHECK(divide_exception) /* 09 */ +PGM_CHECK(overflow_exception) /* 0a */ +PGM_CHECK(divide_exception) /* 0b */ +PGM_CHECK(hfp_overflow_exception) /* 0c */ +PGM_CHECK(hfp_underflow_exception) /* 0d */ +PGM_CHECK(hfp_significance_exception) /* 0e */ +PGM_CHECK(hfp_divide_exception) /* 0f */ +PGM_CHECK(do_dat_exception) /* 10 */ +PGM_CHECK(do_dat_exception) /* 11 */ +PGM_CHECK(translation_exception) /* 12 */ +PGM_CHECK(special_op_exception) /* 13 */ +PGM_CHECK_DEFAULT /* 14 */ +PGM_CHECK(operand_exception) /* 15 */ +PGM_CHECK_DEFAULT /* 16 */ +PGM_CHECK_DEFAULT /* 17 */ +PGM_CHECK_64BIT(transaction_exception) /* 18 */ +PGM_CHECK_DEFAULT /* 19 */ +PGM_CHECK_DEFAULT /* 1a */ +PGM_CHECK_DEFAULT /* 1b */ +PGM_CHECK(space_switch_exception) /* 1c */ +PGM_CHECK(hfp_sqrt_exception) /* 1d */ +PGM_CHECK_DEFAULT /* 1e */ +PGM_CHECK_DEFAULT /* 1f */ +PGM_CHECK_DEFAULT /* 20 */ +PGM_CHECK_DEFAULT /* 21 */ +PGM_CHECK_DEFAULT /* 22 */ +PGM_CHECK_DEFAULT /* 23 */ +PGM_CHECK_DEFAULT /* 24 */ +PGM_CHECK_DEFAULT /* 25 */ +PGM_CHECK_DEFAULT /* 26 */ +PGM_CHECK_DEFAULT /* 27 */ +PGM_CHECK_DEFAULT /* 28 */ +PGM_CHECK_DEFAULT /* 29 */ +PGM_CHECK_DEFAULT /* 2a */ +PGM_CHECK_DEFAULT /* 2b */ +PGM_CHECK_DEFAULT /* 2c */ +PGM_CHECK_DEFAULT /* 2d */ +PGM_CHECK_DEFAULT /* 2e */ +PGM_CHECK_DEFAULT /* 2f */ +PGM_CHECK_DEFAULT /* 30 */ +PGM_CHECK_DEFAULT /* 31 */ +PGM_CHECK_DEFAULT /* 32 */ +PGM_CHECK_DEFAULT /* 33 */ +PGM_CHECK_DEFAULT /* 34 */ +PGM_CHECK_DEFAULT /* 35 */ +PGM_CHECK_DEFAULT /* 36 */ +PGM_CHECK_DEFAULT /* 37 */ +PGM_CHECK_64BIT(do_dat_exception) /* 38 */ +PGM_CHECK_64BIT(do_dat_exception) /* 39 */ +PGM_CHECK_64BIT(do_dat_exception) /* 3a */ +PGM_CHECK_64BIT(do_dat_exception) /* 3b */ +PGM_CHECK_DEFAULT /* 3c */ +PGM_CHECK_DEFAULT /* 3d */ +PGM_CHECK_DEFAULT /* 3e */ +PGM_CHECK_DEFAULT /* 3f */ +PGM_CHECK_DEFAULT /* 40 */ +PGM_CHECK_DEFAULT /* 41 */ +PGM_CHECK_DEFAULT /* 42 */ +PGM_CHECK_DEFAULT /* 43 */ +PGM_CHECK_DEFAULT /* 44 */ +PGM_CHECK_DEFAULT /* 45 */ +PGM_CHECK_DEFAULT /* 46 */ +PGM_CHECK_DEFAULT /* 47 */ +PGM_CHECK_DEFAULT /* 48 */ +PGM_CHECK_DEFAULT /* 49 */ +PGM_CHECK_DEFAULT /* 4a */ +PGM_CHECK_DEFAULT /* 4b */ +PGM_CHECK_DEFAULT /* 4c */ +PGM_CHECK_DEFAULT /* 4d */ +PGM_CHECK_DEFAULT /* 4e */ +PGM_CHECK_DEFAULT /* 4f */ +PGM_CHECK_DEFAULT /* 50 */ +PGM_CHECK_DEFAULT /* 51 */ +PGM_CHECK_DEFAULT /* 52 */ +PGM_CHECK_DEFAULT /* 53 */ +PGM_CHECK_DEFAULT /* 54 */ +PGM_CHECK_DEFAULT /* 55 */ +PGM_CHECK_DEFAULT /* 56 */ +PGM_CHECK_DEFAULT /* 57 */ +PGM_CHECK_DEFAULT /* 58 */ +PGM_CHECK_DEFAULT /* 59 */ +PGM_CHECK_DEFAULT /* 5a */ +PGM_CHECK_DEFAULT /* 5b */ +PGM_CHECK_DEFAULT /* 5c */ +PGM_CHECK_DEFAULT /* 5d */ +PGM_CHECK_DEFAULT /* 5e */ +PGM_CHECK_DEFAULT /* 5f */ +PGM_CHECK_DEFAULT /* 60 */ +PGM_CHECK_DEFAULT /* 61 */ +PGM_CHECK_DEFAULT /* 62 */ +PGM_CHECK_DEFAULT /* 63 */ +PGM_CHECK_DEFAULT /* 64 */ +PGM_CHECK_DEFAULT /* 65 */ +PGM_CHECK_DEFAULT /* 66 */ +PGM_CHECK_DEFAULT /* 67 */ +PGM_CHECK_DEFAULT /* 68 */ +PGM_CHECK_DEFAULT /* 69 */ +PGM_CHECK_DEFAULT /* 6a */ +PGM_CHECK_DEFAULT /* 6b */ +PGM_CHECK_DEFAULT /* 6c */ +PGM_CHECK_DEFAULT /* 6d */ +PGM_CHECK_DEFAULT /* 6e */ +PGM_CHECK_DEFAULT /* 6f */ +PGM_CHECK_DEFAULT /* 70 */ +PGM_CHECK_DEFAULT /* 71 */ +PGM_CHECK_DEFAULT /* 72 */ +PGM_CHECK_DEFAULT /* 73 */ +PGM_CHECK_DEFAULT /* 74 */ +PGM_CHECK_DEFAULT /* 75 */ +PGM_CHECK_DEFAULT /* 76 */ +PGM_CHECK_DEFAULT /* 77 */ +PGM_CHECK_DEFAULT /* 78 */ +PGM_CHECK_DEFAULT /* 79 */ +PGM_CHECK_DEFAULT /* 7a */ +PGM_CHECK_DEFAULT /* 7b */ +PGM_CHECK_DEFAULT /* 7c */ +PGM_CHECK_DEFAULT /* 7d */ +PGM_CHECK_DEFAULT /* 7e */ +PGM_CHECK_DEFAULT /* 7f */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index a895e69379f..93b9ca42e5c 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -1,7 +1,7 @@ /* * This file handles the architecture dependent parts of process handling. * - * Copyright IBM Corp. 1999,2009 + * Copyright IBM Corp. 1999, 2009 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, * Hartmut Penner <hp@de.ibm.com>, * Denis Joseph Barrow, @@ -9,42 +9,29 @@ #include <linux/compiler.h> #include <linux/cpu.h> -#include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/fs.h> +#include <linux/elfcore.h> #include <linux/smp.h> -#include <linux/stddef.h> #include <linux/slab.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/vmalloc.h> -#include <linux/user.h> #include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/reboot.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/notifier.h> #include <linux/tick.h> -#include <linux/elfcore.h> -#include <linux/kernel_stat.h> #include <linux/personality.h> #include <linux/syscalls.h> #include <linux/compat.h> #include <linux/kprobes.h> #include <linux/random.h> -#include <asm/compat.h> -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/system.h> +#include <linux/module.h> #include <asm/io.h> #include <asm/processor.h> +#include <asm/vtimer.h> +#include <asm/exec.h> #include <asm/irq.h> -#include <asm/timer.h> #include <asm/nmi.h> #include <asm/smp.h> +#include <asm/switch_to.h> +#include <asm/runtime_instr.h> #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -74,82 +61,38 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -/* - * The idle loop on a S390... - */ -static void default_idle(void) +void arch_cpu_idle(void) { - if (cpu_is_offline(smp_processor_id())) - cpu_die(); - local_irq_disable(); - if (need_resched()) { - local_irq_enable(); - return; - } local_mcck_disable(); - if (test_thread_flag(TIF_MCCK_PENDING)) { + if (test_cpu_flag(CIF_MCCK_PENDING)) { local_mcck_enable(); local_irq_enable(); - s390_handle_mcck(); return; } - trace_hardirqs_on(); - /* Don't trace preempt off for idle. */ - stop_critical_timings(); - /* Stop virtual timer and halt the cpu. */ + /* Halt the cpu and keep track of cpu time accounting. */ vtime_stop_cpu(); - /* Reenable preemption tracer. */ - start_critical_timings(); + local_irq_enable(); } -void cpu_idle(void) +void arch_cpu_idle_exit(void) { - for (;;) { - tick_nohz_stop_sched_tick(1); - while (!need_resched()) - default_idle(); - tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } + if (test_cpu_flag(CIF_MCCK_PENDING)) + s390_handle_mcck(); } -extern void __kprobes kernel_thread_starter(void); - -asm( - ".section .kprobes.text, \"ax\"\n" - ".global kernel_thread_starter\n" - "kernel_thread_starter:\n" - " la 2,0(10)\n" - " basr 14,9\n" - " la 2,0\n" - " br 11\n" - ".previous\n"); - -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +void arch_cpu_idle_dead(void) { - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; - regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; - regs.gprs[9] = (unsigned long) fn; - regs.gprs[10] = (unsigned long) arg; - regs.gprs[11] = (unsigned long) do_exit; - regs.orig_gpr2 = -1; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, - 0, ®s, 0, NULL, NULL); + cpu_die(); } -EXPORT_SYMBOL(kernel_thread); + +extern void __kprobes kernel_thread_starter(void); /* * Free current thread data structures etc.. */ void exit_thread(void) { + exit_thread_runtime_instr(); } void flush_thread(void) @@ -161,8 +104,7 @@ void release_thread(struct task_struct *dead_task) } int copy_thread(unsigned long clone_flags, unsigned long new_stackp, - unsigned long unused, - struct task_struct *p, struct pt_regs *regs) + unsigned long arg, struct task_struct *p) { struct thread_info *ti; struct fake_frame @@ -173,93 +115,82 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, frame = container_of(task_pt_regs(p), struct fake_frame, childregs); p->thread.ksp = (unsigned long) frame; - /* Store access registers to kernel stack of new process. */ - frame->childregs = *regs; - frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ - frame->childregs.gprs[15] = new_stackp; - frame->sf.back_chain = 0; + /* Save access registers to new thread structure. */ + save_access_regs(&p->thread.acrs[0]); + /* start new process with ar4 pointing to the correct address space */ + p->thread.mm_segment = get_fs(); + /* Don't copy debug registers */ + memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); + memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); + clear_tsk_thread_flag(p, TIF_SINGLE_STEP); + /* Initialize per thread user and system timer values */ + ti = task_thread_info(p); + ti->user_timer = 0; + ti->system_timer = 0; + frame->sf.back_chain = 0; /* new return point is ret_from_fork */ frame->sf.gprs[8] = (unsigned long) ret_from_fork; - /* fake return stack for resume(), don't go back to schedule */ frame->sf.gprs[9] = (unsigned long) frame; - /* Save access registers to new thread structure. */ - save_access_regs(&p->thread.acrs[0]); + /* Store access registers to kernel stack of new process. */ + if (unlikely(p->flags & PF_KTHREAD)) { + /* kernel thread */ + memset(&frame->childregs, 0, sizeof(struct pt_regs)); + frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + frame->childregs.psw.addr = PSW_ADDR_AMODE | + (unsigned long) kernel_thread_starter; + frame->childregs.gprs[9] = new_stackp; /* function */ + frame->childregs.gprs[10] = arg; + frame->childregs.gprs[11] = (unsigned long) do_exit; + frame->childregs.orig_gpr2 = -1; + + return 0; + } + frame->childregs = *current_pt_regs(); + frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ + frame->childregs.flags = 0; + if (new_stackp) + frame->childregs.gprs[15] = new_stackp; + + /* Don't copy runtime instrumentation info */ + p->thread.ri_cb = NULL; + p->thread.ri_signum = 0; + frame->childregs.psw.mask &= ~PSW_MASK_RI; #ifndef CONFIG_64BIT /* * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the child. */ - save_fp_regs(¤t->thread.fp_regs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); memcpy(&p->thread.fp_regs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) - p->thread.acrs[0] = regs->gprs[6]; + p->thread.acrs[0] = frame->childregs.gprs[6]; #else /* CONFIG_64BIT */ /* Save the fpu registers to new thread structure. */ - save_fp_regs(&p->thread.fp_regs); + save_fp_ctl(&p->thread.fp_regs.fpc); + save_fp_regs(p->thread.fp_regs.fprs); + p->thread.fp_regs.pad = 0; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { + unsigned long tls = frame->childregs.gprs[6]; if (is_compat_task()) { - p->thread.acrs[0] = (unsigned int) regs->gprs[6]; + p->thread.acrs[0] = (unsigned int)tls; } else { - p->thread.acrs[0] = (unsigned int)(regs->gprs[6] >> 32); - p->thread.acrs[1] = (unsigned int) regs->gprs[6]; + p->thread.acrs[0] = (unsigned int)(tls >> 32); + p->thread.acrs[1] = (unsigned int)tls; } } #endif /* CONFIG_64BIT */ - /* start new process with ar4 pointing to the correct address space */ - p->thread.mm_segment = get_fs(); - /* Don't copy debug registers */ - memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); - memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); - clear_tsk_thread_flag(p, TIF_SINGLE_STEP); - clear_tsk_thread_flag(p, TIF_PER_TRAP); - /* Initialize per thread user and system timer values */ - ti = task_thread_info(p); - ti->user_timer = 0; - ti->system_timer = 0; return 0; } -SYSCALL_DEFINE0(fork) -{ - struct pt_regs *regs = task_pt_regs(current); - return do_fork(SIGCHLD, regs->gprs[15], regs, 0, NULL, NULL); -} - -SYSCALL_DEFINE4(clone, unsigned long, newsp, unsigned long, clone_flags, - int __user *, parent_tidptr, int __user *, child_tidptr) -{ - struct pt_regs *regs = task_pt_regs(current); - - if (!newsp) - newsp = regs->gprs[15]; - return do_fork(clone_flags, newsp, regs, 0, - parent_tidptr, child_tidptr); -} - -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -SYSCALL_DEFINE0(vfork) -{ - struct pt_regs *regs = task_pt_regs(current); - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, - regs->gprs[15], regs, 0, NULL, NULL); -} - asmlinkage void execve_tail(void) { current->thread.fp_regs.fpc = 0; @@ -268,31 +199,6 @@ asmlinkage void execve_tail(void) } /* - * sys_execve() executes a new program. - */ -SYSCALL_DEFINE3(execve, const char __user *, name, - const char __user *const __user *, argv, - const char __user *const __user *, envp) -{ - struct pt_regs *regs = task_pt_regs(current); - char *filename; - long rc; - - filename = getname(name); - rc = PTR_ERR(filename); - if (IS_ERR(filename)) - return rc; - rc = do_execve(filename, argv, envp, regs); - if (rc) - goto out; - execve_tail(); - rc = regs->gprs[2]; -out: - putname(filename); - return rc; -} - -/* * fill in the FPU structure for a core dump. */ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) @@ -302,10 +208,12 @@ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the dump. */ - save_fp_regs(¤t->thread.fp_regs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); memcpy(fpregs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); #else /* CONFIG_64BIT */ - save_fp_regs(fpregs); + save_fp_ctl(&fpregs->fpc); + save_fp_regs(fpregs->fprs); #endif /* CONFIG_64BIT */ return 1; } @@ -353,20 +261,18 @@ static inline unsigned long brk_rnd(void) unsigned long arch_randomize_brk(struct mm_struct *mm) { - unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); + unsigned long ret; - if (ret < mm->brk) - return mm->brk; - return ret; + ret = PAGE_ALIGN(mm->brk + brk_rnd()); + return (ret > mm->brk) ? ret : mm->brk; } unsigned long randomize_et_dyn(unsigned long base) { - unsigned long ret = PAGE_ALIGN(base + brk_rnd()); + unsigned long ret; if (!(current->flags & PF_RANDOMIZE)) return base; - if (ret < base) - return base; - return ret; + ret = PAGE_ALIGN(base + brk_rnd()); + return (ret > base) ? ret : base; } diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 311e9d71288..24612029f45 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -1,6 +1,4 @@ /* - * arch/s390/kernel/processor.c - * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ @@ -23,15 +21,17 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); /* * cpu_init - initializes state that is per-CPU. */ -void __cpuinit cpu_init(void) +void cpu_init(void) { - struct cpuid *id = &per_cpu(cpu_id, smp_processor_id()); + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct cpuid *id = &__get_cpu_var(cpu_id); get_cpu_id(id); atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); + memset(idle, 0, sizeof(*idle)); } /* @@ -39,9 +39,9 @@ void __cpuinit cpu_init(void) */ static int show_cpuinfo(struct seq_file *m, void *v) { - static const char *hwcap_str[10] = { + static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs" + "edat", "etf3eh", "highgprs", "te" }; unsigned long n = (unsigned long) v - 1; int i; @@ -54,10 +54,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) num_online_cpus(), loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ))%100); seq_puts(m, "features\t: "); - for (i = 0; i < 10; i++) + for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); seq_puts(m, "\n"); + show_cacheinfo(m); } get_online_cpus(); if (cpu_online(n)) { @@ -74,7 +75,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ef86ad24398..5dc7ad9e2fb 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1,7 +1,7 @@ /* * Ptrace user space interface. * - * Copyright IBM Corp. 1999,2010 + * Copyright IBM Corp. 1999, 2010 * Author(s): Denis Joseph Barrow * Martin Schwidefsky (schwidefsky@de.ibm.com) */ @@ -20,15 +20,15 @@ #include <linux/regset.h> #include <linux/tracehook.h> #include <linux/seccomp.h> +#include <linux/compat.h> #include <trace/syscall.h> -#include <asm/compat.h> #include <asm/segment.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/unistd.h> +#include <asm/switch_to.h> #include "entry.h" #ifdef CONFIG_COMPAT @@ -42,48 +42,88 @@ enum s390_regset { REGSET_GENERAL, REGSET_FP, REGSET_LAST_BREAK, + REGSET_TDB, + REGSET_SYSTEM_CALL, REGSET_GENERAL_EXTENDED, }; -void update_per_regs(struct task_struct *task) +void update_cr_regs(struct task_struct *task) { - static const struct per_regs per_single_step = { - .control = PER_EVENT_IFETCH, - .start = 0, - .end = PSW_ADDR_INSN, - }; struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; - const struct per_regs *new; - struct per_regs old; + struct per_regs old, new; - /* TIF_SINGLE_STEP overrides the user specified PER registers. */ - new = test_tsk_thread_flag(task, TIF_SINGLE_STEP) ? - &per_single_step : &thread->per_user; +#ifdef CONFIG_64BIT + /* Take care of the enable/disable of transactional execution. */ + if (MACHINE_HAS_TE) { + unsigned long cr, cr_new; + + __ctl_store(cr, 0, 0); + /* Set or clear transaction execution TXC bit 8. */ + cr_new = cr | (1UL << 55); + if (task->thread.per_flags & PER_FLAG_NO_TE) + cr_new &= ~(1UL << 55); + if (cr_new != cr) + __ctl_load(cr_new, 0, 0); + /* Set or clear transaction execution TDC bits 62 and 63. */ + __ctl_store(cr, 2, 2); + cr_new = cr & ~3UL; + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) + cr_new |= 1UL; + else + cr_new |= 2UL; + } + if (cr_new != cr) + __ctl_load(cr_new, 2, 2); + } +#endif + /* Copy user specified PER registers */ + new.control = thread->per_user.control; + new.start = thread->per_user.start; + new.end = thread->per_user.end; + + /* merge TIF_SINGLE_STEP into user specified PER registers. */ + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + if (test_tsk_thread_flag(task, TIF_BLOCK_STEP)) + new.control |= PER_EVENT_BRANCH; + else + new.control |= PER_EVENT_IFETCH; +#ifdef CONFIG_64BIT + new.control |= PER_CONTROL_SUSPENSION; + new.control |= PER_EVENT_TRANSACTION_END; +#endif + new.start = 0; + new.end = PSW_ADDR_INSN; + } /* Take care of the PER enablement bit in the PSW. */ - if (!(new->control & PER_EVENT_MASK)) { + if (!(new.control & PER_EVENT_MASK)) { regs->psw.mask &= ~PSW_MASK_PER; return; } regs->psw.mask |= PSW_MASK_PER; __ctl_store(old, 9, 11); - if (memcmp(new, &old, sizeof(struct per_regs)) != 0) - __ctl_load(*new, 9, 11); + if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) + __ctl_load(new, 9, 11); } void user_enable_single_step(struct task_struct *task) { + clear_tsk_thread_flag(task, TIF_BLOCK_STEP); set_tsk_thread_flag(task, TIF_SINGLE_STEP); - if (task == current) - update_per_regs(task); } void user_disable_single_step(struct task_struct *task) { + clear_tsk_thread_flag(task, TIF_BLOCK_STEP); clear_tsk_thread_flag(task, TIF_SINGLE_STEP); - if (task == current) - update_per_regs(task); +} + +void user_enable_block_step(struct task_struct *task) +{ + set_tsk_thread_flag(task, TIF_SINGLE_STEP); + set_tsk_thread_flag(task, TIF_BLOCK_STEP); } /* @@ -96,7 +136,8 @@ void ptrace_disable(struct task_struct *task) memset(&task->thread.per_user, 0, sizeof(task->thread.per_user)); memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); clear_tsk_thread_flag(task, TIF_SINGLE_STEP); - clear_tsk_thread_flag(task, TIF_PER_TRAP); + clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP); + task->thread.per_flags = 0; } #ifndef CONFIG_64BIT @@ -165,9 +206,11 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) * psw and gprs are stored on the stack */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); - if (addr == (addr_t) &dummy->regs.psw.mask) - /* Remove per bit from user psw. */ - tmp &= ~PSW_MASK_PER; + if (addr == (addr_t) &dummy->regs.psw.mask) { + /* Return a clean psw mask. */ + tmp &= PSW_MASK_USER | PSW_MASK_RI; + tmp |= PSW_USER_BITS; + } } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { /* @@ -206,8 +249,7 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) offset = addr - (addr_t) &dummy->regs.fp_regs; tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset); if (addr == (addr_t) &dummy->regs.fp_regs.fpc) - tmp &= (unsigned long) FPC_VALID_MASK - << (BITS_PER_LONG - 32); + tmp <<= BITS_PER_LONG - 32; } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* @@ -288,19 +330,20 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* * psw and gprs are stored on the stack */ - if (addr == (addr_t) &dummy->regs.psw.mask && -#ifdef CONFIG_COMPAT - data != PSW_MASK_MERGE(psw_user32_bits, data) && -#endif - data != PSW_MASK_MERGE(psw_user_bits, data)) - /* Invalid psw mask. */ - return -EINVAL; -#ifndef CONFIG_64BIT - if (addr == (addr_t) &dummy->regs.psw.addr) - /* I'd like to reject addresses without the - high order bit but older gdb's rely on it */ - data |= PSW_ADDR_AMODE; -#endif + if (addr == (addr_t) &dummy->regs.psw.mask) { + unsigned long mask = PSW_MASK_USER; + + mask |= is_ri_task(child) ? PSW_MASK_RI : 0; + if ((data ^ PSW_USER_BITS) & ~mask) + /* Invalid psw mask. */ + return -EINVAL; + if ((data & PSW_MASK_ASC) == PSW_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; + if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)) + /* Invalid addressing mode bits */ + return -EINVAL; + } *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -338,10 +381,10 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* * floating point regs. are stored in the thread structure */ - if (addr == (addr_t) &dummy->regs.fp_regs.fpc && - (data & ~((unsigned long) FPC_VALID_MASK - << (BITS_PER_LONG - 32))) != 0) - return -EINVAL; + if (addr == (addr_t) &dummy->regs.fp_regs.fpc) + if ((unsigned int) data != 0 || + test_fp_ctl(data >> (BITS_PER_LONG - 32))) + return -EINVAL; offset = addr - (addr_t) &dummy->regs.fp_regs; *(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data; @@ -421,6 +464,36 @@ long arch_ptrace(struct task_struct *child, long request, put_user(task_thread_info(child)->last_break, (unsigned long __user *) data); return 0; + case PTRACE_ENABLE_TE: + if (!MACHINE_HAS_TE) + return -EIO; + child->thread.per_flags &= ~PER_FLAG_NO_TE; + return 0; + case PTRACE_DISABLE_TE: + if (!MACHINE_HAS_TE) + return -EIO; + child->thread.per_flags |= PER_FLAG_NO_TE; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + return 0; + case PTRACE_TE_ABORT_RAND: + if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + return -EIO; + switch (data) { + case 0UL: + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + break; + case 1UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND; + break; + case 2UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND; + break; + default: + return -EINVAL; + } + return 0; default: /* Removing high order bit from addr (only for 31 bit). */ addr &= PSW_ADDR_INSN; @@ -495,21 +568,22 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) __u32 tmp; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Fake a 31 bit psw mask. */ - tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32); - tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp); + tmp = (__u32)(regs->psw.mask >> 32); + tmp &= PSW32_MASK_USER | PSW32_MASK_RI; + tmp |= PSW32_USER_BITS; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ - tmp = (__u32) task_pt_regs(child)->psw.addr | - PSW32_ADDR_AMODE31; + tmp = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); } else { /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) &task_pt_regs(child)->psw + - addr*2 + 4); + tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -594,24 +668,33 @@ static int __poke_user_compat(struct task_struct *child, addr_t offset; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { + __u32 mask = PSW32_MASK_USER; + + mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; /* Build a 64 bit psw mask from 31 bit mask. */ - if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp)) + if ((tmp ^ PSW32_USER_BITS) & ~mask) /* Invalid psw mask. */ return -EINVAL; - task_pt_regs(child)->psw.mask = - PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32); + if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (regs->psw.mask & PSW_MASK_BA) | + (__u64)(tmp & mask) << 32; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ - task_pt_regs(child)->psw.addr = - (__u64) tmp & PSW32_ADDR_INSN; + regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* Transfer 31 bit amode bit to psw mask. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | + (__u64)(tmp & PSW32_ADDR_AMODE); } else { /* gpr 0-15 */ - *(__u32*)((addr_t) &task_pt_regs(child)->psw - + addr*2 + 4) = tmp; + *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -638,8 +721,7 @@ static int __poke_user_compat(struct task_struct *child, * floating point regs. are stored in the thread structure */ if (addr == (addr_t) &dummy32->regs.fp_regs.fpc && - (tmp & ~FPC_VALID_MASK) != 0) - /* Invalid floating point control. */ + test_fp_ctl(tmp)) return -EINVAL; offset = addr - (addr_t) &dummy32->regs.fp_regs; *(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp; @@ -721,7 +803,11 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) long ret = 0; /* Do the secure computing check first. */ - secure_computing(regs->gprs[2]); + if (secure_computing(regs->gprs[2])) { + /* seccomp failures shouldn't expose any additional code. */ + ret = -1; + goto out; + } /* * The sysc_tracesys code in entry.S stored the system @@ -735,27 +821,25 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - regs->svcnr = 0; + clear_pt_regs_flag(regs, PIF_SYSCALL); ret = -1; } if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->gprs[2]); - if (unlikely(current->audit_context)) - audit_syscall_entry(is_compat_task() ? - AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, - regs->gprs[2], regs->orig_gpr2, - regs->gprs[3], regs->gprs[4], - regs->gprs[5]); + audit_syscall_entry(is_compat_task() ? + AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, + regs->gprs[2], regs->orig_gpr2, + regs->gprs[3], regs->gprs[4], + regs->gprs[5]); +out: return ret ?: regs->gprs[2]; } asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) { - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), - regs->gprs[2]); + audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_exit(regs, regs->gprs[2]); @@ -835,8 +919,10 @@ static int s390_fpregs_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - if (target == current) - save_fp_regs(&target->thread.fp_regs); + if (target == current) { + save_fp_ctl(&target->thread.fp_regs.fpc); + save_fp_regs(target->thread.fp_regs.fprs); + } return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fp_regs, 0, -1); @@ -849,19 +935,21 @@ static int s390_fpregs_set(struct task_struct *target, { int rc = 0; - if (target == current) - save_fp_regs(&target->thread.fp_regs); + if (target == current) { + save_fp_ctl(&target->thread.fp_regs.fpc); + save_fp_regs(target->thread.fp_regs.fprs); + } /* If setting FPC, must validate it first. */ if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { - u32 fpc[2] = { target->thread.fp_regs.fpc, 0 }; - rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpc, + u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 }; + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc, 0, offsetof(s390_fp_regs, fprs)); if (rc) return rc; - if ((fpc[0] & ~FPC_VALID_MASK) != 0 || fpc[1] != 0) + if (ufpc[1] != 0 || test_fp_ctl(ufpc[0])) return -EINVAL; - target->thread.fp_regs.fpc = fpc[0]; + target->thread.fp_regs.fpc = ufpc[0]; } if (rc == 0 && count > 0) @@ -869,8 +957,10 @@ static int s390_fpregs_set(struct task_struct *target, target->thread.fp_regs.fprs, offsetof(s390_fp_regs, fprs), -1); - if (rc == 0 && target == current) - restore_fp_regs(&target->thread.fp_regs); + if (rc == 0 && target == current) { + restore_fp_ctl(&target->thread.fp_regs.fpc); + restore_fp_regs(target->thread.fp_regs.fprs); + } return rc; } @@ -895,8 +985,58 @@ static int s390_last_break_get(struct task_struct *target, return 0; } +static int s390_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +static int s390_tdb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + unsigned char *data; + + if (!(regs->int_code & 0x200)) + return -ENODATA; + data = target->thread.trap_tdb; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256); +} + +static int s390_tdb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + #endif +static int s390_system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static int s390_system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + static const struct user_regset s390_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -921,8 +1061,25 @@ static const struct user_regset s390_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_last_break_get, + .set = s390_last_break_set, + }, + [REGSET_TDB] = { + .core_note_type = NT_S390_TDB, + .n = 1, + .size = 256, + .align = 1, + .get = s390_tdb_get, + .set = s390_tdb_set, }, #endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1078,6 +1235,14 @@ static int s390_compat_last_break_get(struct task_struct *target, return 0; } +static int s390_compat_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + static const struct user_regset s390_compat_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -1101,6 +1266,23 @@ static const struct user_regset s390_compat_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_compat_last_break_get, + .set = s390_compat_last_break_set, + }, + [REGSET_TDB] = { + .core_note_type = NT_S390_TDB, + .n = 1, + .size = 256, + .align = 1, + .get = s390_tdb_get, + .set = s390_tdb_set, + }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, }, [REGSET_GENERAL_EXTENDED] = { .core_note_type = NT_S390_HIGH_GPRS, @@ -1147,7 +1329,7 @@ int regs_query_register_offset(const char *name) if (!name || *name != 'r') return -EINVAL; - if (strict_strtoul(name + 1, 10, &offset)) + if (kstrtoul(name + 1, 10, &offset)) return -EINVAL; if (offset >= NUM_GPRS) return -EINVAL; diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index cb899d9f850..dd8016b0477 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -1,19 +1,25 @@ /* - * arch/s390/kernel/reipl.S - * * S390 version - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2000 * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com) */ +#include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/sigp.h> + +# +# store_status: Empty implementation until kdump is supported on 31 bit +# +ENTRY(store_status) + br %r14 # # do_reipl_asm # Parameter: r2 = schid of reipl device # - .globl do_reipl_asm -do_reipl_asm: basr %r13,0 +ENTRY(do_reipl_asm) + basr %r13,0 .Lpg0: lpsw .Lnewpsw-.Lpg0(%r13) .Lpg1: # do store status of all registers @@ -53,7 +59,7 @@ do_reipl_asm: basr %r13,0 bas %r14,.Ldisab-.Lpg0(%r13) .L003: st %r1,__LC_SUBCHANNEL_ID lpsw 0 - sigp 0,0,0(6) + sigp 0,0,SIGP_RESTART .Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13) lpsw .Ldispsw-.Lpg0(%r13) .align 8 diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index 5e73dee63ba..dc3b1273c4d 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -1,35 +1,83 @@ /* - * Copyright IBM Corp 2000,2009 + * Copyright IBM Corp 2000, 2011 * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, * Denis Joseph Barrow, */ +#include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/sigp.h> + +# +# store_status +# +# Prerequisites to run this function: +# - Prefix register is set to zero +# - Original prefix register is stored in "dump_prefix_page" +# - Lowcore protection is off +# +ENTRY(store_status) + /* Save register one and load save area base */ + stg %r1,__LC_SAVE_AREA_RESTART + lghi %r1,SAVE_AREA_BASE + /* General purpose registers */ + stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lg %r2,__LC_SAVE_AREA_RESTART + stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) + /* Control registers */ + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Access registers */ + stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point registers */ + std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point control register */ + stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* CPU timer */ + stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Saved prefix register */ + larl %r2,dump_prefix_page + mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2) + /* Clock comparator - seven bytes */ + larl %r2,.Lclkcmp + stckc 0(%r2) + mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2) + /* Program status word */ + epsw %r2,%r3 + st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1) + st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1) + larl %r2,store_status + stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) + br %r14 + + .section .bss + .align 8 +.Lclkcmp: .quad 0x0000000000000000 + .previous # # do_reipl_asm # Parameter: r2 = schid of reipl device # - .globl do_reipl_asm -do_reipl_asm: basr %r13,0 +ENTRY(do_reipl_asm) + basr %r13,0 .Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) -.Lpg1: # do store status of all registers - - stg %r1,.Lregsave-.Lpg0(%r13) - lghi %r1,0x1000 - stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1) - lg %r0,.Lregsave-.Lpg0(%r13) - stg %r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1) - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1) - stam %a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1) - lg %r10,.Ldump_pfx-.Lpg0(%r13) - mvc __LC_PREFIX_SAVE_AREA-0x1000(4,%r1),0(%r10) - stfpc __LC_FP_CREG_SAVE_AREA-0x1000(%r1) - stckc .Lclkcmp-.Lpg0(%r13) - mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(7,%r1),.Lclkcmp-.Lpg0(%r13) - stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1) - stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1) +.Lpg1: brasl %r14,store_status lctlg %c6,%c6,.Lall-.Lpg0(%r13) lgr %r1,%r2 @@ -59,17 +107,14 @@ do_reipl_asm: basr %r13,0 .L003: st %r1,__LC_SUBCHANNEL_ID lhi %r1,0 # mode 0 = esa slr %r0,%r0 # set cpuid to zero - sigp %r1,%r0,0x12 # switch to esa mode + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode lpsw 0 .Ldisab: sll %r14,1 srl %r14,1 # need to kill hi bit to avoid specification exceptions. st %r14,.Ldispsw+12-.Lpg0(%r13) lpswe .Ldispsw-.Lpg0(%r13) .align 8 -.Lclkcmp: .quad 0x0000000000000000 .Lall: .quad 0x00000000ff000000 -.Ldump_pfx: .quad dump_prefix_page -.Lregsave: .quad 0x0000000000000000 .align 16 /* * These addresses have to be 31 bit otherwise @@ -78,7 +123,7 @@ do_reipl_asm: basr %r13,0 * in the ESA psw. * Bit 31 of the addresses has to be 0 for the * 31bit lpswe instruction a fact they appear to have - * ommited from the pop. + * omitted from the pop. */ .Lnewpsw: .quad 0x0000000080000000 .quad .Lpg1 diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index 3b456b80bce..f4e6f20e117 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -1,13 +1,14 @@ /* - * arch/s390/kernel/relocate_kernel.S - * - * (C) Copyright IBM Corp. 2005 + * Copyright IBM Corp. 2005 * * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> * */ +#include <linux/linkage.h> +#include <asm/sigp.h> + /* * moves the new kernel to its destination... * %r2 = pointer to first kimage_entry_t @@ -22,8 +23,7 @@ */ .text - .globl relocate_kernel - relocate_kernel: +ENTRY(relocate_kernel) basr %r13,0 # base address .base: stnsm sys_msk-.base(%r13),0xfb # disable DAT @@ -92,7 +92,7 @@ .no_diag308: sr %r1,%r1 # clear %r1 sr %r2,%r2 # clear %r2 - sigp %r1,%r2,0x12 # set cpuid to zero + sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero lpsw 0 # hopefully start new kernel... .align 8 @@ -112,6 +112,7 @@ .byte 0 .align 8 relocate_kernel_end: + .align 8 .globl relocate_kernel_len relocate_kernel_len: .quad relocate_kernel_end - relocate_kernel diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S index 1f9ea2067b5..cfac28330b0 100644 --- a/arch/s390/kernel/relocate_kernel64.S +++ b/arch/s390/kernel/relocate_kernel64.S @@ -1,13 +1,14 @@ /* - * arch/s390/kernel/relocate_kernel64.S - * - * (C) Copyright IBM Corp. 2005 + * Copyright IBM Corp. 2005 * * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> * */ +#include <linux/linkage.h> +#include <asm/sigp.h> + /* * moves the new kernel to its destination... * %r2 = pointer to first kimage_entry_t @@ -23,8 +24,7 @@ */ .text - .globl relocate_kernel - relocate_kernel: +ENTRY(relocate_kernel) basr %r13,0 # base address .base: stnsm sys_msk-.base(%r13),0xfb # disable DAT @@ -44,7 +44,7 @@ diag %r0,%r0,0x308 .back: lhi %r1,1 # mode 1 = esame - sigp %r1,%r0,0x12 # switch to esame mode + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode sam64 # switch to 64 bit addressing mode basr %r13,0 .back_base: @@ -95,7 +95,7 @@ sam31 # 31 bit mode sr %r1,%r1 # erase register r1 sr %r2,%r2 # erase register r2 - sigp %r1,%r2,0x12 # set cpuid to zero + sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero lpsw 0 # hopefully start new kernel... .align 8 @@ -115,6 +115,7 @@ .byte 0 .align 8 relocate_kernel_end: + .align 8 .globl relocate_kernel_len relocate_kernel_len: .quad relocate_kernel_end - relocate_kernel diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c new file mode 100644 index 00000000000..26b4ae96fdd --- /dev/null +++ b/arch/s390/kernel/runtime_instr.c @@ -0,0 +1,149 @@ +/* + * Copyright IBM Corp. 2012 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/kernel_stat.h> +#include <asm/runtime_instr.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> + +/* empty control block to disable RI by loading it */ +struct runtime_instr_cb runtime_instr_empty_cb; + +static int runtime_instr_avail(void) +{ + return test_facility(64); +} + +static void disable_runtime_instr(void) +{ + struct pt_regs *regs = task_pt_regs(current); + + load_runtime_instr_cb(&runtime_instr_empty_cb); + + /* + * Make sure the RI bit is deleted from the PSW. If the user did not + * switch off RI before the system call the process will get a + * specification exception otherwise. + */ + regs->psw.mask &= ~PSW_MASK_RI; +} + +static void init_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + cb->buf_limit = 0xfff; + cb->int_requested = 1; + cb->pstate = 1; + cb->pstate_set_buf = 1; + cb->pstate_sample = 1; + cb->pstate_collect = 1; + cb->key = PAGE_DEFAULT_KEY; + cb->valid = 1; +} + +void exit_thread_runtime_instr(void) +{ + struct task_struct *task = current; + + if (!task->thread.ri_cb) + return; + disable_runtime_instr(); + kfree(task->thread.ri_cb); + task->thread.ri_signum = 0; + task->thread.ri_cb = NULL; +} + +static void runtime_instr_int_handler(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct siginfo info; + + if (!(param32 & CPU_MF_INT_RI_MASK)) + return; + + inc_irq_stat(IRQEXT_CMR); + + if (!current->thread.ri_cb) + return; + if (current->thread.ri_signum < SIGRTMIN || + current->thread.ri_signum > SIGRTMAX) { + WARN_ON_ONCE(1); + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = current->thread.ri_signum; + info.si_code = SI_QUEUE; + if (param32 & CPU_MF_INT_RI_BUF_FULL) + info.si_int = ENOBUFS; + else if (param32 & CPU_MF_INT_RI_HALTED) + info.si_int = ECANCELED; + else + return; /* unknown reason */ + + send_sig_info(current->thread.ri_signum, &info, current); +} + +SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) +{ + struct runtime_instr_cb *cb; + + if (!runtime_instr_avail()) + return -EOPNOTSUPP; + + if (command == S390_RUNTIME_INSTR_STOP) { + preempt_disable(); + exit_thread_runtime_instr(); + preempt_enable(); + return 0; + } + + if (command != S390_RUNTIME_INSTR_START || + (signum < SIGRTMIN || signum > SIGRTMAX)) + return -EINVAL; + + if (!current->thread.ri_cb) { + cb = kzalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) + return -ENOMEM; + } else { + cb = current->thread.ri_cb; + memset(cb, 0, sizeof(*cb)); + } + + init_runtime_instr_cb(cb); + current->thread.ri_signum = signum; + + /* now load the control block to make it available */ + preempt_disable(); + current->thread.ri_cb = cb; + load_runtime_instr_cb(cb); + preempt_enable(); + return 0; +} + +static int __init runtime_instr_init(void) +{ + int rc; + + if (!runtime_instr_avail()) + return 0; + + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, + runtime_instr_int_handler); + if (rc) + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + else + pr_info("Runtime instrumentation facility initialized\n"); + return rc; +} +device_initcall(runtime_instr_init); diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c deleted file mode 100644 index 185029919c4..00000000000 --- a/arch/s390/kernel/s390_ext.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright IBM Corp. 1999,2010 - * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, - * Martin Schwidefsky <schwidefsky@de.ibm.com>, - */ - -#include <linux/kernel_stat.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/ftrace.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <asm/s390_ext.h> -#include <asm/irq_regs.h> -#include <asm/cputime.h> -#include <asm/lowcore.h> -#include <asm/irq.h> -#include "entry.h" - -struct ext_int_info { - struct ext_int_info *next; - ext_int_handler_t handler; - __u16 code; -}; - -/* - * ext_int_hash[index] is the start of the list for all external interrupts - * that hash to this index. With the current set of external interrupts - * (0x1202 external call, 0x1004 cpu timer, 0x2401 hwc console, 0x4000 - * iucv and 0x2603 pfault) this is always the first element. - */ -static struct ext_int_info *ext_int_hash[256]; - -static inline int ext_hash(__u16 code) -{ - return (code + (code >> 9)) & 0xff; -} - -int register_external_interrupt(__u16 code, ext_int_handler_t handler) -{ - struct ext_int_info *p; - int index; - - p = kmalloc(sizeof(*p), GFP_ATOMIC); - if (!p) - return -ENOMEM; - p->code = code; - p->handler = handler; - index = ext_hash(code); - p->next = ext_int_hash[index]; - ext_int_hash[index] = p; - return 0; -} -EXPORT_SYMBOL(register_external_interrupt); - -int unregister_external_interrupt(__u16 code, ext_int_handler_t handler) -{ - struct ext_int_info *p, *q; - int index; - - index = ext_hash(code); - q = NULL; - p = ext_int_hash[index]; - while (p) { - if (p->code == code && p->handler == handler) - break; - q = p; - p = p->next; - } - if (!p) - return -ENOENT; - if (q) - q->next = p->next; - else - ext_int_hash[index] = p->next; - kfree(p); - return 0; -} -EXPORT_SYMBOL(unregister_external_interrupt); - -void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, - unsigned int param32, unsigned long param64) -{ - struct pt_regs *old_regs; - unsigned short code; - struct ext_int_info *p; - int index; - - code = (unsigned short) ext_int_code; - old_regs = set_irq_regs(regs); - s390_idle_check(regs, S390_lowcore.int_clock, - S390_lowcore.async_enter_timer); - irq_enter(); - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) - /* Serve timer interrupts first. */ - clock_comparator_work(); - kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; - if (code != 0x1004) - __get_cpu_var(s390_idle).nohz_delay = 1; - index = ext_hash(code); - for (p = ext_int_hash[index]; p; p = p->next) { - if (likely(p->code == code)) - p->handler(ext_int_code, param32, param64); - } - irq_exit(); - set_irq_regs(old_regs); -} diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 656fcbb9bd8..9f60467938d 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -1,6 +1,13 @@ #include <linux/module.h> +#include <linux/kvm_host.h> #include <asm/ftrace.h> #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); #endif +#if IS_ENABLED(CONFIG_KVM) +EXPORT_SYMBOL(sie64a); +EXPORT_SYMBOL(sie_exit); +#endif +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S index 2e82fdd8932..a41f2c99dcc 100644 --- a/arch/s390/kernel/sclp.S +++ b/arch/s390/kernel/sclp.S @@ -1,13 +1,16 @@ /* * Mini SCLP driver. * - * Copyright IBM Corp. 2004,2009 + * Copyright IBM Corp. 2004, 2009 * * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>, * Heiko Carstens <heiko.carstens@de.ibm.com>, * */ +#include <linux/linkage.h> +#include <asm/irq.h> + LC_EXT_NEW_PSW = 0x58 # addr of ext int handler LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter @@ -42,6 +45,12 @@ _sclp_wait_int: #endif mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8) mvc 0(16,%r8),0(%r9) +#ifdef CONFIG_64BIT + epsw %r6,%r7 # set current addressing mode + nill %r6,0x1 # in new psw (31 or 64 bit mode) + nilh %r7,0x8000 + stm %r6,%r7,0(%r8) +#endif lhi %r6,0x0200 # cr mask for ext int (cr0.54) ltr %r2,%r2 jz .LsetctS1 @@ -65,9 +74,9 @@ _sclp_wait_int: lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt .LwaitS1: lh %r7,LC_EXT_INT_CODE - chi %r7,0x1004 # timeout? + chi %r7,EXT_IRQ_CLK_COMP # timeout? je .LtimeoutS1 - chi %r7,0x2401 # service int? + chi %r7,EXT_IRQ_SERVICE_SIG # service int? jne .LloopS1 sr %r2,%r2 l %r3,LC_EXT_INT_PARAM @@ -85,7 +94,7 @@ _sclp_wait_int: .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int #ifdef CONFIG_64BIT .LextpswS1_64: - .quad 0x0000000180000000, .LwaitS1 # PSW to handle ext int, 64 bit + .quad 0, .LwaitS1 # PSW to handle ext int, 64 bit #endif .LwaitpswS1: .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int @@ -217,7 +226,7 @@ _sclp_print: ahi %r2,1 ltr %r0,%r0 # end of string? jz .LfinalizemtoS4 - chi %r0,0x15 # end of line (NL)? + chi %r0,0x0a # end of line (NL)? jz .LfinalizemtoS4 stc %r0,0(%r6,%r7) # copy to mto la %r11,0(%r6,%r7) @@ -260,8 +269,7 @@ _sclp_print: # R2 = 0 on success, 1 on failure # - .globl _sclp_print_early -_sclp_print_early: +ENTRY(_sclp_print_early) stm %r6,%r15,24(%r15) # save registers ahi %r15,-96 # create stack frame #ifdef CONFIG_64BIT diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 6f6350826c8..1e2264b46e4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/setup.c - * * S390 version - * Copyright (C) IBM Corp. 1999,2010 + * Copyright IBM Corp. 1999, 2012 * Author(s): Hartmut Penner (hp@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -18,9 +16,10 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/errno.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> @@ -42,10 +41,13 @@ #include <linux/reboot.h> #include <linux/topology.h> #include <linux/ftrace.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/memory.h> +#include <linux/compat.h> #include <asm/ipl.h> -#include <asm/uaccess.h> -#include <asm/system.h> +#include <asm/facility.h> #include <asm/smp.h> #include <asm/mmu_context.h> #include <asm/cpcmd.h> @@ -55,20 +57,11 @@ #include <asm/ptrace.h> #include <asm/sections.h> #include <asm/ebcdic.h> -#include <asm/compat.h> #include <asm/kvm_virtio.h> - -long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY); -long psw_user_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); - -/* - * User copy operations. - */ -struct uaccess_ops uaccess; -EXPORT_SYMBOL(uaccess); +#include <asm/diag.h> +#include <asm/os_info.h> +#include <asm/sclp.h> +#include "entry.h" /* * Machine setup.. @@ -85,10 +78,23 @@ EXPORT_SYMBOL(console_irq); unsigned long elf_hwcap = 0; char elf_platform[ELF_PLATFORM_SIZE]; -struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; - int __initdata memory_end_set; unsigned long __initdata memory_end; +unsigned long __initdata max_physmem_end; + +unsigned long VMALLOC_START; +EXPORT_SYMBOL(VMALLOC_START); + +unsigned long VMALLOC_END; +EXPORT_SYMBOL(VMALLOC_END); + +struct page *vmemmap; +EXPORT_SYMBOL(vmemmap); + +#ifdef CONFIG_64BIT +unsigned long MODULES_VADDR; +unsigned long MODULES_END; +#endif /* An array with a pointer to the lowcore of every CPU. */ struct _lowcore *lowcore_ptr[NR_CPUS]; @@ -102,16 +108,6 @@ EXPORT_SYMBOL(lowcore_ptr); #include <asm/setup.h> -static struct resource code_resource = { - .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, -}; - -static struct resource data_resource = { - .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, -}; - /* * condev= and conmode= setup parameter. */ @@ -132,9 +128,14 @@ __setup("condev=", condev_setup); static void __init set_preferred_console(void) { - if (MACHINE_IS_KVM) - add_preferred_console("hvc", 0, NULL); - else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) + if (MACHINE_IS_KVM) { + if (sclp_has_vt220()) + add_preferred_console("ttyS", 1, NULL); + else if (sclp_has_linemode()) + add_preferred_console("ttyS", 0, NULL); + else + add_preferred_console("hvc", 0, NULL); + } else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) add_preferred_console("ttyS", 0, NULL); else if (CONSOLE_IS_3270) add_preferred_console("tty3270", 0, NULL); @@ -210,25 +211,19 @@ static void __init conmode_default(void) } } -#ifdef CONFIG_ZFCPDUMP -static void __init setup_zfcpdump(unsigned int console_devno) +#ifdef CONFIG_CRASH_DUMP +static void __init setup_zfcpdump(void) { - static char str[41]; - if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; - if (console_devno != -1) - sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno, console_devno); - else - sprintf(str, " cio_ignore=all,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno); - strcat(boot_command_line, str); + if (OLDMEM_BASE) + return; + strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); console_loglevel = 2; } #else -static inline void setup_zfcpdump(unsigned int console_devno) {} -#endif /* CONFIG_ZFCPDUMP */ +static inline void setup_zfcpdump(void) {} +#endif /* CONFIG_CRASH_DUMP */ /* * Reboot, halt and power_off stubs. They just call _machine_restart, @@ -272,107 +267,29 @@ void machine_power_off(void) * Dummy power off function. */ void (*pm_power_off)(void) = machine_power_off; +EXPORT_SYMBOL_GPL(pm_power_off); static int __init early_parse_mem(char *p) { memory_end = memparse(p, &p); + memory_end &= PAGE_MASK; memory_end_set = 1; return 0; } early_param("mem", early_parse_mem); -unsigned int user_mode = HOME_SPACE_MODE; -EXPORT_SYMBOL_GPL(user_mode); - -static int set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) +static int __init parse_vmalloc(char *arg) { - psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; -#ifdef CONFIG_COMPAT - psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; - psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE; -#endif - psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY; - - if (MACHINE_HAS_MVCOS) { - memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); - return 1; - } else { - memcpy(&uaccess, &uaccess_pt, sizeof(uaccess)); - return 0; - } -} - -/* - * Switch kernel/user addressing modes? - */ -static int __init early_parse_switch_amode(char *p) -{ - if (user_mode != SECONDARY_SPACE_MODE) - user_mode = PRIMARY_SPACE_MODE; + if (!arg) + return -EINVAL; + VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK; return 0; } -early_param("switch_amode", early_parse_switch_amode); +early_param("vmalloc", parse_vmalloc); -static int __init early_parse_user_mode(char *p) -{ - if (p && strcmp(p, "primary") == 0) - user_mode = PRIMARY_SPACE_MODE; -#ifdef CONFIG_S390_EXEC_PROTECT - else if (p && strcmp(p, "secondary") == 0) - user_mode = SECONDARY_SPACE_MODE; -#endif - else if (!p || strcmp(p, "home") == 0) - user_mode = HOME_SPACE_MODE; - else - return 1; - return 0; -} -early_param("user_mode", early_parse_user_mode); - -#ifdef CONFIG_S390_EXEC_PROTECT -/* - * Enable execute protection? - */ -static int __init early_parse_noexec(char *p) -{ - if (!strncmp(p, "off", 3)) - return 0; - user_mode = SECONDARY_SPACE_MODE; - return 0; -} -early_param("noexec", early_parse_noexec); -#endif /* CONFIG_S390_EXEC_PROTECT */ - -static void setup_addressing_mode(void) -{ - if (user_mode == SECONDARY_SPACE_MODE) { - if (set_amode_and_uaccess(PSW_ASC_SECONDARY, - PSW32_ASC_SECONDARY)) - pr_info("Execute protection active, " - "mvcos available\n"); - else - pr_info("Execute protection active, " - "mvcos not available\n"); - } else if (user_mode == PRIMARY_SPACE_MODE) { - if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) - pr_info("Address spaces switched, " - "mvcos available\n"); - else - pr_info("Address spaces switched, " - "mvcos not available\n"); - } -} +void *restart_stack __attribute__((__section__(".data"))); -static void __init -setup_lowcore(void) +static void __init setup_lowcore(void) { struct _lowcore *lc; @@ -381,31 +298,35 @@ setup_lowcore(void) */ BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; - if (user_mode != HOME_SPACE_MODE) - lc->restart_psw.mask |= PSW_ASC_HOME; - lc->external_new_psw.mask = psw_kernel_bits; + lc->external_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->external_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + lc->svc_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = psw_kernel_bits; + lc->program_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = - psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; + PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; + lc->mcck_new_psw.mask = PSW_KERNEL_BITS; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = psw_kernel_bits; + lc->io_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->clock_comparator = -1ULL; - lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; + lc->kernel_stack = ((unsigned long) &init_thread_union) + + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->async_stack = (unsigned long) - __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; + __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->panic_stack = (unsigned long) - __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; + __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; lc->machine_flags = S390_lowcore.machine_flags; @@ -420,7 +341,6 @@ setup_lowcore(void) __ctl_set_bit(14, 29); } #else - lc->cmf_hpp = -1ULL; lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif lc->sync_enter_timer = S390_lowcore.sync_enter_timer; @@ -432,244 +352,345 @@ setup_lowcore(void) lc->last_update_timer = S390_lowcore.last_update_timer; lc->last_update_clock = S390_lowcore.last_update_clock; lc->ftrace_func = S390_lowcore.ftrace_func; + + restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); + restart_stack += ASYNC_SIZE; + + /* + * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant + * restart data to the absolute zero lowcore. This is necessary if + * PSW restart is done on an offline CPU that has lowcore zero. + */ + lc->restart_stack = (unsigned long) restart_stack; + lc->restart_fn = (unsigned long) do_restart; + lc->restart_data = 0; + lc->restart_source = -1UL; + + /* Setup absolute zero lowcore */ + mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); + mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); + mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data); + mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); + mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); + +#ifdef CONFIG_SMP + lc->spinlock_lockval = arch_spin_lockval(0); +#endif + set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; } -static void __init -setup_resources(void) +static struct resource code_resource = { + .name = "Kernel code", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource data_resource = { + .name = "Kernel data", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource __initdata *standard_resources[] = { + &code_resource, + &data_resource, + &bss_resource, +}; + +static void __init setup_resources(void) { - struct resource *res, *sub_res; - int i; + struct resource *res, *std_res, *sub_res; + struct memblock_region *reg; + int j; code_resource.start = (unsigned long) &_text; code_resource.end = (unsigned long) &_etext - 1; data_resource.start = (unsigned long) &_etext; data_resource.end = (unsigned long) &_edata - 1; + bss_resource.start = (unsigned long) &__bss_start; + bss_resource.end = (unsigned long) &__bss_stop - 1; - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (!memory_chunk[i].size) - continue; - res = alloc_bootmem_low(sizeof(struct resource)); + for_each_memblock(memory, reg) { + res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; - switch (memory_chunk[i].type) { - case CHUNK_READ_WRITE: - res->name = "System RAM"; - break; - case CHUNK_READ_ONLY: - res->name = "System ROM"; - res->flags |= IORESOURCE_READONLY; - break; - default: - res->name = "reserved"; - } - res->start = memory_chunk[i].addr; - res->end = memory_chunk[i].addr + memory_chunk[i].size - 1; - request_resource(&iomem_resource, res); - if (code_resource.start >= res->start && - code_resource.start <= res->end && - code_resource.end > res->end) { - sub_res = alloc_bootmem_low(sizeof(struct resource)); - memcpy(sub_res, &code_resource, - sizeof(struct resource)); - sub_res->end = res->end; - code_resource.start = res->end + 1; - request_resource(res, sub_res); - } + res->name = "System RAM"; + res->start = reg->base; + res->end = reg->base + reg->size - 1; + request_resource(&iomem_resource, res); - if (code_resource.start >= res->start && - code_resource.start <= res->end && - code_resource.end <= res->end) - request_resource(res, &code_resource); - - if (data_resource.start >= res->start && - data_resource.start <= res->end && - data_resource.end > res->end) { - sub_res = alloc_bootmem_low(sizeof(struct resource)); - memcpy(sub_res, &data_resource, - sizeof(struct resource)); - sub_res->end = res->end; - data_resource.start = res->end + 1; - request_resource(res, sub_res); + for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { + std_res = standard_resources[j]; + if (std_res->start < res->start || + std_res->start > res->end) + continue; + if (std_res->end > res->end) { + sub_res = alloc_bootmem_low(sizeof(*sub_res)); + *sub_res = *std_res; + sub_res->end = res->end; + std_res->start = res->end + 1; + request_resource(res, sub_res); + } else { + request_resource(res, std_res); + } } - - if (data_resource.start >= res->start && - data_resource.start <= res->end && - data_resource.end <= res->end) - request_resource(res, &data_resource); } } -unsigned long real_memory_size; -EXPORT_SYMBOL_GPL(real_memory_size); - static void __init setup_memory_end(void) { - unsigned long memory_size; - unsigned long max_mem; - int i; - -#ifdef CONFIG_ZFCPDUMP - if (ipl_info.type == IPL_TYPE_FCP_DUMP) { - memory_end = ZFCPDUMP_HSA_SIZE; - memory_end_set = 1; - } + unsigned long vmax, vmalloc_size, tmp; + + /* Choose kernel address space layout: 2, 3, or 4 levels. */ +#ifdef CONFIG_64BIT + vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; + tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; + tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; + if (tmp <= (1UL << 42)) + vmax = 1UL << 42; /* 3-level kernel page table */ + else + vmax = 1UL << 53; /* 4-level kernel page table */ + /* module area is at the end of the kernel address space. */ + MODULES_END = vmax; + MODULES_VADDR = MODULES_END - MODULES_LEN; + VMALLOC_END = MODULES_VADDR; +#else + vmalloc_size = VMALLOC_END ?: 96UL << 20; + vmax = 1UL << 31; /* 2-level kernel page table */ + /* vmalloc area is at the end of the kernel address space. */ + VMALLOC_END = vmax; #endif - memory_size = 0; - memory_end &= PAGE_MASK; + VMALLOC_START = vmax - vmalloc_size; + + /* Split remaining virtual space between 1:1 mapping & vmemmap array */ + tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); + /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ + tmp = SECTION_ALIGN_UP(tmp); + tmp = VMALLOC_START - tmp * sizeof(struct page); + tmp &= ~((vmax >> 11) - 1); /* align to page table level */ + tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); + vmemmap = (struct page *) tmp; + + /* Take care that memory_end is set and <= vmemmap */ + memory_end = min(memory_end ?: max_physmem_end, tmp); + max_pfn = max_low_pfn = PFN_DOWN(memory_end); + memblock_remove(memory_end, ULONG_MAX); + + pr_notice("Max memory size: %luMB\n", memory_end >> 20); +} - max_mem = memory_end ? min(VMEM_MAX_PHYS, memory_end) : VMEM_MAX_PHYS; - memory_end = min(max_mem, memory_end); +static void __init setup_vmcoreinfo(void) +{ + mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); +} - /* - * Make sure all chunks are MAX_ORDER aligned so we don't need the - * extra checks that HOLES_IN_ZONE would require. - */ - for (i = 0; i < MEMORY_CHUNKS; i++) { - unsigned long start, end; - struct mem_chunk *chunk; - unsigned long align; - - chunk = &memory_chunk[i]; - align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1); - start = (chunk->addr + align - 1) & ~(align - 1); - end = (chunk->addr + chunk->size) & ~(align - 1); - if (start >= end) - memset(chunk, 0, sizeof(*chunk)); - else { - chunk->addr = start; - chunk->size = end - start; - } - } +#ifdef CONFIG_CRASH_DUMP - for (i = 0; i < MEMORY_CHUNKS; i++) { - struct mem_chunk *chunk = &memory_chunk[i]; +/* + * When kdump is enabled, we have to ensure that no memory from + * the area [0 - crashkernel memory size] and + * [crashk_res.start - crashk_res.end] is set offline. + */ +static int kdump_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct memory_notify *arg = data; + + if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) + return NOTIFY_BAD; + if (arg->start_pfn > PFN_DOWN(crashk_res.end)) + return NOTIFY_OK; + if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) + return NOTIFY_OK; + return NOTIFY_BAD; +} - real_memory_size = max(real_memory_size, - chunk->addr + chunk->size); - if (chunk->addr >= max_mem) { - memset(chunk, 0, sizeof(*chunk)); - continue; - } - if (chunk->addr + chunk->size > max_mem) - chunk->size = max_mem - chunk->addr; - memory_size = max(memory_size, chunk->addr + chunk->size); +static struct notifier_block kdump_mem_nb = { + .notifier_call = kdump_mem_notifier, +}; + +#endif + +/* + * Make sure that the area behind memory_end is protected + */ +static void reserve_memory_end(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (ipl_info.type == IPL_TYPE_FCP_DUMP && + !OLDMEM_BASE && sclp_get_hsa_size()) { + memory_end = sclp_get_hsa_size(); + memory_end &= PAGE_MASK; + memory_end_set = 1; } - if (!memory_end) - memory_end = memory_size; +#endif + if (!memory_end_set) + return; + memblock_reserve(memory_end, ULONG_MAX); } -static void __init -setup_memory(void) +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void reserve_oldmem(void) { - unsigned long bootmap_size; - unsigned long start_pfn, end_pfn; - int i; +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) + /* Forget all memory above the running kdump system */ + memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); +#endif +} - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ - start_pfn = PFN_UP(__pa(&_end)); - end_pfn = max_pfn = PFN_DOWN(memory_end); +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void remove_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) + /* Forget all memory above the running kdump system */ + memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); +#endif +} -#ifdef CONFIG_BLK_DEV_INITRD - /* - * Move the initrd in case the bitmap of the bootmem allocater - * would overwrite it. - */ +/* + * Reserve memory for kdump kernel to be loaded with kexec + */ +static void __init reserve_crashkernel(void) +{ +#ifdef CONFIG_CRASH_DUMP + unsigned long long crash_base, crash_size; + phys_addr_t low, high; + int rc; - if (INITRD_START && INITRD_SIZE) { - unsigned long bmap_size; - unsigned long start; + rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + &crash_base); - bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1); - bmap_size = PFN_PHYS(bmap_size); + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); + crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); + if (rc || crash_size == 0) + return; - if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { - start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; + if (memblock.memory.regions[0].size < crash_size) { + pr_info("crashkernel reservation failed: %s\n", + "first memory chunk must be at least crashkernel size"); + return; + } - if (start + INITRD_SIZE > memory_end) { - pr_err("initrd extends beyond end of " - "memory (0x%08lx > 0x%08lx) " - "disabling initrd\n", - start + INITRD_SIZE, memory_end); - INITRD_START = INITRD_SIZE = 0; - } else { - pr_info("Moving initrd (0x%08lx -> " - "0x%08lx, size: %ld)\n", - INITRD_START, start, INITRD_SIZE); - memmove((void *) start, (void *) INITRD_START, - INITRD_SIZE); - INITRD_START = start; - } + low = crash_base ?: OLDMEM_BASE; + high = low + crash_size; + if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) { + /* The crashkernel fits into OLDMEM, reuse OLDMEM */ + crash_base = low; + } else { + /* Find suitable area in free memory */ + low = max_t(unsigned long, crash_size, sclp_get_hsa_size()); + high = crash_base ? crash_base + crash_size : ULONG_MAX; + + if (crash_base && crash_base < low) { + pr_info("crashkernel reservation failed: %s\n", + "crash_base too low"); + return; } + low = crash_base ?: low; + crash_base = memblock_find_in_range(low, high, crash_size, + KEXEC_CRASH_MEM_ALIGN); } -#endif - /* - * Initialize the boot-time allocator - */ - bootmap_size = init_bootmem(start_pfn, end_pfn); + if (!crash_base) { + pr_info("crashkernel reservation failed: %s\n", + "no suitable area found"); + return; + } - /* - * Register RAM areas with the bootmem allocator. - */ + if (register_memory_notifier(&kdump_mem_nb)) + return; - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - unsigned long start_chunk, end_chunk, pfn; - - if (memory_chunk[i].type != CHUNK_READ_WRITE) - continue; - start_chunk = PFN_DOWN(memory_chunk[i].addr); - end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); - end_chunk = min(end_chunk, end_pfn); - if (start_chunk >= end_chunk) - continue; - add_active_range(0, start_chunk, end_chunk); - pfn = max(start_chunk, start_pfn); - for (; pfn < end_chunk; pfn++) - page_set_storage_key(PFN_PHYS(pfn), - PAGE_DEFAULT_KEY, 0); - } + if (!OLDMEM_BASE && MACHINE_IS_VM) + diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); + memblock_remove(crash_base, crash_size); + pr_info("Reserving %lluMB of memory at %lluMB " + "for crashkernel (System RAM: %luMB)\n", + crash_size >> 20, crash_base >> 20, + (unsigned long)memblock.memory.total_size >> 20); + os_info_crashkernel_add(crash_base, crash_size); +#endif +} - psw_set_key(PAGE_DEFAULT_KEY); +/* + * Reserve the initrd from being used by memblock + */ +static void __init reserve_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = INITRD_START; + initrd_end = initrd_start + INITRD_SIZE; + memblock_reserve(INITRD_START, INITRD_SIZE); +#endif +} - free_bootmem_with_active_regions(0, max_pfn); +/* + * Check for initrd being in usable memory + */ +static void __init check_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (INITRD_START && INITRD_SIZE && + !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) { + pr_err("initrd does not fit memory.\n"); + memblock_free(INITRD_START, INITRD_SIZE); + initrd_start = initrd_end = 0; + } +#endif +} + +/* + * Reserve all kernel text + */ +static void __init reserve_kernel(void) +{ + unsigned long start_pfn; + start_pfn = PFN_UP(__pa(&_end)); /* * Reserve memory used for lowcore/command line/kernel image. */ - reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT); - reserve_bootmem((unsigned long)_stext, - PFN_PHYS(start_pfn) - (unsigned long)_stext, - BOOTMEM_DEFAULT); + memblock_reserve(0, (unsigned long)_ehead); + memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) + - (unsigned long)_stext); +} + +static void __init reserve_elfcorehdr(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (is_kdump_kernel()) + memblock_reserve(elfcorehdr_addr - OLDMEM_BASE, + PAGE_ALIGN(elfcorehdr_size)); +#endif +} + +static void __init setup_memory(void) +{ + struct memblock_region *reg; + /* - * Reserve the bootmem bitmap itself as well. We do this in two - * steps (first step was init_bootmem()) because this catches - * the (very unlikely) case of us accidentally initializing the - * bootmem allocator with an invalid RAM area. + * Init storage key for present memory */ - reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, - BOOTMEM_DEFAULT); - -#ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START && INITRD_SIZE) { - if (INITRD_START + INITRD_SIZE <= memory_end) { - reserve_bootmem(INITRD_START, INITRD_SIZE, - BOOTMEM_DEFAULT); - initrd_start = INITRD_START; - initrd_end = initrd_start + INITRD_SIZE; - } else { - pr_err("initrd extends beyond end of " - "memory (0x%08lx > 0x%08lx) " - "disabling initrd\n", - initrd_start + INITRD_SIZE, memory_end); - initrd_start = initrd_end = 0; - } + for_each_memblock(memory, reg) { + storage_key_init_range(reg->base, reg->base + reg->size); } -#endif + psw_set_key(PAGE_DEFAULT_KEY); + + /* Only cosmetics */ + memblock_enforce_memory_limit(memblock_end_of_DRAM()); } /* @@ -712,7 +733,7 @@ static void __init setup_hwcaps(void) * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information * as stored by stfl, bits 32-xxx contain additional facilities. * How many facility words are stored depends on the number of - * doublewords passed to the instruction. The additional facilites + * doublewords passed to the instruction. The additional facilities * are: * Bit 42: decimal floating point facility is installed * Bit 44: perform floating point operation facility is installed @@ -728,12 +749,20 @@ static void __init setup_hwcaps(void) if (MACHINE_HAS_HPAGE) elf_hwcap |= HWCAP_S390_HPAGE; +#if defined(CONFIG_64BIT) /* * 64-bit register support for 31-bit processes * HWCAP_S390_HIGH_GPRS is bit 9. */ elf_hwcap |= HWCAP_S390_HIGH_GPRS; + /* + * Transactional execution support HWCAP_S390_TE is bit 10. + */ + if (test_facility(50) && test_facility(73)) + elf_hwcap |= HWCAP_S390_TE; +#endif + get_cpu_id(&cpu_id); switch (cpu_id.machine) { case 0x9672: @@ -762,8 +791,13 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z10"); break; case 0x2817: + case 0x2818: strcpy(elf_platform, "z196"); break; + case 0x2827: + case 0x2828: + strcpy(elf_platform, "zEC12"); + break; } } @@ -772,8 +806,7 @@ static void __init setup_hwcaps(void) * was printed. */ -void __init -setup_arch(char **cmdline_p) +void __init setup_arch(char **cmdline_p) { /* * print what head.S has found out about the machine @@ -806,25 +839,47 @@ setup_arch(char **cmdline_p) ROOT_DEV = Root_RAM0; + /* Is init_mm really needed? */ init_mm.start_code = PAGE_OFFSET; init_mm.end_code = (unsigned long) &_etext; init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; - if (MACHINE_HAS_MVCOS) - memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess)); - else - memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); - parse_early_param(); - + os_info_init(); setup_ipl(); + + /* Do some memory reservations *before* memory is added to memblock */ + reserve_memory_end(); + reserve_oldmem(); + reserve_kernel(); + reserve_initrd(); + reserve_elfcorehdr(); + memblock_allow_resize(); + + /* Get information about *all* installed memory */ + detect_memory_memblock(); + + remove_oldmem(); + + /* + * Make sure all chunks are MAX_ORDER aligned so we don't need the + * extra checks that HOLES_IN_ZONE would require. + * + * Is this still required? + */ + memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT)); + setup_memory_end(); - setup_addressing_mode(); setup_memory(); + + check_initrd(); + reserve_crashkernel(); + setup_resources(); + setup_vmcoreinfo(); setup_lowcore(); - + smp_fill_possible_mask(); cpu_init(); s390_init_cpu_topology(); @@ -843,5 +898,37 @@ setup_arch(char **cmdline_p) set_preferred_console(); /* Setup zfcpdump support */ - setup_zfcpdump(console_devno); + setup_zfcpdump(); +} + +#ifdef CONFIG_32BIT +static int no_removal_warning __initdata; + +static int __init parse_no_removal_warning(char *str) +{ + no_removal_warning = 1; + return 0; } +__setup("no_removal_warning", parse_no_removal_warning); + +static int __init removal_warning(void) +{ + if (no_removal_warning) + return 0; + printk(KERN_ALERT "\n\n"); + printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n"); + printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n"); + printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n"); + printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n"); + printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n"); + printk(KERN_CONT "please let us know. Please write to:\n"); + printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n"); + printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n"); + printk(KERN_CONT "Thank you!\n\n"); + printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n"); + printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n"); + schedule_timeout_uninterruptible(300 * HZ); + return 0; +} +early_initcall(removal_warning); +#endif diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index abbb3c3c7aa..42b49f9e19b 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/signal.c - * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright IBM Corp. 1999, 2006 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) * * Based on Intel version @@ -30,11 +28,9 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> +#include <asm/switch_to.h> #include "entry.h" -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - - typedef struct { __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; @@ -52,63 +48,6 @@ typedef struct struct ucontext uc; } rt_sigframe; -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask) -{ - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); - - return -ERESTARTNOHAND; -} - -SYSCALL_DEFINE3(sigaction, int, sig, const struct old_sigaction __user *, act, - struct old_sigaction __user *, oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || - __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(mask, &act->sa_mask)) - return -EFAULT; - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) - return -EFAULT; - } - - return ret; -} - -SYSCALL_DEFINE2(sigaltstack, const stack_t __user *, uss, - stack_t __user *, uoss) -{ - struct pt_regs *regs = task_pt_regs(current); - return do_sigaltstack(uss, uoss, regs->gprs[15]); -} - /* Returns non-zero on fault. */ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { @@ -118,47 +57,63 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask); + user_sregs.regs.psw.mask = PSW_USER_BITS | + (regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI)); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, - sizeof(sregs->regs.acrs)); + sizeof(user_sregs.regs.acrs)); /* * We have to store the fp registers to current->thread.fp_regs * to merge them with the emulated registers. */ - save_fp_regs(¤t->thread.fp_regs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, - sizeof(s390_fp_regs)); - return __copy_to_user(sregs, &user_sregs, sizeof(_sigregs)); + sizeof(user_sregs.fpregs)); + if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs))) + return -EFAULT; + return 0; } -/* Returns positive number on error */ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { - int err; _sigregs user_sregs; /* Alwys make any pending restarted system call return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; - err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); - if (err) - return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - user_sregs.regs.psw.mask); - regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr; + if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs))) + return -EFAULT; + + if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI)) + return -EINVAL; + + /* Loading the floating-point-control word can fail. Do that first. */ + if (restore_fp_ctl(&user_sregs.fpregs.fpc)) + return -EINVAL; + + /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) | + (user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI)); + /* Check for invalid user address space control. */ + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME) + regs->psw.mask = PSW_ASC_PRIMARY | + (regs->psw.mask & ~PSW_MASK_ASC); + /* Check for invalid amode */ + if (regs->psw.mask & PSW_MASK_EA) + regs->psw.mask |= PSW_MASK_BA; + regs->psw.addr = user_sregs.regs.psw.addr; memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, - sizeof(sregs->regs.acrs)); + sizeof(current->thread.acrs)); restore_access_regs(current->thread.acrs); memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, - sizeof(s390_fp_regs)); - current->thread.fp_regs.fpc &= FPC_VALID_MASK; + sizeof(current->thread.fp_regs)); - restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + restore_fp_regs(current->thread.fp_regs.fprs); + clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -168,22 +123,12 @@ SYSCALL_DEFINE0(sigreturn) sigframe __user *frame = (sigframe __user *)regs->gprs[15]; sigset_t set; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs(regs, &frame->sregs)) goto badframe; - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -195,25 +140,14 @@ SYSCALL_DEFINE0(rt_sigreturn) rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15]; sigset_t set; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; - - if (do_sigaltstack(&frame->uc.uc_stack, NULL, - regs->gprs[15]) == -EFAULT) + if (restore_altstack(&frame->uc.uc_stack)) goto badframe; return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -245,13 +179,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) sp = current->sas_ss_sp + current->sas_ss_size; } - /* This is the legacy signal stack switching. */ - else if (!user_mode(regs) && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - sp = (unsigned long) ka->sa.sa_restorer; - } - return (void __user *)((sp - frame_size) & -8ul); } @@ -271,8 +198,6 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigframe __user *frame; frame = get_sigframe(ka, regs, sizeof(sigframe)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe))) - goto give_sigsegv; if (frame == (void __user *) -1UL) goto give_sigsegv; @@ -304,6 +229,10 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + /* Force default amode and default user address space control. */ + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -311,9 +240,13 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ - regs->gprs[4] = current->thread.trap_no; - regs->gprs[5] = current->thread.prot_addr; - regs->gprs[6] = task_thread_info(current)->last_break; + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; + } /* Place signal number on stack to allow backtrace from handler. */ if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) @@ -332,8 +265,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, rt_sigframe __user *frame; frame = get_sigframe(ka, regs, sizeof(rt_sigframe)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe))) - goto give_sigsegv; if (frame == (void __user *) -1UL) goto give_sigsegv; @@ -344,10 +275,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(NULL, &frame->uc.uc_link); - err |= __put_user((void __user *)current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->gprs[15]), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]); err |= save_sigregs(regs, &frame->uc.uc_mcontext); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) @@ -372,6 +300,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + /* Force default amode and default user address space control. */ + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -385,13 +317,9 @@ give_sigsegv: return -EFAULT; } -/* - * OK, we're invoking a handler - */ - -static int -handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +static void handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs) { int ret; @@ -400,17 +328,10 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, ret = setup_rt_frame(sig, ka, info, oldset, regs); else ret = setup_frame(sig, ka, oldset, regs); - - if (ret == 0) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - - return ret; + if (ret) + return; + signal_delivered(sig, info, ka, regs, + test_thread_flag(TIF_SINGLE_STEP)); } /* @@ -424,112 +345,83 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, */ void do_signal(struct pt_regs *regs) { - unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; struct k_sigaction ka; - sigset_t *oldset; + sigset_t *oldset = sigmask_to_save(); /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. + * Get signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers, including the system + * call information. */ - if (!user_mode(regs)) - return; + current_thread_info()->system_call = + test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; + signr = get_signal_to_deliver(&info, &ka, regs, NULL); - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - oldset = ¤t->saved_sigmask; - else - oldset = ¤t->blocked; + if (signr > 0) { + /* Whee! Actually deliver the signal. */ + if (current_thread_info()->system_call) { + regs->int_code = current_thread_info()->system_call; + /* Check for system call restarting. */ + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->gprs[2] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka.sa.sa_flags & SA_RESTART)) { + regs->gprs[2] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->gprs[2] = regs->orig_gpr2; + regs->psw.addr = + __rewind_psw(regs->psw, + regs->int_code >> 16); + break; + } + } + /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL); - /* Are we from a system call? */ - if (regs->svcnr) { - continue_addr = regs->psw.addr; - restart_addr = continue_addr - regs->ilc; - retval = regs->gprs[2]; + if (is_compat_task()) + handle_signal32(signr, &ka, &info, oldset, regs); + else + handle_signal(signr, &ka, &info, oldset, regs); + return; + } - /* Prepare for system call restart. We do this here so that a - debugger will see the already changed PSW. */ - switch (retval) { + /* No handlers present - check for system call restart */ + clear_pt_regs_flag(regs, PIF_SYSCALL); + if (current_thread_info()->system_call) { + regs->int_code = current_thread_info()->system_call; + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + /* Restart with sys_restart_syscall */ + regs->int_code = __NR_restart_syscall; + /* fallthrough */ case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: + /* Restart system call with magic TIF bit. */ regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = restart_addr; + set_pt_regs_flag(regs, PIF_SYSCALL); + if (test_thread_flag(TIF_SINGLE_STEP)) + clear_pt_regs_flag(regs, PIF_PER_TRAP); break; - case -ERESTART_RESTARTBLOCK: - regs->gprs[2] = -EINTR; } - regs->svcnr = 0; /* Don't deal with this again. */ - } - - /* Get signal to deliver. When running under ptrace, at this point - the debugger may change all our registers ... */ - signr = get_signal_to_deliver(&info, &ka, regs, NULL); - - /* Depending on the signal settings we may need to revert the - decision to restart the system call. */ - if (signr > 0 && regs->psw.addr == restart_addr) { - if (retval == -ERESTARTNOHAND - || (retval == -ERESTARTSYS - && !(current->sighand->action[signr-1].sa.sa_flags - & SA_RESTART))) { - regs->gprs[2] = -EINTR; - regs->psw.addr = continue_addr; - } - } - - if (signr > 0) { - /* Whee! Actually deliver the signal. */ - int ret; -#ifdef CONFIG_COMPAT - if (is_compat_task()) { - ret = handle_signal32(signr, &ka, &info, oldset, regs); - } - else -#endif - ret = handle_signal(signr, &ka, &info, oldset, regs); - if (!ret) { - /* - * A signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag. - */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - - /* - * Let tracing know that we've done the handler setup. - */ - tracehook_signal_handler(signr, &info, &ka, regs, - test_thread_flag(TIF_SINGLE_STEP)); - } - return; } /* * If there's no signal to deliver, we just put the saved sigmask back. */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } - - /* Restart a different system call. */ - if (retval == -ERESTART_RESTARTBLOCK - && regs->psw.addr == continue_addr) { - regs->gprs[2] = __NR_restart_syscall; - set_thread_flag(TIF_RESTART_SVC); - } + restore_saved_sigmask(); } void do_notify_resume(struct pt_regs *regs) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - if (current->replacement_session_keyring) - key_replace_session_keyring(); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 63a97db83f9..243c7e51260 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,23 +1,18 @@ /* - * arch/s390/kernel/smp.c + * SMP related functions * - * Copyright IBM Corp. 1999, 2009 - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Martin Schwidefsky (schwidefsky@de.ibm.com) - * Heiko Carstens (heiko.carstens@de.ibm.com) + * Copyright IBM Corp. 1999, 2012 + * Author(s): Denis Joseph Barrow, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, * * based on other smp stuff by * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net> * (c) 1998 Ingo Molnar * - * We work with logical cpu numbering everywhere we can. The only - * functions using the real cpu address (got from STAP) are the sigp - * functions. For all other functions we use the identity mapping. - * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is - * used e.g. to find the idle task belonging to a logical cpu. Every array - * in the kernel is sorted by the logical cpu number and not by the physical - * one which is causing all the confusion with __cpu_logical_map and - * cpu_number_map in other architectures. + * The code outside of smp.c uses logical cpu numbers, only smp.c does + * the translation of logical to physical cpu ids. All new code that + * operates on physical cpu numbers needs to go into smp.c. */ #define KMSG_COMPONENT "cpu" @@ -31,123 +26,380 @@ #include <linux/spinlock.h> #include <linux/kernel_stat.h> #include <linux/delay.h> -#include <linux/cache.h> #include <linux/interrupt.h> #include <linux/irqflags.h> #include <linux/cpu.h> -#include <linux/timex.h> -#include <linux/bootmem.h> #include <linux/slab.h> +#include <linux/crash_dump.h> #include <asm/asm-offsets.h> +#include <asm/switch_to.h> +#include <asm/facility.h> #include <asm/ipl.h> #include <asm/setup.h> -#include <asm/sigp.h> -#include <asm/pgalloc.h> #include <asm/irq.h> -#include <asm/s390_ext.h> -#include <asm/cpcmd.h> #include <asm/tlbflush.h> -#include <asm/timer.h> +#include <asm/vtimer.h> #include <asm/lowcore.h> #include <asm/sclp.h> -#include <asm/cputime.h> #include <asm/vdso.h> -#include <asm/cpu.h> +#include <asm/debug.h> +#include <asm/os_info.h> +#include <asm/sigp.h> #include "entry.h" -/* logical cpu to cpu address */ -unsigned short __cpu_logical_map[NR_CPUS]; - -static struct task_struct *current_set[NR_CPUS]; - -static u8 smp_cpu_type; -static int smp_use_sigp_detection; +enum { + ec_schedule = 0, + ec_call_function_single, + ec_stop_cpu, +}; -enum s390_cpu_state { +enum { CPU_STATE_STANDBY, CPU_STATE_CONFIGURED, }; +struct pcpu { + struct cpu *cpu; + struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ + unsigned long async_stack; /* async stack for the cpu */ + unsigned long panic_stack; /* panic stack for the cpu */ + unsigned long ec_mask; /* bit mask for ec_xxx functions */ + int state; /* physical cpu state */ + int polarization; /* physical polarization */ + u16 address; /* physical cpu address */ +}; + +static u8 boot_cpu_type; +static u16 boot_cpu_address; +static struct pcpu pcpu_devices[NR_CPUS]; + +/* + * The smp_cpu_state_mutex must be held when changing the state or polarization + * member of a pcpu data structure within the pcpu_devices arreay. + */ DEFINE_MUTEX(smp_cpu_state_mutex); -int smp_cpu_polarization[NR_CPUS]; -static int smp_cpu_state[NR_CPUS]; -static int cpu_management; -static DEFINE_PER_CPU(struct cpu, cpu_devices); +/* + * Signal processor helper functions. + */ +static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) +{ + int cc; -static void smp_ext_bitcall(int, int); + while (1) { + cc = __pcpu_sigp(addr, order, parm, NULL); + if (cc != SIGP_CC_BUSY) + return cc; + cpu_relax(); + } +} -static int raw_cpu_stopped(int cpu) +static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) { - u32 status; + int cc, retry; - switch (raw_sigp_ps(&status, 0, cpu, sigp_sense)) { - case sigp_status_stored: - /* Check for stopped and check stop state */ - if (status & 0x50) - return 1; - break; - default: - break; + for (retry = 0; ; retry++) { + cc = __pcpu_sigp(pcpu->address, order, parm, NULL); + if (cc != SIGP_CC_BUSY) + break; + if (retry >= 3) + udelay(10); } + return cc; +} + +static inline int pcpu_stopped(struct pcpu *pcpu) +{ + u32 uninitialized_var(status); + + if (__pcpu_sigp(pcpu->address, SIGP_SENSE, + 0, &status) != SIGP_CC_STATUS_STORED) + return 0; + return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); +} + +static inline int pcpu_running(struct pcpu *pcpu) +{ + if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING, + 0, NULL) != SIGP_CC_STATUS_STORED) + return 1; + /* Status stored condition code is equivalent to cpu not running. */ return 0; } -static inline int cpu_stopped(int cpu) +/* + * Find struct pcpu by cpu address. + */ +static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address) { - return raw_cpu_stopped(cpu_logical_map(cpu)); + int cpu; + + for_each_cpu(cpu, mask) + if (pcpu_devices[cpu].address == address) + return pcpu_devices + cpu; + return NULL; } -void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) +static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) { - struct _lowcore *lc, *current_lc; - struct stack_frame *sf; - struct pt_regs *regs; - unsigned long sp; - - if (smp_processor_id() == 0) - func(data); - __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY); - /* Disable lowcore protection */ - __ctl_clear_bit(0, 28); - current_lc = lowcore_ptr[smp_processor_id()]; - lc = lowcore_ptr[0]; - if (!lc) - lc = current_lc; - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; - lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu; - if (!cpu_online(0)) - smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]); - while (sigp(0, sigp_stop_and_store_status) == sigp_busy) + int order; + + if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) + return; + order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; + pcpu_sigp_retry(pcpu, order, 0); +} + +static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc; + + if (pcpu != &pcpu_devices[0]) { + pcpu->lowcore = (struct _lowcore *) + __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); + pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + pcpu->panic_stack = __get_free_page(GFP_KERNEL); + if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) + goto out; + } + lc = pcpu->lowcore; + memcpy(lc, &S390_lowcore, 512); + memset((char *) lc + 512, 0, sizeof(*lc) - 512); + lc->async_stack = pcpu->async_stack + ASYNC_SIZE + - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->panic_stack = pcpu->panic_stack + PAGE_SIZE + - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->cpu_nr = cpu; + lc->spinlock_lockval = arch_spin_lockval(cpu); +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL); + if (!lc->extended_save_area_addr) + goto out; + } +#else + if (vdso_alloc_per_cpu(lc)) + goto out; +#endif + lowcore_ptr[cpu] = lc; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); + return 0; +out: + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } + return -ENOMEM; +} + +#ifdef CONFIG_HOTPLUG_CPU + +static void pcpu_free_lowcore(struct pcpu *pcpu) +{ + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); + lowcore_ptr[pcpu - pcpu_devices] = NULL; +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + struct _lowcore *lc = pcpu->lowcore; + + free_page((unsigned long) lc->extended_save_area_addr); + lc->extended_save_area_addr = 0; + } +#else + vdso_free_per_cpu(pcpu->lowcore); +#endif + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } +} + +#endif /* CONFIG_HOTPLUG_CPU */ + +static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc = pcpu->lowcore; + + if (MACHINE_HAS_TLB_LC) + cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); + atomic_inc(&init_mm.context.attach_count); + lc->cpu_nr = cpu; + lc->spinlock_lockval = arch_spin_lockval(cpu); + lc->percpu_offset = __per_cpu_offset[cpu]; + lc->kernel_asce = S390_lowcore.kernel_asce; + lc->machine_flags = S390_lowcore.machine_flags; + lc->ftrace_func = S390_lowcore.ftrace_func; + lc->user_timer = lc->system_timer = lc->steal_timer = 0; + __ctl_store(lc->cregs_save_area, 0, 15); + save_access_regs((unsigned int *) lc->access_regs_save_area); + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); +} + +static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +{ + struct _lowcore *lc = pcpu->lowcore; + struct thread_info *ti = task_thread_info(tsk); + + lc->kernel_stack = (unsigned long) task_stack_page(tsk) + + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->thread_info = (unsigned long) task_thread_info(tsk); + lc->current_task = (unsigned long) tsk; + lc->user_timer = ti->user_timer; + lc->system_timer = ti->system_timer; + lc->steal_timer = 0; +} + +static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +{ + struct _lowcore *lc = pcpu->lowcore; + + lc->restart_stack = lc->kernel_stack; + lc->restart_fn = (unsigned long) func; + lc->restart_data = (unsigned long) data; + lc->restart_source = -1UL; + pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); +} + +/* + * Call function via PSW restart on pcpu and stop the current cpu. + */ +static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), + void *data, unsigned long stack) +{ + struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; + unsigned long source_cpu = stap(); + + __load_psw_mask(PSW_KERNEL_BITS); + if (pcpu->address == source_cpu) + func(data); /* should not return */ + /* Stop target cpu (if func returns this stops the current cpu). */ + pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + /* Restart func on the target cpu and stop the current cpu. */ + mem_assign_absolute(lc->restart_stack, stack); + mem_assign_absolute(lc->restart_fn, (unsigned long) func); + mem_assign_absolute(lc->restart_data, (unsigned long) data); + mem_assign_absolute(lc->restart_source, source_cpu); + asm volatile( + "0: sigp 0,%0,%2 # sigp restart to target cpu\n" + " brc 2,0b # busy, try again\n" + "1: sigp 0,%1,%3 # sigp stop to current cpu\n" + " brc 2,1b # busy, try again\n" + : : "d" (pcpu->address), "d" (source_cpu), + "K" (SIGP_RESTART), "K" (SIGP_STOP) + : "0", "1", "cc"); + for (;;) ; +} + +/* + * Call function on an online CPU. + */ +void smp_call_online_cpu(void (*func)(void *), void *data) +{ + struct pcpu *pcpu; + + /* Use the current cpu if it is online. */ + pcpu = pcpu_find_address(cpu_online_mask, stap()); + if (!pcpu) + /* Use the first online cpu. */ + pcpu = pcpu_devices + cpumask_first(cpu_online_mask); + pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); +} + +/* + * Call function on the ipl CPU. + */ +void smp_call_ipl_cpu(void (*func)(void *), void *data) +{ + pcpu_delegate(&pcpu_devices[0], func, data, + pcpu_devices->panic_stack + PAGE_SIZE); +} + +int smp_find_processor_id(u16 address) +{ + int cpu; + + for_each_present_cpu(cpu) + if (pcpu_devices[cpu].address == address) + return cpu; + return -1; +} + +int smp_vcpu_scheduled(int cpu) +{ + return pcpu_running(pcpu_devices + cpu); +} + +void smp_yield(void) +{ + if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); +} + +void smp_yield_cpu(int cpu) +{ + if (MACHINE_HAS_DIAG9C) + asm volatile("diag %0,0,0x9c" + : : "d" (pcpu_devices[cpu].address)); + else if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); +} + +/* + * Send cpus emergency shutdown signal. This gives the cpus the + * opportunity to complete outstanding interrupts. + */ +static void smp_emergency_stop(cpumask_t *cpumask) +{ + u64 end; + int cpu; + + end = get_tod_clock() + (1000000UL << 12); + for_each_cpu(cpu, cpumask) { + struct pcpu *pcpu = pcpu_devices + cpu; + set_bit(ec_stop_cpu, &pcpu->ec_mask); + while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, + 0, NULL) == SIGP_CC_BUSY && + get_tod_clock() < end) + cpu_relax(); + } + while (get_tod_clock() < end) { + for_each_cpu(cpu, cpumask) + if (pcpu_stopped(pcpu_devices + cpu)) + cpumask_clear_cpu(cpu, cpumask); + if (cpumask_empty(cpumask)) + break; cpu_relax(); - sp = lc->panic_stack; - sp -= sizeof(struct pt_regs); - regs = (struct pt_regs *) sp; - memcpy(®s->gprs, ¤t_lc->gpregs_save_area, sizeof(regs->gprs)); - regs->psw = lc->psw_save_area; - sp -= STACK_FRAME_OVERHEAD; - sf = (struct stack_frame *) sp; - sf->back_chain = regs->gprs[15]; - smp_switch_to_cpu(func, data, sp, stap(), __cpu_logical_map[0]); + } } +/* + * Stop all cpus but the current one. + */ void smp_send_stop(void) { - int cpu, rc; + cpumask_t cpumask; + int cpu; /* Disable all interrupts/machine checks */ - __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); trace_hardirqs_off(); - /* stop all processors */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - do { - rc = sigp(cpu, sigp_stop); - } while (rc == sigp_busy); + debug_set_critical(); + cpumask_copy(&cpumask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &cpumask); - while (!cpu_stopped(cpu)) + if (oops_in_progress) + smp_emergency_stop(&cpumask); + + /* stop all processors */ + for_each_cpu(cpu, &cpumask) { + struct pcpu *pcpu = pcpu_devices + cpu; + pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + while (!pcpu_stopped(pcpu)) cpu_relax(); } } @@ -156,40 +408,25 @@ void smp_send_stop(void) * This is the main routine where commands issued by other * cpus are handled. */ - -static void do_ext_call_interrupt(unsigned int ext_int_code, - unsigned int param32, unsigned long param64) +static void smp_handle_ext_call(void) { unsigned long bits; - kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; - /* - * handle bit signal external calls - * - * For the ec_schedule signal we have to do nothing. All the work - * is done automatically when we return from the interrupt. - */ - bits = xchg(&S390_lowcore.ext_call_fast, 0); - - if (test_bit(ec_call_function, &bits)) - generic_smp_call_function_interrupt(); - + /* handle bit signal external calls */ + bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); + if (test_bit(ec_stop_cpu, &bits)) + smp_stop_cpu(); + if (test_bit(ec_schedule, &bits)) + scheduler_ipi(); if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); } -/* - * Send an external call sigp to another cpu and return without waiting - * for its completion. - */ -static void smp_ext_bitcall(int cpu, int sig) +static void do_ext_call_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) { - /* - * Set signaling bit in lowcore of target cpu and kick it - */ - set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while (sigp(cpu, sigp_emergency_signal) == sigp_busy) - udelay(10); + inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS); + smp_handle_ext_call(); } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -197,12 +434,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - smp_ext_bitcall(cpu, ec_call_function); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) { - smp_ext_bitcall(cpu, ec_call_function_single); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } #ifndef CONFIG_64BIT @@ -228,15 +465,16 @@ EXPORT_SYMBOL(smp_ptlb_all); */ void smp_send_reschedule(int cpu) { - smp_ext_bitcall(cpu, ec_schedule); + pcpu_ec_call(pcpu_devices + cpu, ec_schedule); } /* * parameter area for the set/clear control bit callbacks */ struct ec_creg_mask_parms { - unsigned long orvals[16]; - unsigned long andvals[16]; + unsigned long orval; + unsigned long andval; + int cr; }; /* @@ -246,11 +484,9 @@ static void smp_ctl_bit_callback(void *info) { struct ec_creg_mask_parms *pp = info; unsigned long cregs[16]; - int i; __ctl_store(cregs, 0, 15); - for (i = 0; i <= 15; i++) - cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i]; + cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval; __ctl_load(cregs, 0, 15); } @@ -259,11 +495,8 @@ static void smp_ctl_bit_callback(void *info) */ void smp_ctl_set_bit(int cr, int bit) { - struct ec_creg_mask_parms parms; + struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; - memset(&parms.orvals, 0, sizeof(parms.orvals)); - memset(&parms.andvals, 0xff, sizeof(parms.andvals)); - parms.orvals[cr] = 1 << bit; on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_set_bit); @@ -273,477 +506,291 @@ EXPORT_SYMBOL(smp_ctl_set_bit); */ void smp_ctl_clear_bit(int cr, int bit) { - struct ec_creg_mask_parms parms; + struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; - memset(&parms.orvals, 0, sizeof(parms.orvals)); - memset(&parms.andvals, 0xff, sizeof(parms.andvals)); - parms.andvals[cr] = ~(1L << bit); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_clear_bit); -#ifdef CONFIG_ZFCPDUMP +#ifdef CONFIG_CRASH_DUMP -static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) +static void __init smp_get_save_area(int cpu, u16 address) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + void *lc = pcpu_devices[0].lowcore; + struct save_area *save_area; + + if (is_kdump_kernel()) + return; + if (!OLDMEM_BASE && (address == boot_cpu_address || + ipl_info.type != IPL_TYPE_FCP_DUMP)) return; - if (cpu >= NR_CPUS) { - pr_warning("CPU %i exceeds the maximum %i and is excluded from " - "the dump\n", cpu, NR_CPUS - 1); + save_area = dump_save_area_create(cpu); + if (!save_area) + panic("could not allocate memory for save area\n"); + if (address == boot_cpu_address) { + /* Copy the registers of the boot cpu. */ + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + SAVE_AREA_BASE - PAGE_SIZE, 0); return; } - zfcpdump_save_areas[cpu] = kmalloc(sizeof(struct save_area), GFP_KERNEL); - while (raw_sigp(phy_cpu, sigp_stop_and_store_status) == sigp_busy) - cpu_relax(); - memcpy_real(zfcpdump_save_areas[cpu], - (void *)(unsigned long) store_prefix() + SAVE_AREA_BASE, - sizeof(struct save_area)); + /* Get the registers of a non-boot cpu. */ + __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); + memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); } -struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; -EXPORT_SYMBOL_GPL(zfcpdump_save_areas); +int smp_store_status(int cpu) +{ + struct pcpu *pcpu; -#else + pcpu = pcpu_devices + cpu; + if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, + 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) + return -EIO; + return 0; +} -static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { } +#else /* CONFIG_CRASH_DUMP */ -#endif /* CONFIG_ZFCPDUMP */ +static inline void smp_get_save_area(int cpu, u16 address) { } -static int cpu_known(int cpu_id) -{ - int cpu; +#endif /* CONFIG_CRASH_DUMP */ - for_each_present_cpu(cpu) { - if (__cpu_logical_map[cpu] == cpu_id) - return 1; - } - return 0; +void smp_cpu_set_polarization(int cpu, int val) +{ + pcpu_devices[cpu].polarization = val; } -static int smp_rescan_cpus_sigp(cpumask_t avail) +int smp_cpu_get_polarization(int cpu) { - int cpu_id, logical_cpu; - - logical_cpu = cpumask_first(&avail); - if (logical_cpu >= nr_cpu_ids) - return 0; - for (cpu_id = 0; cpu_id <= MAX_CPU_ADDRESS; cpu_id++) { - if (cpu_known(cpu_id)) - continue; - __cpu_logical_map[logical_cpu] = cpu_id; - smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; - if (!cpu_stopped(logical_cpu)) - continue; - cpu_set(logical_cpu, cpu_present_map); - smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; - logical_cpu = cpumask_next(logical_cpu, &avail); - if (logical_cpu >= nr_cpu_ids) - break; - } - return 0; + return pcpu_devices[cpu].polarization; } -static int smp_rescan_cpus_sclp(cpumask_t avail) +static struct sclp_cpu_info *smp_get_cpu_info(void) { + static int use_sigp_detection; struct sclp_cpu_info *info; - int cpu_id, logical_cpu, cpu; - int rc; - - logical_cpu = cpumask_first(&avail); - if (logical_cpu >= nr_cpu_ids) - return 0; - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - rc = sclp_get_cpu_info(info); - if (rc) - goto out; - for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type) - continue; - cpu_id = info->cpu[cpu].address; - if (cpu_known(cpu_id)) - continue; - __cpu_logical_map[logical_cpu] = cpu_id; - smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; - cpu_set(logical_cpu, cpu_present_map); - if (cpu >= info->configured) - smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; - else - smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; - logical_cpu = cpumask_next(logical_cpu, &avail); - if (logical_cpu >= nr_cpu_ids) - break; + int address; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { + use_sigp_detection = 1; + for (address = 0; address <= MAX_CPU_ADDRESS; address++) { + if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) == + SIGP_CC_NOT_OPERATIONAL) + continue; + info->cpu[info->configured].address = address; + info->configured++; + } + info->combined = info->configured; } -out: - kfree(info); - return rc; + return info; } -static int __smp_rescan_cpus(void) +static int smp_add_present_cpu(int cpu); + +static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) { + struct pcpu *pcpu; cpumask_t avail; + int cpu, nr, i; - cpus_xor(avail, cpu_possible_map, cpu_present_map); - if (smp_use_sigp_detection) - return smp_rescan_cpus_sigp(avail); - else - return smp_rescan_cpus_sclp(avail); + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + cpu = cpumask_first(&avail); + for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { + if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) + continue; + if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) + continue; + pcpu = pcpu_devices + cpu; + pcpu->address = info->cpu[i].address; + pcpu->state = (i >= info->configured) ? + CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (sysfs_add && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpu = cpumask_next(cpu, &avail); + } + return nr; } static void __init smp_detect_cpus(void) { unsigned int cpu, c_cpus, s_cpus; struct sclp_cpu_info *info; - u16 boot_cpu_addr, cpu_addr; - c_cpus = 1; - s_cpus = 0; - boot_cpu_addr = __cpu_logical_map[0]; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = smp_get_cpu_info(); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); - /* Use sigp detection algorithm if sclp doesn't work. */ - if (sclp_get_cpu_info(info)) { - smp_use_sigp_detection = 1; - for (cpu = 0; cpu <= MAX_CPU_ADDRESS; cpu++) { - if (cpu == boot_cpu_addr) - continue; - if (!raw_cpu_stopped(cpu)) - continue; - smp_get_save_area(c_cpus, cpu); - c_cpus++; - } - goto out; - } - if (info->has_cpu_type) { for (cpu = 0; cpu < info->combined; cpu++) { - if (info->cpu[cpu].address == boot_cpu_addr) { - smp_cpu_type = info->cpu[cpu].type; - break; - } + if (info->cpu[cpu].address != boot_cpu_address) + continue; + /* The boot cpu dictates the cpu type. */ + boot_cpu_type = info->cpu[cpu].type; + break; } } - + c_cpus = s_cpus = 0; for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type) - continue; - cpu_addr = info->cpu[cpu].address; - if (cpu_addr == boot_cpu_addr) + if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) continue; - if (!raw_cpu_stopped(cpu_addr)) { + if (cpu < info->configured) { + smp_get_save_area(c_cpus, info->cpu[cpu].address); + c_cpus++; + } else s_cpus++; - continue; - } - smp_get_save_area(c_cpus, cpu_addr); - c_cpus++; } -out: - kfree(info); pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); get_online_cpus(); - __smp_rescan_cpus(); + __smp_rescan_cpus(info, 0); put_online_cpus(); + kfree(info); } /* * Activate a secondary processor. */ -int __cpuinit start_secondary(void *cpuvoid) +static void smp_start_secondary(void *cpuvoid) { - /* Setup the cpu */ + S390_lowcore.last_update_clock = get_tod_clock(); + S390_lowcore.restart_stack = (unsigned long) restart_stack; + S390_lowcore.restart_fn = (unsigned long) do_restart; + S390_lowcore.restart_data = 0; + S390_lowcore.restart_source = -1UL; + restore_access_regs(S390_lowcore.access_regs_save_area); + __ctl_load(S390_lowcore.cregs_save_area, 0, 15); + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); cpu_init(); preempt_disable(); - /* Enable TOD clock interrupts on the secondary cpu. */ init_cpu_timer(); - /* Enable cpu timer interrupts on the secondary cpu. */ init_cpu_vtimer(); - /* Enable pfault pseudo page faults on this cpu. */ pfault_init(); - - /* call cpu notifiers */ notify_cpu_starting(smp_processor_id()); - /* Mark this cpu as online */ - ipi_call_lock(); - cpu_set(smp_processor_id(), cpu_online_map); - ipi_call_unlock(); - /* Switch on interrupts */ + set_cpu_online(smp_processor_id(), true); + inc_irq_stat(CPU_RST); local_irq_enable(); - /* cpu_idle will call schedule for us */ - cpu_idle(); - return 0; -} - -struct create_idle { - struct work_struct work; - struct task_struct *idle; - struct completion done; - int cpu; -}; - -static void __cpuinit smp_fork_idle(struct work_struct *work) -{ - struct create_idle *c_idle; - - c_idle = container_of(work, struct create_idle, work); - c_idle->idle = fork_idle(c_idle->cpu); - complete(&c_idle->done); -} - -static int __cpuinit smp_alloc_lowcore(int cpu) -{ - unsigned long async_stack, panic_stack; - struct _lowcore *lowcore; - - lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - if (!lowcore) - return -ENOMEM; - async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - panic_stack = __get_free_page(GFP_KERNEL); - if (!panic_stack || !async_stack) - goto out; - memcpy(lowcore, &S390_lowcore, 512); - memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); - lowcore->async_stack = async_stack + ASYNC_SIZE; - lowcore->panic_stack = panic_stack + PAGE_SIZE; - -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) { - unsigned long save_area; - - save_area = get_zeroed_page(GFP_KERNEL); - if (!save_area) - goto out; - lowcore->extended_save_area_addr = (u32) save_area; - } -#else - if (vdso_alloc_per_cpu(cpu, lowcore)) - goto out; -#endif - lowcore_ptr[cpu] = lowcore; - return 0; - -out: - free_page(panic_stack); - free_pages(async_stack, ASYNC_ORDER); - free_pages((unsigned long) lowcore, LC_ORDER); - return -ENOMEM; -} - -static void smp_free_lowcore(int cpu) -{ - struct _lowcore *lowcore; - - lowcore = lowcore_ptr[cpu]; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - free_page((unsigned long) lowcore->extended_save_area_addr); -#else - vdso_free_per_cpu(cpu, lowcore); -#endif - free_page(lowcore->panic_stack - PAGE_SIZE); - free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); - free_pages((unsigned long) lowcore, LC_ORDER); - lowcore_ptr[cpu] = NULL; + cpu_startup_entry(CPUHP_ONLINE); } /* Upping and downing of CPUs */ -int __cpuinit __cpu_up(unsigned int cpu) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct _lowcore *cpu_lowcore; - struct create_idle c_idle; - struct task_struct *idle; - struct stack_frame *sf; - u32 lowcore; - int ccode; - - if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED) - return -EIO; - idle = current_set[cpu]; - if (!idle) { - c_idle.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done); - INIT_WORK_ONSTACK(&c_idle.work, smp_fork_idle); - c_idle.cpu = cpu; - schedule_work(&c_idle.work); - wait_for_completion(&c_idle.done); - if (IS_ERR(c_idle.idle)) - return PTR_ERR(c_idle.idle); - idle = c_idle.idle; - current_set[cpu] = c_idle.idle; - } - init_idle(idle, cpu); - if (smp_alloc_lowcore(cpu)) - return -ENOMEM; - do { - ccode = sigp(cpu, sigp_initial_cpu_reset); - if (ccode == sigp_busy) - udelay(10); - if (ccode == sigp_not_operational) - goto err_out; - } while (ccode == sigp_busy); - - lowcore = (u32)(unsigned long)lowcore_ptr[cpu]; - while (sigp_p(lowcore, cpu, sigp_set_prefix) == sigp_busy) - udelay(10); - - cpu_lowcore = lowcore_ptr[cpu]; - cpu_lowcore->kernel_stack = (unsigned long) - task_stack_page(idle) + THREAD_SIZE; - cpu_lowcore->thread_info = (unsigned long) task_thread_info(idle); - sf = (struct stack_frame *) (cpu_lowcore->kernel_stack - - sizeof(struct pt_regs) - - sizeof(struct stack_frame)); - memset(sf, 0, sizeof(struct stack_frame)); - sf->gprs[9] = (unsigned long) sf; - cpu_lowcore->save_area[15] = (unsigned long) sf; - __ctl_store(cpu_lowcore->cregs_save_area, 0, 15); - atomic_inc(&init_mm.context.attach_count); - asm volatile( - " stam 0,15,0(%0)" - : : "a" (&cpu_lowcore->access_regs_save_area) : "memory"); - cpu_lowcore->percpu_offset = __per_cpu_offset[cpu]; - cpu_lowcore->current_task = (unsigned long) idle; - cpu_lowcore->cpu_nr = cpu; - cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce; - cpu_lowcore->machine_flags = S390_lowcore.machine_flags; - cpu_lowcore->ftrace_func = S390_lowcore.ftrace_func; - memcpy(cpu_lowcore->stfle_fac_list, S390_lowcore.stfle_fac_list, - MAX_FACILITY_BIT/8); - eieio(); + struct pcpu *pcpu; + int rc; - while (sigp(cpu, sigp_restart) == sigp_busy) - udelay(10); + pcpu = pcpu_devices + cpu; + if (pcpu->state != CPU_STATE_CONFIGURED) + return -EIO; + if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != + SIGP_CC_ORDER_CODE_ACCEPTED) + return -EIO; + rc = pcpu_alloc_lowcore(pcpu, cpu); + if (rc) + return rc; + pcpu_prepare_secondary(pcpu, cpu); + pcpu_attach_task(pcpu, tidle); + pcpu_start_fn(pcpu, smp_start_secondary, NULL); while (!cpu_online(cpu)) cpu_relax(); return 0; - -err_out: - smp_free_lowcore(cpu); - return -EIO; } -static int __init setup_possible_cpus(char *s) -{ - int pcpus, cpu; +static unsigned int setup_possible_cpus __initdata; - pcpus = simple_strtoul(s, NULL, 0); - init_cpu_possible(cpumask_of(0)); - for (cpu = 1; cpu < pcpus && cpu < nr_cpu_ids; cpu++) - set_cpu_possible(cpu, true); +static int __init _setup_possible_cpus(char *s) +{ + get_option(&s, &setup_possible_cpus); return 0; } -early_param("possible_cpus", setup_possible_cpus); +early_param("possible_cpus", _setup_possible_cpus); #ifdef CONFIG_HOTPLUG_CPU int __cpu_disable(void) { - struct ec_creg_mask_parms cr_parms; - int cpu = smp_processor_id(); - - cpu_clear(cpu, cpu_online_map); + unsigned long cregs[16]; - /* Disable pfault pseudo page faults on this cpu. */ + /* Handle possible pending IPIs */ + smp_handle_ext_call(); + set_cpu_online(smp_processor_id(), false); + /* Disable pseudo page faults on this cpu. */ pfault_fini(); - - memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals)); - memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals)); - - /* disable all external interrupts */ - cr_parms.orvals[0] = 0; - cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 12 | - 1 << 11 | 1 << 10 | 1 << 6 | 1 << 4); - /* disable all I/O interrupts */ - cr_parms.orvals[6] = 0; - cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 | - 1 << 27 | 1 << 26 | 1 << 25 | 1 << 24); - /* disable most machine checks */ - cr_parms.orvals[14] = 0; - cr_parms.andvals[14] = ~(1 << 28 | 1 << 27 | 1 << 26 | - 1 << 25 | 1 << 24); - - smp_ctl_bit_callback(&cr_parms); - + /* Disable interrupt sources via control register. */ + __ctl_store(cregs, 0, 15); + cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */ + cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ + cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ + __ctl_load(cregs, 0, 15); return 0; } void __cpu_die(unsigned int cpu) { + struct pcpu *pcpu; + /* Wait until target cpu is down */ - while (!cpu_stopped(cpu)) + pcpu = pcpu_devices + cpu; + while (!pcpu_stopped(pcpu)) cpu_relax(); - while (sigp_p(0, cpu, sigp_set_prefix) == sigp_busy) - udelay(10); - smp_free_lowcore(cpu); + pcpu_free_lowcore(pcpu); atomic_dec(&init_mm.context.attach_count); + cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); + if (MACHINE_HAS_TLB_LC) + cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); } -void cpu_die(void) +void __noreturn cpu_die(void) { idle_task_exit(); - while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) - cpu_relax(); - for (;;); + pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); + for (;;) ; } #endif /* CONFIG_HOTPLUG_CPU */ -void __init smp_prepare_cpus(unsigned int max_cpus) +void __init smp_fill_possible_mask(void) { -#ifndef CONFIG_64BIT - unsigned long save_area = 0; -#endif - unsigned long async_stack, panic_stack; - struct _lowcore *lowcore; + unsigned int possible, sclp, cpu; - smp_detect_cpus(); + sclp = sclp_get_max_cpu() ?: nr_cpu_ids; + possible = setup_possible_cpus ?: nr_cpu_ids; + possible = min(possible, sclp); + for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) + set_cpu_possible(cpu, true); +} +void __init smp_prepare_cpus(unsigned int max_cpus) +{ /* request the 0x1201 emergency signal external interrupt */ - if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) + if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); - - /* Reallocate current lowcore, but keep its contents. */ - lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - panic_stack = __get_free_page(GFP_KERNEL); - async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - BUG_ON(!lowcore || !panic_stack || !async_stack); -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - save_area = get_zeroed_page(GFP_KERNEL); -#endif - local_irq_disable(); - local_mcck_disable(); - lowcore_ptr[smp_processor_id()] = lowcore; - *lowcore = S390_lowcore; - lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->async_stack = async_stack + ASYNC_SIZE; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - lowcore->extended_save_area_addr = (u32) save_area; -#endif - set_prefix((u32)(unsigned long) lowcore); - local_mcck_enable(); - local_irq_enable(); -#ifdef CONFIG_64BIT - if (vdso_alloc_per_cpu(smp_processor_id(), &S390_lowcore)) - BUG(); -#endif + /* request the 0x1202 external call external interrupt */ + if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) + panic("Couldn't request external interrupt 0x1202"); + smp_detect_cpus(); } void __init smp_prepare_boot_cpu(void) { - BUG_ON(smp_processor_id() != 0); - - current_thread_info()->cpu = 0; - cpu_set(0, cpu_present_map); - cpu_set(0, cpu_online_map); + struct pcpu *pcpu = pcpu_devices; + + boot_cpu_address = stap(); + pcpu->state = CPU_STATE_CONFIGURED; + pcpu->address = boot_cpu_address; + pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); + pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE + + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE + + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); S390_lowcore.percpu_offset = __per_cpu_offset[0]; - current_set[0] = current; - smp_cpu_state[0] = CPU_STATE_CONFIGURED; - smp_cpu_polarization[0] = POLARIZATION_UNKNWN; + smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); + set_cpu_present(0, true); + set_cpu_online(0, true); } void __init smp_cpus_done(unsigned int max_cpus) @@ -753,7 +800,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { S390_lowcore.cpu_nr = 0; - __cpu_logical_map[0] = stap(); + S390_lowcore.spinlock_lockval = arch_spin_lockval(0); } /* @@ -768,55 +815,58 @@ int setup_profiling_timer(unsigned int multiplier) } #ifdef CONFIG_HOTPLUG_CPU -static ssize_t cpu_configure_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t cpu_configure_show(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", smp_cpu_state[dev->id]); + count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); mutex_unlock(&smp_cpu_state_mutex); return count; } -static ssize_t cpu_configure_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t count) +static ssize_t cpu_configure_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { - int cpu = dev->id; - int val, rc; + struct pcpu *pcpu; + int cpu, val, rc; char delim; if (sscanf(buf, "%d %c", &val, &delim) != 1) return -EINVAL; if (val != 0 && val != 1) return -EINVAL; - get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); rc = -EBUSY; /* disallow configuration changes of online cpus and cpu 0 */ + cpu = dev->id; if (cpu_online(cpu) || cpu == 0) goto out; + pcpu = pcpu_devices + cpu; rc = 0; switch (val) { case 0: - if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) { - rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]); - if (!rc) { - smp_cpu_state[cpu] = CPU_STATE_STANDBY; - smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; - } - } + if (pcpu->state != CPU_STATE_CONFIGURED) + break; + rc = sclp_cpu_deconfigure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); break; case 1: - if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) { - rc = sclp_cpu_configure(__cpu_logical_map[cpu]); - if (!rc) { - smp_cpu_state[cpu] = CPU_STATE_CONFIGURED; - smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; - } - } + if (pcpu->state != CPU_STATE_STANDBY) + break; + rc = sclp_cpu_configure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); break; default: break; @@ -826,52 +876,21 @@ out: put_online_cpus(); return rc ? rc : count; } -static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); +static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t cpu_polarization_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_cpu_address(struct device *dev, + struct device_attribute *attr, char *buf) { - int cpu = dev->id; - ssize_t count; - - mutex_lock(&smp_cpu_state_mutex); - switch (smp_cpu_polarization[cpu]) { - case POLARIZATION_HRZ: - count = sprintf(buf, "horizontal\n"); - break; - case POLARIZATION_VL: - count = sprintf(buf, "vertical:low\n"); - break; - case POLARIZATION_VM: - count = sprintf(buf, "vertical:medium\n"); - break; - case POLARIZATION_VH: - count = sprintf(buf, "vertical:high\n"); - break; - default: - count = sprintf(buf, "unknown\n"); - break; - } - mutex_unlock(&smp_cpu_state_mutex); - return count; + return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); } -static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL); - -static ssize_t show_cpu_address(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]); -} -static SYSDEV_ATTR(address, 0444, show_cpu_address, NULL); - +static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); static struct attribute *cpu_common_attrs[] = { #ifdef CONFIG_HOTPLUG_CPU - &attr_configure.attr, + &dev_attr_configure.attr, #endif - &attr_address.attr, - &attr_polarization.attr, + &dev_attr_address.attr, NULL, }; @@ -879,71 +898,45 @@ static struct attribute_group cpu_common_attr_group = { .attrs = cpu_common_attrs, }; -static ssize_t show_capability(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - unsigned int capability; - int rc; - - rc = get_cpu_capability(&capability); - if (rc) - return rc; - return sprintf(buf, "%u\n", capability); -} -static SYSDEV_ATTR(capability, 0444, show_capability, NULL); - -static ssize_t show_idle_count(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) { - struct s390_idle_data *idle; + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); unsigned long long idle_count; unsigned int sequence; - idle = &per_cpu(s390_idle, dev->id); -repeat: - sequence = idle->sequence; - smp_rmb(); - if (sequence & 1) - goto repeat; - idle_count = idle->idle_count; - if (idle->idle_enter) - idle_count++; - smp_rmb(); - if (idle->sequence != sequence) - goto repeat; + do { + sequence = ACCESS_ONCE(idle->sequence); + idle_count = ACCESS_ONCE(idle->idle_count); + if (ACCESS_ONCE(idle->clock_idle_enter)) + idle_count++; + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); return sprintf(buf, "%llu\n", idle_count); } -static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); +static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); -static ssize_t show_idle_time(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) { - struct s390_idle_data *idle; - unsigned long long now, idle_time, idle_enter; + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long now, idle_time, idle_enter, idle_exit; unsigned int sequence; - idle = &per_cpu(s390_idle, dev->id); - now = get_clock(); -repeat: - sequence = idle->sequence; - smp_rmb(); - if (sequence & 1) - goto repeat; - idle_time = idle->idle_time; - idle_enter = idle->idle_enter; - if (idle_enter != 0ULL && idle_enter < now) - idle_time += now - idle_enter; - smp_rmb(); - if (idle->sequence != sequence) - goto repeat; + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_time = ACCESS_ONCE(idle->idle_time); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; return sprintf(buf, "%llu\n", idle_time >> 12); } -static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); +static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); static struct attribute *cpu_online_attrs[] = { - &attr_capability.attr, - &attr_idle_count.attr, - &attr_idle_time_us.attr, + &dev_attr_idle_count.attr, + &dev_attr_idle_time_us.attr, NULL, }; @@ -951,40 +944,36 @@ static struct attribute_group cpu_online_attr_group = { .attrs = cpu_online_attrs, }; -static int __cpuinit smp_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int smp_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; - struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; - struct s390_idle_data *idle; + struct cpu *c = pcpu_devices[cpu].cpu; + struct device *s = &c->dev; int err = 0; - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - idle = &per_cpu(s390_idle, cpu); - memset(idle, 0, sizeof(struct s390_idle_data)); err = sysfs_create_group(&s->kobj, &cpu_online_attr_group); break; case CPU_DEAD: - case CPU_DEAD_FROZEN: sysfs_remove_group(&s->kobj, &cpu_online_attr_group); break; } return notifier_from_errno(err); } -static struct notifier_block __cpuinitdata smp_cpu_nb = { - .notifier_call = smp_cpu_notify, -}; - -static int __devinit smp_add_present_cpu(int cpu) +static int smp_add_present_cpu(int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; + struct device *s; + struct cpu *c; int rc; + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + pcpu_devices[cpu].cpu = c; + s = &c->dev; c->hotpluggable = 1; rc = register_cpu(c, cpu); if (rc) @@ -992,11 +981,20 @@ static int __devinit smp_add_present_cpu(int cpu) rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group); if (rc) goto out_cpu; - if (!cpu_online(cpu)) - goto out; - rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); - if (!rc) - return 0; + if (cpu_online(cpu)) { + rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); + if (rc) + goto out_online; + } + rc = topology_cpu_init(c); + if (rc) + goto out_topology; + return 0; + +out_topology: + if (cpu_online(cpu)) + sysfs_remove_group(&s->kobj, &cpu_online_attr_group); +out_online: sysfs_remove_group(&s->kobj, &cpu_common_attr_group); out_cpu: #ifdef CONFIG_HOTPLUG_CPU @@ -1010,33 +1008,25 @@ out: int __ref smp_rescan_cpus(void) { - cpumask_t newcpus; - int cpu; - int rc; + struct sclp_cpu_info *info; + int nr; + info = smp_get_cpu_info(); + if (!info) + return -ENOMEM; get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - newcpus = cpu_present_map; - rc = __smp_rescan_cpus(); - if (rc) - goto out; - cpus_andnot(newcpus, cpu_present_map, newcpus); - for_each_cpu_mask(cpu, newcpus) { - rc = smp_add_present_cpu(cpu); - if (rc) - cpu_clear(cpu, cpu_present_map); - } - rc = 0; -out: + nr = __smp_rescan_cpus(info, 1); mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); - if (!cpus_empty(newcpus)) + kfree(info); + if (nr) topology_schedule_update(); - return rc; + return 0; } -static ssize_t __ref rescan_store(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t __ref rescan_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -1045,69 +1035,29 @@ static ssize_t __ref rescan_store(struct sysdev_class *class, rc = smp_rescan_cpus(); return rc ? rc : count; } -static SYSDEV_CLASS_ATTR(rescan, 0200, NULL, rescan_store); +static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t dispatching_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - char *buf) -{ - ssize_t count; - - mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", cpu_management); - mutex_unlock(&smp_cpu_state_mutex); - return count; -} - -static ssize_t dispatching_store(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, - const char *buf, - size_t count) -{ - int val, rc; - char delim; - - if (sscanf(buf, "%d %c", &val, &delim) != 1) - return -EINVAL; - if (val != 0 && val != 1) - return -EINVAL; - rc = 0; - get_online_cpus(); - mutex_lock(&smp_cpu_state_mutex); - if (cpu_management == val) - goto out; - rc = topology_set_cpu_management(val); - if (!rc) - cpu_management = val; -out: - mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); - return rc ? rc : count; -} -static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show, - dispatching_store); - -static int __init topology_init(void) +static int __init s390_smp_init(void) { - int cpu; - int rc; - - register_cpu_notifier(&smp_cpu_nb); + int cpu, rc = 0; #ifdef CONFIG_HOTPLUG_CPU - rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_rescan); + rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); if (rc) return rc; #endif - rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_dispatching); - if (rc) - return rc; + cpu_notifier_register_begin(); for_each_present_cpu(cpu) { rc = smp_add_present_cpu(cpu); if (rc) - return rc; + goto out; } - return 0; + + __hotcpu_notifier(smp_cpu_notify, 0); + +out: + cpu_notifier_register_done(); + return rc; } -subsys_initcall(topology_init); +subsys_initcall(s390_smp_init); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 8841919ef7e..1785cd82253 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -1,9 +1,7 @@ /* - * arch/s390/kernel/stacktrace.c - * * Stack trace management functions * - * Copyright (C) IBM Corp. 2006 + * Copyright IBM Corp. 2006 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index cf9e5c6d552..a7a7537ce1e 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -7,13 +7,137 @@ */ #include <linux/pfn.h> -#include <asm/system.h> +#include <linux/suspend.h> +#include <linux/mm.h> +#include <asm/ctl_reg.h> +#include <asm/ipl.h> +#include <asm/cio.h> +#include <asm/pci.h> +#include "entry.h" /* * References to section boundaries */ extern const void __nosave_begin, __nosave_end; +/* + * The restore of the saved pages in an hibernation image will set + * the change and referenced bits in the storage key for each page. + * Overindication of the referenced bits after an hibernation cycle + * does not cause any harm but the overindication of the change bits + * would cause trouble. + * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each + * page to the most significant byte of the associated page frame + * number in the hibernation image. + */ + +/* + * Key storage is allocated as a linked list of pages. + * The size of the keys array is (PAGE_SIZE - sizeof(long)) + */ +struct page_key_data { + struct page_key_data *next; + unsigned char data[]; +}; + +#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *)) + +static struct page_key_data *page_key_data; +static struct page_key_data *page_key_rp, *page_key_wp; +static unsigned long page_key_rx, page_key_wx; +unsigned long suspend_zero_pages; + +/* + * For each page in the hibernation image one additional byte is + * stored in the most significant byte of the page frame number. + * On suspend no additional memory is required but on resume the + * keys need to be memorized until the page data has been restored. + * Only then can the storage keys be set to their old state. + */ +unsigned long page_key_additional_pages(unsigned long pages) +{ + return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); +} + +/* + * Free page_key_data list of arrays. + */ +void page_key_free(void) +{ + struct page_key_data *pkd; + + while (page_key_data) { + pkd = page_key_data; + page_key_data = pkd->next; + free_page((unsigned long) pkd); + } +} + +/* + * Allocate page_key_data list of arrays with enough room to store + * one byte for each page in the hibernation image. + */ +int page_key_alloc(unsigned long pages) +{ + struct page_key_data *pk; + unsigned long size; + + size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); + while (size--) { + pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL); + if (!pk) { + page_key_free(); + return -ENOMEM; + } + pk->next = page_key_data; + page_key_data = pk; + } + page_key_rp = page_key_wp = page_key_data; + page_key_rx = page_key_wx = 0; + return 0; +} + +/* + * Save the storage key into the upper 8 bits of the page frame number. + */ +void page_key_read(unsigned long *pfn) +{ + unsigned long addr; + + addr = (unsigned long) page_address(pfn_to_page(*pfn)); + *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); +} + +/* + * Extract the storage key from the upper 8 bits of the page frame number + * and store it in the page_key_data list of arrays. + */ +void page_key_memorize(unsigned long *pfn) +{ + page_key_wp->data[page_key_wx] = *(unsigned char *) pfn; + *(unsigned char *) pfn = 0; + if (++page_key_wx < PAGE_KEY_DATA_SIZE) + return; + page_key_wp = page_key_wp->next; + page_key_wx = 0; +} + +/* + * Get the next key from the page_key_data list of arrays and set the + * storage key of the page referred by @address. If @address refers to + * a "safe" page the swsusp_arch_resume code will transfer the storage + * key from the buffer page to the original page. + */ +void page_key_write(void *address) +{ + page_set_storage_key((unsigned long) address, + page_key_rp->data[page_key_rx], 0); + if (++page_key_rx >= PAGE_KEY_DATA_SIZE) + return; + page_key_rp = page_key_rp->next; + page_key_rx = 0; +} + int pfn_is_nosave(unsigned long pfn) { unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); @@ -30,6 +154,36 @@ int pfn_is_nosave(unsigned long pfn) return 0; } +/* + * PM notifier callback for suspend + */ +static int suspend_pm_cb(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER); + if (!suspend_zero_pages) + return NOTIFY_BAD; + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + free_pages(suspend_zero_pages, LC_ORDER); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static int __init suspend_pm_init(void) +{ + pm_notifier(suspend_pm_cb, 0); + return 0; +} +arch_initcall(suspend_pm_init); + void save_processor_state(void) { /* swsusp_arch_suspend() actually saves all cpu register contents. @@ -61,3 +215,11 @@ void restore_processor_state(void) __ctl_set_bit(0,28); local_mcck_enable(); } + +/* Called at the end of swsusp_arch_resume */ +void s390_early_resume(void) +{ + lgr_info_log(); + channel_subsystem_reinit(); + zpci_rescan(); +} diff --git a/arch/s390/kernel/switch_cpu.S b/arch/s390/kernel/switch_cpu.S deleted file mode 100644 index 469f11b574f..00000000000 --- a/arch/s390/kernel/switch_cpu.S +++ /dev/null @@ -1,58 +0,0 @@ -/* - * 31-bit switch cpu code - * - * Copyright IBM Corp. 2009 - * - */ - -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> - -# smp_switch_to_cpu switches to destination cpu and executes the passed function -# Parameter: %r2 - function to call -# %r3 - function parameter -# %r4 - stack poiner -# %r5 - current cpu -# %r6 - destination cpu - - .section .text - .align 4 - .globl smp_switch_to_cpu -smp_switch_to_cpu: - stm %r6,%r15,__SF_GPRS(%r15) - lr %r1,%r15 - ahi %r15,-STACK_FRAME_OVERHEAD - st %r1,__SF_BACKCHAIN(%r15) - basr %r13,0 -0: la %r1,.gprregs_addr-0b(%r13) - l %r1,0(%r1) - stm %r0,%r15,0(%r1) -1: sigp %r0,%r6,__SIGP_RESTART /* start destination CPU */ - brc 2,1b /* busy, try again */ -2: sigp %r0,%r5,__SIGP_STOP /* stop current CPU */ - brc 2,2b /* busy, try again */ -3: j 3b - - .globl smp_restart_cpu -smp_restart_cpu: - basr %r13,0 -0: la %r1,.gprregs_addr-0b(%r13) - l %r1,0(%r1) - lm %r0,%r15,0(%r1) -1: sigp %r0,%r5,__SIGP_SENSE /* Wait for calling CPU */ - brc 10,1b /* busy, accepted (status 0), running */ - tmll %r0,0x40 /* Test if calling CPU is stopped */ - jz 1b - ltr %r4,%r4 /* New stack ? */ - jz 1f - lr %r15,%r4 -1: basr %r14,%r2 - -.gprregs_addr: - .long .gprregs - - .section .data,"aw",@progbits -.gprregs: - .rept 16 - .long 0 - .endr diff --git a/arch/s390/kernel/switch_cpu64.S b/arch/s390/kernel/switch_cpu64.S deleted file mode 100644 index d94aacc898c..00000000000 --- a/arch/s390/kernel/switch_cpu64.S +++ /dev/null @@ -1,51 +0,0 @@ -/* - * 64-bit switch cpu code - * - * Copyright IBM Corp. 2009 - * - */ - -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> - -# smp_switch_to_cpu switches to destination cpu and executes the passed function -# Parameter: %r2 - function to call -# %r3 - function parameter -# %r4 - stack poiner -# %r5 - current cpu -# %r6 - destination cpu - - .section .text - .align 4 - .globl smp_switch_to_cpu -smp_switch_to_cpu: - stmg %r6,%r15,__SF_GPRS(%r15) - lgr %r1,%r15 - aghi %r15,-STACK_FRAME_OVERHEAD - stg %r1,__SF_BACKCHAIN(%r15) - larl %r1,.gprregs - stmg %r0,%r15,0(%r1) -1: sigp %r0,%r6,__SIGP_RESTART /* start destination CPU */ - brc 2,1b /* busy, try again */ -2: sigp %r0,%r5,__SIGP_STOP /* stop current CPU */ - brc 2,2b /* busy, try again */ -3: j 3b - - .globl smp_restart_cpu -smp_restart_cpu: - larl %r1,.gprregs - lmg %r0,%r15,0(%r1) -1: sigp %r0,%r5,__SIGP_SENSE /* Wait for calling CPU */ - brc 10,1b /* busy, accepted (status 0), running */ - tmll %r0,0x40 /* Test if calling CPU is stopped */ - jz 1b - ltgr %r4,%r4 /* New stack ? */ - jz 1f - lgr %r15,%r4 -1: basr %r14,%r2 - - .section .data,"aw",@progbits -.gprregs: - .rept 16 - .quad 0 - .endr diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index 1f066e46e83..6b09fdffbd2 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -7,10 +7,12 @@ * Michael Holzheu <holzheu@linux.vnet.ibm.com> */ +#include <linux/linkage.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> +#include <asm/sigp.h> /* * Save register context in absolute 0 lowcore and call swsusp_save() to @@ -22,9 +24,7 @@ * This function runs with disabled interrupts. */ .section .text - .align 4 - .globl swsusp_arch_suspend -swsusp_arch_suspend: +ENTRY(swsusp_arch_suspend) stmg %r6,%r15,__SF_GPRS(%r15) lgr %r1,%r15 aghi %r15,-STACK_FRAME_OVERHEAD @@ -36,14 +36,14 @@ swsusp_arch_suspend: /* Store prefix register on stack */ stpx __SF_EMPTY(%r15) - /* Save prefix register contents for lowcore */ - llgf %r4,__SF_EMPTY(%r15) + /* Save prefix register contents for lowcore copy */ + llgf %r10,__SF_EMPTY(%r15) /* Get pointer to save area */ lghi %r1,0x1000 /* Save CPU address */ - stap __LC_CPU_ADDRESS(%r0) + stap __LC_EXT_CPU_ADDR(%r0) /* Store registers */ mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ @@ -91,7 +91,18 @@ swsusp_arch_suspend: xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) spx __SF_EMPTY(%r15) + /* Save absolute zero pages */ + larl %r2,suspend_zero_pages + lg %r2,0(%r2) + lghi %r4,0 + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + + /* Copy lowcore to absolute zero lowcore */ lghi %r2,0 + lgr %r4,%r10 lghi %r3,2*PAGE_SIZE lghi %r5,2*PAGE_SIZE 1: mvcle %r2,%r4,0 @@ -112,8 +123,7 @@ swsusp_arch_suspend: * Then we return to the function that called swsusp_arch_suspend(). * swsusp_arch_resume() runs with disabled interrupts. */ - .globl swsusp_arch_resume -swsusp_arch_resume: +ENTRY(swsusp_arch_resume) stmg %r6,%r15,__SF_GPRS(%r15) lgr %r1,%r15 aghi %r15,-STACK_FRAME_OVERHEAD @@ -138,11 +148,14 @@ swsusp_arch_resume: 0: lg %r2,8(%r1) lg %r4,0(%r1) + iske %r0,%r4 lghi %r3,PAGE_SIZE lghi %r5,PAGE_SIZE 1: mvcle %r2,%r4,0 jo 1b + lg %r2,8(%r1) + sske %r0,%r2 lg %r1,16(%r1) ltgr %r1,%r1 jnz 0b @@ -162,7 +175,7 @@ swsusp_arch_resume: diag %r0,%r0,0x308 restart_entry: lhi %r1,1 - sigp %r1,%r0,0x12 + sigp %r1,%r0,SIGP_SET_ARCHITECTURE sam64 larl %r1,.Lnew_pgm_check_psw lpswe 0(%r1) @@ -172,15 +185,15 @@ pgm_check_entry: larl %r1,.Lresume_cpu /* Resume CPU address: r2 */ stap 0(%r1) llgh %r2,0(%r1) - llgh %r1,__LC_CPU_ADDRESS(%r0) /* Suspend CPU address: r1 */ + llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */ cgr %r1,%r2 je restore_registers /* r1 = r2 -> nothing to do */ larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */ mvc __LC_RST_NEW_PSW(16,%r0),0(%r4) 3: - sigp %r9,%r1,__SIGP_INITIAL_CPU_RESET - brc 8,4f /* accepted */ - brc 2,3b /* busy, try again */ + sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */ + brc 8,4f /* accepted */ + brc 2,3b /* busy, try again */ /* Suspend CPU not available -> panic */ larl %r15,init_thread_union @@ -189,16 +202,16 @@ pgm_check_entry: larl %r3,_sclp_print_early lghi %r1,0 sam31 - sigp %r1,%r0,0x12 + sigp %r1,%r0,SIGP_SET_ARCHITECTURE basr %r14,%r3 larl %r3,.Ldisabled_wait_31 lpsw 0(%r3) 4: /* Switch to suspend CPU */ - sigp %r9,%r1,__SIGP_RESTART /* start suspend CPU */ + sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */ brc 2,4b /* busy, try again */ 5: - sigp %r9,%r2,__SIGP_STOP /* stop resume (current) CPU */ + sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */ brc 2,5b /* busy, try again */ 6: j 6b @@ -206,7 +219,7 @@ restart_suspend: larl %r1,.Lresume_cpu llgh %r2,0(%r1) 7: - sigp %r9,%r2,__SIGP_SENSE /* Wait for resume CPU */ + sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */ brc 8,7b /* accepted, status 0, still running */ brc 2,7b /* busy, try again */ tmll %r9,0x40 /* Test if resume CPU is stopped */ @@ -246,8 +259,20 @@ restore_registers: /* Load old stack */ lg %r15,0x2f8(%r13) + /* Save prefix register */ + mvc __SF_EMPTY(4,%r15),0x318(%r13) + + /* Restore absolute zero pages */ + lghi %r2,0 + larl %r4,suspend_zero_pages + lg %r4,0(%r4) + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + /* Restore prefix register */ - spx 0x318(%r13) + spx __SF_EMPTY(%r15) /* Activate DAT */ stosm __SF_EMPTY(%r15),0x04 @@ -256,8 +281,8 @@ restore_registers: lghi %r2,0 brasl %r14,arch_set_page_states - /* Reinitialize the channel subsystem */ - brasl %r14,channel_subsystem_reinit + /* Call arch specific early resume code */ + brasl %r14,s390_early_resume /* Return 0 */ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 476081440df..23eb222c165 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/sys_s390.c - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Thomas Spatzier (tspat@de.ibm.com) * @@ -60,74 +58,22 @@ out: } /* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. + * sys_ipc() is the de-multiplexer for the SysV IPC calls. */ SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, unsigned long, third, void __user *, ptr) { - struct ipc_kludge tmp; - int ret; - - switch (call) { - case SEMOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, NULL); - case SEMTIMEDOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, - (const struct timespec __user *) third); - case SEMGET: - return sys_semget(first, (int)second, third); - case SEMCTL: { - union semun fourth; - if (!ptr) - return -EINVAL; - if (get_user(fourth.__pad, (void __user * __user *) ptr)) - return -EFAULT; - return sys_semctl(first, (int)second, third, fourth); - } - case MSGSND: - return sys_msgsnd (first, (struct msgbuf __user *) ptr, - (size_t)second, third); - break; - case MSGRCV: - if (!ptr) - return -EINVAL; - if (copy_from_user (&tmp, (struct ipc_kludge __user *) ptr, - sizeof (struct ipc_kludge))) - return -EFAULT; - return sys_msgrcv (first, tmp.msgp, - (size_t)second, tmp.msgtyp, third); - case MSGGET: - return sys_msgget((key_t)first, (int)second); - case MSGCTL: - return sys_msgctl(first, (int)second, - (struct msqid_ds __user *)ptr); - - case SHMAT: { - ulong raddr; - ret = do_shmat(first, (char __user *)ptr, - (int)second, &raddr); - if (ret) - return ret; - return put_user (raddr, (ulong __user *) third); - break; - } - case SHMDT: - return sys_shmdt ((char __user *)ptr); - case SHMGET: - return sys_shmget(first, (size_t)second, third); - case SHMCTL: - return sys_shmctl(first, (int)second, - (struct shmid_ds __user *) ptr); - default: - return -ENOSYS; - - } - - return -EINVAL; + if (call >> 16) + return -EINVAL; + /* The s390 sys_ipc variant has only five parameters instead of six + * like the generic variant. The only difference is the handling of + * the SEMTIMEDOP subcall where on s390 the third parameter is used + * as a pointer to a struct timespec where the generic variant uses + * the fifth parameter. + * Therefore we can call the generic variant by simply passing the + * third parameter also as fifth parameter. + */ + return sys_ipc(call, first, second, third, ptr, third); } #ifdef CONFIG_64BIT @@ -135,11 +81,12 @@ SYSCALL_DEFINE1(s390_personality, unsigned int, personality) { unsigned int ret; - if (current->personality == PER_LINUX32 && personality == PER_LINUX) - personality = PER_LINUX32; + if (personality(current->personality) == PER_LINUX32 && + personality(personality) == PER_LINUX) + personality |= PER_LINUX32; ret = sys_personality(personality); - if (ret == PER_LINUX32) - ret = PER_LINUX; + if (personality(ret) == PER_LINUX32) + ret &= ~PER_LINUX32; return ret; } @@ -185,19 +132,9 @@ SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) * to * %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len */ -SYSCALL_DEFINE(s390_fallocate)(int fd, int mode, loff_t offset, - u32 len_high, u32 len_low) +SYSCALL_DEFINE5(s390_fallocate, int, fd, int, mode, loff_t, offset, + u32, len_high, u32, len_low) { return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low); } -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_s390_fallocate(long fd, long mode, loff_t offset, - long len_high, long len_low) -{ - return SYSC_s390_fallocate((int) fd, (int) mode, offset, - (u32) len_high, (u32) len_low); -} -SYSCALL_ALIAS(sys_s390_fallocate, SyS_s390_fallocate); -#endif - #endif diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index a8fee1b1439..fe5cdf29a00 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -9,337 +9,350 @@ #define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall,sys_ni_syscall) NI_SYSCALL /* 0 */ -SYSCALL(sys_exit,sys_exit,sys32_exit_wrapper) +SYSCALL(sys_exit,sys_exit,compat_sys_exit) SYSCALL(sys_fork,sys_fork,sys_fork) -SYSCALL(sys_read,sys_read,sys32_read_wrapper) -SYSCALL(sys_write,sys_write,sys32_write_wrapper) -SYSCALL(sys_open,sys_open,sys32_open_wrapper) /* 5 */ -SYSCALL(sys_close,sys_close,sys32_close_wrapper) +SYSCALL(sys_read,sys_read,compat_sys_s390_read) +SYSCALL(sys_write,sys_write,compat_sys_s390_write) +SYSCALL(sys_open,sys_open,compat_sys_open) /* 5 */ +SYSCALL(sys_close,sys_close,compat_sys_close) SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall) -SYSCALL(sys_creat,sys_creat,sys32_creat_wrapper) -SYSCALL(sys_link,sys_link,sys32_link_wrapper) -SYSCALL(sys_unlink,sys_unlink,sys32_unlink_wrapper) /* 10 */ -SYSCALL(sys_execve,sys_execve,sys32_execve_wrapper) -SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper) -SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */ -SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper) -SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */ -SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper) /* old lchown16 syscall*/ +SYSCALL(sys_creat,sys_creat,compat_sys_creat) +SYSCALL(sys_link,sys_link,compat_sys_link) +SYSCALL(sys_unlink,sys_unlink,compat_sys_unlink) /* 10 */ +SYSCALL(sys_execve,sys_execve,compat_sys_execve) +SYSCALL(sys_chdir,sys_chdir,compat_sys_chdir) +SYSCALL(sys_time,sys_ni_syscall,compat_sys_time) /* old time syscall */ +SYSCALL(sys_mknod,sys_mknod,compat_sys_mknod) +SYSCALL(sys_chmod,sys_chmod,compat_sys_chmod) /* 15 */ +SYSCALL(sys_lchown16,sys_ni_syscall,compat_sys_s390_lchown16) /* old lchown16 syscall*/ NI_SYSCALL /* old break syscall holder */ NI_SYSCALL /* old stat syscall holder */ -SYSCALL(sys_lseek,sys_lseek,sys32_lseek_wrapper) +SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek) SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */ -SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper) -SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper) -SYSCALL(sys_setuid16,sys_ni_syscall,sys32_setuid16_wrapper) /* old setuid16 syscall*/ -SYSCALL(sys_getuid16,sys_ni_syscall,sys32_getuid16) /* old getuid16 syscall*/ -SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */ -SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) -SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper) +SYSCALL(sys_mount,sys_mount,compat_sys_mount) +SYSCALL(sys_oldumount,sys_oldumount,compat_sys_oldumount) +SYSCALL(sys_setuid16,sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/ +SYSCALL(sys_getuid16,sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/ +SYSCALL(sys_stime,sys_ni_syscall,compat_sys_stime) /* 25 old stime syscall */ +SYSCALL(sys_ptrace,sys_ptrace,compat_sys_ptrace) +SYSCALL(sys_alarm,sys_alarm,compat_sys_alarm) NI_SYSCALL /* old fstat syscall */ SYSCALL(sys_pause,sys_pause,sys_pause) -SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */ +SYSCALL(sys_utime,sys_utime,compat_sys_utime) /* 30 */ NI_SYSCALL /* old stty syscall */ NI_SYSCALL /* old gtty syscall */ -SYSCALL(sys_access,sys_access,sys32_access_wrapper) -SYSCALL(sys_nice,sys_nice,sys32_nice_wrapper) +SYSCALL(sys_access,sys_access,compat_sys_access) +SYSCALL(sys_nice,sys_nice,compat_sys_nice) NI_SYSCALL /* 35 old ftime syscall */ SYSCALL(sys_sync,sys_sync,sys_sync) -SYSCALL(sys_kill,sys_kill,sys32_kill_wrapper) -SYSCALL(sys_rename,sys_rename,sys32_rename_wrapper) -SYSCALL(sys_mkdir,sys_mkdir,sys32_mkdir_wrapper) -SYSCALL(sys_rmdir,sys_rmdir,sys32_rmdir_wrapper) /* 40 */ -SYSCALL(sys_dup,sys_dup,sys32_dup_wrapper) -SYSCALL(sys_pipe,sys_pipe,sys32_pipe_wrapper) -SYSCALL(sys_times,sys_times,compat_sys_times_wrapper) +SYSCALL(sys_kill,sys_kill,compat_sys_kill) +SYSCALL(sys_rename,sys_rename,compat_sys_rename) +SYSCALL(sys_mkdir,sys_mkdir,compat_sys_mkdir) +SYSCALL(sys_rmdir,sys_rmdir,compat_sys_rmdir) /* 40 */ +SYSCALL(sys_dup,sys_dup,compat_sys_dup) +SYSCALL(sys_pipe,sys_pipe,compat_sys_pipe) +SYSCALL(sys_times,sys_times,compat_sys_times) NI_SYSCALL /* old prof syscall */ -SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */ -SYSCALL(sys_setgid16,sys_ni_syscall,sys32_setgid16_wrapper) /* old setgid16 syscall*/ -SYSCALL(sys_getgid16,sys_ni_syscall,sys32_getgid16) /* old getgid16 syscall*/ -SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper) -SYSCALL(sys_geteuid16,sys_ni_syscall,sys32_geteuid16) /* old geteuid16 syscall */ -SYSCALL(sys_getegid16,sys_ni_syscall,sys32_getegid16) /* 50 old getegid16 syscall */ -SYSCALL(sys_acct,sys_acct,sys32_acct_wrapper) -SYSCALL(sys_umount,sys_umount,sys32_umount_wrapper) +SYSCALL(sys_brk,sys_brk,compat_sys_brk) /* 45 */ +SYSCALL(sys_setgid16,sys_ni_syscall,compat_sys_s390_setgid16) /* old setgid16 syscall*/ +SYSCALL(sys_getgid16,sys_ni_syscall,compat_sys_s390_getgid16) /* old getgid16 syscall*/ +SYSCALL(sys_signal,sys_signal,compat_sys_signal) +SYSCALL(sys_geteuid16,sys_ni_syscall,compat_sys_s390_geteuid16) /* old geteuid16 syscall */ +SYSCALL(sys_getegid16,sys_ni_syscall,compat_sys_s390_getegid16) /* 50 old getegid16 syscall */ +SYSCALL(sys_acct,sys_acct,compat_sys_acct) +SYSCALL(sys_umount,sys_umount,compat_sys_umount) NI_SYSCALL /* old lock syscall */ -SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper) -SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */ +SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl) +SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl) /* 55 */ NI_SYSCALL /* intel mpx syscall */ -SYSCALL(sys_setpgid,sys_setpgid,sys32_setpgid_wrapper) +SYSCALL(sys_setpgid,sys_setpgid,compat_sys_setpgid) NI_SYSCALL /* old ulimit syscall */ NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_umask,sys_umask,sys32_umask_wrapper) /* 60 */ -SYSCALL(sys_chroot,sys_chroot,sys32_chroot_wrapper) -SYSCALL(sys_ustat,sys_ustat,sys32_ustat_wrapper) -SYSCALL(sys_dup2,sys_dup2,sys32_dup2_wrapper) +SYSCALL(sys_umask,sys_umask,compat_sys_umask) /* 60 */ +SYSCALL(sys_chroot,sys_chroot,compat_sys_chroot) +SYSCALL(sys_ustat,sys_ustat,compat_sys_ustat) +SYSCALL(sys_dup2,sys_dup2,compat_sys_dup2) SYSCALL(sys_getppid,sys_getppid,sys_getppid) SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp) /* 65 */ SYSCALL(sys_setsid,sys_setsid,sys_setsid) -SYSCALL(sys_sigaction,sys_sigaction,sys32_sigaction_wrapper) +SYSCALL(sys_sigaction,sys_sigaction,compat_sys_sigaction) NI_SYSCALL /* old sgetmask syscall*/ NI_SYSCALL /* old ssetmask syscall*/ -SYSCALL(sys_setreuid16,sys_ni_syscall,sys32_setreuid16_wrapper) /* old setreuid16 syscall */ -SYSCALL(sys_setregid16,sys_ni_syscall,sys32_setregid16_wrapper) /* old setregid16 syscall */ -SYSCALL(sys_sigsuspend,sys_sigsuspend,sys_sigsuspend_wrapper) -SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper) -SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper) -SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */ -SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) -SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage_wrapper) -SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper) -SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper) -SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */ -SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */ +SYSCALL(sys_setreuid16,sys_ni_syscall,compat_sys_s390_setreuid16) /* old setreuid16 syscall */ +SYSCALL(sys_setregid16,sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */ +SYSCALL(sys_sigsuspend,sys_sigsuspend,compat_sys_sigsuspend) +SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending) +SYSCALL(sys_sethostname,sys_sethostname,compat_sys_sethostname) +SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit) /* 75 */ +SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit) +SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage) +SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday) +SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday) +SYSCALL(sys_getgroups16,sys_ni_syscall,compat_sys_s390_getgroups16) /* 80 old getgroups16 syscall */ +SYSCALL(sys_setgroups16,sys_ni_syscall,compat_sys_s390_setgroups16) /* old setgroups16 syscall */ NI_SYSCALL /* old select syscall */ -SYSCALL(sys_symlink,sys_symlink,sys32_symlink_wrapper) +SYSCALL(sys_symlink,sys_symlink,compat_sys_symlink) NI_SYSCALL /* old lstat syscall */ -SYSCALL(sys_readlink,sys_readlink,sys32_readlink_wrapper) /* 85 */ -SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper) -SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper) -SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper) -SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ -SYSCALL(sys_old_mmap,sys_old_mmap,old32_mmap_wrapper) /* 90 */ -SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper) -SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper) -SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper) -SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper) -SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper) /* 95 old fchown16 syscall*/ -SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper) -SYSCALL(sys_setpriority,sys_setpriority,sys32_setpriority_wrapper) +SYSCALL(sys_readlink,sys_readlink,compat_sys_readlink) /* 85 */ +SYSCALL(sys_uselib,sys_uselib,compat_sys_uselib) +SYSCALL(sys_swapon,sys_swapon,compat_sys_swapon) +SYSCALL(sys_reboot,sys_reboot,compat_sys_reboot) +SYSCALL(sys_ni_syscall,sys_ni_syscall,compat_sys_old_readdir) /* old readdir syscall */ +SYSCALL(sys_old_mmap,sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */ +SYSCALL(sys_munmap,sys_munmap,compat_sys_munmap) +SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate) +SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate) +SYSCALL(sys_fchmod,sys_fchmod,compat_sys_fchmod) +SYSCALL(sys_fchown16,sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 syscall*/ +SYSCALL(sys_getpriority,sys_getpriority,compat_sys_getpriority) +SYSCALL(sys_setpriority,sys_setpriority,compat_sys_setpriority) NI_SYSCALL /* old profil syscall */ -SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs_wrapper) -SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs_wrapper) /* 100 */ +SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs) +SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs) /* 100 */ NI_SYSCALL /* ioperm for i386 */ -SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall_wrapper) -SYSCALL(sys_syslog,sys_syslog,sys32_syslog_wrapper) -SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer_wrapper) -SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer_wrapper) /* 105 */ -SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper) -SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper) -SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper) +SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall) +SYSCALL(sys_syslog,sys_syslog,compat_sys_syslog) +SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer) +SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer) /* 105 */ +SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat) +SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat) +SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat) NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,sys32_lookup_dcookie_wrapper) /* 110 */ +SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */ SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup) NI_SYSCALL /* old "idle" system call */ NI_SYSCALL /* vm86old for i386 */ -SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4_wrapper) -SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */ -SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper) -SYSCALL(sys_s390_ipc,sys_s390_ipc,sys32_ipc_wrapper) -SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper) -SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn) -SYSCALL(sys_clone,sys_clone,sys_clone_wrapper) /* 120 */ -SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper) -SYSCALL(sys_newuname,sys_newuname,sys32_newuname_wrapper) +SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4) +SYSCALL(sys_swapoff,sys_swapoff,compat_sys_swapoff) /* 115 */ +SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo) +SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc) +SYSCALL(sys_fsync,sys_fsync,compat_sys_fsync) +SYSCALL(sys_sigreturn,sys_sigreturn,compat_sys_sigreturn) +SYSCALL(sys_clone,sys_clone,compat_sys_clone) /* 120 */ +SYSCALL(sys_setdomainname,sys_setdomainname,compat_sys_setdomainname) +SYSCALL(sys_newuname,sys_newuname,compat_sys_newuname) NI_SYSCALL /* modify_ldt for i386 */ -SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper) -SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */ -SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask_wrapper) +SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex) +SYSCALL(sys_mprotect,sys_mprotect,compat_sys_mprotect) /* 125 */ +SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask) NI_SYSCALL /* old "create module" */ -SYSCALL(sys_init_module,sys_init_module,sys_init_module_wrapper) -SYSCALL(sys_delete_module,sys_delete_module,sys_delete_module_wrapper) +SYSCALL(sys_init_module,sys_init_module,compat_sys_init_module) +SYSCALL(sys_delete_module,sys_delete_module,compat_sys_delete_module) NI_SYSCALL /* 130: old get_kernel_syms */ -SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper) -SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper) -SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper) -SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper) -SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */ -SYSCALL(sys_personality,sys_s390_personality,sys32_personality_wrapper) +SYSCALL(sys_quotactl,sys_quotactl,compat_sys_quotactl) +SYSCALL(sys_getpgid,sys_getpgid,compat_sys_getpgid) +SYSCALL(sys_fchdir,sys_fchdir,compat_sys_fchdir) +SYSCALL(sys_bdflush,sys_bdflush,compat_sys_bdflush) +SYSCALL(sys_sysfs,sys_sysfs,compat_sys_sysfs) /* 135 */ +SYSCALL(sys_personality,sys_s390_personality,compat_sys_s390_personality) NI_SYSCALL /* for afs_syscall */ -SYSCALL(sys_setfsuid16,sys_ni_syscall,sys32_setfsuid16_wrapper) /* old setfsuid16 syscall */ -SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */ -SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */ -SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper) -SYSCALL(sys_select,sys_select,compat_sys_select_wrapper) -SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper) -SYSCALL(sys_msync,sys_msync,sys32_msync_wrapper) -SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */ -SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper) -SYSCALL(sys_getsid,sys_getsid,sys32_getsid_wrapper) -SYSCALL(sys_fdatasync,sys_fdatasync,sys32_fdatasync_wrapper) -SYSCALL(sys_sysctl,sys_sysctl,sys32_sysctl_wrapper) -SYSCALL(sys_mlock,sys_mlock,sys32_mlock_wrapper) /* 150 */ -SYSCALL(sys_munlock,sys_munlock,sys32_munlock_wrapper) -SYSCALL(sys_mlockall,sys_mlockall,sys32_mlockall_wrapper) +SYSCALL(sys_setfsuid16,sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */ +SYSCALL(sys_setfsgid16,sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */ +SYSCALL(sys_llseek,sys_llseek,compat_sys_llseek) /* 140 */ +SYSCALL(sys_getdents,sys_getdents,compat_sys_getdents) +SYSCALL(sys_select,sys_select,compat_sys_select) +SYSCALL(sys_flock,sys_flock,compat_sys_flock) +SYSCALL(sys_msync,sys_msync,compat_sys_msync) +SYSCALL(sys_readv,sys_readv,compat_sys_readv) /* 145 */ +SYSCALL(sys_writev,sys_writev,compat_sys_writev) +SYSCALL(sys_getsid,sys_getsid,compat_sys_getsid) +SYSCALL(sys_fdatasync,sys_fdatasync,compat_sys_fdatasync) +SYSCALL(sys_sysctl,sys_sysctl,compat_sys_sysctl) +SYSCALL(sys_mlock,sys_mlock,compat_sys_mlock) /* 150 */ +SYSCALL(sys_munlock,sys_munlock,compat_sys_munlock) +SYSCALL(sys_mlockall,sys_mlockall,compat_sys_mlockall) SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall) -SYSCALL(sys_sched_setparam,sys_sched_setparam,sys32_sched_setparam_wrapper) -SYSCALL(sys_sched_getparam,sys_sched_getparam,sys32_sched_getparam_wrapper) /* 155 */ -SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,sys32_sched_setscheduler_wrapper) -SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,sys32_sched_getscheduler_wrapper) +SYSCALL(sys_sched_setparam,sys_sched_setparam,compat_sys_sched_setparam) +SYSCALL(sys_sched_getparam,sys_sched_getparam,compat_sys_sched_getparam) /* 155 */ +SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,compat_sys_sched_setscheduler) +SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,compat_sys_sched_getscheduler) SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield) -SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,sys32_sched_get_priority_max_wrapper) -SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,sys32_sched_get_priority_min_wrapper) /* 160 */ -SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,sys32_sched_rr_get_interval_wrapper) -SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper) -SYSCALL(sys_mremap,sys_mremap,sys32_mremap_wrapper) -SYSCALL(sys_setresuid16,sys_ni_syscall,sys32_setresuid16_wrapper) /* old setresuid16 syscall */ -SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper) /* 165 old getresuid16 syscall */ +SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,compat_sys_sched_get_priority_max) +SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,compat_sys_sched_get_priority_min) /* 160 */ +SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval) +SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep) +SYSCALL(sys_mremap,sys_mremap,compat_sys_mremap) +SYSCALL(sys_setresuid16,sys_ni_syscall,compat_sys_s390_setresuid16) /* old setresuid16 syscall */ +SYSCALL(sys_getresuid16,sys_ni_syscall,compat_sys_s390_getresuid16) /* 165 old getresuid16 syscall */ NI_SYSCALL /* for vm86 */ NI_SYSCALL /* old sys_query_module */ -SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper) -SYSCALL(sys_nfsservctl,sys_nfsservctl,compat_sys_nfsservctl_wrapper) -SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */ -SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */ -SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper) -SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn) -SYSCALL(sys_rt_sigaction,sys_rt_sigaction,sys32_rt_sigaction_wrapper) -SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,sys32_rt_sigprocmask_wrapper) /* 175 */ -SYSCALL(sys_rt_sigpending,sys_rt_sigpending,sys32_rt_sigpending_wrapper) -SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait_wrapper) -SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,sys32_rt_sigqueueinfo_wrapper) -SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend_wrapper) -SYSCALL(sys_pread64,sys_pread64,sys32_pread64_wrapper) /* 180 */ -SYSCALL(sys_pwrite64,sys_pwrite64,sys32_pwrite64_wrapper) -SYSCALL(sys_chown16,sys_ni_syscall,sys32_chown16_wrapper) /* old chown16 syscall */ -SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) -SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) -SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */ -SYSCALL(sys_sigaltstack,sys_sigaltstack,sys32_sigaltstack_wrapper) -SYSCALL(sys_sendfile,sys_sendfile64,sys32_sendfile_wrapper) +SYSCALL(sys_poll,sys_poll,compat_sys_poll) +NI_SYSCALL /* old nfsservctl */ +SYSCALL(sys_setresgid16,sys_ni_syscall,compat_sys_s390_setresgid16) /* 170 old setresgid16 syscall */ +SYSCALL(sys_getresgid16,sys_ni_syscall,compat_sys_s390_getresgid16) /* old getresgid16 syscall */ +SYSCALL(sys_prctl,sys_prctl,compat_sys_prctl) +SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,compat_sys_rt_sigreturn) +SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction) +SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */ +SYSCALL(sys_rt_sigpending,sys_rt_sigpending,compat_sys_rt_sigpending) +SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait) +SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo) +SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend) +SYSCALL(sys_pread64,sys_pread64,compat_sys_s390_pread64) /* 180 */ +SYSCALL(sys_pwrite64,sys_pwrite64,compat_sys_s390_pwrite64) +SYSCALL(sys_chown16,sys_ni_syscall,compat_sys_s390_chown16) /* old chown16 syscall */ +SYSCALL(sys_getcwd,sys_getcwd,compat_sys_getcwd) +SYSCALL(sys_capget,sys_capget,compat_sys_capget) +SYSCALL(sys_capset,sys_capset,compat_sys_capset) /* 185 */ +SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack) +SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile) NI_SYSCALL /* streams1 */ NI_SYSCALL /* streams2 */ SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ -SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper) -SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper) -SYSCALL(sys_truncate64,sys_ni_syscall,sys32_truncate64_wrapper) -SYSCALL(sys_ftruncate64,sys_ni_syscall,sys32_ftruncate64_wrapper) -SYSCALL(sys_stat64,sys_ni_syscall,sys32_stat64_wrapper) /* 195 */ -SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper) -SYSCALL(sys_fstat64,sys_ni_syscall,sys32_fstat64_wrapper) -SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper) +SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit) +SYSCALL(sys_mmap2,sys_mmap2,compat_sys_s390_mmap2) +SYSCALL(sys_truncate64,sys_ni_syscall,compat_sys_s390_truncate64) +SYSCALL(sys_ftruncate64,sys_ni_syscall,compat_sys_s390_ftruncate64) +SYSCALL(sys_stat64,sys_ni_syscall,compat_sys_s390_stat64) /* 195 */ +SYSCALL(sys_lstat64,sys_ni_syscall,compat_sys_s390_lstat64) +SYSCALL(sys_fstat64,sys_ni_syscall,compat_sys_s390_fstat64) +SYSCALL(sys_lchown,sys_lchown,compat_sys_lchown) SYSCALL(sys_getuid,sys_getuid,sys_getuid) SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */ SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid) SYSCALL(sys_getegid,sys_getegid,sys_getegid) -SYSCALL(sys_setreuid,sys_setreuid,sys32_setreuid_wrapper) -SYSCALL(sys_setregid,sys_setregid,sys32_setregid_wrapper) -SYSCALL(sys_getgroups,sys_getgroups,sys32_getgroups_wrapper) /* 205 */ -SYSCALL(sys_setgroups,sys_setgroups,sys32_setgroups_wrapper) -SYSCALL(sys_fchown,sys_fchown,sys32_fchown_wrapper) -SYSCALL(sys_setresuid,sys_setresuid,sys32_setresuid_wrapper) -SYSCALL(sys_getresuid,sys_getresuid,sys32_getresuid_wrapper) -SYSCALL(sys_setresgid,sys_setresgid,sys32_setresgid_wrapper) /* 210 */ -SYSCALL(sys_getresgid,sys_getresgid,sys32_getresgid_wrapper) -SYSCALL(sys_chown,sys_chown,sys32_chown_wrapper) -SYSCALL(sys_setuid,sys_setuid,sys32_setuid_wrapper) -SYSCALL(sys_setgid,sys_setgid,sys32_setgid_wrapper) -SYSCALL(sys_setfsuid,sys_setfsuid,sys32_setfsuid_wrapper) /* 215 */ -SYSCALL(sys_setfsgid,sys_setfsgid,sys32_setfsgid_wrapper) -SYSCALL(sys_pivot_root,sys_pivot_root,sys32_pivot_root_wrapper) -SYSCALL(sys_mincore,sys_mincore,sys32_mincore_wrapper) -SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper) -SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */ -SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) -SYSCALL(sys_readahead,sys_readahead,sys32_readahead_wrapper) -SYSCALL(sys_sendfile64,sys_ni_syscall,sys32_sendfile64_wrapper) -SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper) -SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper) -SYSCALL(sys_getxattr,sys_getxattr,sys32_getxattr_wrapper) -SYSCALL(sys_lgetxattr,sys_lgetxattr,sys32_lgetxattr_wrapper) -SYSCALL(sys_fgetxattr,sys_fgetxattr,sys32_fgetxattr_wrapper) -SYSCALL(sys_listxattr,sys_listxattr,sys32_listxattr_wrapper) /* 230 */ -SYSCALL(sys_llistxattr,sys_llistxattr,sys32_llistxattr_wrapper) -SYSCALL(sys_flistxattr,sys_flistxattr,sys32_flistxattr_wrapper) -SYSCALL(sys_removexattr,sys_removexattr,sys32_removexattr_wrapper) -SYSCALL(sys_lremovexattr,sys_lremovexattr,sys32_lremovexattr_wrapper) -SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */ +SYSCALL(sys_setreuid,sys_setreuid,compat_sys_setreuid) +SYSCALL(sys_setregid,sys_setregid,compat_sys_setregid) +SYSCALL(sys_getgroups,sys_getgroups,compat_sys_getgroups) /* 205 */ +SYSCALL(sys_setgroups,sys_setgroups,compat_sys_setgroups) +SYSCALL(sys_fchown,sys_fchown,compat_sys_fchown) +SYSCALL(sys_setresuid,sys_setresuid,compat_sys_setresuid) +SYSCALL(sys_getresuid,sys_getresuid,compat_sys_getresuid) +SYSCALL(sys_setresgid,sys_setresgid,compat_sys_setresgid) /* 210 */ +SYSCALL(sys_getresgid,sys_getresgid,compat_sys_getresgid) +SYSCALL(sys_chown,sys_chown,compat_sys_chown) +SYSCALL(sys_setuid,sys_setuid,compat_sys_setuid) +SYSCALL(sys_setgid,sys_setgid,compat_sys_setgid) +SYSCALL(sys_setfsuid,sys_setfsuid,compat_sys_setfsuid) /* 215 */ +SYSCALL(sys_setfsgid,sys_setfsgid,compat_sys_setfsgid) +SYSCALL(sys_pivot_root,sys_pivot_root,compat_sys_pivot_root) +SYSCALL(sys_mincore,sys_mincore,compat_sys_mincore) +SYSCALL(sys_madvise,sys_madvise,compat_sys_madvise) +SYSCALL(sys_getdents64,sys_getdents64,compat_sys_getdents64) /* 220 */ +SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64) +SYSCALL(sys_readahead,sys_readahead,compat_sys_s390_readahead) +SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64) +SYSCALL(sys_setxattr,sys_setxattr,compat_sys_setxattr) +SYSCALL(sys_lsetxattr,sys_lsetxattr,compat_sys_lsetxattr) /* 225 */ +SYSCALL(sys_fsetxattr,sys_fsetxattr,compat_sys_fsetxattr) +SYSCALL(sys_getxattr,sys_getxattr,compat_sys_getxattr) +SYSCALL(sys_lgetxattr,sys_lgetxattr,compat_sys_lgetxattr) +SYSCALL(sys_fgetxattr,sys_fgetxattr,compat_sys_fgetxattr) +SYSCALL(sys_listxattr,sys_listxattr,compat_sys_listxattr) /* 230 */ +SYSCALL(sys_llistxattr,sys_llistxattr,compat_sys_llistxattr) +SYSCALL(sys_flistxattr,sys_flistxattr,compat_sys_flistxattr) +SYSCALL(sys_removexattr,sys_removexattr,compat_sys_removexattr) +SYSCALL(sys_lremovexattr,sys_lremovexattr,compat_sys_lremovexattr) +SYSCALL(sys_fremovexattr,sys_fremovexattr,compat_sys_fremovexattr) /* 235 */ SYSCALL(sys_gettid,sys_gettid,sys_gettid) -SYSCALL(sys_tkill,sys_tkill,sys_tkill_wrapper) -SYSCALL(sys_futex,sys_futex,compat_sys_futex_wrapper) -SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper) -SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */ -SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill_wrapper) +SYSCALL(sys_tkill,sys_tkill,compat_sys_tkill) +SYSCALL(sys_futex,sys_futex,compat_sys_futex) +SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,compat_sys_sched_setaffinity) +SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,compat_sys_sched_getaffinity) /* 240 */ +SYSCALL(sys_tgkill,sys_tgkill,compat_sys_tgkill) NI_SYSCALL /* reserved for TUX */ -SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper) -SYSCALL(sys_io_destroy,sys_io_destroy,sys32_io_destroy_wrapper) -SYSCALL(sys_io_getevents,sys_io_getevents,sys32_io_getevents_wrapper) /* 245 */ -SYSCALL(sys_io_submit,sys_io_submit,sys32_io_submit_wrapper) -SYSCALL(sys_io_cancel,sys_io_cancel,sys32_io_cancel_wrapper) -SYSCALL(sys_exit_group,sys_exit_group,sys32_exit_group_wrapper) -SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper) -SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */ -SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper) -SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper) -SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper) -SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper) -SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */ -SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper) -SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,sys32_timer_getoverrun_wrapper) -SYSCALL(sys_timer_delete,sys_timer_delete,sys32_timer_delete_wrapper) -SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper) -SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ -SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) -SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) +SYSCALL(sys_io_setup,sys_io_setup,compat_sys_io_setup) +SYSCALL(sys_io_destroy,sys_io_destroy,compat_sys_io_destroy) +SYSCALL(sys_io_getevents,sys_io_getevents,compat_sys_io_getevents) /* 245 */ +SYSCALL(sys_io_submit,sys_io_submit,compat_sys_io_submit) +SYSCALL(sys_io_cancel,sys_io_cancel,compat_sys_io_cancel) +SYSCALL(sys_exit_group,sys_exit_group,compat_sys_exit_group) +SYSCALL(sys_epoll_create,sys_epoll_create,compat_sys_epoll_create) +SYSCALL(sys_epoll_ctl,sys_epoll_ctl,compat_sys_epoll_ctl) /* 250 */ +SYSCALL(sys_epoll_wait,sys_epoll_wait,compat_sys_epoll_wait) +SYSCALL(sys_set_tid_address,sys_set_tid_address,compat_sys_set_tid_address) +SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,compat_sys_s390_fadvise64) +SYSCALL(sys_timer_create,sys_timer_create,compat_sys_timer_create) +SYSCALL(sys_timer_settime,sys_timer_settime,compat_sys_timer_settime) /* 255 */ +SYSCALL(sys_timer_gettime,sys_timer_gettime,compat_sys_timer_gettime) +SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,compat_sys_timer_getoverrun) +SYSCALL(sys_timer_delete,sys_timer_delete,compat_sys_timer_delete) +SYSCALL(sys_clock_settime,sys_clock_settime,compat_sys_clock_settime) +SYSCALL(sys_clock_gettime,sys_clock_gettime,compat_sys_clock_gettime) /* 260 */ +SYSCALL(sys_clock_getres,sys_clock_getres,compat_sys_clock_getres) +SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,compat_sys_clock_nanosleep) NI_SYSCALL /* reserved for vserver */ -SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) -SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) -SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) -SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper) +SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,compat_sys_s390_fadvise64_64) +SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64) +SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64) +SYSCALL(sys_remap_file_pages,sys_remap_file_pages,compat_sys_remap_file_pages) NI_SYSCALL /* 268 sys_mbind */ NI_SYSCALL /* 269 sys_get_mempolicy */ NI_SYSCALL /* 270 sys_set_mempolicy */ -SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper) -SYSCALL(sys_mq_unlink,sys_mq_unlink,sys32_mq_unlink_wrapper) -SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper) -SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper) -SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */ -SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper) -SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper) -SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper) -SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper) -SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl_wrapper) /* 280 */ -SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid_wrapper) -SYSCALL(sys_ioprio_set,sys_ioprio_set,sys_ioprio_set_wrapper) -SYSCALL(sys_ioprio_get,sys_ioprio_get,sys_ioprio_get_wrapper) +SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open) +SYSCALL(sys_mq_unlink,sys_mq_unlink,compat_sys_mq_unlink) +SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend) +SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive) +SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify) /* 275 */ +SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr) +SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load) +SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key) +SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key) +SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl) /* 280 */ +SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid) +SYSCALL(sys_ioprio_set,sys_ioprio_set,compat_sys_ioprio_set) +SYSCALL(sys_ioprio_get,sys_ioprio_get,compat_sys_ioprio_get) SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init) -SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper) /* 285 */ -SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper) +SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */ +SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,compat_sys_inotify_rm_watch) NI_SYSCALL /* 287 sys_migrate_pages */ -SYSCALL(sys_openat,sys_openat,compat_sys_openat_wrapper) -SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper) -SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */ -SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper) -SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper) -SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat64_wrapper) -SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper) -SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */ -SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper) -SYSCALL(sys_symlinkat,sys_symlinkat,sys_symlinkat_wrapper) -SYSCALL(sys_readlinkat,sys_readlinkat,sys_readlinkat_wrapper) -SYSCALL(sys_fchmodat,sys_fchmodat,sys_fchmodat_wrapper) -SYSCALL(sys_faccessat,sys_faccessat,sys_faccessat_wrapper) /* 300 */ -SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper) -SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper) -SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper) -SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list_wrapper) -SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list_wrapper) -SYSCALL(sys_splice,sys_splice,sys_splice_wrapper) -SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper) -SYSCALL(sys_tee,sys_tee,sys_tee_wrapper) -SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice_wrapper) +SYSCALL(sys_openat,sys_openat,compat_sys_openat) +SYSCALL(sys_mkdirat,sys_mkdirat,compat_sys_mkdirat) +SYSCALL(sys_mknodat,sys_mknodat,compat_sys_mknodat) /* 290 */ +SYSCALL(sys_fchownat,sys_fchownat,compat_sys_fchownat) +SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat) +SYSCALL(sys_fstatat64,sys_newfstatat,compat_sys_s390_fstatat64) +SYSCALL(sys_unlinkat,sys_unlinkat,compat_sys_unlinkat) +SYSCALL(sys_renameat,sys_renameat,compat_sys_renameat) /* 295 */ +SYSCALL(sys_linkat,sys_linkat,compat_sys_linkat) +SYSCALL(sys_symlinkat,sys_symlinkat,compat_sys_symlinkat) +SYSCALL(sys_readlinkat,sys_readlinkat,compat_sys_readlinkat) +SYSCALL(sys_fchmodat,sys_fchmodat,compat_sys_fchmodat) +SYSCALL(sys_faccessat,sys_faccessat,compat_sys_faccessat) /* 300 */ +SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6) +SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll) +SYSCALL(sys_unshare,sys_unshare,compat_sys_unshare) +SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list) +SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list) +SYSCALL(sys_splice,sys_splice,compat_sys_splice) +SYSCALL(sys_sync_file_range,sys_sync_file_range,compat_sys_s390_sync_file_range) +SYSCALL(sys_tee,sys_tee,compat_sys_tee) +SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice) NI_SYSCALL /* 310 sys_move_pages */ -SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper) -SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait_wrapper) -SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) -SYSCALL(sys_s390_fallocate,sys_fallocate,sys_fallocate_wrapper) -SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ -SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd_wrapper) +SYSCALL(sys_getcpu,sys_getcpu,compat_sys_getcpu) +SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait) +SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes) +SYSCALL(sys_s390_fallocate,sys_fallocate,compat_sys_s390_fallocate) +SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat) /* 315 */ +SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd) NI_SYSCALL /* 317 old sys_timer_fd */ -SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper) -SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper) -SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime_wrapper) /* 320 */ -SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime_wrapper) -SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4_wrapper) -SYSCALL(sys_eventfd2,sys_eventfd2,sys_eventfd2_wrapper) -SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper) -SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */ -SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper) -SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) -SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper) -SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper) -SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */ -SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper) -SYSCALL(sys_fanotify_init,sys_fanotify_init,sys_fanotify_init_wrapper) -SYSCALL(sys_fanotify_mark,sys_fanotify_mark,sys_fanotify_mark_wrapper) -SYSCALL(sys_prlimit64,sys_prlimit64,sys_prlimit64_wrapper) +SYSCALL(sys_eventfd,sys_eventfd,compat_sys_eventfd) +SYSCALL(sys_timerfd_create,sys_timerfd_create,compat_sys_timerfd_create) +SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */ +SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime) +SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4) +SYSCALL(sys_eventfd2,sys_eventfd2,compat_sys_eventfd2) +SYSCALL(sys_inotify_init1,sys_inotify_init1,compat_sys_inotify_init1) +SYSCALL(sys_pipe2,sys_pipe2,compat_sys_pipe2) /* 325 */ +SYSCALL(sys_dup3,sys_dup3,compat_sys_dup3) +SYSCALL(sys_epoll_create1,sys_epoll_create1,compat_sys_epoll_create1) +SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv) +SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev) +SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */ +SYSCALL(sys_perf_event_open,sys_perf_event_open,compat_sys_perf_event_open) +SYSCALL(sys_fanotify_init,sys_fanotify_init,compat_sys_fanotify_init) +SYSCALL(sys_fanotify_mark,sys_fanotify_mark,compat_sys_fanotify_mark) +SYSCALL(sys_prlimit64,sys_prlimit64,compat_sys_prlimit64) +SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */ +SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at) +SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime) +SYSCALL(sys_syncfs,sys_syncfs,compat_sys_syncfs) +SYSCALL(sys_setns,sys_setns,compat_sys_setns) +SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */ +SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev) +SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,compat_sys_s390_runtime_instr) +SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp) +SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module) +SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */ +SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr) +SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 5c9e439bf3f..811f542b8ed 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -22,17 +22,41 @@ #include <math-emu/soft-fp.h> #include <math-emu/single.h> -static inline int stsi_0(void) +int topology_max_mnest; + +/* + * stsi - store system information + * + * Returns the current configuration level if function code 0 was specified. + * Otherwise returns 0 on success or a negative value on error. + */ +int stsi(void *sysinfo, int fc, int sel1, int sel2) { - int rc = stsi(NULL, 0, 0, 0); - return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28); + register int r0 asm("0") = (fc << 28) | sel1; + register int r1 asm("1") = sel2; + int rc = 0; + + asm volatile( + " stsi 0(%3)\n" + "0: jz 2f\n" + "1: lhi %1,%4\n" + "2:\n" + EX_TABLE(0b, 1b) + : "+d" (r0), "+d" (rc) + : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP) + : "cc", "memory"); + if (rc) + return rc; + return fc ? 0 : ((unsigned int) r0) >> 28; } +EXPORT_SYMBOL(stsi); -static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) +static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) { - if (stsi(info, 1, 1, 1) == -ENOSYS) - return len; + int i; + if (stsi(info, 1, 1, 1)) + return; EBCASC(info->manufacturer, sizeof(info->manufacturer)); EBCASC(info->type, sizeof(info->type)); EBCASC(info->model, sizeof(info->model)); @@ -41,242 +65,197 @@ static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) EBCASC(info->model_capacity, sizeof(info->model_capacity)); EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap)); EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap)); - len += sprintf(page + len, "Manufacturer: %-16.16s\n", - info->manufacturer); - len += sprintf(page + len, "Type: %-4.4s\n", - info->type); + seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer); + seq_printf(m, "Type: %-4.4s\n", info->type); + /* + * Sigh: the model field has been renamed with System z9 + * to model_capacity and a new model field has been added + * after the plant field. To avoid confusing older programs + * the "Model:" prints "model_capacity model" or just + * "model_capacity" if the model string is empty . + */ + seq_printf(m, "Model: %-16.16s", info->model_capacity); if (info->model[0] != '\0') - /* - * Sigh: the model field has been renamed with System z9 - * to model_capacity and a new model field has been added - * after the plant field. To avoid confusing older programs - * the "Model:" prints "model_capacity model" or just - * "model_capacity" if the model string is empty . - */ - len += sprintf(page + len, - "Model: %-16.16s %-16.16s\n", - info->model_capacity, info->model); - else - len += sprintf(page + len, "Model: %-16.16s\n", - info->model_capacity); - len += sprintf(page + len, "Sequence Code: %-16.16s\n", - info->sequence); - len += sprintf(page + len, "Plant: %-4.4s\n", - info->plant); - len += sprintf(page + len, "Model Capacity: %-16.16s %08u\n", - info->model_capacity, *(u32 *) info->model_cap_rating); - if (info->model_perm_cap[0] != '\0') - len += sprintf(page + len, - "Model Perm. Capacity: %-16.16s %08u\n", - info->model_perm_cap, - *(u32 *) info->model_perm_cap_rating); - if (info->model_temp_cap[0] != '\0') - len += sprintf(page + len, - "Model Temp. Capacity: %-16.16s %08u\n", - info->model_temp_cap, - *(u32 *) info->model_temp_cap_rating); + seq_printf(m, " %-16.16s", info->model); + seq_putc(m, '\n'); + seq_printf(m, "Sequence Code: %-16.16s\n", info->sequence); + seq_printf(m, "Plant: %-4.4s\n", info->plant); + seq_printf(m, "Model Capacity: %-16.16s %08u\n", + info->model_capacity, info->model_cap_rating); + if (info->model_perm_cap_rating) + seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n", + info->model_perm_cap, + info->model_perm_cap_rating); + if (info->model_temp_cap_rating) + seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n", + info->model_temp_cap, + info->model_temp_cap_rating); + if (info->ncr) + seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr); + if (info->npr) + seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr); + if (info->ntr) + seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr); if (info->cai) { - len += sprintf(page + len, - "Capacity Adj. Ind.: %d\n", - info->cai); - len += sprintf(page + len, "Capacity Ch. Reason: %d\n", - info->ccr); + seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai); + seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr); + seq_printf(m, "Capacity Transient: %d\n", info->t); + } + if (info->p) { + for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) { + seq_printf(m, "Type %d Percentage: %d\n", + i, info->typepct[i - 1]); + } } - return len; } -static int stsi_15_1_x(struct sysinfo_15_1_x *info, char *page, int len) +static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) { static int max_mnest; int i, rc; - len += sprintf(page + len, "\n"); + seq_putc(m, '\n'); if (!MACHINE_HAS_TOPOLOGY) - return len; - if (max_mnest) { - stsi(info, 15, 1, max_mnest); - } else { - for (max_mnest = 6; max_mnest > 1; max_mnest--) { - rc = stsi(info, 15, 1, max_mnest); - if (rc != -ENOSYS) - break; - } - } - len += sprintf(page + len, "CPU Topology HW: "); + return; + if (stsi(info, 15, 1, topology_max_mnest)) + return; + seq_printf(m, "CPU Topology HW: "); for (i = 0; i < TOPOLOGY_NR_MAG; i++) - len += sprintf(page + len, " %d", info->mag[i]); - len += sprintf(page + len, "\n"); + seq_printf(m, " %d", info->mag[i]); + seq_putc(m, '\n'); #ifdef CONFIG_SCHED_MC store_topology(info); - len += sprintf(page + len, "CPU Topology SW: "); + seq_printf(m, "CPU Topology SW: "); for (i = 0; i < TOPOLOGY_NR_MAG; i++) - len += sprintf(page + len, " %d", info->mag[i]); - len += sprintf(page + len, "\n"); + seq_printf(m, " %d", info->mag[i]); + seq_putc(m, '\n'); #endif - return len; } -static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len) +static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info) { struct sysinfo_1_2_2_extension *ext; int i; - if (stsi(info, 1, 2, 2) == -ENOSYS) - return len; + if (stsi(info, 1, 2, 2)) + return; ext = (struct sysinfo_1_2_2_extension *) ((unsigned long) info + info->acc_offset); - - len += sprintf(page + len, "CPUs Total: %d\n", - info->cpus_total); - len += sprintf(page + len, "CPUs Configured: %d\n", - info->cpus_configured); - len += sprintf(page + len, "CPUs Standby: %d\n", - info->cpus_standby); - len += sprintf(page + len, "CPUs Reserved: %d\n", - info->cpus_reserved); - - if (info->format == 1) { - /* - * Sigh 2. According to the specification the alternate - * capability field is a 32 bit floating point number - * if the higher order 8 bits are not zero. Printing - * a floating point number in the kernel is a no-no, - * always print the number as 32 bit unsigned integer. - * The user-space needs to know about the strange - * encoding of the alternate cpu capability. - */ - len += sprintf(page + len, "Capability: %u %u\n", - info->capability, ext->alt_capability); - for (i = 2; i <= info->cpus_total; i++) - len += sprintf(page + len, - "Adjustment %02d-way: %u %u\n", - i, info->adjustment[i-2], - ext->alt_adjustment[i-2]); - - } else { - len += sprintf(page + len, "Capability: %u\n", - info->capability); - for (i = 2; i <= info->cpus_total; i++) - len += sprintf(page + len, - "Adjustment %02d-way: %u\n", - i, info->adjustment[i-2]); + seq_printf(m, "CPUs Total: %d\n", info->cpus_total); + seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured); + seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby); + seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved); + /* + * Sigh 2. According to the specification the alternate + * capability field is a 32 bit floating point number + * if the higher order 8 bits are not zero. Printing + * a floating point number in the kernel is a no-no, + * always print the number as 32 bit unsigned integer. + * The user-space needs to know about the strange + * encoding of the alternate cpu capability. + */ + seq_printf(m, "Capability: %u", info->capability); + if (info->format == 1) + seq_printf(m, " %u", ext->alt_capability); + seq_putc(m, '\n'); + if (info->nominal_cap) + seq_printf(m, "Nominal Capability: %d\n", info->nominal_cap); + if (info->secondary_cap) + seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap); + for (i = 2; i <= info->cpus_total; i++) { + seq_printf(m, "Adjustment %02d-way: %u", + i, info->adjustment[i-2]); + if (info->format == 1) + seq_printf(m, " %u", ext->alt_adjustment[i-2]); + seq_putc(m, '\n'); } - - if (info->secondary_capability != 0) - len += sprintf(page + len, "Secondary Capability: %d\n", - info->secondary_capability); - return len; } -static int stsi_2_2_2(struct sysinfo_2_2_2 *info, char *page, int len) +static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info) { - if (stsi(info, 2, 2, 2) == -ENOSYS) - return len; - + if (stsi(info, 2, 2, 2)) + return; EBCASC(info->name, sizeof(info->name)); - - len += sprintf(page + len, "\n"); - len += sprintf(page + len, "LPAR Number: %d\n", - info->lpar_number); - - len += sprintf(page + len, "LPAR Characteristics: "); + seq_putc(m, '\n'); + seq_printf(m, "LPAR Number: %d\n", info->lpar_number); + seq_printf(m, "LPAR Characteristics: "); if (info->characteristics & LPAR_CHAR_DEDICATED) - len += sprintf(page + len, "Dedicated "); + seq_printf(m, "Dedicated "); if (info->characteristics & LPAR_CHAR_SHARED) - len += sprintf(page + len, "Shared "); + seq_printf(m, "Shared "); if (info->characteristics & LPAR_CHAR_LIMITED) - len += sprintf(page + len, "Limited "); - len += sprintf(page + len, "\n"); - - len += sprintf(page + len, "LPAR Name: %-8.8s\n", - info->name); - - len += sprintf(page + len, "LPAR Adjustment: %d\n", - info->caf); - - len += sprintf(page + len, "LPAR CPUs Total: %d\n", - info->cpus_total); - len += sprintf(page + len, "LPAR CPUs Configured: %d\n", - info->cpus_configured); - len += sprintf(page + len, "LPAR CPUs Standby: %d\n", - info->cpus_standby); - len += sprintf(page + len, "LPAR CPUs Reserved: %d\n", - info->cpus_reserved); - len += sprintf(page + len, "LPAR CPUs Dedicated: %d\n", - info->cpus_dedicated); - len += sprintf(page + len, "LPAR CPUs Shared: %d\n", - info->cpus_shared); - return len; + seq_printf(m, "Limited "); + seq_putc(m, '\n'); + seq_printf(m, "LPAR Name: %-8.8s\n", info->name); + seq_printf(m, "LPAR Adjustment: %d\n", info->caf); + seq_printf(m, "LPAR CPUs Total: %d\n", info->cpus_total); + seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured); + seq_printf(m, "LPAR CPUs Standby: %d\n", info->cpus_standby); + seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved); + seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated); + seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared); } -static int stsi_3_2_2(struct sysinfo_3_2_2 *info, char *page, int len) +static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info) { int i; - if (stsi(info, 3, 2, 2) == -ENOSYS) - return len; + if (stsi(info, 3, 2, 2)) + return; for (i = 0; i < info->count; i++) { EBCASC(info->vm[i].name, sizeof(info->vm[i].name)); EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi)); - len += sprintf(page + len, "\n"); - len += sprintf(page + len, "VM%02d Name: %-8.8s\n", - i, info->vm[i].name); - len += sprintf(page + len, "VM%02d Control Program: %-16.16s\n", - i, info->vm[i].cpi); - - len += sprintf(page + len, "VM%02d Adjustment: %d\n", - i, info->vm[i].caf); - - len += sprintf(page + len, "VM%02d CPUs Total: %d\n", - i, info->vm[i].cpus_total); - len += sprintf(page + len, "VM%02d CPUs Configured: %d\n", - i, info->vm[i].cpus_configured); - len += sprintf(page + len, "VM%02d CPUs Standby: %d\n", - i, info->vm[i].cpus_standby); - len += sprintf(page + len, "VM%02d CPUs Reserved: %d\n", - i, info->vm[i].cpus_reserved); + seq_putc(m, '\n'); + seq_printf(m, "VM%02d Name: %-8.8s\n", i, info->vm[i].name); + seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi); + seq_printf(m, "VM%02d Adjustment: %d\n", i, info->vm[i].caf); + seq_printf(m, "VM%02d CPUs Total: %d\n", i, info->vm[i].cpus_total); + seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured); + seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby); + seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved); } - return len; } -static int proc_read_sysinfo(char *page, char **start, - off_t off, int count, - int *eof, void *data) +static int sysinfo_show(struct seq_file *m, void *v) { - unsigned long info = get_zeroed_page(GFP_KERNEL); - int level, len; + void *info = (void *)get_zeroed_page(GFP_KERNEL); + int level; if (!info) return 0; - - len = 0; - level = stsi_0(); + level = stsi(NULL, 0, 0, 0); if (level >= 1) - len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len); - + stsi_1_1_1(m, info); if (level >= 1) - len = stsi_15_1_x((struct sysinfo_15_1_x *) info, page, len); - + stsi_15_1_x(m, info); if (level >= 1) - len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len); - + stsi_1_2_2(m, info); if (level >= 2) - len = stsi_2_2_2((struct sysinfo_2_2_2 *) info, page, len); - + stsi_2_2_2(m, info); if (level >= 3) - len = stsi_3_2_2((struct sysinfo_3_2_2 *) info, page, len); + stsi_3_2_2(m, info); + free_page((unsigned long)info); + return 0; +} - free_page(info); - return len; +static int sysinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, sysinfo_show, NULL); } -static __init int create_proc_sysinfo(void) +static const struct file_operations sysinfo_fops = { + .open = sysinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init sysinfo_create_proc(void) { - create_proc_read_entry("sysinfo", 0444, NULL, - proc_read_sysinfo, NULL); + proc_create("sysinfo", 0444, NULL, &sysinfo_fops); return 0; } -device_initcall(create_proc_sysinfo); +device_initcall(sysinfo_create_proc); /* * Service levels interface. @@ -393,27 +372,6 @@ static __init int create_proc_service_level(void) subsys_initcall(create_proc_service_level); /* - * Bogomips calculation based on cpu capability. - */ -int get_cpu_capability(unsigned int *capability) -{ - struct sysinfo_1_2_2 *info; - int rc; - - info = (void *) get_zeroed_page(GFP_KERNEL); - if (!info) - return -ENOMEM; - rc = stsi(info, 1, 2, 2); - if (rc == -ENOSYS) - goto out; - rc = 0; - *capability = info->capability; -out: - free_page((unsigned long) info); - return rc; -} - -/* * CPU capability might have changed. Therefore recalculate loops_per_jiffy. */ void s390_adjust_jiffies(void) @@ -428,7 +386,7 @@ void s390_adjust_jiffies(void) if (!info) return; - if (stsi(info, 1, 2, 2) != -ENOSYS) { + if (stsi(info, 1, 2, 2) == 0) { /* * Major sigh. The cpu capability encoding is "special". * If the first 9 bits of info->capability are 0 then it @@ -442,7 +400,7 @@ void s390_adjust_jiffies(void) */ FP_UNPACK_SP(SA, &fmil); if ((info->capability >> 23) == 0) - FP_FROM_INT_S(SB, info->capability, 32, int); + FP_FROM_INT_S(SB, (long) info->capability, 64, long); else FP_UNPACK_SP(SB, &info->capability); FP_DIV_S(SR, SA, SB); @@ -460,7 +418,7 @@ void s390_adjust_jiffies(void) /* * calibrate the delay loop */ -void __cpuinit calibrate_delay(void) +void calibrate_delay(void) { s390_adjust_jiffies(); /* Print the good old Bogomips line .. */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 9e7b039458d..0931b110c82 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1,5 +1,4 @@ /* - * arch/s390/kernel/time.c * Time of day based timer functions. * * S390 version @@ -27,7 +26,7 @@ #include <linux/cpu.h> #include <linux/stop_machine.h> #include <linux/time.h> -#include <linux/sysdev.h> +#include <linux/device.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/smp.h> @@ -35,20 +34,20 @@ #include <linux/profile.h> #include <linux/timex.h> #include <linux/notifier.h> -#include <linux/clocksource.h> +#include <linux/timekeeper_internal.h> #include <linux/clockchips.h> #include <linux/gfp.h> #include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/delay.h> -#include <asm/s390_ext.h> #include <asm/div64.h> #include <asm/vdso.h> #include <asm/irq.h> #include <asm/irq_regs.h> -#include <asm/timer.h> +#include <asm/vtimer.h> #include <asm/etr.h> #include <asm/cio.h> +#include "entry.h" /* change this if you have some constant time drift */ #define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) @@ -64,7 +63,7 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators); */ unsigned long long notrace __kprobes sched_clock(void) { - return (get_clock_monotonic() * 125) >> 9; + return tod_to_ns(get_tod_clock_monotonic()); } /* @@ -93,7 +92,6 @@ void clock_comparator_work(void) struct clock_event_device *cd; S390_lowcore.clock_comparator = -1ULL; - set_clock_comparator(S390_lowcore.clock_comparator); cd = &__get_cpu_var(comparators); cd->event_handler(cd); } @@ -113,7 +111,7 @@ static void fixup_clock_comparator(unsigned long long delta) static int s390_next_event(unsigned long delta, struct clock_event_device *evt) { - S390_lowcore.clock_comparator = get_clock() + delta; + S390_lowcore.clock_comparator = get_tod_clock() + delta; set_clock_comparator(S390_lowcore.clock_comparator); return 0; } @@ -157,11 +155,11 @@ void init_cpu_timer(void) __ctl_set_bit(0, 4); } -static void clock_comparator_interrupt(unsigned int ext_int_code, +static void clock_comparator_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { - kstat_cpu(smp_processor_id()).irqs[EXTINT_CLK]++; + inc_irq_stat(IRQEXT_CLK); if (S390_lowcore.clock_comparator == -1ULL) set_clock_comparator(S390_lowcore.clock_comparator); } @@ -169,10 +167,10 @@ static void clock_comparator_interrupt(unsigned int ext_int_code, static void etr_timing_alert(struct etr_irq_parm *); static void stp_timing_alert(struct stp_irq_parm *); -static void timing_alert_interrupt(unsigned int ext_int_code, +static void timing_alert_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { - kstat_cpu(smp_processor_id()).irqs[EXTINT_TLA]++; + inc_irq_stat(IRQEXT_TLA); if (param32 & 0x00c40000) etr_timing_alert((struct etr_irq_parm *) ¶m32); if (param32 & 0x00038000) @@ -184,7 +182,7 @@ static void stp_reset(void); void read_persistent_clock(struct timespec *ts) { - tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts); + tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts); } void read_boot_clock(struct timespec *ts) @@ -194,7 +192,7 @@ void read_boot_clock(struct timespec *ts) static cycle_t read_tod_clock(struct clocksource *cs) { - return get_clock(); + return get_tod_clock(); } static struct clocksource clocksource_tod = { @@ -212,21 +210,30 @@ struct clocksource * __init clocksource_default_clock(void) return &clocksource_tod; } -void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult) +void update_vsyscall(struct timekeeper *tk) { - if (clock != &clocksource_tod) + u64 nsecps; + + if (tk->clock != &clocksource_tod) return; /* Make userspace gettimeofday spin until we're done. */ ++vdso_data->tb_update_count; smp_wmb(); - vdso_data->xtime_tod_stamp = clock->cycle_last; - vdso_data->xtime_clock_sec = wall_time->tv_sec; - vdso_data->xtime_clock_nsec = wall_time->tv_nsec; - vdso_data->wtom_clock_sec = wtm->tv_sec; - vdso_data->wtom_clock_nsec = wtm->tv_nsec; - vdso_data->ntp_mult = mult; + vdso_data->xtime_tod_stamp = tk->clock->cycle_last; + vdso_data->xtime_clock_sec = tk->xtime_sec; + vdso_data->xtime_clock_nsec = tk->xtime_nsec; + vdso_data->wtom_clock_sec = + tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + vdso_data->wtom_clock_nsec = tk->xtime_nsec + + + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift); + nsecps = (u64) NSEC_PER_SEC << tk->shift; + while (vdso_data->wtom_clock_nsec >= nsecps) { + vdso_data->wtom_clock_nsec -= nsecps; + vdso_data->wtom_clock_sec++; + } + vdso_data->tk_mult = tk->mult; + vdso_data->tk_shift = tk->shift; smp_wmb(); ++vdso_data->tb_update_count; } @@ -255,11 +262,11 @@ void __init time_init(void) stp_reset(); /* request the clock comparator external interrupt */ - if (register_external_interrupt(0x1004, clock_comparator_interrupt)) - panic("Couldn't request external interrupt 0x1004"); + if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt)) + panic("Couldn't request external interrupt 0x1004"); /* request the timing alert external interrupt */ - if (register_external_interrupt(0x1406, timing_alert_interrupt)) + if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt)) panic("Couldn't request external interrupt 0x1406"); if (clocksource_register(&clocksource_tod) != 0) @@ -322,7 +329,7 @@ static unsigned long clock_sync_flags; * The synchronous get_clock function. It will write the current clock * value to the clock pointer and return 0 if the clock is in sync with * the external time source. If the clock mode is local it will return - * -ENOSYS and -EAGAIN if the clock is not in sync with the external + * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external * reference. */ int get_sync_clock(unsigned long long *clock) @@ -332,7 +339,7 @@ int get_sync_clock(unsigned long long *clock) sw_ptr = &get_cpu_var(clock_sync_word); sw0 = atomic_read(sw_ptr); - *clock = get_clock(); + *clock = get_tod_clock(); sw1 = atomic_read(sw_ptr); put_cpu_var(clock_sync_word); if (sw0 == sw1 && (sw0 & 0x80000000U)) @@ -340,7 +347,7 @@ int get_sync_clock(unsigned long long *clock) return 0; if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) && !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) - return -ENOSYS; + return -EOPNOTSUPP; if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) && !test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) return -EACCES; @@ -476,7 +483,7 @@ static void etr_reset(void) .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0, .es = 0, .sl = 0 }; if (etr_setr(&etr_eacr) == 0) { - etr_tolec = get_clock(); + etr_tolec = get_tod_clock(); set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags); if (etr_port0_online && etr_port1_online) set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); @@ -724,7 +731,7 @@ static void clock_sync_cpu(struct clock_sync_data *sync) } /* - * Sync the TOD clock using the port refered to by aibp. This port + * Sync the TOD clock using the port referred to by aibp. This port * has to be enabled and the other port has to be disabled. The * last eacr update has to be more than 1.6 seconds in the past. */ @@ -758,8 +765,8 @@ static int etr_sync_clock(void *data) __ctl_set_bit(14, 21); __ctl_set_bit(0, 29); clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32; - old_clock = get_clock(); - if (set_clock(clock) == 0) { + old_clock = get_tod_clock(); + if (set_tod_clock(clock) == 0) { __udelay(1); /* Wait for the clock to start. */ __ctl_clear_bit(0, 29); __ctl_clear_bit(14, 21); @@ -810,7 +817,7 @@ static int etr_sync_clock_stop(struct etr_aib *aib, int port) etr_sync.etr_port = port; get_online_cpus(); atomic_set(&etr_sync.cpus, num_online_cpus() - 1); - rc = stop_machine(etr_sync_clock, &etr_sync, &cpu_online_map); + rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask); put_online_cpus(); return rc; } @@ -835,7 +842,7 @@ static struct etr_eacr etr_handle_events(struct etr_eacr eacr) * assume that this can have caused an stepping * port switch. */ - etr_tolec = get_clock(); + etr_tolec = get_tod_clock(); eacr.p0 = etr_port0_online; if (!eacr.p0) eacr.e0 = 0; @@ -848,7 +855,7 @@ static struct etr_eacr etr_handle_events(struct etr_eacr eacr) * assume that this can have caused an stepping * port switch. */ - etr_tolec = get_clock(); + etr_tolec = get_tod_clock(); eacr.p1 = etr_port1_online; if (!eacr.p1) eacr.e1 = 0; @@ -964,7 +971,7 @@ static void etr_update_eacr(struct etr_eacr eacr) etr_eacr = eacr; etr_setr(&etr_eacr); if (dp_changed) - etr_tolec = get_clock(); + etr_tolec = get_tod_clock(); } /* @@ -1002,7 +1009,7 @@ static void etr_work_fn(struct work_struct *work) /* Store aib to get the current ETR status word. */ BUG_ON(etr_stetr(&aib) != 0); etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */ - now = get_clock(); + now = get_tod_clock(); /* * Update the port information if the last stepping port change @@ -1012,7 +1019,7 @@ static void etr_work_fn(struct work_struct *work) eacr = etr_handle_update(&aib, eacr); /* - * Select ports to enable. The prefered synchronization mode is PPS. + * Select ports to enable. The preferred synchronization mode is PPS. * If a port can be enabled depends on a number of things: * 1) The port needs to be online and uptodate. A port is not * disabled just because it is not uptodate, but it is only @@ -1091,7 +1098,7 @@ static void etr_work_fn(struct work_struct *work) /* * Update eacr and try to synchronize the clock. If the update * of eacr caused a stepping port switch (or if we have to - * assume that a stepping port switch has occured) or the + * assume that a stepping port switch has occurred) or the * clock syncing failed, reset the sync check control bit * and set up a timer to try again after 0.5 seconds */ @@ -1111,34 +1118,35 @@ out_unlock: /* * Sysfs interface functions */ -static struct sysdev_class etr_sysclass = { - .name = "etr", +static struct bus_type etr_subsys = { + .name = "etr", + .dev_name = "etr", }; -static struct sys_device etr_port0_dev = { +static struct device etr_port0_dev = { .id = 0, - .cls = &etr_sysclass, + .bus = &etr_subsys, }; -static struct sys_device etr_port1_dev = { +static struct device etr_port1_dev = { .id = 1, - .cls = &etr_sysclass, + .bus = &etr_subsys, }; /* - * ETR class attributes + * ETR subsys attributes */ -static ssize_t etr_stepping_port_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t etr_stepping_port_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", etr_port0.esw.p); } -static SYSDEV_CLASS_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); +static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); -static ssize_t etr_stepping_mode_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t etr_stepping_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) { char *mode_str; @@ -1152,12 +1160,12 @@ static ssize_t etr_stepping_mode_show(struct sysdev_class *class, return sprintf(buf, "%s\n", mode_str); } -static SYSDEV_CLASS_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); +static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); /* * ETR port attributes */ -static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev) +static inline struct etr_aib *etr_aib_from_dev(struct device *dev) { if (dev == &etr_port0_dev) return etr_port0_online ? &etr_port0 : NULL; @@ -1165,8 +1173,8 @@ static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev) return etr_port1_online ? &etr_port1 : NULL; } -static ssize_t etr_online_show(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t etr_online_show(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned int online; @@ -1175,8 +1183,8 @@ static ssize_t etr_online_show(struct sys_device *dev, return sprintf(buf, "%i\n", online); } -static ssize_t etr_online_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t etr_online_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned int value; @@ -1213,20 +1221,20 @@ out: return count; } -static SYSDEV_ATTR(online, 0600, etr_online_show, etr_online_store); +static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store); -static ssize_t etr_stepping_control_show(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t etr_stepping_control_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? etr_eacr.e0 : etr_eacr.e1); } -static SYSDEV_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); +static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); -static ssize_t etr_mode_code_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_mode_code_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!etr_port0_online && !etr_port1_online) /* Status word is not uptodate if both ports are offline. */ @@ -1235,10 +1243,10 @@ static ssize_t etr_mode_code_show(struct sys_device *dev, etr_port0.esw.psc0 : etr_port0.esw.psc1); } -static SYSDEV_ATTR(state_code, 0400, etr_mode_code_show, NULL); +static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL); -static ssize_t etr_untuned_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_untuned_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1247,10 +1255,10 @@ static ssize_t etr_untuned_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.u); } -static SYSDEV_ATTR(untuned, 0400, etr_untuned_show, NULL); +static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL); -static ssize_t etr_network_id_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_network_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1259,10 +1267,10 @@ static ssize_t etr_network_id_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.net_id); } -static SYSDEV_ATTR(network, 0400, etr_network_id_show, NULL); +static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL); -static ssize_t etr_id_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1271,10 +1279,10 @@ static ssize_t etr_id_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.etr_id); } -static SYSDEV_ATTR(id, 0400, etr_id_show, NULL); +static DEVICE_ATTR(id, 0400, etr_id_show, NULL); -static ssize_t etr_port_number_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_port_number_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1283,10 +1291,10 @@ static ssize_t etr_port_number_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.etr_pn); } -static SYSDEV_ATTR(port, 0400, etr_port_number_show, NULL); +static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL); -static ssize_t etr_coupled_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_coupled_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1295,10 +1303,10 @@ static ssize_t etr_coupled_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf3.c); } -static SYSDEV_ATTR(coupled, 0400, etr_coupled_show, NULL); +static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL); -static ssize_t etr_local_time_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_local_time_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1307,10 +1315,10 @@ static ssize_t etr_local_time_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf3.blto); } -static SYSDEV_ATTR(local_time, 0400, etr_local_time_show, NULL); +static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL); -static ssize_t etr_utc_offset_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_utc_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1319,64 +1327,64 @@ static ssize_t etr_utc_offset_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf3.buo); } -static SYSDEV_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); +static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); -static struct sysdev_attribute *etr_port_attributes[] = { - &attr_online, - &attr_stepping_control, - &attr_state_code, - &attr_untuned, - &attr_network, - &attr_id, - &attr_port, - &attr_coupled, - &attr_local_time, - &attr_utc_offset, +static struct device_attribute *etr_port_attributes[] = { + &dev_attr_online, + &dev_attr_stepping_control, + &dev_attr_state_code, + &dev_attr_untuned, + &dev_attr_network, + &dev_attr_id, + &dev_attr_port, + &dev_attr_coupled, + &dev_attr_local_time, + &dev_attr_utc_offset, NULL }; -static int __init etr_register_port(struct sys_device *dev) +static int __init etr_register_port(struct device *dev) { - struct sysdev_attribute **attr; + struct device_attribute **attr; int rc; - rc = sysdev_register(dev); + rc = device_register(dev); if (rc) goto out; for (attr = etr_port_attributes; *attr; attr++) { - rc = sysdev_create_file(dev, *attr); + rc = device_create_file(dev, *attr); if (rc) goto out_unreg; } return 0; out_unreg: for (; attr >= etr_port_attributes; attr--) - sysdev_remove_file(dev, *attr); - sysdev_unregister(dev); + device_remove_file(dev, *attr); + device_unregister(dev); out: return rc; } -static void __init etr_unregister_port(struct sys_device *dev) +static void __init etr_unregister_port(struct device *dev) { - struct sysdev_attribute **attr; + struct device_attribute **attr; for (attr = etr_port_attributes; *attr; attr++) - sysdev_remove_file(dev, *attr); - sysdev_unregister(dev); + device_remove_file(dev, *attr); + device_unregister(dev); } static int __init etr_init_sysfs(void) { int rc; - rc = sysdev_class_register(&etr_sysclass); + rc = subsys_system_register(&etr_subsys, NULL); if (rc) goto out; - rc = sysdev_class_create_file(&etr_sysclass, &attr_stepping_port); + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port); if (rc) - goto out_unreg_class; - rc = sysdev_class_create_file(&etr_sysclass, &attr_stepping_mode); + goto out_unreg_subsys; + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode); if (rc) goto out_remove_stepping_port; rc = etr_register_port(&etr_port0_dev); @@ -1390,11 +1398,11 @@ static int __init etr_init_sysfs(void) out_remove_port0: etr_unregister_port(&etr_port0_dev); out_remove_stepping_mode: - sysdev_class_remove_file(&etr_sysclass, &attr_stepping_mode); + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode); out_remove_stepping_port: - sysdev_class_remove_file(&etr_sysclass, &attr_stepping_port); -out_unreg_class: - sysdev_class_unregister(&etr_sysclass); + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port); +out_unreg_subsys: + bus_unregister(&etr_subsys); out: return rc; } @@ -1526,10 +1534,10 @@ static int stp_sync_clock(void *data) if (stp_info.todoff[0] || stp_info.todoff[1] || stp_info.todoff[2] || stp_info.todoff[3] || stp_info.tmd != 2) { - old_clock = get_clock(); + old_clock = get_tod_clock(); rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0); if (rc == 0) { - delta = adjust_time(old_clock, get_clock(), 0); + delta = adjust_time(old_clock, get_tod_clock(), 0); fixup_clock_comparator(delta); rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); @@ -1579,7 +1587,7 @@ static void stp_work_fn(struct work_struct *work) memset(&stp_sync, 0, sizeof(stp_sync)); get_online_cpus(); atomic_set(&stp_sync.cpus, num_online_cpus() - 1); - stop_machine(stp_sync_clock, &stp_sync, &cpu_online_map); + stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask); put_online_cpus(); if (!check_sync_clock()) @@ -1594,14 +1602,15 @@ out_unlock: } /* - * STP class sysfs interface functions + * STP subsys sysfs interface functions */ -static struct sysdev_class stp_sysclass = { - .name = "stp", +static struct bus_type stp_subsys = { + .name = "stp", + .dev_name = "stp", }; -static ssize_t stp_ctn_id_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_ctn_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1610,10 +1619,10 @@ static ssize_t stp_ctn_id_show(struct sysdev_class *class, *(unsigned long long *) stp_info.ctnid); } -static SYSDEV_CLASS_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); +static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); -static ssize_t stp_ctn_type_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_ctn_type_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1621,10 +1630,10 @@ static ssize_t stp_ctn_type_show(struct sysdev_class *class, return sprintf(buf, "%i\n", stp_info.ctn); } -static SYSDEV_CLASS_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); +static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); -static ssize_t stp_dst_offset_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_dst_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x2000)) @@ -1632,10 +1641,10 @@ static ssize_t stp_dst_offset_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); } -static SYSDEV_CLASS_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); +static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); -static ssize_t stp_leap_seconds_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_leap_seconds_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x8000)) @@ -1643,10 +1652,10 @@ static ssize_t stp_leap_seconds_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); } -static SYSDEV_CLASS_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); +static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); -static ssize_t stp_stratum_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_stratum_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1654,10 +1663,10 @@ static ssize_t stp_stratum_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); } -static SYSDEV_CLASS_ATTR(stratum, 0400, stp_stratum_show, NULL); +static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); -static ssize_t stp_time_offset_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_time_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x0800)) @@ -1665,10 +1674,10 @@ static ssize_t stp_time_offset_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int) stp_info.tto); } -static SYSDEV_CLASS_ATTR(time_offset, 0400, stp_time_offset_show, NULL); +static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); -static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_time_zone_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x4000)) @@ -1676,11 +1685,11 @@ static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); } -static SYSDEV_CLASS_ATTR(time_zone_offset, 0400, +static DEVICE_ATTR(time_zone_offset, 0400, stp_time_zone_offset_show, NULL); -static ssize_t stp_timing_mode_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_timing_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1688,10 +1697,10 @@ static ssize_t stp_timing_mode_show(struct sysdev_class *class, return sprintf(buf, "%i\n", stp_info.tmd); } -static SYSDEV_CLASS_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); +static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); -static ssize_t stp_timing_state_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_timing_state_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1699,17 +1708,17 @@ static ssize_t stp_timing_state_show(struct sysdev_class *class, return sprintf(buf, "%i\n", stp_info.tst); } -static SYSDEV_CLASS_ATTR(timing_state, 0400, stp_timing_state_show, NULL); +static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); -static ssize_t stp_online_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_online_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", stp_online); } -static ssize_t stp_online_store(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_online_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned int value; @@ -1731,47 +1740,47 @@ static ssize_t stp_online_store(struct sysdev_class *class, } /* - * Can't use SYSDEV_CLASS_ATTR because the attribute should be named - * stp/online but attr_online already exists in this file .. + * Can't use DEVICE_ATTR because the attribute should be named + * stp/online but dev_attr_online already exists in this file .. */ -static struct sysdev_class_attribute attr_stp_online = { +static struct device_attribute dev_attr_stp_online = { .attr = { .name = "online", .mode = 0600 }, .show = stp_online_show, .store = stp_online_store, }; -static struct sysdev_class_attribute *stp_attributes[] = { - &attr_ctn_id, - &attr_ctn_type, - &attr_dst_offset, - &attr_leap_seconds, - &attr_stp_online, - &attr_stratum, - &attr_time_offset, - &attr_time_zone_offset, - &attr_timing_mode, - &attr_timing_state, +static struct device_attribute *stp_attributes[] = { + &dev_attr_ctn_id, + &dev_attr_ctn_type, + &dev_attr_dst_offset, + &dev_attr_leap_seconds, + &dev_attr_stp_online, + &dev_attr_stratum, + &dev_attr_time_offset, + &dev_attr_time_zone_offset, + &dev_attr_timing_mode, + &dev_attr_timing_state, NULL }; static int __init stp_init_sysfs(void) { - struct sysdev_class_attribute **attr; + struct device_attribute **attr; int rc; - rc = sysdev_class_register(&stp_sysclass); + rc = subsys_system_register(&stp_subsys, NULL); if (rc) goto out; for (attr = stp_attributes; *attr; attr++) { - rc = sysdev_class_create_file(&stp_sysclass, *attr); + rc = device_create_file(stp_subsys.dev_root, *attr); if (rc) goto out_unreg; } return 0; out_unreg: for (; attr >= stp_attributes; attr--) - sysdev_class_remove_file(&stp_sysclass, *attr); - sysdev_class_unregister(&stp_sysclass); + device_remove_file(stp_subsys.dev_root, *attr); + bus_unregister(&stp_subsys); out: return rc; } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 94b06c31fc8..355a16c5570 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,23 +1,24 @@ /* - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007, 2011 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/device.h> +#include <linux/workqueue.h> #include <linux/bootmem.h> +#include <linux/cpuset.h> +#include <linux/device.h> +#include <linux/export.h> +#include <linux/kernel.h> #include <linux/sched.h> -#include <linux/workqueue.h> +#include <linux/init.h> +#include <linux/delay.h> #include <linux/cpu.h> #include <linux/smp.h> -#include <linux/cpuset.h> -#include <asm/delay.h> -#include <asm/s390_ext.h> +#include <linux/mm.h> +#include <asm/sysinfo.h> #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) @@ -29,88 +30,79 @@ struct mask_info { cpumask_t mask; }; -static int topology_enabled = 1; +static void set_topology_timer(void); static void topology_work_fn(struct work_struct *work); static struct sysinfo_15_1_x *tl_info; -static struct timer_list topology_timer; -static void set_topology_timer(void); -static DECLARE_WORK(topology_work, topology_work_fn); -/* topology_lock protects the core linked list */ -static DEFINE_SPINLOCK(topology_lock); -static struct mask_info core_info; -cpumask_t cpu_core_map[NR_CPUS]; -unsigned char cpu_core_id[NR_CPUS]; +static int topology_enabled = 1; +static DECLARE_WORK(topology_work, topology_work_fn); -#ifdef CONFIG_SCHED_BOOK +/* topology_lock protects the socket and book linked lists */ +static DEFINE_SPINLOCK(topology_lock); +static struct mask_info socket_info; static struct mask_info book_info; -cpumask_t cpu_book_map[NR_CPUS]; -unsigned char cpu_book_id[NR_CPUS]; -#endif + +struct cpu_topology_s390 cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { cpumask_t mask; - cpus_clear(mask); - if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) { - cpumask_copy(&mask, cpumask_of(cpu)); + cpumask_copy(&mask, cpumask_of(cpu)); + if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) return mask; + for (; info; info = info->next) { + if (cpumask_test_cpu(cpu, &info->mask)) + return info->mask; } - while (info) { - if (cpu_isset(cpu, info->mask)) { - mask = info->mask; - break; - } - info = info->next; - } - if (cpus_empty(mask)) - mask = cpumask_of_cpu(cpu); return mask; } -static void add_cpus_to_mask(struct topology_cpu *tl_cpu, - struct mask_info *book, struct mask_info *core) +static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, + struct mask_info *book, + struct mask_info *socket, + int one_socket_per_cpu) { unsigned int cpu; - for (cpu = find_first_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS); - cpu < TOPOLOGY_CPU_BITS; - cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1)) - { - unsigned int rcpu, lcpu; + for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) { + unsigned int rcpu; + int lcpu; rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; - for_each_present_cpu(lcpu) { - if (cpu_logical_map(lcpu) != rcpu) - continue; -#ifdef CONFIG_SCHED_BOOK - cpu_set(lcpu, book->mask); - cpu_book_id[lcpu] = book->id; -#endif - cpu_set(lcpu, core->mask); - cpu_core_id[lcpu] = core->id; - smp_cpu_polarization[lcpu] = tl_cpu->pp; + lcpu = smp_find_processor_id(rcpu); + if (lcpu < 0) + continue; + cpumask_set_cpu(lcpu, &book->mask); + cpu_topology[lcpu].book_id = book->id; + cpumask_set_cpu(lcpu, &socket->mask); + cpu_topology[lcpu].core_id = rcpu; + if (one_socket_per_cpu) { + cpu_topology[lcpu].socket_id = rcpu; + socket = socket->next; + } else { + cpu_topology[lcpu].socket_id = socket->id; } + smp_cpu_set_polarization(lcpu, tl_cpu->pp); } + return socket; } static void clear_masks(void) { struct mask_info *info; - info = &core_info; + info = &socket_info; while (info) { - cpus_clear(info->mask); + cpumask_clear(&info->mask); info = info->next; } -#ifdef CONFIG_SCHED_BOOK info = &book_info; while (info) { - cpus_clear(info->mask); + cpumask_clear(&info->mask); info = info->next; } -#endif } static union topology_entry *next_tle(union topology_entry *tle) @@ -120,43 +112,75 @@ static union topology_entry *next_tle(union topology_entry *tle) return (union topology_entry *)((struct topology_container *)tle + 1); } -static void tl_to_cores(struct sysinfo_15_1_x *info) +static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) { -#ifdef CONFIG_SCHED_BOOK + struct mask_info *socket = &socket_info; struct mask_info *book = &book_info; -#else - struct mask_info *book = NULL; -#endif - struct mask_info *core = &core_info; union topology_entry *tle, *end; - - spin_lock_irq(&topology_lock); - clear_masks(); tle = info->tle; end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { -#ifdef CONFIG_SCHED_BOOK case 2: book = book->next; book->id = tle->container.id; break; -#endif case 1: - core = core->next; - core->id = tle->container.id; + socket = socket->next; + socket->id = tle->container.id; break; case 0: - add_cpus_to_mask(&tle->cpu, book, core); + add_cpus_to_mask(&tle->cpu, book, socket, 0); break; default: clear_masks(); - goto out; + return; } tle = next_tle(tle); } -out: +} + +static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) +{ + struct mask_info *socket = &socket_info; + struct mask_info *book = &book_info; + union topology_entry *tle, *end; + + tle = info->tle; + end = (union topology_entry *)((unsigned long)info + info->length); + while (tle < end) { + switch (tle->nl) { + case 1: + book = book->next; + book->id = tle->container.id; + break; + case 0: + socket = add_cpus_to_mask(&tle->cpu, book, socket, 1); + break; + default: + clear_masks(); + return; + } + tle = next_tle(tle); + } +} + +static void tl_to_masks(struct sysinfo_15_1_x *info) +{ + struct cpuid cpu_id; + + spin_lock_irq(&topology_lock); + get_cpu_id(&cpu_id); + clear_masks(); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + __tl_to_masks_z10(info); + break; + default: + __tl_to_masks_generic(info); + } spin_unlock_irq(&topology_lock); } @@ -166,7 +190,7 @@ static void topology_update_polarization_simple(void) mutex_lock(&smp_cpu_state_mutex); for_each_possible_cpu(cpu) - smp_cpu_polarization[cpu] = POLARIZATION_HRZ; + smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); mutex_unlock(&smp_cpu_state_mutex); } @@ -185,8 +209,7 @@ static int ptf(unsigned long fc) int topology_set_cpu_management(int fc) { - int cpu; - int rc; + int cpu, rc; if (!MACHINE_HAS_TOPOLOGY) return -EOPNOTSUPP; @@ -197,54 +220,53 @@ int topology_set_cpu_management(int fc) if (rc) return -EBUSY; for_each_possible_cpu(cpu) - smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); return rc; } -static void update_cpu_core_map(void) +static void update_cpu_masks(void) { unsigned long flags; int cpu; spin_lock_irqsave(&topology_lock, flags); for_each_possible_cpu(cpu) { - cpu_core_map[cpu] = cpu_group_map(&core_info, cpu); -#ifdef CONFIG_SCHED_BOOK - cpu_book_map[cpu] = cpu_group_map(&book_info, cpu); -#endif + cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu); + cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu); + if (!MACHINE_HAS_TOPOLOGY) { + cpu_topology[cpu].core_id = cpu; + cpu_topology[cpu].socket_id = cpu; + cpu_topology[cpu].book_id = cpu; + } } spin_unlock_irqrestore(&topology_lock, flags); } void store_topology(struct sysinfo_15_1_x *info) { -#ifdef CONFIG_SCHED_BOOK - int rc; - - rc = stsi(info, 15, 1, 3); - if (rc != -ENOSYS) - return; -#endif - stsi(info, 15, 1, 2); + if (topology_max_mnest >= 3) + stsi(info, 15, 1, 3); + else + stsi(info, 15, 1, 2); } int arch_update_cpu_topology(void) { struct sysinfo_15_1_x *info = tl_info; - struct sys_device *sysdev; + struct device *dev; int cpu; if (!MACHINE_HAS_TOPOLOGY) { - update_cpu_core_map(); + update_cpu_masks(); topology_update_polarization_simple(); return 0; } store_topology(info); - tl_to_cores(info); - update_cpu_core_map(); + tl_to_masks(info); + update_cpu_masks(); for_each_online_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); + dev = get_cpu_device(cpu); + kobject_uevent(&dev->kobj, KOBJ_CHANGE); } return 1; } @@ -266,12 +288,30 @@ static void topology_timer_fn(unsigned long ignored) set_topology_timer(); } +static struct timer_list topology_timer = + TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); + +static atomic_t topology_poll = ATOMIC_INIT(0); + static void set_topology_timer(void) { - topology_timer.function = topology_timer_fn; - topology_timer.data = 0; - topology_timer.expires = jiffies + 60 * HZ; - add_timer(&topology_timer); + if (atomic_add_unless(&topology_poll, -1, 0)) + mod_timer(&topology_timer, jiffies + HZ / 10); + else + mod_timer(&topology_timer, jiffies + HZ * 60); +} + +void topology_expect_change(void) +{ + if (!MACHINE_HAS_TOPOLOGY) + return; + /* This is racy, but it doesn't matter since it is just a heuristic. + * Worst case is that we poll in a higher frequency for a bit longer. + */ + if (atomic_read(&topology_poll) > 60) + return; + atomic_add(60, &topology_poll); + set_topology_timer(); } static int __init early_parse_topology(char *p) @@ -283,25 +323,8 @@ static int __init early_parse_topology(char *p) } early_param("topology", early_parse_topology); -static int __init init_topology_update(void) -{ - int rc; - - rc = 0; - if (!MACHINE_HAS_TOPOLOGY) { - topology_update_polarization_simple(); - goto out; - } - init_timer_deferrable(&topology_timer); - set_topology_timer(); -out: - update_cpu_core_map(); - return rc; -} -__initcall(init_topology_update); - -static void alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask, - int offset) +static void __init alloc_masks(struct sysinfo_15_1_x *info, + struct mask_info *mask, int offset) { int i, nr_masks; @@ -310,7 +333,9 @@ static void alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask, nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; nr_masks = max(nr_masks, 1); for (i = 0; i < nr_masks; i++) { - mask->next = alloc_bootmem(sizeof(struct mask_info)); + mask->next = alloc_bootmem_align( + roundup_pow_of_two(sizeof(struct mask_info)), + roundup_pow_of_two(sizeof(struct mask_info))); mask = mask->next; } } @@ -327,10 +352,127 @@ void __init s390_init_cpu_topology(void) store_topology(info); pr_info("The CPU configuration topology of the machine is:"); for (i = 0; i < TOPOLOGY_NR_MAG; i++) - printk(" %d", info->mag[i]); - printk(" / %d\n", info->mnest); - alloc_masks(info, &core_info, 2); -#ifdef CONFIG_SCHED_BOOK - alloc_masks(info, &book_info, 3); -#endif + printk(KERN_CONT " %d", info->mag[i]); + printk(KERN_CONT " / %d\n", info->mnest); + alloc_masks(info, &socket_info, 1); + alloc_masks(info, &book_info, 2); +} + +static int cpu_management; + +static ssize_t dispatching_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + count = sprintf(buf, "%d\n", cpu_management); + mutex_unlock(&smp_cpu_state_mutex); + return count; +} + +static ssize_t dispatching_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + int val, rc; + char delim; + + if (sscanf(buf, "%d %c", &val, &delim) != 1) + return -EINVAL; + if (val != 0 && val != 1) + return -EINVAL; + rc = 0; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + if (cpu_management == val) + goto out; + rc = topology_set_cpu_management(val); + if (rc) + goto out; + cpu_management = val; + topology_expect_change(); +out: + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(dispatching, 0644, dispatching_show, + dispatching_store); + +static ssize_t cpu_polarization_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu = dev->id; + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + switch (smp_cpu_get_polarization(cpu)) { + case POLARIZATION_HRZ: + count = sprintf(buf, "horizontal\n"); + break; + case POLARIZATION_VL: + count = sprintf(buf, "vertical:low\n"); + break; + case POLARIZATION_VM: + count = sprintf(buf, "vertical:medium\n"); + break; + case POLARIZATION_VH: + count = sprintf(buf, "vertical:high\n"); + break; + default: + count = sprintf(buf, "unknown\n"); + break; + } + mutex_unlock(&smp_cpu_state_mutex); + return count; +} +static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); + +static struct attribute *topology_cpu_attrs[] = { + &dev_attr_polarization.attr, + NULL, +}; + +static struct attribute_group topology_cpu_attr_group = { + .attrs = topology_cpu_attrs, +}; + +int topology_cpu_init(struct cpu *cpu) +{ + return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); +} + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_mask; +} + +static const struct cpumask *cpu_book_mask(int cpu) +{ + return &cpu_topology[cpu].book_mask; +} + +static struct sched_domain_topology_level s390_topology[] = { + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, + { cpu_book_mask, SD_INIT_NAME(BOOK) }, + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + +static int __init topology_init(void) +{ + if (!MACHINE_HAS_TOPOLOGY) { + topology_update_polarization_simple(); + goto out; + } + set_topology_timer(); +out: + + set_sched_topology(s390_topology); + + return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); } +device_initcall(topology_init); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index b5a4a739b47..c5762324d9e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/traps.c - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * @@ -14,266 +12,34 @@ * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'asm.s'. */ +#include <linux/kprobes.h> +#include <linux/kdebug.h> +#include <linux/module.h> +#include <linux/ptrace.h> #include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/tracehook.h> -#include <linux/timer.h> #include <linux/mm.h> -#include <linux/smp.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/seq_file.h> -#include <linux/delay.h> -#include <linux/module.h> -#include <linux/kdebug.h> -#include <linux/kallsyms.h> -#include <linux/reboot.h> -#include <linux/kprobes.h> -#include <linux/bug.h> -#include <linux/utsname.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/atomic.h> -#include <asm/mathemu.h> -#include <asm/cpcmd.h> -#include <asm/s390_ext.h> -#include <asm/lowcore.h> -#include <asm/debug.h> #include "entry.h" -pgm_check_handler_t *pgm_check_table[128]; - -int show_unhandled_signals; - -extern pgm_check_handler_t do_protection_exception; -extern pgm_check_handler_t do_dat_exception; -extern pgm_check_handler_t do_asce_exception; - -#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) - -#ifndef CONFIG_64BIT -#define LONG "%08lx " -#define FOURLONG "%08lx %08lx %08lx %08lx\n" -static int kstack_depth_to_print = 12; -#else /* CONFIG_64BIT */ -#define LONG "%016lx " -#define FOURLONG "%016lx %016lx %016lx %016lx\n" -static int kstack_depth_to_print = 20; -#endif /* CONFIG_64BIT */ - -/* - * For show_trace we have tree different stack to consider: - * - the panic stack which is used if the kernel stack has overflown - * - the asynchronous interrupt stack (cpu related) - * - the synchronous kernel stack (process related) - * The stack trace can start at any of the three stack and can potentially - * touch all of them. The order is: panic stack, async stack, sync stack. - */ -static unsigned long -__show_trace(unsigned long sp, unsigned long low, unsigned long high) -{ - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - sp = sp & PSW_ADDR_INSN; - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN); - print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN); - low = sp; - sp = regs->gprs[15]; - } -} - -static void show_trace(struct task_struct *task, unsigned long *stack) -{ - register unsigned long __r15 asm ("15"); - unsigned long sp; - - sp = (unsigned long) stack; - if (!sp) - sp = task ? task->thread.ksp : __r15; - printk("Call Trace:\n"); -#ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, S390_lowcore.panic_stack - 4096, - S390_lowcore.panic_stack); -#endif - sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); - if (task) - __show_trace(sp, (unsigned long) task_stack_page(task), - (unsigned long) task_stack_page(task) + THREAD_SIZE); - else - __show_trace(sp, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); - if (!task) - task = current; - debug_show_held_locks(task); -} +int show_unhandled_signals = 1; -void show_stack(struct task_struct *task, unsigned long *sp) -{ - register unsigned long * __r15 asm ("15"); - unsigned long *stack; - int i; - - if (!sp) - stack = task ? (unsigned long *) task->thread.ksp : __r15; - else - stack = sp; - - for (i = 0; i < kstack_depth_to_print; i++) { - if (((addr_t) stack & (THREAD_SIZE-1)) == 0) - break; - if (i && ((i * sizeof (long) % 32) == 0)) - printk("\n "); - printk(LONG, *stack++); - } - printk("\n"); - show_trace(task, sp); -} - -static void show_last_breaking_event(struct pt_regs *regs) +static inline void __user *get_trap_ip(struct pt_regs *regs) { #ifdef CONFIG_64BIT - printk("Last Breaking-Event-Address:\n"); - printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN); - print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN); -#endif -} + unsigned long address; -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - -static inline int mask_bits(struct pt_regs *regs, unsigned long bits) -{ - return (regs->psw.mask & bits) / ((~bits + 1) & bits); -} - -void show_registers(struct pt_regs *regs) -{ - char *mode; - - mode = (regs->psw.mask & PSW_MASK_PSTATE) ? "User" : "Krnl"; - printk("%s PSW : %p %p", - mode, (void *) regs->psw.mask, - (void *) regs->psw.addr); - print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN); - printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), - mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), - mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), - mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), - mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), - mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); -#ifdef CONFIG_64BIT - printk(" EA:%x", mask_bits(regs, PSW_BASE_BITS)); -#endif - printk("\n%s GPRS: " FOURLONG, mode, - regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); - printk(" " FOURLONG, - regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); - printk(" " FOURLONG, - regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); - printk(" " FOURLONG, - regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); - - show_code(regs); -} - -void show_regs(struct pt_regs *regs) -{ - print_modules(); - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_registers(regs); - /* Show stack backtrace if pt_regs is from kernel mode */ - if (!(regs->psw.mask & PSW_MASK_PSTATE)) - show_trace(NULL, (unsigned long *) regs->gprs[15]); - show_last_breaking_event(regs); -} - -static DEFINE_SPINLOCK(die_lock); - -void die(const char * str, struct pt_regs * regs, long err) -{ - static int die_counter; - - oops_enter(); - debug_stop_all(); - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); + if (regs->int_code & 0x200) + address = *(unsigned long *)(current->thread.trap_tdb + 24); + else + address = regs->psw.addr; + return (void __user *) + ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN); +#else + return (void __user *) + ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); #endif - printk("\n"); - notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); - show_regs(regs); - bust_spinlocks(0); - add_taint(TAINT_DIE); - spin_unlock_irq(&die_lock); - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception: panic_on_oops"); - oops_exit(); - do_exit(SIGSEGV); } -static void inline report_user_fault(struct pt_regs *regs, long int_code, - int signr) +static inline void report_user_fault(struct pt_regs *regs, int signr) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; @@ -281,7 +47,7 @@ static void inline report_user_fault(struct pt_regs *regs, long int_code, return; if (!printk_ratelimit()) return; - printk("User process fault: interruption code 0x%lX ", int_code); + printk("User process fault: interruption code 0x%X ", regs->int_code); print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN); printk("\n"); show_regs(regs); @@ -292,70 +58,67 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str, - struct pt_regs *regs, siginfo_t *info) +static void __kprobes do_trap(struct pt_regs *regs, + int si_signo, int si_code, char *str) { - if (notify_die(DIE_TRAP, str, regs, pgm_int_code, - pgm_int_code, signr) == NOTIFY_STOP) - return; + siginfo_t info; - if (regs->psw.mask & PSW_MASK_PSTATE) { - struct task_struct *tsk = current; + if (notify_die(DIE_TRAP, str, regs, 0, + regs->int_code, si_signo) == NOTIFY_STOP) + return; - tsk->thread.trap_no = pgm_int_code & 0xffff; - force_sig_info(signr, info, tsk); - report_user_fault(regs, pgm_int_code, signr); + if (user_mode(regs)) { + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = get_trap_ip(regs); + force_sig_info(si_signo, &info, current); + report_user_fault(regs, si_signo); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) - regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; else { enum bug_trap_type btt; btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); if (btt == BUG_TRAP_TYPE_WARN) return; - die(str, regs, pgm_int_code); + die(regs, str); } } } -static inline void __user *get_psw_address(struct pt_regs *regs, - long pgm_int_code) -{ - return (void __user *) - ((regs->psw.addr - (pgm_int_code >> 16)) & PSW_ADDR_INSN); -} - void __kprobes do_per_trap(struct pt_regs *regs) { + siginfo_t info; + if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; - if (tracehook_consider_fatal_signal(current, SIGTRAP)) - force_sig(SIGTRAP, current); + if (!current->ptrace) + return; + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = + (void __force __user *) current->thread.per_event.address; + force_sig_info(SIGTRAP, &info, current); } -static void default_trap_handler(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void default_trap_handler(struct pt_regs *regs) { - if (regs->psw.mask & PSW_MASK_PSTATE) { - report_user_fault(regs, pgm_int_code, SIGSEGV); + if (user_mode(regs)) { + report_user_fault(regs, SIGSEGV); do_exit(SIGSEGV); } else - die("Unknown program exception", regs, pgm_int_code); + die(regs, "Unknown program exception"); } #define DO_ERROR_INFO(name, signr, sicode, str) \ -static void name(struct pt_regs *regs, long pgm_int_code, \ - unsigned long trans_exc_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = get_psw_address(regs, pgm_int_code); \ - do_trap(pgm_int_code, signr, str, regs, &info); \ +void name(struct pt_regs *regs) \ +{ \ + do_trap(regs, signr, sicode, str); \ } DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, @@ -385,50 +148,51 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, "translation exception") -static inline void do_fp_trap(struct pt_regs *regs, void __user *location, - int fpc, long pgm_int_code) -{ - siginfo_t si; +#ifdef CONFIG_64BIT +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, + "transaction constraint exception") +#endif - si.si_signo = SIGFPE; - si.si_errno = 0; - si.si_addr = location; - si.si_code = 0; +static inline void do_fp_trap(struct pt_regs *regs, int fpc) +{ + int si_code = 0; /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ if (fpc & 0x8000) /* invalid fp operation */ - si.si_code = FPE_FLTINV; + si_code = FPE_FLTINV; else if (fpc & 0x4000) /* div by 0 */ - si.si_code = FPE_FLTDIV; + si_code = FPE_FLTDIV; else if (fpc & 0x2000) /* overflow */ - si.si_code = FPE_FLTOVF; + si_code = FPE_FLTOVF; else if (fpc & 0x1000) /* underflow */ - si.si_code = FPE_FLTUND; + si_code = FPE_FLTUND; else if (fpc & 0x0800) /* inexact */ - si.si_code = FPE_FLTRES; + si_code = FPE_FLTRES; } - do_trap(pgm_int_code, SIGFPE, - "floating point exception", regs, &si); + do_trap(regs, SIGFPE, si_code, "floating point exception"); } -static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void __kprobes illegal_op(struct pt_regs *regs) { siginfo_t info; __u8 opcode[6]; __u16 __user *location; int signal = 0; - location = get_psw_address(regs, pgm_int_code); + location = get_trap_ip(regs); - if (regs->psw.mask & PSW_MASK_PSTATE) { + if (user_mode(regs)) { if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) return; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { - if (tracehook_consider_fatal_signal(current, SIGTRAP)) - force_sig(SIGTRAP, current); - else + if (current->ptrace) { + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_addr = location; + force_sig_info(SIGTRAP, &info, current); + } else signal = SIGILL; #ifdef CONFIG_MATHEMU } else if (opcode[0] == 0xb3) { @@ -460,47 +224,33 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, * If we get an illegal op in kernel mode, send it through the * kprobes notifier. If kprobes doesn't pick it up, SIGILL */ - if (notify_die(DIE_BPT, "bpt", regs, pgm_int_code, + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } #ifdef CONFIG_MATHEMU if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, pgm_int_code); - else if (signal == SIGSEGV) { - info.si_signo = signal; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *) location; - do_trap(pgm_int_code, signal, - "user address fault", regs, &info); - } else + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal == SIGSEGV) + do_trap(regs, signal, SEGV_MAPERR, "user address fault"); + else #endif - if (signal) { - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void __user *) location; - do_trap(pgm_int_code, signal, - "illegal operation", regs, &info); - } + if (signal) + do_trap(regs, signal, ILL_ILLOPC, "illegal operation"); } #ifdef CONFIG_MATHEMU -asmlinkage void specification_exception(struct pt_regs *regs, - long pgm_int_code, - unsigned long trans_exc_code) +void specification_exception(struct pt_regs *regs) { __u8 opcode[6]; __u16 __user *location = NULL; int signal = 0; - location = (__u16 __user *) get_psw_address(regs, pgm_int_code); + location = (__u16 __user *) get_trap_ip(regs); - if (regs->psw.mask & PSW_MASK_PSTATE) { + if (user_mode(regs)) { get_user(*((__u16 *) opcode), location); switch (opcode[0]) { case 0x28: /* LDR Rx,Ry */ @@ -533,36 +283,27 @@ asmlinkage void specification_exception(struct pt_regs *regs, signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, pgm_int_code); - else if (signal) { - siginfo_t info; - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPN; - info.si_addr = location; - do_trap(pgm_int_code, signal, - "specification exception", regs, &info); - } + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "specification exception"); } #else DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); #endif -static void data_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void data_exception(struct pt_regs *regs) { __u16 __user *location; int signal = 0; - location = get_psw_address(regs, pgm_int_code); + location = get_trap_ip(regs); if (MACHINE_HAS_IEEE) asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); #ifdef CONFIG_MATHEMU - else if (regs->psw.mask & PSW_MASK_PSTATE) { + else if (user_mode(regs)) { __u8 opcode[6]; get_user(*((__u16 *) opcode), location); switch (opcode[0]) { @@ -621,35 +362,21 @@ static void data_exception(struct pt_regs *regs, long pgm_int_code, else signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, pgm_int_code); - else if (signal) { - siginfo_t info; - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPN; - info.si_addr = location; - do_trap(pgm_int_code, signal, "data exception", regs, &info); - } + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "data exception"); } -static void space_switch_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void space_switch_exception(struct pt_regs *regs) { - siginfo_t info; - /* Set user psw back to home space mode. */ - if (regs->psw.mask & PSW_MASK_PSTATE) + if (user_mode(regs)) regs->psw.mask |= PSW_ASC_HOME; /* Send SIGILL. */ - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_PRVOPC; - info.si_addr = get_psw_address(regs, pgm_int_code); - do_trap(pgm_int_code, SIGILL, "space switch event", regs, &info); + do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event"); } -asmlinkage void __kprobes kernel_stack_overflow(struct pt_regs * regs) +void __kprobes kernel_stack_overflow(struct pt_regs * regs) { bust_spinlocks(1); printk("Kernel stack overflow.\n"); @@ -658,42 +385,7 @@ asmlinkage void __kprobes kernel_stack_overflow(struct pt_regs * regs) panic("Corrupt kernel stack, can't continue."); } -/* init is done in lowcore.S and head.S */ - void __init trap_init(void) { - int i; - - for (i = 0; i < 128; i++) - pgm_check_table[i] = &default_trap_handler; - pgm_check_table[1] = &illegal_op; - pgm_check_table[2] = &privileged_op; - pgm_check_table[3] = &execute_exception; - pgm_check_table[4] = &do_protection_exception; - pgm_check_table[5] = &addressing_exception; - pgm_check_table[6] = &specification_exception; - pgm_check_table[7] = &data_exception; - pgm_check_table[8] = &overflow_exception; - pgm_check_table[9] = ÷_exception; - pgm_check_table[0x0A] = &overflow_exception; - pgm_check_table[0x0B] = ÷_exception; - pgm_check_table[0x0C] = &hfp_overflow_exception; - pgm_check_table[0x0D] = &hfp_underflow_exception; - pgm_check_table[0x0E] = &hfp_significance_exception; - pgm_check_table[0x0F] = &hfp_divide_exception; - pgm_check_table[0x10] = &do_dat_exception; - pgm_check_table[0x11] = &do_dat_exception; - pgm_check_table[0x12] = &translation_exception; - pgm_check_table[0x13] = &special_op_exception; -#ifdef CONFIG_64BIT - pgm_check_table[0x38] = &do_asce_exception; - pgm_check_table[0x39] = &do_dat_exception; - pgm_check_table[0x3A] = &do_dat_exception; - pgm_check_table[0x3B] = &do_dat_exception; -#endif /* CONFIG_64BIT */ - pgm_check_table[0x15] = &operand_exception; - pgm_check_table[0x1C] = &space_switch_exception; - pgm_check_table[0x1D] = &hfp_sqrt_exception; - /* Enable machine checks early. */ local_mcck_enable(); } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f438d74dedb..61364909678 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -25,12 +25,12 @@ #include <linux/compat.h> #include <asm/asm-offsets.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/sections.h> #include <asm/vdso.h> +#include <asm/facility.h> #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) extern char vdso32_start, vdso32_end; @@ -63,7 +63,7 @@ static int __init vdso_setup(char *s) else if (strncmp(s, "off", 4) == 0) vdso_enabled = 0; else { - rc = strict_strtoul(s, 0, &val); + rc = kstrtoul(s, 0, &val); vdso_enabled = rc ? 0 : !!val; } return !rc; @@ -84,23 +84,16 @@ struct vdso_data *vdso_data = &vdso_data_store.data; */ static void vdso_init_data(struct vdso_data *vd) { - vd->ectg_available = user_mode != HOME_SPACE_MODE && test_facility(31); + vd->ectg_available = test_facility(31); } #ifdef CONFIG_64BIT /* - * Setup per cpu vdso data page. - */ -static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) -{ -} - -/* * Allocate/free per cpu vdso data. */ #define SEGMENT_ORDER 2 -int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) +int vdso_alloc_per_cpu(struct _lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; @@ -108,7 +101,7 @@ int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) lowcore->vdso_per_cpu_data = __LC_PASTE; - if (user_mode == HOME_SPACE_MODE || !vdso_enabled) + if (!vdso_enabled) return 0; segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); @@ -119,11 +112,11 @@ int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE << SEGMENT_ORDER); - clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) page_table, _PAGE_INVALID, 256*sizeof(unsigned long)); *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; - *(unsigned long *) page_table = _PAGE_RO + page_frame; + *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; psal = (u32 *) (page_table + 256*sizeof(unsigned long)); aste = psal + 32; @@ -132,14 +125,13 @@ int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) psal[i] = 0x80000000; lowcore->paste[4] = (u32)(addr_t) psal; - psal[0] = 0x20000000; + psal[0] = 0x02000000; psal[2] = (u32)(addr_t) aste; *(unsigned long *) (aste + 2) = segment_table + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; aste[4] = (u32)(addr_t) psal; lowcore->vdso_per_cpu_data = page_frame; - vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame); return 0; out: @@ -149,12 +141,12 @@ out: return -ENOMEM; } -void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) +void vdso_free_per_cpu(struct _lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; - if (user_mode == HOME_SPACE_MODE || !vdso_enabled) + if (!vdso_enabled) return; psal = (u32 *)(addr_t) lowcore->paste[4]; @@ -168,19 +160,15 @@ void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) free_pages(segment_table, SEGMENT_ORDER); } -static void __vdso_init_cr5(void *dummy) +static void vdso_init_cr5(void) { unsigned long cr5; + if (!vdso_enabled) + return; cr5 = offsetof(struct _lowcore, paste); __ctl_load(cr5, 5, 5); } - -static void vdso_init_cr5(void) -{ - if (user_mode != HOME_SPACE_MODE && vdso_enabled) - on_each_cpu(__vdso_init_cr5, NULL, 1); -} #endif /* CONFIG_64BIT */ /* @@ -253,17 +241,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * on the "data" page of the vDSO or you'll stop getting kernel * updates and your nice userland gettimeofday will be totally dead. * It's fine to use that for setting breakpoints in the vDSO code - * pages though - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. + * pages though. */ rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); if (rc) current->mm->context.vdso_base = 0; @@ -322,10 +304,8 @@ static int __init vdso_init(void) } vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); vdso64_pagelist[vdso64_pages] = NULL; -#ifndef CONFIG_SMP - if (vdso_alloc_per_cpu(0, &S390_lowcore)) + if (vdso_alloc_per_cpu(&S390_lowcore)) BUG(); -#endif vdso_init_cr5(); #endif /* CONFIG_64BIT */ @@ -335,19 +315,19 @@ static int __init vdso_init(void) return 0; } -arch_initcall(vdso_init); +early_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore new file mode 100644 index 00000000000..e45fba9d0ce --- /dev/null +++ b/arch/s390/kernel/vdso32/.gitignore @@ -0,0 +1 @@ +vdso32.lds diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index d13e8755a8c..8ad2b34ad15 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -22,6 +22,9 @@ obj-y += vdso32_wrapper.o extra-y += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index b2224e0b974..65fc3979c2f 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -38,25 +38,21 @@ __kernel_clock_gettime: sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,2f ahi %r0,-1 -2: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */ +2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ lr %r2,%r0 - l %r0,__VDSO_NTP_MULT(%r5) + l %r0,__VDSO_TK_MULT(%r5) ltr %r1,%r1 mr %r0,%r0 jnm 3f - a %r0,__VDSO_NTP_MULT(%r5) + a %r0,__VDSO_TK_MULT(%r5) 3: alr %r0,%r2 - srdl %r0,12 - al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ - al %r1,__VDSO_XTIME_NSEC+4(%r5) - brc 12,4f - ahi %r0,1 -4: l %r2,__VDSO_XTIME_SEC+4(%r5) - al %r0,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */ + al %r0,__VDSO_WTOM_NSEC(%r5) al %r1,__VDSO_WTOM_NSEC+4(%r5) brc 12,5f ahi %r0,1 -5: al %r2,__VDSO_WTOM_SEC+4(%r5) +5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srdl %r0,0(%r2) /* >> tk->shift */ + l %r2,__VDSO_WTOM_SEC+4(%r5) cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ jne 1b basr %r5,0 @@ -86,20 +82,21 @@ __kernel_clock_gettime: sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,12f ahi %r0,-1 -12: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */ +12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ lr %r2,%r0 - l %r0,__VDSO_NTP_MULT(%r5) + l %r0,__VDSO_TK_MULT(%r5) ltr %r1,%r1 mr %r0,%r0 jnm 13f - a %r0,__VDSO_NTP_MULT(%r5) + a %r0,__VDSO_TK_MULT(%r5) 13: alr %r0,%r2 - srdl %r0,12 - al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ al %r1,__VDSO_XTIME_NSEC+4(%r5) brc 12,14f ahi %r0,1 -14: l %r2,__VDSO_XTIME_SEC+4(%r5) +14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srdl %r0,0(%r2) /* >> tk->shift */ + l %r2,__VDSO_XTIME_SEC+4(%r5) cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ jne 11b basr %r5,0 diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index 2d3633175e3..fd621a950f7 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -35,15 +35,14 @@ __kernel_gettimeofday: sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,3f ahi %r0,-1 -3: ms %r0,__VDSO_NTP_MULT(%r5) /* cyc2ns(clock,cycle_delta) */ +3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ st %r0,24(%r15) - l %r0,__VDSO_NTP_MULT(%r5) + l %r0,__VDSO_TK_MULT(%r5) ltr %r1,%r1 mr %r0,%r0 jnm 4f - a %r0,__VDSO_NTP_MULT(%r5) + a %r0,__VDSO_TK_MULT(%r5) 4: al %r0,24(%r15) - srdl %r0,12 al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ al %r1,__VDSO_XTIME_NSEC+4(%r5) brc 12,5f @@ -51,6 +50,8 @@ __kernel_gettimeofday: 5: mvc 24(4,%r15),__VDSO_XTIME_SEC+4(%r5) cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ jne 1b + l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srdl %r0,0(%r4) /* >> tk->shift */ l %r4,24(%r15) /* get tv_sec from stack */ basr %r5,0 6: ltr %r0,%r0 diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore new file mode 100644 index 00000000000..3fd18cf9fec --- /dev/null +++ b/arch/s390/kernel/vdso64/.gitignore @@ -0,0 +1 @@ +vdso64.lds diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 449352dda9c..2a8ddfd12a5 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -22,6 +22,9 @@ obj-y += vdso64_wrapper.o extra-y += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 176e1f75f9a..34deba7c7ed 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -23,7 +23,9 @@ __kernel_clock_getres: je 0f cghi %r2,__CLOCK_MONOTONIC je 0f - cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ + cghi %r2,__CLOCK_THREAD_CPUTIME_ID + je 0f + cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ jne 2f larl %r5,_vdso_data icm %r0,15,__LC_ECTG_OK(%r5) diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index d46c95ed5f1..91940ed33a4 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -22,7 +22,9 @@ __kernel_clock_gettime: larl %r5,_vdso_data cghi %r2,__CLOCK_REALTIME je 4f - cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ + cghi %r2,__CLOCK_THREAD_CPUTIME_ID + je 9f + cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ je 9f cghi %r2,__CLOCK_MONOTONIC jne 12f @@ -34,14 +36,13 @@ __kernel_clock_gettime: tmll %r4,0x0001 /* pending update ? loop */ jnz 0b stck 48(%r15) /* Store TOD clock */ + lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + lg %r0,__VDSO_WTOM_SEC(%r5) lg %r1,48(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */ - srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ - alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */ - lg %r0,__VDSO_XTIME_SEC(%r5) - alg %r1,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */ - alg %r0,__VDSO_WTOM_SEC(%r5) + msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ + alg %r1,__VDSO_WTOM_NSEC(%r5) + srlg %r1,%r1,0(%r2) /* >> tk->shift */ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 0b larl %r5,13f @@ -62,12 +63,13 @@ __kernel_clock_gettime: tmll %r4,0x0001 /* pending update ? loop */ jnz 5b stck 48(%r15) /* Store TOD clock */ + lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ lg %r1,48(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */ - srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ - alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */ - lg %r0,__VDSO_XTIME_SEC(%r5) + msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ + srlg %r1,%r1,0(%r2) /* >> tk->shift */ + lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 5b larl %r5,13f diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index 36ee674722e..d0860d1d0cc 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -31,12 +31,13 @@ __kernel_gettimeofday: stck 48(%r15) /* Store TOD clock */ lg %r1,48(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_NTP_MULT(%r5) /* * NTP adjustment */ - srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ - alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime.tv_nsec */ - lg %r0,__VDSO_XTIME_SEC(%r5) /* xtime.tv_sec */ + msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ + lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 0b + lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srlg %r1,%r1,0(%r5) /* >> tk->shift */ larl %r5,5f 2: clg %r1,0(%r5) jl 3f diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index a68ac10213b..35b13ed0af5 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -8,13 +8,13 @@ #ifndef CONFIG_64BIT OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") -OUTPUT_ARCH(s390) -ENTRY(_start) +OUTPUT_ARCH(s390:31-bit) +ENTRY(startup) jiffies = jiffies_64 + 4; #else OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) -ENTRY(_start) +ENTRY(startup) jiffies = jiffies_64; #endif @@ -43,7 +43,9 @@ SECTIONS NOTES :text :note - RODATA + .dummy : { *(.dummy) } :data + + RO_DATA_SECTION(PAGE_SIZE) #ifdef CONFIG_SHARED_KERNEL . = ALIGN(0x100000); /* VM shared segments are 1MB aligned */ @@ -73,11 +75,15 @@ SECTIONS EXIT_TEXT } + .exit.data : { + EXIT_DATA + } + /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) - PERCPU(PAGE_SIZE) + PERCPU_SECTION(0x100) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 1ccdf4d8aa8..8c34363d6f1 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -1,70 +1,83 @@ /* - * arch/s390/kernel/vtime.c * Virtual cpu timer based timer functions. * - * S390 version - * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2004, 2012 * Author(s): Jan Glauber <jan.glauber@de.ibm.com> */ -#include <linux/module.h> +#include <linux/kernel_stat.h> +#include <linux/notifier.h> +#include <linux/kprobes.h> +#include <linux/export.h> #include <linux/kernel.h> -#include <linux/time.h> -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/types.h> #include <linux/timex.h> -#include <linux/notifier.h> -#include <linux/kernel_stat.h> -#include <linux/rcupdate.h> -#include <linux/posix-timers.h> +#include <linux/types.h> +#include <linux/time.h> #include <linux/cpu.h> -#include <linux/kprobes.h> +#include <linux/smp.h> -#include <asm/s390_ext.h> -#include <asm/timer.h> #include <asm/irq_regs.h> #include <asm/cputime.h> +#include <asm/vtimer.h> +#include <asm/vtime.h> +#include <asm/irq.h> +#include "entry.h" -static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); +static void virt_timer_expire(void); DEFINE_PER_CPU(struct s390_idle_data, s390_idle); -static inline __u64 get_vtimer(void) +static LIST_HEAD(virt_timer_list); +static DEFINE_SPINLOCK(virt_timer_lock); +static atomic64_t virt_timer_current; +static atomic64_t virt_timer_elapsed; + +static inline u64 get_vtimer(void) { - __u64 timer; + u64 timer; - asm volatile("STPT %0" : "=m" (timer)); + asm volatile("stpt %0" : "=m" (timer)); return timer; } -static inline void set_vtimer(__u64 expires) +static inline void set_vtimer(u64 expires) { - __u64 timer; + u64 timer; - asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " SPT %1" /* Set new value immediatly afterwards */ - : "=m" (timer) : "m" (expires) ); + asm volatile( + " stpt %0\n" /* Store current cpu timer value */ + " spt %1" /* Set new value imm. afterwards */ + : "=m" (timer) : "m" (expires)); S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; S390_lowcore.last_update_timer = expires; } +static inline int virt_timer_forward(u64 elapsed) +{ + BUG_ON(!irqs_disabled()); + + if (list_empty(&virt_timer_list)) + return 0; + elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed); + return elapsed >= atomic64_read(&virt_timer_current); +} + /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -static void do_account_vtime(struct task_struct *tsk, int hardirq_offset) +static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) { struct thread_info *ti = task_thread_info(tsk); - __u64 timer, clock, user, system, steal; + u64 timer, clock, user, system, steal; timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; - asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " STCK %1" /* Store current tod clock value */ - : "=m" (S390_lowcore.last_update_timer), - "=m" (S390_lowcore.last_update_clock) ); + asm volatile( + " stpt %0\n" /* Store current cpu timer value */ + " stck %1" /* Store current tod clock value */ + : "=m" (S390_lowcore.last_update_timer), + "=m" (S390_lowcore.last_update_clock)); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; @@ -83,9 +96,11 @@ static void do_account_vtime(struct task_struct *tsk, int hardirq_offset) S390_lowcore.steal_timer = 0; account_steal_time(steal); } + + return virt_timer_forward(user + system); } -void account_vtime(struct task_struct *prev, struct task_struct *next) +void vtime_task_switch(struct task_struct *prev) { struct thread_info *ti; @@ -93,24 +108,32 @@ void account_vtime(struct task_struct *prev, struct task_struct *next) ti = task_thread_info(prev); ti->user_timer = S390_lowcore.user_timer; ti->system_timer = S390_lowcore.system_timer; - ti = task_thread_info(next); + ti = task_thread_info(current); S390_lowcore.user_timer = ti->user_timer; S390_lowcore.system_timer = ti->system_timer; } -void account_process_tick(struct task_struct *tsk, int user_tick) +/* + * In s390, accounting pending user time also implies + * accounting system time in order to correctly compute + * the stolen time accounting. + */ +void vtime_account_user(struct task_struct *tsk) { - do_account_vtime(tsk, HARDIRQ_OFFSET); + if (do_account_vtime(tsk, HARDIRQ_OFFSET)) + virt_timer_expire(); } /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_system_vtime(struct task_struct *tsk) +void vtime_account_irq_enter(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); - __u64 timer, system; + u64 timer, system; + + WARN_ON_ONCE(!irqs_disabled()); timer = S390_lowcore.last_update_timer; S390_lowcore.last_update_timer = get_vtimer(); @@ -120,153 +143,56 @@ void account_system_vtime(struct task_struct *tsk) S390_lowcore.steal_timer -= system; ti->system_timer = S390_lowcore.system_timer; account_system_time(tsk, 0, system, system); + + virt_timer_forward(system); } -EXPORT_SYMBOL_GPL(account_system_vtime); +EXPORT_SYMBOL_GPL(vtime_account_irq_enter); -void __kprobes vtime_start_cpu(__u64 int_clock, __u64 enter_timer) +void vtime_account_system(struct task_struct *tsk) +__attribute__((alias("vtime_account_irq_enter"))); +EXPORT_SYMBOL_GPL(vtime_account_system); + +void __kprobes vtime_stop_cpu(void) { struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); - __u64 idle_time, expires; + unsigned long long idle_time; + unsigned long psw_mask; - if (idle->idle_enter == 0ULL) - return; + trace_hardirqs_on(); - /* Account time spent with enabled wait psw loaded as idle time. */ - idle_time = int_clock - idle->idle_enter; - account_idle_time(idle_time); - S390_lowcore.steal_timer += - idle->idle_enter - S390_lowcore.last_update_clock; - S390_lowcore.last_update_clock = int_clock; - - /* Account system time spent going idle. */ - S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle; - S390_lowcore.last_update_timer = enter_timer; - - /* Restart vtime CPU timer */ - if (vq->do_spt) { - /* Program old expire value but first save progress. */ - expires = vq->idle - enter_timer; - expires += get_vtimer(); - set_vtimer(expires); - } else { - /* Don't account the CPU timer delta while the cpu was idle. */ - vq->elapsed -= vq->idle - enter_timer; - } + /* Wait for external, I/O or machine check interrupt. */ + psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + idle->nohz_delay = 0; + + /* Call the assembler magic in entry.S */ + psw_idle(idle, psw_mask); + /* Account time spent with enabled wait psw loaded as idle time. */ idle->sequence++; smp_wmb(); + idle_time = idle->clock_idle_exit - idle->clock_idle_enter; + idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; idle->idle_time += idle_time; - idle->idle_enter = 0ULL; idle->idle_count++; + account_idle_time(idle_time); smp_wmb(); idle->sequence++; } -void __kprobes vtime_stop_cpu(void) -{ - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); - psw_t psw; - - /* Wait for external, I/O or machine check interrupt. */ - psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; - - idle->nohz_delay = 0; - - /* Check if the CPU timer needs to be reprogrammed. */ - if (vq->do_spt) { - __u64 vmax = VTIMER_MAX_SLICE; - /* - * The inline assembly is equivalent to - * vq->idle = get_cpu_timer(); - * set_cpu_timer(VTIMER_MAX_SLICE); - * idle->idle_enter = get_clock(); - * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); - * The difference is that the inline assembly makes sure that - * the last three instruction are stpt, stck and lpsw in that - * order. This is done to increase the precision. - */ - asm volatile( -#ifndef CONFIG_64BIT - " basr 1,0\n" - "0: ahi 1,1f-0b\n" - " st 1,4(%2)\n" -#else /* CONFIG_64BIT */ - " larl 1,1f\n" - " stg 1,8(%2)\n" -#endif /* CONFIG_64BIT */ - " stpt 0(%4)\n" - " spt 0(%5)\n" - " stck 0(%3)\n" -#ifndef CONFIG_64BIT - " lpsw 0(%2)\n" -#else /* CONFIG_64BIT */ - " lpswe 0(%2)\n" -#endif /* CONFIG_64BIT */ - "1:" - : "=m" (idle->idle_enter), "=m" (vq->idle) - : "a" (&psw), "a" (&idle->idle_enter), - "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw) - : "memory", "cc", "1"); - } else { - /* - * The inline assembly is equivalent to - * vq->idle = get_cpu_timer(); - * idle->idle_enter = get_clock(); - * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); - * The difference is that the inline assembly makes sure that - * the last three instruction are stpt, stck and lpsw in that - * order. This is done to increase the precision. - */ - asm volatile( -#ifndef CONFIG_64BIT - " basr 1,0\n" - "0: ahi 1,1f-0b\n" - " st 1,4(%2)\n" -#else /* CONFIG_64BIT */ - " larl 1,1f\n" - " stg 1,8(%2)\n" -#endif /* CONFIG_64BIT */ - " stpt 0(%4)\n" - " stck 0(%3)\n" -#ifndef CONFIG_64BIT - " lpsw 0(%2)\n" -#else /* CONFIG_64BIT */ - " lpswe 0(%2)\n" -#endif /* CONFIG_64BIT */ - "1:" - : "=m" (idle->idle_enter), "=m" (vq->idle) - : "a" (&psw), "a" (&idle->idle_enter), - "a" (&vq->idle), "m" (psw) - : "memory", "cc", "1"); - } -} - cputime64_t s390_get_idle_time(int cpu) { - struct s390_idle_data *idle; - unsigned long long now, idle_time, idle_enter; + struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); + unsigned long long now, idle_enter, idle_exit; unsigned int sequence; - idle = &per_cpu(s390_idle, cpu); - - now = get_clock(); -repeat: - sequence = idle->sequence; - smp_rmb(); - if (sequence & 1) - goto repeat; - idle_time = 0; - idle_enter = idle->idle_enter; - if (idle_enter != 0ULL && idle_enter < now) - idle_time = now - idle_enter; - smp_rmb(); - if (idle->sequence != sequence) - goto repeat; - return idle_time; + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; } /* @@ -275,11 +201,11 @@ repeat: */ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) { - struct vtimer_list *event; + struct vtimer_list *tmp; - list_for_each_entry(event, head, entry) { - if (event->expires > timer->expires) { - list_add_tail(&timer->entry, &event->entry); + list_for_each_entry(tmp, head, entry) { + if (tmp->expires > timer->expires) { + list_add_tail(&timer->entry, &tmp->entry); return; } } @@ -287,84 +213,45 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) } /* - * Do the callback functions of expired vtimer events. - * Called from within the interrupt handler. + * Handler for expired virtual CPU timer. */ -static void do_callbacks(struct list_head *cb_list) +static void virt_timer_expire(void) { - struct vtimer_queue *vq; - struct vtimer_list *event, *tmp; - - if (list_empty(cb_list)) - return; - - vq = &__get_cpu_var(virt_cpu_timer); - - list_for_each_entry_safe(event, tmp, cb_list, entry) { - list_del_init(&event->entry); - (event->function)(event->data); - if (event->interval) { - /* Recharge interval timer */ - event->expires = event->interval + vq->elapsed; - spin_lock(&vq->lock); - list_add_sorted(event, &vq->list); - spin_unlock(&vq->lock); - } - } -} - -/* - * Handler for the virtual CPU timer. - */ -static void do_cpu_timer_interrupt(unsigned int ext_int_code, - unsigned int param32, unsigned long param64) -{ - struct vtimer_queue *vq; - struct vtimer_list *event, *tmp; - struct list_head cb_list; /* the callback queue */ - __u64 elapsed, next; - - kstat_cpu(smp_processor_id()).irqs[EXTINT_TMR]++; - INIT_LIST_HEAD(&cb_list); - vq = &__get_cpu_var(virt_cpu_timer); - - /* walk timer list, fire all expired events */ - spin_lock(&vq->lock); - - elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer); - BUG_ON((s64) elapsed < 0); - vq->elapsed = 0; - list_for_each_entry_safe(event, tmp, &vq->list, entry) { - if (event->expires < elapsed) + struct vtimer_list *timer, *tmp; + unsigned long elapsed; + LIST_HEAD(cb_list); + + /* walk timer list, fire all expired timers */ + spin_lock(&virt_timer_lock); + elapsed = atomic64_read(&virt_timer_elapsed); + list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) { + if (timer->expires < elapsed) /* move expired timer to the callback queue */ - list_move_tail(&event->entry, &cb_list); + list_move_tail(&timer->entry, &cb_list); else - event->expires -= elapsed; + timer->expires -= elapsed; + } + if (!list_empty(&virt_timer_list)) { + timer = list_first_entry(&virt_timer_list, + struct vtimer_list, entry); + atomic64_set(&virt_timer_current, timer->expires); + } + atomic64_sub(elapsed, &virt_timer_elapsed); + spin_unlock(&virt_timer_lock); + + /* Do callbacks and recharge periodic timers */ + list_for_each_entry_safe(timer, tmp, &cb_list, entry) { + list_del_init(&timer->entry); + timer->function(timer->data); + if (timer->interval) { + /* Recharge interval timer */ + timer->expires = timer->interval + + atomic64_read(&virt_timer_elapsed); + spin_lock(&virt_timer_lock); + list_add_sorted(timer, &virt_timer_list); + spin_unlock(&virt_timer_lock); + } } - spin_unlock(&vq->lock); - - vq->do_spt = list_empty(&cb_list); - do_callbacks(&cb_list); - - /* next event is first in list */ - next = VTIMER_MAX_SLICE; - spin_lock(&vq->lock); - if (!list_empty(&vq->list)) { - event = list_first_entry(&vq->list, struct vtimer_list, entry); - next = event->expires; - } else - vq->do_spt = 0; - spin_unlock(&vq->lock); - /* - * To improve precision add the time spent by the - * interrupt handler to the elapsed time. - * Note: CPU timer counts down and we got an interrupt, - * the current content is negative - */ - elapsed = S390_lowcore.async_enter_timer - get_vtimer(); - set_vtimer(next - elapsed); - vq->timer = next - elapsed; - vq->elapsed = elapsed; } void init_virt_timer(struct vtimer_list *timer) @@ -376,179 +263,108 @@ EXPORT_SYMBOL(init_virt_timer); static inline int vtimer_pending(struct vtimer_list *timer) { - return (!list_empty(&timer->entry)); + return !list_empty(&timer->entry); } -/* - * this function should only run on the specified CPU - */ static void internal_add_vtimer(struct vtimer_list *timer) { - struct vtimer_queue *vq; - unsigned long flags; - __u64 left, expires; - - vq = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vq->lock, flags); - - BUG_ON(timer->cpu != smp_processor_id()); - - if (list_empty(&vq->list)) { - /* First timer on this cpu, just program it. */ - list_add(&timer->entry, &vq->list); - set_vtimer(timer->expires); - vq->timer = timer->expires; - vq->elapsed = 0; + if (list_empty(&virt_timer_list)) { + /* First timer, just program it. */ + atomic64_set(&virt_timer_current, timer->expires); + atomic64_set(&virt_timer_elapsed, 0); + list_add(&timer->entry, &virt_timer_list); } else { - /* Check progress of old timers. */ - expires = timer->expires; - left = get_vtimer(); - if (likely((s64) expires < (s64) left)) { + /* Update timer against current base. */ + timer->expires += atomic64_read(&virt_timer_elapsed); + if (likely((s64) timer->expires < + (s64) atomic64_read(&virt_timer_current))) /* The new timer expires before the current timer. */ - set_vtimer(expires); - vq->elapsed += vq->timer - left; - vq->timer = expires; - } else { - vq->elapsed += vq->timer - left; - vq->timer = left; - } - /* Insert new timer into per cpu list. */ - timer->expires += vq->elapsed; - list_add_sorted(timer, &vq->list); + atomic64_set(&virt_timer_current, timer->expires); + /* Insert new timer into the list. */ + list_add_sorted(timer, &virt_timer_list); } - - spin_unlock_irqrestore(&vq->lock, flags); - /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ - put_cpu(); } -static inline void prepare_vtimer(struct vtimer_list *timer) +static void __add_vtimer(struct vtimer_list *timer, int periodic) { - BUG_ON(!timer->function); - BUG_ON(!timer->expires || timer->expires > VTIMER_MAX_SLICE); - BUG_ON(vtimer_pending(timer)); - timer->cpu = get_cpu(); + unsigned long flags; + + timer->interval = periodic ? timer->expires : 0; + spin_lock_irqsave(&virt_timer_lock, flags); + internal_add_vtimer(timer); + spin_unlock_irqrestore(&virt_timer_lock, flags); } /* * add_virt_timer - add an oneshot virtual CPU timer */ -void add_virt_timer(void *new) +void add_virt_timer(struct vtimer_list *timer) { - struct vtimer_list *timer; - - timer = (struct vtimer_list *)new; - prepare_vtimer(timer); - timer->interval = 0; - internal_add_vtimer(timer); + __add_vtimer(timer, 0); } EXPORT_SYMBOL(add_virt_timer); /* * add_virt_timer_int - add an interval virtual CPU timer */ -void add_virt_timer_periodic(void *new) +void add_virt_timer_periodic(struct vtimer_list *timer) { - struct vtimer_list *timer; - - timer = (struct vtimer_list *)new; - prepare_vtimer(timer); - timer->interval = timer->expires; - internal_add_vtimer(timer); + __add_vtimer(timer, 1); } EXPORT_SYMBOL(add_virt_timer_periodic); -int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) +static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic) { - struct vtimer_queue *vq; unsigned long flags; - int cpu; + int rc; BUG_ON(!timer->function); - BUG_ON(!expires || expires > VTIMER_MAX_SLICE); if (timer->expires == expires && vtimer_pending(timer)) return 1; - - cpu = get_cpu(); - vq = &per_cpu(virt_cpu_timer, cpu); - - /* disable interrupts before test if timer is pending */ - spin_lock_irqsave(&vq->lock, flags); - - /* if timer isn't pending add it on the current CPU */ - if (!vtimer_pending(timer)) { - spin_unlock_irqrestore(&vq->lock, flags); - - if (periodic) - timer->interval = expires; - else - timer->interval = 0; - timer->expires = expires; - timer->cpu = cpu; - internal_add_vtimer(timer); - return 0; - } - - /* check if we run on the right CPU */ - BUG_ON(timer->cpu != cpu); - - list_del_init(&timer->entry); + spin_lock_irqsave(&virt_timer_lock, flags); + rc = vtimer_pending(timer); + if (rc) + list_del_init(&timer->entry); + timer->interval = periodic ? expires : 0; timer->expires = expires; - if (periodic) - timer->interval = expires; - - /* the timer can't expire anymore so we can release the lock */ - spin_unlock_irqrestore(&vq->lock, flags); internal_add_vtimer(timer); - return 1; + spin_unlock_irqrestore(&virt_timer_lock, flags); + return rc; } /* - * If we change a pending timer the function must be called on the CPU - * where the timer is running on. - * * returns whether it has modified a pending timer (1) or not (0) */ -int mod_virt_timer(struct vtimer_list *timer, __u64 expires) +int mod_virt_timer(struct vtimer_list *timer, u64 expires) { return __mod_vtimer(timer, expires, 0); } EXPORT_SYMBOL(mod_virt_timer); /* - * If we change a pending timer the function must be called on the CPU - * where the timer is running on. - * * returns whether it has modified a pending timer (1) or not (0) */ -int mod_virt_timer_periodic(struct vtimer_list *timer, __u64 expires) +int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires) { return __mod_vtimer(timer, expires, 1); } EXPORT_SYMBOL(mod_virt_timer_periodic); /* - * delete a virtual timer + * Delete a virtual timer. * * returns whether the deleted timer was pending (1) or not (0) */ int del_virt_timer(struct vtimer_list *timer) { unsigned long flags; - struct vtimer_queue *vq; - /* check if timer is pending */ if (!vtimer_pending(timer)) return 0; - - vq = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vq->lock, flags); - - /* we don't interrupt a running timer, just let it expire! */ + spin_lock_irqsave(&virt_timer_lock, flags); list_del_init(&timer->entry); - - spin_unlock_irqrestore(&vq->lock, flags); + spin_unlock_irqrestore(&virt_timer_lock, flags); return 1; } EXPORT_SYMBOL(del_virt_timer); @@ -558,27 +374,19 @@ EXPORT_SYMBOL(del_virt_timer); */ void init_cpu_vtimer(void) { - struct vtimer_queue *vq; - - /* initialize per cpu vtimer structure */ - vq = &__get_cpu_var(virt_cpu_timer); - INIT_LIST_HEAD(&vq->list); - spin_lock_init(&vq->lock); - - /* enable cpu timer interrupts */ - __ctl_set_bit(0,10); + /* set initial cpu timer */ + set_vtimer(VTIMER_MAX_SLICE); } -static int __cpuinit s390_nohz_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int s390_nohz_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { struct s390_idle_data *idle; long cpu = (long) hcpu; idle = &per_cpu(s390_idle, cpu); - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_DYING: - case CPU_DYING_FROZEN: idle->nohz_delay = 0; default: break; @@ -588,12 +396,7 @@ static int __cpuinit s390_nohz_notify(struct notifier_block *self, void __init vtime_init(void) { - /* request the cpu timer external interrupt */ - if (register_external_interrupt(0x1005, do_cpu_timer_interrupt)) - panic("Couldn't request external interrupt 0x1005"); - /* Enable cpu timer interrupts on the boot cpu. */ init_cpu_vtimer(); cpu_notifier(s390_nohz_notify, 0); } - diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index f66a1bdbb61..10d529ac982 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -5,7 +5,7 @@ source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION def_bool y - prompt "Virtualization" + prompt "KVM" ---help--- Say Y here to get to see options for using your Linux host to run other operating systems inside virtual machines (guests). @@ -18,9 +18,15 @@ if VIRTUALIZATION config KVM def_tristate y prompt "Kernel-based Virtual Machine (KVM) support" - depends on HAVE_KVM && EXPERIMENTAL + depends on HAVE_KVM select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_EVENTFD + select KVM_ASYNC_PF + select KVM_ASYNC_PF_SYNC + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work @@ -34,9 +40,17 @@ config KVM If unsure, say N. +config KVM_S390_UCONTROL + bool "Userspace controlled virtual machines" + depends on KVM + ---help--- + Allow CAP_SYS_ADMIN users to create KVM virtual machines that are + controlled by userspace. + + If unsure, say N. + # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. source drivers/vhost/Kconfig -source drivers/virtio/Kconfig endif # VIRTUALIZATION diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index e5221ec0b8e..b3b55346965 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,9 +6,12 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) +KVM := ../../../virt/kvm +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o -EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm +ccflags-y := -Ivirt/kvm -Iarch/s390/kvm + +kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o +kvm-objs += diag.o gaccess.o guestdbg.o -kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 9e4c84187cf..0161675878a 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -1,7 +1,7 @@ /* - * diag.c - handling diagnose instructions + * handling diagnose instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008, 2011 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -13,22 +13,156 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> +#include <asm/pgalloc.h> +#include <asm/virtio-ccw.h> #include "kvm-s390.h" +#include "trace.h" +#include "trace-s390.h" +#include "gaccess.h" + +static int diag_release_pages(struct kvm_vcpu *vcpu) +{ + unsigned long start, end; + unsigned long prefix = kvm_s390_get_prefix(vcpu); + + start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + + if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end + || start < 2 * PAGE_SIZE) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + vcpu->stat.diagnose_10++; + + /* we checked for start > end above */ + if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { + gmap_discard(start, end, vcpu->arch.gmap); + } else { + if (start < prefix) + gmap_discard(start, prefix, vcpu->arch.gmap); + if (end >= prefix) + gmap_discard(prefix + 2 * PAGE_SIZE, + end, vcpu->arch.gmap); + } + return 0; +} + +static int __diag_page_ref_service(struct kvm_vcpu *vcpu) +{ + struct prs_parm { + u16 code; + u16 subcode; + u16 parm_len; + u16 parm_version; + u64 token_addr; + u64 select_mask; + u64 compare_mask; + u64 zarch; + }; + struct prs_parm parm; + int rc; + u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4; + u16 ry = (vcpu->arch.sie_block->ipa & 0x0f); + + if (vcpu->run->s.regs.gprs[rx] & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (parm.subcode) { + case 0: /* TOKEN */ + if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) { + /* + * If the pagefault handshake is already activated, + * the token must not be changed. We have to return + * decimal 8 instead, as mandated in SC24-6084. + */ + vcpu->run->s.regs.gprs[ry] = 8; + return 0; + } + + if ((parm.compare_mask & parm.select_mask) != parm.compare_mask || + parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + vcpu->arch.pfault_token = parm.token_addr; + vcpu->arch.pfault_select = parm.select_mask; + vcpu->arch.pfault_compare = parm.compare_mask; + vcpu->run->s.regs.gprs[ry] = 0; + rc = 0; + break; + case 1: /* + * CANCEL + * Specification allows to let already pending tokens survive + * the cancel, therefore to reduce code complexity, we assume + * all outstanding tokens are already pending. + */ + if (parm.token_addr || parm.select_mask || + parm.compare_mask || parm.zarch) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + vcpu->run->s.regs.gprs[ry] = 0; + /* + * If the pfault handling was not established or is already + * canceled SC24-6084 requests to return decimal 4. + */ + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) + vcpu->run->s.regs.gprs[ry] = 4; + else + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + + rc = 0; + break; + default: + rc = -EOPNOTSUPP; + break; + } + + return rc; +} static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); vcpu->stat.diagnose_44++; - vcpu_put(vcpu); - yield(); - vcpu_load(vcpu); + kvm_vcpu_on_spin(vcpu); + return 0; +} + +static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tcpu; + int tid; + int i; + + tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + vcpu->stat.diagnose_9c++; + VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid); + + if (tid == vcpu->vcpu_id) + return 0; + + kvm_for_each_vcpu(i, tcpu, kvm) + if (tcpu->vcpu_id == tid) { + kvm_vcpu_yield_to(tcpu); + break; + } + return 0; } static int __diag_ipl_functions(struct kvm_vcpu *vcpu) { unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; - unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff; + unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); switch (subcode) { @@ -42,25 +176,68 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + kvm_s390_vcpu_stop(vcpu); vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; vcpu->run->exit_reason = KVM_EXIT_S390_RESET; VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx", vcpu->run->s390_reset_flags); + trace_kvm_s390_request_resets(vcpu->run->s390_reset_flags); return -EREMOTE; } +static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) +{ + int ret; + + /* No virtio-ccw notification? Get out quickly. */ + if (!vcpu->kvm->arch.css_support || + (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) + return -EOPNOTSUPP; + + /* + * The layout is as follows: + * - gpr 2 contains the subchannel id (passed as addr) + * - gpr 3 contains the virtqueue index (passed as datamatch) + * - gpr 4 contains the index on the bus (optionally) + */ + ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + vcpu->run->s.regs.gprs[2] & 0xffffffff, + 8, &vcpu->run->s.regs.gprs[3], + vcpu->run->s.regs.gprs[4]); + + /* + * Return cookie in gpr 2, but don't overwrite the register if the + * diagnose will be handled by userspace. + */ + if (ret != -EOPNOTSUPP) + vcpu->run->s.regs.gprs[2] = ret; + /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */ + return ret < 0 ? ret : 0; +} + int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { - int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + trace_kvm_s390_handle_diag(vcpu, code); switch (code) { + case 0x10: + return diag_release_pages(vcpu); case 0x44: return __diag_time_slice_end(vcpu); + case 0x9c: + return __diag_time_slice_end_directed(vcpu); + case 0x258: + return __diag_page_ref_service(vcpu); case 0x308: return __diag_ipl_functions(vcpu); + case 0x500: + return __diag_virtio_hypercall(vcpu); default: return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c new file mode 100644 index 00000000000..4653ac6e182 --- /dev/null +++ b/arch/s390/kvm/gaccess.c @@ -0,0 +1,726 @@ +/* + * guest access functions + * + * Copyright IBM Corp. 2014 + * + */ + +#include <linux/vmalloc.h> +#include <linux/err.h> +#include <asm/pgtable.h> +#include "kvm-s390.h" +#include "gaccess.h" + +union asce { + unsigned long val; + struct { + unsigned long origin : 52; /* Region- or Segment-Table Origin */ + unsigned long : 2; + unsigned long g : 1; /* Subspace Group Control */ + unsigned long p : 1; /* Private Space Control */ + unsigned long s : 1; /* Storage-Alteration-Event Control */ + unsigned long x : 1; /* Space-Switch-Event Control */ + unsigned long r : 1; /* Real-Space Control */ + unsigned long : 1; + unsigned long dt : 2; /* Designation-Type Control */ + unsigned long tl : 2; /* Region- or Segment-Table Length */ + }; +}; + +enum { + ASCE_TYPE_SEGMENT = 0, + ASCE_TYPE_REGION3 = 1, + ASCE_TYPE_REGION2 = 2, + ASCE_TYPE_REGION1 = 3 +}; + +union region1_table_entry { + unsigned long val; + struct { + unsigned long rto: 52;/* Region-Table Origin */ + unsigned long : 2; + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 1; + unsigned long tf : 2; /* Region-Second-Table Offset */ + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long : 1; + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long tl : 2; /* Region-Second-Table Length */ + }; +}; + +union region2_table_entry { + unsigned long val; + struct { + unsigned long rto: 52;/* Region-Table Origin */ + unsigned long : 2; + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 1; + unsigned long tf : 2; /* Region-Third-Table Offset */ + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long : 1; + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long tl : 2; /* Region-Third-Table Length */ + }; +}; + +struct region3_table_entry_fc0 { + unsigned long sto: 52;/* Segment-Table Origin */ + unsigned long : 1; + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 1; + unsigned long tf : 2; /* Segment-Table Offset */ + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long cr : 1; /* Common-Region Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long tl : 2; /* Segment-Table Length */ +}; + +struct region3_table_entry_fc1 { + unsigned long rfaa : 33; /* Region-Frame Absolute Address */ + unsigned long : 14; + unsigned long av : 1; /* ACCF-Validity Control */ + unsigned long acc: 4; /* Access-Control Bits */ + unsigned long f : 1; /* Fetch-Protection Bit */ + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long co : 1; /* Change-Recording Override */ + unsigned long : 2; + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long cr : 1; /* Common-Region Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; +}; + +union region3_table_entry { + unsigned long val; + struct region3_table_entry_fc0 fc0; + struct region3_table_entry_fc1 fc1; + struct { + unsigned long : 53; + unsigned long fc : 1; /* Format-Control */ + unsigned long : 4; + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long cr : 1; /* Common-Region Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; + }; +}; + +struct segment_entry_fc0 { + unsigned long pto: 53;/* Page-Table Origin */ + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 3; + unsigned long i : 1; /* Segment-Invalid Bit */ + unsigned long cs : 1; /* Common-Segment Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; +}; + +struct segment_entry_fc1 { + unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ + unsigned long : 3; + unsigned long av : 1; /* ACCF-Validity Control */ + unsigned long acc: 4; /* Access-Control Bits */ + unsigned long f : 1; /* Fetch-Protection Bit */ + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long co : 1; /* Change-Recording Override */ + unsigned long : 2; + unsigned long i : 1; /* Segment-Invalid Bit */ + unsigned long cs : 1; /* Common-Segment Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; +}; + +union segment_table_entry { + unsigned long val; + struct segment_entry_fc0 fc0; + struct segment_entry_fc1 fc1; + struct { + unsigned long : 53; + unsigned long fc : 1; /* Format-Control */ + unsigned long : 4; + unsigned long i : 1; /* Segment-Invalid Bit */ + unsigned long cs : 1; /* Common-Segment Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; + }; +}; + +enum { + TABLE_TYPE_SEGMENT = 0, + TABLE_TYPE_REGION3 = 1, + TABLE_TYPE_REGION2 = 2, + TABLE_TYPE_REGION1 = 3 +}; + +union page_table_entry { + unsigned long val; + struct { + unsigned long pfra : 52; /* Page-Frame Real Address */ + unsigned long z : 1; /* Zero Bit */ + unsigned long i : 1; /* Page-Invalid Bit */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long co : 1; /* Change-Recording Override */ + unsigned long : 8; + }; +}; + +/* + * vaddress union in order to easily decode a virtual address into its + * region first index, region second index etc. parts. + */ +union vaddress { + unsigned long addr; + struct { + unsigned long rfx : 11; + unsigned long rsx : 11; + unsigned long rtx : 11; + unsigned long sx : 11; + unsigned long px : 8; + unsigned long bx : 12; + }; + struct { + unsigned long rfx01 : 2; + unsigned long : 9; + unsigned long rsx01 : 2; + unsigned long : 9; + unsigned long rtx01 : 2; + unsigned long : 9; + unsigned long sx01 : 2; + unsigned long : 29; + }; +}; + +/* + * raddress union which will contain the result (real or absolute address) + * after a page table walk. The rfaa, sfaa and pfra members are used to + * simply assign them the value of a region, segment or page table entry. + */ +union raddress { + unsigned long addr; + unsigned long rfaa : 33; /* Region-Frame Absolute Address */ + unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ + unsigned long pfra : 52; /* Page-Frame Real Address */ +}; + +static int ipte_lock_count; +static DEFINE_MUTEX(ipte_mutex); + +int ipte_lock_held(struct kvm_vcpu *vcpu) +{ + union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control; + + if (vcpu->arch.sie_block->eca & 1) + return ic->kh != 0; + return ipte_lock_count != 0; +} + +static void ipte_lock_simple(struct kvm_vcpu *vcpu) +{ + union ipte_control old, new, *ic; + + mutex_lock(&ipte_mutex); + ipte_lock_count++; + if (ipte_lock_count > 1) + goto out; + ic = &vcpu->kvm->arch.sca->ipte_control; + do { + old = ACCESS_ONCE(*ic); + while (old.k) { + cond_resched(); + old = ACCESS_ONCE(*ic); + } + new = old; + new.k = 1; + } while (cmpxchg(&ic->val, old.val, new.val) != old.val); +out: + mutex_unlock(&ipte_mutex); +} + +static void ipte_unlock_simple(struct kvm_vcpu *vcpu) +{ + union ipte_control old, new, *ic; + + mutex_lock(&ipte_mutex); + ipte_lock_count--; + if (ipte_lock_count) + goto out; + ic = &vcpu->kvm->arch.sca->ipte_control; + do { + new = old = ACCESS_ONCE(*ic); + new.k = 0; + } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + if (!ipte_lock_count) + wake_up(&vcpu->kvm->arch.ipte_wq); +out: + mutex_unlock(&ipte_mutex); +} + +static void ipte_lock_siif(struct kvm_vcpu *vcpu) +{ + union ipte_control old, new, *ic; + + ic = &vcpu->kvm->arch.sca->ipte_control; + do { + old = ACCESS_ONCE(*ic); + while (old.kg) { + cond_resched(); + old = ACCESS_ONCE(*ic); + } + new = old; + new.k = 1; + new.kh++; + } while (cmpxchg(&ic->val, old.val, new.val) != old.val); +} + +static void ipte_unlock_siif(struct kvm_vcpu *vcpu) +{ + union ipte_control old, new, *ic; + + ic = &vcpu->kvm->arch.sca->ipte_control; + do { + new = old = ACCESS_ONCE(*ic); + new.kh--; + if (!new.kh) + new.k = 0; + } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + if (!new.kh) + wake_up(&vcpu->kvm->arch.ipte_wq); +} + +void ipte_lock(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.sie_block->eca & 1) + ipte_lock_siif(vcpu); + else + ipte_lock_simple(vcpu); +} + +void ipte_unlock(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.sie_block->eca & 1) + ipte_unlock_siif(vcpu); + else + ipte_unlock_simple(vcpu); +} + +static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu) +{ + switch (psw_bits(vcpu->arch.sie_block->gpsw).as) { + case PSW_AS_PRIMARY: + return vcpu->arch.sie_block->gcr[1]; + case PSW_AS_SECONDARY: + return vcpu->arch.sie_block->gcr[7]; + case PSW_AS_HOME: + return vcpu->arch.sie_block->gcr[13]; + } + return 0; +} + +static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) +{ + return kvm_read_guest(kvm, gpa, val, sizeof(*val)); +} + +/** + * guest_translate - translate a guest virtual into a guest absolute address + * @vcpu: virtual cpu + * @gva: guest virtual address + * @gpa: points to where guest physical (absolute) address should be stored + * @write: indicates if access is a write access + * + * Translate a guest virtual address into a guest absolute address by means + * of dynamic address translation as specified by the architecuture. + * If the resulting absolute address is not available in the configuration + * an addressing exception is indicated and @gpa will not be changed. + * + * Returns: - zero on success; @gpa contains the resulting absolute address + * - a negative value if guest access failed due to e.g. broken + * guest mapping + * - a positve value if an access exception happened. In this case + * the returned value is the program interruption code as defined + * by the architecture + */ +static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa, int write) +{ + union vaddress vaddr = {.addr = gva}; + union raddress raddr = {.addr = gva}; + union page_table_entry pte; + int dat_protection = 0; + union ctlreg0 ctlreg0; + unsigned long ptr; + int edat1, edat2; + union asce asce; + + ctlreg0.val = vcpu->arch.sie_block->gcr[0]; + edat1 = ctlreg0.edat && test_vfacility(8); + edat2 = edat1 && test_vfacility(78); + asce.val = get_vcpu_asce(vcpu); + if (asce.r) + goto real_address; + ptr = asce.origin * 4096; + switch (asce.dt) { + case ASCE_TYPE_REGION1: + if (vaddr.rfx01 > asce.tl) + return PGM_REGION_FIRST_TRANS; + ptr += vaddr.rfx * 8; + break; + case ASCE_TYPE_REGION2: + if (vaddr.rfx) + return PGM_ASCE_TYPE; + if (vaddr.rsx01 > asce.tl) + return PGM_REGION_SECOND_TRANS; + ptr += vaddr.rsx * 8; + break; + case ASCE_TYPE_REGION3: + if (vaddr.rfx || vaddr.rsx) + return PGM_ASCE_TYPE; + if (vaddr.rtx01 > asce.tl) + return PGM_REGION_THIRD_TRANS; + ptr += vaddr.rtx * 8; + break; + case ASCE_TYPE_SEGMENT: + if (vaddr.rfx || vaddr.rsx || vaddr.rtx) + return PGM_ASCE_TYPE; + if (vaddr.sx01 > asce.tl) + return PGM_SEGMENT_TRANSLATION; + ptr += vaddr.sx * 8; + break; + } + switch (asce.dt) { + case ASCE_TYPE_REGION1: { + union region1_table_entry rfte; + + if (kvm_is_error_gpa(vcpu->kvm, ptr)) + return PGM_ADDRESSING; + if (deref_table(vcpu->kvm, ptr, &rfte.val)) + return -EFAULT; + if (rfte.i) + return PGM_REGION_FIRST_TRANS; + if (rfte.tt != TABLE_TYPE_REGION1) + return PGM_TRANSLATION_SPEC; + if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) + return PGM_REGION_SECOND_TRANS; + if (edat1) + dat_protection |= rfte.p; + ptr = rfte.rto * 4096 + vaddr.rsx * 8; + } + /* fallthrough */ + case ASCE_TYPE_REGION2: { + union region2_table_entry rste; + + if (kvm_is_error_gpa(vcpu->kvm, ptr)) + return PGM_ADDRESSING; + if (deref_table(vcpu->kvm, ptr, &rste.val)) + return -EFAULT; + if (rste.i) + return PGM_REGION_SECOND_TRANS; + if (rste.tt != TABLE_TYPE_REGION2) + return PGM_TRANSLATION_SPEC; + if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) + return PGM_REGION_THIRD_TRANS; + if (edat1) + dat_protection |= rste.p; + ptr = rste.rto * 4096 + vaddr.rtx * 8; + } + /* fallthrough */ + case ASCE_TYPE_REGION3: { + union region3_table_entry rtte; + + if (kvm_is_error_gpa(vcpu->kvm, ptr)) + return PGM_ADDRESSING; + if (deref_table(vcpu->kvm, ptr, &rtte.val)) + return -EFAULT; + if (rtte.i) + return PGM_REGION_THIRD_TRANS; + if (rtte.tt != TABLE_TYPE_REGION3) + return PGM_TRANSLATION_SPEC; + if (rtte.cr && asce.p && edat2) + return PGM_TRANSLATION_SPEC; + if (rtte.fc && edat2) { + dat_protection |= rtte.fc1.p; + raddr.rfaa = rtte.fc1.rfaa; + goto absolute_address; + } + if (vaddr.sx01 < rtte.fc0.tf) + return PGM_SEGMENT_TRANSLATION; + if (vaddr.sx01 > rtte.fc0.tl) + return PGM_SEGMENT_TRANSLATION; + if (edat1) + dat_protection |= rtte.fc0.p; + ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8; + } + /* fallthrough */ + case ASCE_TYPE_SEGMENT: { + union segment_table_entry ste; + + if (kvm_is_error_gpa(vcpu->kvm, ptr)) + return PGM_ADDRESSING; + if (deref_table(vcpu->kvm, ptr, &ste.val)) + return -EFAULT; + if (ste.i) + return PGM_SEGMENT_TRANSLATION; + if (ste.tt != TABLE_TYPE_SEGMENT) + return PGM_TRANSLATION_SPEC; + if (ste.cs && asce.p) + return PGM_TRANSLATION_SPEC; + if (ste.fc && edat1) { + dat_protection |= ste.fc1.p; + raddr.sfaa = ste.fc1.sfaa; + goto absolute_address; + } + dat_protection |= ste.fc0.p; + ptr = ste.fc0.pto * 2048 + vaddr.px * 8; + } + } + if (kvm_is_error_gpa(vcpu->kvm, ptr)) + return PGM_ADDRESSING; + if (deref_table(vcpu->kvm, ptr, &pte.val)) + return -EFAULT; + if (pte.i) + return PGM_PAGE_TRANSLATION; + if (pte.z) + return PGM_TRANSLATION_SPEC; + if (pte.co && !edat1) + return PGM_TRANSLATION_SPEC; + dat_protection |= pte.p; + raddr.pfra = pte.pfra; +real_address: + raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); +absolute_address: + if (write && dat_protection) + return PGM_PROTECTION; + if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) + return PGM_ADDRESSING; + *gpa = raddr.addr; + return 0; +} + +static inline int is_low_address(unsigned long ga) +{ + /* Check for address ranges 0..511 and 4096..4607 */ + return (ga & ~0x11fful) == 0; +} + +static int low_address_protection_enabled(struct kvm_vcpu *vcpu) +{ + union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + union asce asce; + + if (!ctlreg0.lap) + return 0; + asce.val = get_vcpu_asce(vcpu); + if (psw_bits(*psw).t && asce.p) + return 0; + return 1; +} + +struct trans_exc_code_bits { + unsigned long addr : 52; /* Translation-exception Address */ + unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ + unsigned long : 7; + unsigned long b61 : 1; + unsigned long as : 2; /* ASCE Identifier */ +}; + +enum { + FSI_UNKNOWN = 0, /* Unknown wether fetch or store */ + FSI_STORE = 1, /* Exception was due to store operation */ + FSI_FETCH = 2 /* Exception was due to fetch operation */ +}; + +static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, + unsigned long *pages, unsigned long nr_pages, + int write) +{ + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct trans_exc_code_bits *tec_bits; + int lap_enabled, rc; + + memset(pgm, 0, sizeof(*pgm)); + tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; + tec_bits->as = psw_bits(*psw).as; + lap_enabled = low_address_protection_enabled(vcpu); + while (nr_pages) { + ga = kvm_s390_logical_to_effective(vcpu, ga); + tec_bits->addr = ga >> PAGE_SHIFT; + if (write && lap_enabled && is_low_address(ga)) { + pgm->code = PGM_PROTECTION; + return pgm->code; + } + ga &= PAGE_MASK; + if (psw_bits(*psw).t) { + rc = guest_translate(vcpu, ga, pages, write); + if (rc < 0) + return rc; + if (rc == PGM_PROTECTION) + tec_bits->b61 = 1; + if (rc) + pgm->code = rc; + } else { + *pages = kvm_s390_real_to_abs(vcpu, ga); + if (kvm_is_error_gpa(vcpu->kvm, *pages)) + pgm->code = PGM_ADDRESSING; + } + if (pgm->code) + return pgm->code; + ga += PAGE_SIZE; + pages++; + nr_pages--; + } + return 0; +} + +int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, + unsigned long len, int write) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned long _len, nr_pages, gpa, idx; + unsigned long pages_array[2]; + unsigned long *pages; + int need_ipte_lock; + union asce asce; + int rc; + + if (!len) + return 0; + /* Access register mode is not supported yet. */ + if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG) + return -EOPNOTSUPP; + nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; + pages = pages_array; + if (nr_pages > ARRAY_SIZE(pages_array)) + pages = vmalloc(nr_pages * sizeof(unsigned long)); + if (!pages) + return -ENOMEM; + asce.val = get_vcpu_asce(vcpu); + need_ipte_lock = psw_bits(*psw).t && !asce.r; + if (need_ipte_lock) + ipte_lock(vcpu); + rc = guest_page_range(vcpu, ga, pages, nr_pages, write); + for (idx = 0; idx < nr_pages && !rc; idx++) { + gpa = *(pages + idx) + (ga & ~PAGE_MASK); + _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); + if (write) + rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); + else + rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); + len -= _len; + ga += _len; + data += _len; + } + if (need_ipte_lock) + ipte_unlock(vcpu); + if (nr_pages > ARRAY_SIZE(pages_array)) + vfree(pages); + return rc; +} + +int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, + void *data, unsigned long len, int write) +{ + unsigned long _len, gpa; + int rc = 0; + + while (len && !rc) { + gpa = kvm_s390_real_to_abs(vcpu, gra); + _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); + if (write) + rc = write_guest_abs(vcpu, gpa, data, _len); + else + rc = read_guest_abs(vcpu, gpa, data, _len); + len -= _len; + gra += _len; + data += _len; + } + return rc; +} + +/** + * guest_translate_address - translate guest logical into guest absolute address + * + * Parameter semantics are the same as the ones from guest_translate. + * The memory contents at the guest address are not changed. + * + * Note: The IPTE lock is not taken during this function, so the caller + * has to take care of this. + */ +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa, int write) +{ + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct trans_exc_code_bits *tec; + union asce asce; + int rc; + + /* Access register mode is not supported yet. */ + if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG) + return -EOPNOTSUPP; + + gva = kvm_s390_logical_to_effective(vcpu, gva); + memset(pgm, 0, sizeof(*pgm)); + tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + tec->as = psw_bits(*psw).as; + tec->fsi = write ? FSI_STORE : FSI_FETCH; + tec->addr = gva >> PAGE_SHIFT; + if (is_low_address(gva) && low_address_protection_enabled(vcpu)) { + if (write) { + rc = pgm->code = PGM_PROTECTION; + return rc; + } + } + + asce.val = get_vcpu_asce(vcpu); + if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ + rc = guest_translate(vcpu, gva, gpa, write); + if (rc > 0) { + if (rc == PGM_PROTECTION) + tec->b61 = 1; + pgm->code = rc; + } + } else { + rc = 0; + *gpa = kvm_s390_real_to_abs(vcpu, gva); + if (kvm_is_error_gpa(vcpu->kvm, *gpa)) + rc = pgm->code = PGM_ADDRESSING; + } + + return rc; +} + +/** + * kvm_s390_check_low_addr_protection - check for low-address protection + * @ga: Guest address + * + * Checks whether an address is subject to low-address protection and set + * up vcpu->arch.pgm accordingly if necessary. + * + * Return: 0 if no protection exception, or PGM_PROTECTION if protected. + */ +int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga) +{ + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct trans_exc_code_bits *tec_bits; + + if (!is_low_address(ga) || !low_address_protection_enabled(vcpu)) + return 0; + + memset(pgm, 0, sizeof(*pgm)); + tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + tec_bits->fsi = FSI_STORE; + tec_bits->as = psw_bits(*psw).as; + tec_bits->addr = ga >> PAGE_SHIFT; + pgm->code = PGM_PROTECTION; + + return pgm->code; +} diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 03c716a0f01..0149cf15058 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -1,7 +1,7 @@ /* - * gaccess.h - access guest memory + * access guest memory * - * Copyright IBM Corp. 2008,2009 + * Copyright IBM Corp. 2008, 2014 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -15,265 +15,321 @@ #include <linux/compiler.h> #include <linux/kvm_host.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> +#include <linux/ptrace.h> #include "kvm-s390.h" -static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, - unsigned long guestaddr) -{ - unsigned long prefix = vcpu->arch.sie_block->prefix; - unsigned long origin = vcpu->arch.sie_block->gmsor; - unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu); - - if (guestaddr < 2 * PAGE_SIZE) - guestaddr += prefix; - else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) - guestaddr -= prefix; - - if (guestaddr > memsize) - return (void __user __force *) ERR_PTR(-EFAULT); - - guestaddr += origin; - - return (void __user *) guestaddr; -} - -static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u64 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 7); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (unsigned long __user *) uptr); -} - -static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u32 *result) +/** + * kvm_s390_real_to_abs - convert guest real address to guest absolute address + * @vcpu - guest virtual cpu + * @gra - guest real address + * + * Returns the guest absolute address that corresponds to the passed guest real + * address @gra of a virtual guest cpu by applying its prefix. + */ +static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, + unsigned long gra) { - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 3); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); + unsigned long prefix = kvm_s390_get_prefix(vcpu); - return get_user(*result, (u32 __user *) uptr); + if (gra < 2 * PAGE_SIZE) + gra += prefix; + else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE) + gra -= prefix; + return gra; } -static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u16 *result) +/** + * kvm_s390_logical_to_effective - convert guest logical to effective address + * @vcpu: guest virtual cpu + * @ga: guest logical address + * + * Convert a guest vcpu logical address to a guest vcpu effective address by + * applying the rules of the vcpu's addressing mode defined by PSW bits 31 + * and 32 (extendended/basic addressing mode). + * + * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing + * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode) + * of @ga will be zeroed and the remaining bits will be returned. + */ +static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu, + unsigned long ga) { - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + psw_t *psw = &vcpu->arch.sie_block->gpsw; - BUG_ON(guestaddr & 1); - - if (IS_ERR(uptr)) - return PTR_ERR(uptr); - - return get_user(*result, (u16 __user *) uptr); + if (psw_bits(*psw).eaba == PSW_AMODE_64BIT) + return ga; + if (psw_bits(*psw).eaba == PSW_AMODE_31BIT) + return ga & ((1UL << 31) - 1); + return ga & ((1UL << 24) - 1); } -static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u8 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (u8 __user *) uptr); -} +/* + * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions + * which shall only be used to access the lowcore of a vcpu. + * These functions should be used for e.g. interrupt handlers where no + * guest memory access protection facilities, like key or low address + * protection, are applicable. + * At a later point guest vcpu lowcore access should happen via pinned + * prefix pages, so that these pages can be accessed directly via the + * kernel mapping. All of these *_lc functions can be removed then. + */ -static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u64 value) +/** + * put_guest_lc - write a simple variable to a guest vcpu's lowcore + * @vcpu: virtual cpu + * @x: value to copy to guest + * @gra: vcpu's destination guest real address + * + * Copies a simple value from kernel space to a guest vcpu's lowcore. + * The size of the variable may be 1, 2, 4 or 8 bytes. The destination + * must be located in the vcpu's lowcore. Otherwise the result is undefined. + * + * Returns zero on success or -EFAULT on error. + * + * Note: an error indicates that either the kernel is out of memory or + * the guest memory mapping is broken. In any case the best solution + * would be to terminate the guest. + * It is wrong to inject a guest exception. + */ +#define put_guest_lc(vcpu, x, gra) \ +({ \ + struct kvm_vcpu *__vcpu = (vcpu); \ + __typeof__(*(gra)) __x = (x); \ + unsigned long __gpa; \ + \ + __gpa = (unsigned long)(gra); \ + __gpa += kvm_s390_get_prefix(__vcpu); \ + kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x)); \ +}) + +/** + * write_guest_lc - copy data from kernel space to guest vcpu's lowcore + * @vcpu: virtual cpu + * @gra: vcpu's source guest real address + * @data: source address in kernel space + * @len: number of bytes to copy + * + * Copy data from kernel space to guest vcpu's lowcore. The entire range must + * be located within the vcpu's lowcore, otherwise the result is undefined. + * + * Returns zero on success or -EFAULT on error. + * + * Note: an error indicates that either the kernel is out of memory or + * the guest memory mapping is broken. In any case the best solution + * would be to terminate the guest. + * It is wrong to inject a guest exception. + */ +static inline __must_check +int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, + unsigned long len) { - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + unsigned long gpa = gra + kvm_s390_get_prefix(vcpu); - BUG_ON(guestaddr & 7); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u64 __user *) uptr); + return kvm_write_guest(vcpu->kvm, gpa, data, len); } -static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u32 value) +/** + * read_guest_lc - copy data from guest vcpu's lowcore to kernel space + * @vcpu: virtual cpu + * @gra: vcpu's source guest real address + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy data from guest vcpu's lowcore to kernel space. The entire range must + * be located within the vcpu's lowcore, otherwise the result is undefined. + * + * Returns zero on success or -EFAULT on error. + * + * Note: an error indicates that either the kernel is out of memory or + * the guest memory mapping is broken. In any case the best solution + * would be to terminate the guest. + * It is wrong to inject a guest exception. + */ +static inline __must_check +int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, + unsigned long len) { - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + unsigned long gpa = gra + kvm_s390_get_prefix(vcpu); - BUG_ON(guestaddr & 3); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u32 __user *) uptr); + return kvm_read_guest(vcpu->kvm, gpa, data, len); } -static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u16 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 1); +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa, int write); - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); +int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, + unsigned long len, int write); - return put_user(value, (u16 __user *) uptr); -} +int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, + void *data, unsigned long len, int write); -static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u8 value) +/** + * write_guest - copy data from kernel space to guest space + * @vcpu: virtual cpu + * @ga: guest address + * @data: source address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @data (kernel space) to @ga (guest address). + * In order to copy data to guest space the PSW of the vcpu is inspected: + * If DAT is off data will be copied to guest real or absolute memory. + * If DAT is on data will be copied to the address space as specified by + * the address space bits of the PSW: + * Primary, secondory or home space (access register mode is currently not + * implemented). + * The addressing mode of the PSW is also inspected, so that address wrap + * around is taken into account for 24-, 31- and 64-bit addressing mode, + * if the to be copied data crosses page boundaries in guest address space. + * In addition also low address and DAT protection are inspected before + * copying any data (key protection is currently not implemented). + * + * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu. + * In case of an access exception (e.g. protection exception) pgm will contain + * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()' + * will inject a correct exception into the guest. + * If no access exception happened, the contents of pgm are undefined when + * this function returns. + * + * Returns: - zero on success + * - a negative value if e.g. the guest mapping is broken or in + * case of out-of-memory. In this case the contents of pgm are + * undefined. Also parts of @data may have been copied to guest + * space. + * - a positive value if an access exception happened. In this case + * the returned value is the program interruption code and the + * contents of pgm may be used to inject an exception into the + * guest. No data has been copied to guest space. + * + * Note: in case an access exception is recognized no data has been copied to + * guest space (this is also true, if the to be copied data would cross + * one or more page boundaries in guest space). + * Therefore this function may be used for nullifying and suppressing + * instruction emulation. + * It may also be used for terminating instructions, if it is undefined + * if data has been changed in guest space in case of an exception. + */ +static inline __must_check +int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, + unsigned long len) { - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u8 __user *) uptr); + return access_guest(vcpu, ga, data, len, 1); } - -static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, - unsigned long guestdest, - const void *from, unsigned long n) +/** + * read_guest - copy data from guest space to kernel space + * @vcpu: virtual cpu + * @ga: guest address + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @ga (guest address) to @data (kernel space). + * + * The behaviour of read_guest is identical to write_guest, except that + * data will be copied from guest space to kernel space. + */ +static inline __must_check +int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, + unsigned long len) { - int rc; - unsigned long i; - const u8 *data = from; - - for (i = 0; i < n; i++) { - rc = put_guest_u8(vcpu, guestdest++, *(data++)); - if (rc < 0) - return rc; - } - return 0; + return access_guest(vcpu, ga, data, len, 0); } -static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest, - const void *from, unsigned long n) +/** + * write_guest_abs - copy data from kernel space to guest space absolute + * @vcpu: virtual cpu + * @gpa: guest physical (absolute) address + * @data: source address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address). + * It is up to the caller to ensure that the entire guest memory range is + * valid memory before calling this function. + * Guest low address and key protection are not checked. + * + * Returns zero on success or -EFAULT on error. + * + * If an error occurs data may have been copied partially to guest memory. + */ +static inline __must_check +int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, + unsigned long len) { - unsigned long prefix = vcpu->arch.sie_block->prefix; - unsigned long origin = vcpu->arch.sie_block->gmsor; - unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu); - - if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) - goto slowpath; - - if ((guestdest < prefix) && (guestdest + n > prefix)) - goto slowpath; - - if ((guestdest < prefix + 2 * PAGE_SIZE) - && (guestdest + n > prefix + 2 * PAGE_SIZE)) - goto slowpath; - - if (guestdest < 2 * PAGE_SIZE) - guestdest += prefix; - else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) - guestdest -= prefix; - - if (guestdest + n > memsize) - return -EFAULT; - - if (guestdest + n < guestdest) - return -EFAULT; - - guestdest += origin; - - return copy_to_user((void __user *) guestdest, from, n); -slowpath: - return __copy_to_guest_slow(vcpu, guestdest, from, n); + return kvm_write_guest(vcpu->kvm, gpa, data, len); } -static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) +/** + * read_guest_abs - copy data from guest space absolute to kernel space + * @vcpu: virtual cpu + * @gpa: guest physical (absolute) address + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space). + * It is up to the caller to ensure that the entire guest memory range is + * valid memory before calling this function. + * Guest key protection is not checked. + * + * Returns zero on success or -EFAULT on error. + * + * If an error occurs data may have been copied partially to kernel space. + */ +static inline __must_check +int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, + unsigned long len) { - int rc; - unsigned long i; - u8 *data = to; - - for (i = 0; i < n; i++) { - rc = get_guest_u8(vcpu, guestsrc++, data++); - if (rc < 0) - return rc; - } - return 0; + return kvm_read_guest(vcpu->kvm, gpa, data, len); } -static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, unsigned long n) +/** + * write_guest_real - copy data from kernel space to guest space real + * @vcpu: virtual cpu + * @gra: guest real address + * @data: source address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @data (kernel space) to @gra (guest real address). + * It is up to the caller to ensure that the entire guest memory range is + * valid memory before calling this function. + * Guest low address and key protection are not checked. + * + * Returns zero on success or -EFAULT on error. + * + * If an error occurs data may have been copied partially to guest memory. + */ +static inline __must_check +int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, + unsigned long len) { - unsigned long prefix = vcpu->arch.sie_block->prefix; - unsigned long origin = vcpu->arch.sie_block->gmsor; - unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu); - - if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) - goto slowpath; - - if ((guestsrc < prefix) && (guestsrc + n > prefix)) - goto slowpath; - - if ((guestsrc < prefix + 2 * PAGE_SIZE) - && (guestsrc + n > prefix + 2 * PAGE_SIZE)) - goto slowpath; - - if (guestsrc < 2 * PAGE_SIZE) - guestsrc += prefix; - else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) - guestsrc -= prefix; - - if (guestsrc + n > memsize) - return -EFAULT; - - if (guestsrc + n < guestsrc) - return -EFAULT; - - guestsrc += origin; - - return copy_from_user(to, (void __user *) guestsrc, n); -slowpath: - return __copy_from_guest_slow(vcpu, to, guestsrc, n); + return access_guest_real(vcpu, gra, data, len, 1); } -static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, - unsigned long guestdest, - const void *from, unsigned long n) +/** + * read_guest_real - copy data from guest space real to kernel space + * @vcpu: virtual cpu + * @gra: guest real address + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @gra (guest real address) to @data (kernel space). + * It is up to the caller to ensure that the entire guest memory range is + * valid memory before calling this function. + * Guest key protection is not checked. + * + * Returns zero on success or -EFAULT on error. + * + * If an error occurs data may have been copied partially to kernel space. + */ +static inline __must_check +int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, + unsigned long len) { - unsigned long origin = vcpu->arch.sie_block->gmsor; - unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu); - - if (guestdest + n > memsize) - return -EFAULT; - - if (guestdest + n < guestdest) - return -EFAULT; - - guestdest += origin; - - return copy_to_user((void __user *) guestdest, from, n); + return access_guest_real(vcpu, gra, data, len, 0); } -static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - unsigned long origin = vcpu->arch.sie_block->gmsor; - unsigned long memsize = kvm_s390_vcpu_get_memsize(vcpu); - - if (guestsrc + n > memsize) - return -EFAULT; +void ipte_lock(struct kvm_vcpu *vcpu); +void ipte_unlock(struct kvm_vcpu *vcpu); +int ipte_lock_held(struct kvm_vcpu *vcpu); +int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga); - if (guestsrc + n < guestsrc) - return -EFAULT; - - guestsrc += origin; - - return copy_from_user(to, (void __user *) guestsrc, n); -} -#endif +#endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c new file mode 100644 index 00000000000..3e8d4092ce3 --- /dev/null +++ b/arch/s390/kvm/guestdbg.c @@ -0,0 +1,482 @@ +/* + * kvm guest debug support + * + * Copyright IBM Corp. 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com> + */ +#include <linux/kvm_host.h> +#include <linux/errno.h> +#include "kvm-s390.h" +#include "gaccess.h" + +/* + * Extends the address range given by *start and *stop to include the address + * range starting with estart and the length len. Takes care of overflowing + * intervals and tries to minimize the overall intervall size. + */ +static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len) +{ + u64 estop; + + if (len > 0) + len--; + else + len = 0; + + estop = estart + len; + + /* 0-0 range represents "not set" */ + if ((*start == 0) && (*stop == 0)) { + *start = estart; + *stop = estop; + } else if (*start <= *stop) { + /* increase the existing range */ + if (estart < *start) + *start = estart; + if (estop > *stop) + *stop = estop; + } else { + /* "overflowing" interval, whereby *stop > *start */ + if (estart <= *stop) { + if (estop > *stop) + *stop = estop; + } else if (estop > *start) { + if (estart < *start) + *start = estart; + } + /* minimize the range */ + else if ((estop - *stop) < (*start - estart)) + *stop = estop; + else + *start = estart; + } +} + +#define MAX_INST_SIZE 6 + +static void enable_all_hw_bp(struct kvm_vcpu *vcpu) +{ + unsigned long start, len; + u64 *cr9 = &vcpu->arch.sie_block->gcr[9]; + u64 *cr10 = &vcpu->arch.sie_block->gcr[10]; + u64 *cr11 = &vcpu->arch.sie_block->gcr[11]; + int i; + + if (vcpu->arch.guestdbg.nr_hw_bp <= 0 || + vcpu->arch.guestdbg.hw_bp_info == NULL) + return; + + /* + * If the guest is not interrested in branching events, we can savely + * limit them to the PER address range. + */ + if (!(*cr9 & PER_EVENT_BRANCH)) + *cr9 |= PER_CONTROL_BRANCH_ADDRESS; + *cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH; + + for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) { + start = vcpu->arch.guestdbg.hw_bp_info[i].addr; + len = vcpu->arch.guestdbg.hw_bp_info[i].len; + + /* + * The instruction in front of the desired bp has to + * report instruction-fetching events + */ + if (start < MAX_INST_SIZE) { + len += start; + start = 0; + } else { + start -= MAX_INST_SIZE; + len += MAX_INST_SIZE; + } + + extend_address_range(cr10, cr11, start, len); + } +} + +static void enable_all_hw_wp(struct kvm_vcpu *vcpu) +{ + unsigned long start, len; + u64 *cr9 = &vcpu->arch.sie_block->gcr[9]; + u64 *cr10 = &vcpu->arch.sie_block->gcr[10]; + u64 *cr11 = &vcpu->arch.sie_block->gcr[11]; + int i; + + if (vcpu->arch.guestdbg.nr_hw_wp <= 0 || + vcpu->arch.guestdbg.hw_wp_info == NULL) + return; + + /* if host uses storage alternation for special address + * spaces, enable all events and give all to the guest */ + if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) { + *cr9 &= ~PER_CONTROL_ALTERATION; + *cr10 = 0; + *cr11 = PSW_ADDR_INSN; + } else { + *cr9 &= ~PER_CONTROL_ALTERATION; + *cr9 |= PER_EVENT_STORE; + + for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) { + start = vcpu->arch.guestdbg.hw_wp_info[i].addr; + len = vcpu->arch.guestdbg.hw_wp_info[i].len; + + extend_address_range(cr10, cr11, start, len); + } + } +} + +void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu) +{ + vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0]; + vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9]; + vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10]; + vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11]; +} + +void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu) +{ + vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0; + vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9; + vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10; + vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11; +} + +void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu) +{ + /* + * TODO: if guest psw has per enabled, otherwise 0s! + * This reduces the amount of reported events. + * Need to intercept all psw changes! + */ + + if (guestdbg_sstep_enabled(vcpu)) { + /* disable timer (clock-comparator) interrupts */ + vcpu->arch.sie_block->gcr[0] &= ~0x800ul; + vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH; + vcpu->arch.sie_block->gcr[10] = 0; + vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN; + } + + if (guestdbg_hw_bp_enabled(vcpu)) { + enable_all_hw_bp(vcpu); + enable_all_hw_wp(vcpu); + } + + /* TODO: Instruction-fetching-nullification not allowed for now */ + if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION) + vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION; +} + +#define MAX_WP_SIZE 100 + +static int __import_wp_info(struct kvm_vcpu *vcpu, + struct kvm_hw_breakpoint *bp_data, + struct kvm_hw_wp_info_arch *wp_info) +{ + int ret = 0; + wp_info->len = bp_data->len; + wp_info->addr = bp_data->addr; + wp_info->phys_addr = bp_data->phys_addr; + wp_info->old_data = NULL; + + if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE) + return -EINVAL; + + wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL); + if (!wp_info->old_data) + return -ENOMEM; + /* try to backup the original value */ + ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data, + wp_info->len); + if (ret) { + kfree(wp_info->old_data); + wp_info->old_data = NULL; + } + + return ret; +} + +#define MAX_BP_COUNT 50 + +int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + int ret = 0, nr_wp = 0, nr_bp = 0, i, size; + struct kvm_hw_breakpoint *bp_data = NULL; + struct kvm_hw_wp_info_arch *wp_info = NULL; + struct kvm_hw_bp_info_arch *bp_info = NULL; + + if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp) + return 0; + else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT) + return -EINVAL; + + size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint); + bp_data = kmalloc(size, GFP_KERNEL); + if (!bp_data) { + ret = -ENOMEM; + goto error; + } + + if (copy_from_user(bp_data, dbg->arch.hw_bp, size)) { + ret = -EFAULT; + goto error; + } + + for (i = 0; i < dbg->arch.nr_hw_bp; i++) { + switch (bp_data[i].type) { + case KVM_HW_WP_WRITE: + nr_wp++; + break; + case KVM_HW_BP: + nr_bp++; + break; + default: + break; + } + } + + size = nr_wp * sizeof(struct kvm_hw_wp_info_arch); + if (size > 0) { + wp_info = kmalloc(size, GFP_KERNEL); + if (!wp_info) { + ret = -ENOMEM; + goto error; + } + } + size = nr_bp * sizeof(struct kvm_hw_bp_info_arch); + if (size > 0) { + bp_info = kmalloc(size, GFP_KERNEL); + if (!bp_info) { + ret = -ENOMEM; + goto error; + } + } + + for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) { + switch (bp_data[i].type) { + case KVM_HW_WP_WRITE: + ret = __import_wp_info(vcpu, &bp_data[i], + &wp_info[nr_wp]); + if (ret) + goto error; + nr_wp++; + break; + case KVM_HW_BP: + bp_info[nr_bp].len = bp_data[i].len; + bp_info[nr_bp].addr = bp_data[i].addr; + nr_bp++; + break; + } + } + + vcpu->arch.guestdbg.nr_hw_bp = nr_bp; + vcpu->arch.guestdbg.hw_bp_info = bp_info; + vcpu->arch.guestdbg.nr_hw_wp = nr_wp; + vcpu->arch.guestdbg.hw_wp_info = wp_info; + return 0; +error: + kfree(bp_data); + kfree(wp_info); + kfree(bp_info); + return ret; +} + +void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_hw_wp_info_arch *hw_wp_info = NULL; + + for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) { + hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i]; + kfree(hw_wp_info->old_data); + hw_wp_info->old_data = NULL; + } + kfree(vcpu->arch.guestdbg.hw_wp_info); + vcpu->arch.guestdbg.hw_wp_info = NULL; + + kfree(vcpu->arch.guestdbg.hw_bp_info); + vcpu->arch.guestdbg.hw_bp_info = NULL; + + vcpu->arch.guestdbg.nr_hw_wp = 0; + vcpu->arch.guestdbg.nr_hw_bp = 0; +} + +static inline int in_addr_range(u64 addr, u64 a, u64 b) +{ + if (a <= b) + return (addr >= a) && (addr <= b); + else + /* "overflowing" interval */ + return (addr <= a) && (addr >= b); +} + +#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1) + +static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu, + unsigned long addr) +{ + struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info; + int i; + + if (vcpu->arch.guestdbg.nr_hw_bp == 0) + return NULL; + + for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) { + /* addr is directly the start or in the range of a bp */ + if (addr == bp_info->addr) + goto found; + if (bp_info->len > 0 && + in_addr_range(addr, bp_info->addr, end_of_range(bp_info))) + goto found; + + bp_info++; + } + + return NULL; +found: + return bp_info; +} + +static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_hw_wp_info_arch *wp_info = NULL; + void *temp = NULL; + + if (vcpu->arch.guestdbg.nr_hw_wp == 0) + return NULL; + + for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) { + wp_info = &vcpu->arch.guestdbg.hw_wp_info[i]; + if (!wp_info || !wp_info->old_data || wp_info->len <= 0) + continue; + + temp = kmalloc(wp_info->len, GFP_KERNEL); + if (!temp) + continue; + + /* refetch the wp data and compare it to the old value */ + if (!read_guest(vcpu, wp_info->phys_addr, temp, + wp_info->len)) { + if (memcmp(temp, wp_info->old_data, wp_info->len)) { + kfree(temp); + return wp_info; + } + } + kfree(temp); + temp = NULL; + } + + return NULL; +} + +void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu) +{ + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING; +} + +#define per_bp_event(code) \ + (code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH)) +#define per_write_wp_event(code) \ + (code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL)) + +static int debug_exit_required(struct kvm_vcpu *vcpu) +{ + u32 perc = (vcpu->arch.sie_block->perc << 24); + struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch; + struct kvm_hw_wp_info_arch *wp_info = NULL; + struct kvm_hw_bp_info_arch *bp_info = NULL; + unsigned long addr = vcpu->arch.sie_block->gpsw.addr; + unsigned long peraddr = vcpu->arch.sie_block->peraddr; + + if (guestdbg_hw_bp_enabled(vcpu)) { + if (per_write_wp_event(perc) && + vcpu->arch.guestdbg.nr_hw_wp > 0) { + wp_info = any_wp_changed(vcpu); + if (wp_info) { + debug_exit->addr = wp_info->addr; + debug_exit->type = KVM_HW_WP_WRITE; + goto exit_required; + } + } + if (per_bp_event(perc) && + vcpu->arch.guestdbg.nr_hw_bp > 0) { + bp_info = find_hw_bp(vcpu, addr); + /* remove duplicate events if PC==PER address */ + if (bp_info && (addr != peraddr)) { + debug_exit->addr = addr; + debug_exit->type = KVM_HW_BP; + vcpu->arch.guestdbg.last_bp = addr; + goto exit_required; + } + /* breakpoint missed */ + bp_info = find_hw_bp(vcpu, peraddr); + if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) { + debug_exit->addr = peraddr; + debug_exit->type = KVM_HW_BP; + goto exit_required; + } + } + } + if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) { + debug_exit->addr = addr; + debug_exit->type = KVM_SINGLESTEP; + goto exit_required; + } + + return 0; +exit_required: + return 1; +} + +#define guest_per_enabled(vcpu) \ + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) + +static void filter_guest_per_event(struct kvm_vcpu *vcpu) +{ + u32 perc = vcpu->arch.sie_block->perc << 24; + u64 peraddr = vcpu->arch.sie_block->peraddr; + u64 addr = vcpu->arch.sie_block->gpsw.addr; + u64 cr9 = vcpu->arch.sie_block->gcr[9]; + u64 cr10 = vcpu->arch.sie_block->gcr[10]; + u64 cr11 = vcpu->arch.sie_block->gcr[11]; + /* filter all events, demanded by the guest */ + u32 guest_perc = perc & cr9 & PER_EVENT_MASK; + + if (!guest_per_enabled(vcpu)) + guest_perc = 0; + + /* filter "successful-branching" events */ + if (guest_perc & PER_EVENT_BRANCH && + cr9 & PER_CONTROL_BRANCH_ADDRESS && + !in_addr_range(addr, cr10, cr11)) + guest_perc &= ~PER_EVENT_BRANCH; + + /* filter "instruction-fetching" events */ + if (guest_perc & PER_EVENT_IFETCH && + !in_addr_range(peraddr, cr10, cr11)) + guest_perc &= ~PER_EVENT_IFETCH; + + /* All other PER events will be given to the guest */ + /* TODO: Check alterated address/address space */ + + vcpu->arch.sie_block->perc = guest_perc >> 24; + + if (!guest_perc) + vcpu->arch.sie_block->iprcc &= ~PGM_PER; +} + +void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) +{ + if (debug_exit_required(vcpu)) + vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; + + filter_guest_per_event(vcpu); +} diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index f7b6df45d8b..a0b586c1913 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -1,7 +1,7 @@ /* - * intercept.c - in-kernel handling for sie intercepts + * in-kernel handling for sie intercepts * - * Copyright IBM Corp. 2008,2009 + * Copyright IBM Corp. 2008, 2014 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -16,96 +16,26 @@ #include <linux/pagemap.h> #include <asm/kvm_host.h> +#include <asm/asm-offsets.h> +#include <asm/irq.h> #include "kvm-s390.h" #include "gaccess.h" +#include "trace.h" +#include "trace-s390.h" -static int handle_lctlg(struct kvm_vcpu *vcpu) -{ - int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - int base2 = vcpu->arch.sie_block->ipb >> 28; - int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + - ((vcpu->arch.sie_block->ipb & 0xff00) << 4); - u64 useraddr; - int reg, rc; - - vcpu->stat.instruction_lctlg++; - if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f) - return -EOPNOTSUPP; - useraddr = disp2; - if (base2) - useraddr += vcpu->arch.guest_gprs[base2]; - - if (useraddr & 7) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - reg = reg1; - - VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, - disp2); - - do { - rc = get_guest_u64(vcpu, useraddr, - &vcpu->arch.sie_block->gcr[reg]); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } - useraddr += 8; - if (reg == reg3) - break; - reg = (reg + 1) % 16; - } while (1); - return 0; -} - -static int handle_lctl(struct kvm_vcpu *vcpu) -{ - int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - int base2 = vcpu->arch.sie_block->ipb >> 28; - int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); - u64 useraddr; - u32 val = 0; - int reg, rc; - - vcpu->stat.instruction_lctl++; - - useraddr = disp2; - if (base2) - useraddr += vcpu->arch.guest_gprs[base2]; - - if (useraddr & 3) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, - disp2); - - reg = reg1; - do { - rc = get_guest_u32(vcpu, useraddr, &val); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } - vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; - vcpu->arch.sie_block->gcr[reg] |= val; - useraddr += 4; - if (reg == reg3) - break; - reg = (reg + 1) % 16; - } while (1); - return 0; -} - -static intercept_handler_t instruction_handlers[256] = { +static const intercept_handler_t instruction_handlers[256] = { + [0x01] = kvm_s390_handle_01, + [0x82] = kvm_s390_handle_lpsw, [0x83] = kvm_s390_handle_diag, [0xae] = kvm_s390_handle_sigp, [0xb2] = kvm_s390_handle_b2, - [0xb7] = handle_lctl, - [0xeb] = handle_lctlg, + [0xb6] = kvm_s390_handle_stctl, + [0xb7] = kvm_s390_handle_lctl, + [0xb9] = kvm_s390_handle_b9, + [0xe5] = kvm_s390_handle_e5, + [0xeb] = kvm_s390_handle_eb, }; static int handle_noop(struct kvm_vcpu *vcpu) @@ -117,9 +47,6 @@ static int handle_noop(struct kvm_vcpu *vcpu) case 0x10: vcpu->stat.exit_external_request++; break; - case 0x14: - vcpu->stat.exit_external_interrupt++; - break; default: break; /* nothing */ } @@ -131,54 +58,40 @@ static int handle_stop(struct kvm_vcpu *vcpu) int rc = 0; vcpu->stat.exit_stop_request++; - atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); spin_lock_bh(&vcpu->arch.local_int.lock); - if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { - vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; - rc = kvm_s390_vcpu_store_status(vcpu, - KVM_S390_STORE_STATUS_NOADDR); - if (rc >= 0) - rc = -EOPNOTSUPP; - } - if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) { - vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP; - rc = SIE_INTERCEPT_RERUNVCPU; - vcpu->run->exit_reason = KVM_EXIT_INTR; - } + trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { + kvm_s390_vcpu_stop(vcpu); vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); rc = -EOPNOTSUPP; } - spin_unlock_bh(&vcpu->arch.local_int.lock); + if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; + /* store status must be called unlocked. Since local_int.lock + * only protects local_int.* and not guest memory we can give + * up the lock here */ + spin_unlock_bh(&vcpu->arch.local_int.lock); + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_NOADDR); + if (rc >= 0) + rc = -EOPNOTSUPP; + } else + spin_unlock_bh(&vcpu->arch.local_int.lock); return rc; } static int handle_validity(struct kvm_vcpu *vcpu) { int viwhy = vcpu->arch.sie_block->ipb >> 16; - int rc; vcpu->stat.exit_validity++; - if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix - <= kvm_s390_vcpu_get_memsize(vcpu) - 2*PAGE_SIZE)) { - rc = fault_in_pages_writeable((char __user *) - vcpu->arch.sie_block->gmsor + - vcpu->arch.sie_block->prefix, - 2*PAGE_SIZE); - if (rc) - /* user will receive sigsegv, exit to user */ - rc = -EOPNOTSUPP; - } else - rc = -EOPNOTSUPP; - - if (rc) - VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", - viwhy); - return rc; + trace_kvm_s390_intercept_validity(vcpu, viwhy); + WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy); + return -EOPNOTSUPP; } static int handle_instruction(struct kvm_vcpu *vcpu) @@ -186,16 +99,129 @@ static int handle_instruction(struct kvm_vcpu *vcpu) intercept_handler_t handler; vcpu->stat.exit_instruction++; + trace_kvm_s390_intercept_instruction(vcpu, + vcpu->arch.sie_block->ipa, + vcpu->arch.sie_block->ipb); handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; if (handler) return handler(vcpu); return -EOPNOTSUPP; } +static void __extract_prog_irq(struct kvm_vcpu *vcpu, + struct kvm_s390_pgm_info *pgm_info) +{ + memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info)); + pgm_info->code = vcpu->arch.sie_block->iprcc; + + switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) { + case PGM_AFX_TRANSLATION: + case PGM_ASX_TRANSLATION: + case PGM_EX_TRANSLATION: + case PGM_LFX_TRANSLATION: + case PGM_LSTE_SEQUENCE: + case PGM_LSX_TRANSLATION: + case PGM_LX_TRANSLATION: + case PGM_PRIMARY_AUTHORITY: + case PGM_SECONDARY_AUTHORITY: + case PGM_SPACE_SWITCH: + pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; + break; + case PGM_ALEN_TRANSLATION: + case PGM_ALE_SEQUENCE: + case PGM_ASTE_INSTANCE: + case PGM_ASTE_SEQUENCE: + case PGM_ASTE_VALIDITY: + case PGM_EXTENDED_AUTHORITY: + pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + break; + case PGM_ASCE_TYPE: + case PGM_PAGE_TRANSLATION: + case PGM_REGION_FIRST_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_THIRD_TRANS: + case PGM_SEGMENT_TRANSLATION: + pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + pgm_info->op_access_id = vcpu->arch.sie_block->oai; + break; + case PGM_MONITOR: + pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn; + pgm_info->mon_code = vcpu->arch.sie_block->tecmc; + break; + case PGM_DATA: + pgm_info->data_exc_code = vcpu->arch.sie_block->dxc; + break; + case PGM_PROTECTION: + pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + break; + default: + break; + } + + if (vcpu->arch.sie_block->iprcc & PGM_PER) { + pgm_info->per_code = vcpu->arch.sie_block->perc; + pgm_info->per_atmid = vcpu->arch.sie_block->peratmid; + pgm_info->per_address = vcpu->arch.sie_block->peraddr; + pgm_info->per_access_id = vcpu->arch.sie_block->peraid; + } +} + +/* + * restore ITDB to program-interruption TDB in guest lowcore + * and set TX abort indication if required +*/ +static int handle_itdb(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_itdb *itdb; + int rc; + + if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu)) + return 0; + if (current->thread.per_flags & PER_FLAG_NO_TE) + return 0; + itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba; + rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb)); + if (rc) + return rc; + memset(itdb, 0, sizeof(*itdb)); + + return 0; +} + +#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER) + static int handle_prog(struct kvm_vcpu *vcpu) { + struct kvm_s390_pgm_info pgm_info; + psw_t psw; + int rc; + vcpu->stat.exit_program_interruption++; - return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc); + + if (guestdbg_enabled(vcpu) && per_event(vcpu)) { + kvm_s390_handle_per_event(vcpu); + /* the interrupt might have been filtered out completely */ + if (vcpu->arch.sie_block->iprcc == 0) + return 0; + } + + trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc); + if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) { + rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t)); + if (rc) + return rc; + /* Avoid endless loops of specification exceptions */ + if (!is_valid_psw(&psw)) + return -EOPNOTSUPP; + } + rc = handle_itdb(vcpu); + if (rc) + return rc; + + __extract_prog_irq(vcpu, &pgm_info); + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) @@ -213,16 +239,110 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) return rc2; } +/** + * handle_external_interrupt - used for external interruption interceptions + * + * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if + * the new PSW does not have external interrupts disabled. In the first case, + * we've got to deliver the interrupt manually, and in the second case, we + * drop to userspace to handle the situation there. + */ +static int handle_external_interrupt(struct kvm_vcpu *vcpu) +{ + u16 eic = vcpu->arch.sie_block->eic; + struct kvm_s390_interrupt irq; + psw_t newpsw; + int rc; + + vcpu->stat.exit_external_interrupt++; + + rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t)); + if (rc) + return rc; + /* We can not handle clock comparator or timer interrupt with bad PSW */ + if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) && + (newpsw.mask & PSW_MASK_EXT)) + return -EOPNOTSUPP; + + switch (eic) { + case EXT_IRQ_CLK_COMP: + irq.type = KVM_S390_INT_CLOCK_COMP; + break; + case EXT_IRQ_CPU_TIMER: + irq.type = KVM_S390_INT_CPU_TIMER; + break; + case EXT_IRQ_EXTERNAL_CALL: + if (kvm_s390_si_ext_call_pending(vcpu)) + return 0; + irq.type = KVM_S390_INT_EXTERNAL_CALL; + irq.parm = vcpu->arch.sie_block->extcpuaddr; + break; + default: + return -EOPNOTSUPP; + } + + return kvm_s390_inject_vcpu(vcpu, &irq); +} + +/** + * Handle MOVE PAGE partial execution interception. + * + * This interception can only happen for guests with DAT disabled and + * addresses that are currently not mapped in the host. Thus we try to + * set up the mappings for the corresponding user pages here (or throw + * addressing exceptions in case of illegal guest addresses). + */ +static int handle_mvpg_pei(struct kvm_vcpu *vcpu) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned long srcaddr, dstaddr; + int reg1, reg2, rc; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + /* Make sure that the source is paged-in */ + srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]); + if (kvm_is_error_gpa(vcpu->kvm, srcaddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); + if (rc != 0) + return rc; + + /* Make sure that the destination is paged-in */ + dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]); + if (kvm_is_error_gpa(vcpu->kvm, dstaddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); + if (rc != 0) + return rc; + + psw->addr = __rewind_psw(*psw, 4); + + return 0; +} + +static int handle_partial_execution(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */ + return handle_mvpg_pei(vcpu); + if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */ + return kvm_s390_handle_sigp_pei(vcpu); + + return -EOPNOTSUPP; +} + static const intercept_handler_t intercept_funcs[] = { [0x00 >> 2] = handle_noop, [0x04 >> 2] = handle_instruction, [0x08 >> 2] = handle_prog, [0x0C >> 2] = handle_instruction_and_prog, [0x10 >> 2] = handle_noop, - [0x14 >> 2] = handle_noop, + [0x14 >> 2] = handle_external_interrupt, + [0x18 >> 2] = handle_noop, [0x1C >> 2] = kvm_s390_handle_wait, [0x20 >> 2] = handle_validity, [0x28 >> 2] = handle_stop, + [0x38 >> 2] = handle_partial_execution, }; int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 35c21bf910c..90c8de22a2a 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1,7 +1,7 @@ /* - * interrupt.c - handling kvm guest interrupts + * handling kvm guest interrupts * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2014 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -13,18 +13,42 @@ #include <linux/interrupt.h> #include <linux/kvm_host.h> #include <linux/hrtimer.h> +#include <linux/mmu_context.h> #include <linux/signal.h> #include <linux/slab.h> #include <asm/asm-offsets.h> #include <asm/uaccess.h> #include "kvm-s390.h" #include "gaccess.h" +#include "trace-s390.h" -static int psw_extint_disabled(struct kvm_vcpu *vcpu) +#define IOINT_SCHID_MASK 0x0000ffff +#define IOINT_SSID_MASK 0x00030000 +#define IOINT_CSSID_MASK 0x03fc0000 +#define IOINT_AI_MASK 0x04000000 + +static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu); + +static int is_ioint(u64 type) +{ + return ((type & 0xfffe0000u) != 0xfffe0000u); +} + +int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); } +static int psw_ioint_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO); +} + +static int psw_mchk_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK); +} + static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) { if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || @@ -34,22 +58,50 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) return 1; } +static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) +{ + if (psw_extint_disabled(vcpu) || + !(vcpu->arch.sie_block->gcr[0] & 0x800ul)) + return 0; + if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu)) + /* No timer interrupts when single stepping */ + return 0; + return 1; +} + +static u64 int_word_to_isc_bits(u32 int_word) +{ + u8 isc = (int_word & 0x38000000) >> 27; + + return (0x80 >> isc) << 24; +} + static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) + return 1; case KVM_S390_INT_EMERGENCY: if (psw_extint_disabled(vcpu)) return 0; if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) return 1; return 0; - case KVM_S390_INT_SERVICE: + case KVM_S390_INT_CLOCK_COMP: + return ckc_interrupts_enabled(vcpu); + case KVM_S390_INT_CPU_TIMER: if (psw_extint_disabled(vcpu)) return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x200ul) + if (vcpu->arch.sie_block->gcr[0] & 0x400ul) return 1; return 0; + case KVM_S390_INT_SERVICE: + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: if (psw_extint_disabled(vcpu)) return 0; @@ -61,7 +113,22 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, case KVM_S390_SIGP_SET_PREFIX: case KVM_S390_RESTART: return 1; + case KVM_S390_MCHK: + if (psw_mchk_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14) + return 1; + return 0; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (psw_ioint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[6] & + int_word_to_isc_bits(inti->io.io_int_word)) + return 1; + return 0; default: + printk(KERN_WARNING "illegal interrupt type %llx\n", + inti->type); BUG(); } return 0; @@ -69,24 +136,28 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, static void __set_cpu_idle(struct kvm_vcpu *vcpu) { - BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); } static void __unset_cpu_idle(struct kvm_vcpu *vcpu) { - BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); } static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) { - atomic_clear_mask(CPUSTAT_ECALL_PEND | - CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, - &vcpu->arch.sie_block->cpuflags); + atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, + &vcpu->arch.sie_block->cpuflags); vcpu->arch.sie_block->lctl = 0x0000; + vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT); + + if (guestdbg_enabled(vcpu)) { + vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 | + LCTL_CR10 | LCTL_CR11); + vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT); + } } static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) @@ -98,9 +169,14 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: case KVM_S390_INT_SERVICE: + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_CLOCK_COMP: + case KVM_S390_INT_CPU_TIMER: if (psw_extint_disabled(vcpu)) __set_cpuflag(vcpu, CPUSTAT_EXT_INT); else @@ -109,94 +185,240 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, case KVM_S390_SIGP_STOP: __set_cpuflag(vcpu, CPUSTAT_STOP_INT); break; + case KVM_S390_MCHK: + if (psw_mchk_disabled(vcpu)) + vcpu->arch.sie_block->ictl |= ICTL_LPSW; + else + vcpu->arch.sie_block->lctl |= LCTL_CR14; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (psw_ioint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_IO_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR6; + break; default: BUG(); } } +static int __deliver_prog_irq(struct kvm_vcpu *vcpu, + struct kvm_s390_pgm_info *pgm_info) +{ + const unsigned short table[] = { 2, 4, 4, 6 }; + int rc = 0; + + switch (pgm_info->code & ~PGM_PER) { + case PGM_AFX_TRANSLATION: + case PGM_ASX_TRANSLATION: + case PGM_EX_TRANSLATION: + case PGM_LFX_TRANSLATION: + case PGM_LSTE_SEQUENCE: + case PGM_LSX_TRANSLATION: + case PGM_LX_TRANSLATION: + case PGM_PRIMARY_AUTHORITY: + case PGM_SECONDARY_AUTHORITY: + case PGM_SPACE_SWITCH: + rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + (u64 *)__LC_TRANS_EXC_CODE); + break; + case PGM_ALEN_TRANSLATION: + case PGM_ALE_SEQUENCE: + case PGM_ASTE_INSTANCE: + case PGM_ASTE_SEQUENCE: + case PGM_ASTE_VALIDITY: + case PGM_EXTENDED_AUTHORITY: + rc = put_guest_lc(vcpu, pgm_info->exc_access_id, + (u8 *)__LC_EXC_ACCESS_ID); + break; + case PGM_ASCE_TYPE: + case PGM_PAGE_TRANSLATION: + case PGM_REGION_FIRST_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_THIRD_TRANS: + case PGM_SEGMENT_TRANSLATION: + rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + (u64 *)__LC_TRANS_EXC_CODE); + rc |= put_guest_lc(vcpu, pgm_info->exc_access_id, + (u8 *)__LC_EXC_ACCESS_ID); + rc |= put_guest_lc(vcpu, pgm_info->op_access_id, + (u8 *)__LC_OP_ACCESS_ID); + break; + case PGM_MONITOR: + rc = put_guest_lc(vcpu, pgm_info->mon_class_nr, + (u64 *)__LC_MON_CLASS_NR); + rc |= put_guest_lc(vcpu, pgm_info->mon_code, + (u64 *)__LC_MON_CODE); + break; + case PGM_DATA: + rc = put_guest_lc(vcpu, pgm_info->data_exc_code, + (u32 *)__LC_DATA_EXC_CODE); + break; + case PGM_PROTECTION: + rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + (u64 *)__LC_TRANS_EXC_CODE); + rc |= put_guest_lc(vcpu, pgm_info->exc_access_id, + (u8 *)__LC_EXC_ACCESS_ID); + break; + } + + if (pgm_info->code & PGM_PER) { + rc |= put_guest_lc(vcpu, pgm_info->per_code, + (u8 *) __LC_PER_CODE); + rc |= put_guest_lc(vcpu, pgm_info->per_atmid, + (u8 *)__LC_PER_ATMID); + rc |= put_guest_lc(vcpu, pgm_info->per_address, + (u64 *) __LC_PER_ADDRESS); + rc |= put_guest_lc(vcpu, pgm_info->per_access_id, + (u8 *) __LC_PER_ACCESS_ID); + } + + switch (vcpu->arch.sie_block->icptcode) { + case ICPT_INST: + case ICPT_INSTPROGI: + case ICPT_OPEREXC: + case ICPT_PARTEXEC: + case ICPT_IOINST: + /* last instruction only stored for these icptcodes */ + rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14], + (u16 *) __LC_PGM_ILC); + break; + case ICPT_PROGI: + rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc, + (u16 *) __LC_PGM_ILC); + break; + default: + rc |= put_guest_lc(vcpu, 0, + (u16 *) __LC_PGM_ILC); + } + + rc |= put_guest_lc(vcpu, pgm_info->code, + (u16 *)__LC_PGM_INT_CODE); + rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + + return rc; +} + static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { const unsigned short table[] = { 2, 4, 4, 6 }; - int rc, exception = 0; + int rc = 0; switch (inti->type) { case KVM_S390_INT_EMERGENCY: VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); vcpu->stat.deliver_emergency_signal++; - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->emerg.code, 0); + rc = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, inti->emerg.code, + (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + break; + case KVM_S390_INT_EXTERNAL_CALL: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); + vcpu->stat.deliver_external_call++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->extcall.code, 0); + rc = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, inti->extcall.code, + (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + break; + case KVM_S390_INT_CLOCK_COMP: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, 0); + deliver_ckc_interrupt(vcpu); + break; + case KVM_S390_INT_CPU_TIMER: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, 0); + rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER, + (u16 *)__LC_EXT_INT_CODE); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); break; - case KVM_S390_INT_SERVICE: VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", inti->ext.ext_params); vcpu->stat.deliver_service_signal++; - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - if (rc == -EFAULT) - exception = 1; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, 0); + rc = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + break; + case KVM_S390_INT_PFAULT_INIT: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, + inti->ext.ext_params2); + rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *) __LC_EXT_PARAMS2); + break; + case KVM_S390_INT_PFAULT_DONE: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, + inti->ext.ext_params2); + rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); break; - case KVM_S390_INT_VIRTIO: VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", inti->ext.ext_params, inti->ext.ext_params2); vcpu->stat.deliver_virtio_interrupt++; - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2, - inti->ext.ext_params2); - if (rc == -EFAULT) - exception = 1; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, + inti->ext.ext_params2); + rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); break; - case KVM_S390_SIGP_STOP: VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); vcpu->stat.deliver_stop_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + 0, 0); __set_intercept_indicator(vcpu, inti); break; @@ -204,87 +426,117 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", inti->prefix.address); vcpu->stat.deliver_prefix_signal++; - vcpu->arch.sie_block->prefix = inti->prefix.address; - vcpu->arch.sie_block->ihcpu = 0xffff; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->prefix.address, 0); + kvm_s390_set_prefix(vcpu, inti->prefix.address); break; case KVM_S390_RESTART: VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); vcpu->stat.deliver_restart_signal++; - rc = copy_to_guest(vcpu, offsetof(struct _lowcore, - restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + 0, 0); + rc = write_guest_lc(vcpu, + offsetof(struct _lowcore, restart_old_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw), + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); break; - case KVM_S390_PROGRAM_INT: VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", inti->pgm.code, table[vcpu->arch.sie_block->ipa >> 14]); vcpu->stat.deliver_program_int++; - rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_PGM_ILC, - table[vcpu->arch.sie_block->ipa >> 14]); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->pgm.code, 0); + rc = __deliver_prog_irq(vcpu, &inti->pgm); + break; - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_PGM_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + case KVM_S390_MCHK: + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + inti->mchk.mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->mchk.cr14, + inti->mchk.mcic); + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE); + rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + { + __u32 param0 = ((__u32)inti->io.subchannel_id << 16) | + inti->io.subchannel_nr; + __u64 param1 = ((__u64)inti->io.io_int_parm << 32) | + inti->io.io_int_word; + VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + vcpu->stat.deliver_io_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + param0, param1); + rc = put_guest_lc(vcpu, inti->io.subchannel_id, + (u16 *)__LC_SUBCHANNEL_ID); + rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, + (u16 *)__LC_SUBCHANNEL_NR); + rc |= put_guest_lc(vcpu, inti->io.io_int_parm, + (u32 *)__LC_IO_INT_PARM); + rc |= put_guest_lc(vcpu, inti->io.io_int_word, + (u32 *)__LC_IO_INT_WORD); + rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + break; + } default: BUG(); } - if (exception) { + if (rc) { printk("kvm: The guest lowcore is not mapped during interrupt " - "delivery, killing userspace\n"); + "delivery, killing userspace\n"); do_exit(SIGKILL); } } -static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) +static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu) { - int rc, exception = 0; - - if (psw_extint_disabled(vcpu)) - return 0; - if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) - return 0; - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); - if (rc == -EFAULT) - exception = 1; - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - if (exception) { + int rc; + + rc = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + if (rc) { printk("kvm: The guest lowcore is not mapped during interrupt " "delivery, killing userspace\n"); do_exit(SIGKILL); } - return 1; } -static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +/* Check whether SIGP interpretation facility has an external call pending */ +int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu) +{ + atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl; + + if (!psw_extint_disabled(vcpu) && + (vcpu->arch.sie_block->gcr[0] & 0x2000ul) && + (atomic_read(sigp_ctrl) & SIGP_CTRL_C) && + (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) + return 1; + + return 0; +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; @@ -311,19 +563,23 @@ static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) spin_unlock(&fi->lock); } - if ((!rc) && (vcpu->arch.sie_block->ckc < - get_clock() + vcpu->arch.sie_block->epoch)) { - if ((!psw_extint_disabled(vcpu)) && - (vcpu->arch.sie_block->gcr[0] & 0x800ul)) - rc = 1; - } + if (!rc && kvm_cpu_has_pending_timer(vcpu)) + rc = 1; + + if (!rc && kvm_s390_si_ext_call_pending(vcpu)) + rc = 1; return rc; } int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return 0; + if (!(vcpu->arch.sie_block->ckc < + get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) + return 0; + if (!ckc_interrupts_enabled(vcpu)) + return 0; + return 1; } int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) @@ -346,44 +602,45 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; /* disabled wait */ } - if (psw_extint_disabled(vcpu) || - (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) { + if (!ckc_interrupts_enabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); goto no_timer; } - now = get_clock() + vcpu->arch.sie_block->epoch; + now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; if (vcpu->arch.sie_block->ckc < now) { __unset_cpu_idle(vcpu); return 0; } - sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9; + sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); no_timer: + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); - add_wait_queue(&vcpu->arch.local_int.wq, &wait); + add_wait_queue(&vcpu->wq, &wait); while (list_empty(&vcpu->arch.local_int.list) && list_empty(&vcpu->arch.local_int.float_int->list) && (!vcpu->arch.local_int.timer_due) && - !signal_pending(current)) { + !signal_pending(current) && + !kvm_s390_si_ext_call_pending(vcpu)) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.float_int->lock); - vcpu_put(vcpu); schedule(); - vcpu_load(vcpu); spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->arch.local_int.wq, &wait); + remove_wait_queue(&vcpu->wq, &wait); spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.float_int->lock); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); return 0; } @@ -394,8 +651,8 @@ void kvm_s390_tasklet(unsigned long parm) spin_lock(&vcpu->arch.local_int.lock); vcpu->arch.local_int.timer_due = 1; - if (waitqueue_active(&vcpu->arch.local_int.wq)) - wake_up_interruptible(&vcpu->arch.local_int.wq); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); spin_unlock(&vcpu->arch.local_int.lock); } @@ -408,11 +665,31 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); + vcpu->preempted = true; tasklet_schedule(&vcpu->arch.tasklet); return HRTIMER_NORESTART; } +void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; + + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + list_del(&inti->list); + kfree(inti); + } + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + + /* clear pending external calls set by sigp interpretation facility */ + atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + atomic_clear_mask(SIGP_CTRL_C, + &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); +} + void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -443,9 +720,8 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } while (deliver); } - if ((vcpu->arch.sie_block->ckc < - get_clock() + vcpu->arch.sie_block->epoch)) - __try_deliver_ckc_interrupt(vcpu); + if (kvm_cpu_has_pending_timer(vcpu)) + deliver_ckc_interrupt(vcpu); if (atomic_read(&fi->active)) { do { @@ -454,6 +730,63 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) list_for_each_entry_safe(inti, n, &fi->list, list) { if (__interrupt_is_deliverable(vcpu, inti)) { list_del(&inti->list); + fi->irq_count--; + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } +} + +void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; + int deliver; + + __reset_intercept_indicators(vcpu); + if (atomic_read(&li->active)) { + do { + deliver = 0; + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + if ((inti->type == KVM_S390_MCHK) && + __interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&li->list)) + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } + + if (atomic_read(&fi->active)) { + do { + deliver = 0; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + if ((inti->type == KVM_S390_MCHK) && + __interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + fi->irq_count--; deliver = 1; break; } @@ -483,51 +816,113 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) inti->pgm.code = code; VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1); spin_lock_bh(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); - BUG_ON(waitqueue_active(&li->wq)); + BUG_ON(waitqueue_active(li->wq)); spin_unlock_bh(&li->lock); return 0; } -int kvm_s390_inject_vm(struct kvm *kvm, - struct kvm_s390_interrupt *s390int) +int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, + struct kvm_s390_pgm_info *pgm_info) { - struct kvm_s390_local_interrupt *li; - struct kvm_s390_float_interrupt *fi; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_interrupt_info *inti; - int sigcpu; inti = kzalloc(sizeof(*inti), GFP_KERNEL); if (!inti) return -ENOMEM; - switch (s390int->type) { - case KVM_S390_INT_VIRTIO: - VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", - s390int->parm, s390int->parm64); - inti->type = s390int->type; - inti->ext.ext_params = s390int->parm; - inti->ext.ext_params2 = s390int->parm64; - break; - case KVM_S390_INT_SERVICE: - VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); - inti->type = s390int->type; - inti->ext.ext_params = s390int->parm; + VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)", + pgm_info->code); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, + pgm_info->code, 0, 1); + + inti->type = KVM_S390_PROGRAM_INT; + memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm)); + spin_lock_bh(&li->lock); + list_add(&inti->list, &li->list); + atomic_set(&li->active, 1); + BUG_ON(waitqueue_active(li->wq)); + spin_unlock_bh(&li->lock); + return 0; +} + +struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, + u64 cr6, u64 schid) +{ + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *inti, *iter; + + if ((!schid && !cr6) || (schid && cr6)) + return NULL; + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + inti = NULL; + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (cr6 && + ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0)) + continue; + if (schid) { + if (((schid & 0x00000000ffff0000) >> 16) != + iter->io.subchannel_id) + continue; + if ((schid & 0x000000000000ffff) != + iter->io.subchannel_nr) + continue; + } + inti = iter; break; - case KVM_S390_PROGRAM_INT: - case KVM_S390_SIGP_STOP: - case KVM_S390_INT_EMERGENCY: - default: - kfree(inti); - return -EINVAL; } + if (inti) { + list_del_init(&inti->list); + fi->irq_count--; + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + return inti; +} + +static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_local_interrupt *li; + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *iter; + struct kvm_vcpu *dst_vcpu = NULL; + int sigcpu; + int rc = 0; mutex_lock(&kvm->lock); fi = &kvm->arch.float_int; spin_lock(&fi->lock); - list_add_tail(&inti->list, &fi->list); + if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { + rc = -EINVAL; + goto unlock_fi; + } + fi->irq_count++; + if (!is_ioint(inti->type)) { + list_add_tail(&inti->list, &fi->list); + } else { + u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); + + /* Keep I/O interrupts sorted in isc order. */ + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (int_word_to_isc_bits(iter->io.io_int_word) + <= isc_bits) + continue; + break; + } + list_add_tail(&inti->list, &iter->list); + } atomic_set(&fi->active, 1); sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); if (sigcpu == KVM_MAX_VCPUS) { @@ -535,17 +930,80 @@ int kvm_s390_inject_vm(struct kvm *kvm, sigcpu = fi->next_rr_cpu++; if (sigcpu == KVM_MAX_VCPUS) sigcpu = fi->next_rr_cpu = 0; - } while (fi->local_int[sigcpu] == NULL); + } while (kvm_get_vcpu(kvm, sigcpu) == NULL); } - li = fi->local_int[sigcpu]; + dst_vcpu = kvm_get_vcpu(kvm, sigcpu); + li = &dst_vcpu->arch.local_int; spin_lock_bh(&li->lock); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); + kvm_get_vcpu(kvm, sigcpu)->preempted = true; spin_unlock_bh(&li->lock); +unlock_fi: spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); - return 0; + return rc; +} + +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int) +{ + struct kvm_s390_interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = s390int->type; + switch (inti->type) { + case KVM_S390_INT_VIRTIO: + VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", + s390int->parm, s390int->parm64); + inti->ext.ext_params = s390int->parm; + inti->ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_INT_SERVICE: + VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); + inti->ext.ext_params = s390int->parm; + break; + case KVM_S390_INT_PFAULT_DONE: + inti->type = s390int->type; + inti->ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_MCHK: + VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", + s390int->parm64); + inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ + inti->mchk.mcic = s390int->parm64; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (inti->type & IOINT_AI_MASK) + VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); + else + VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", + s390int->type & IOINT_CSSID_MASK, + s390int->type & IOINT_SSID_MASK, + s390int->type & IOINT_SCHID_MASK); + inti->io.subchannel_id = s390int->parm >> 16; + inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; + inti->io.io_int_parm = s390int->parm64 >> 32; + inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull; + break; + default: + kfree(inti); + return -EINVAL; + } + trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, + 2); + + return __inject_vm(kvm, inti); +} + +void kvm_s390_reinject_io_int(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + __inject_vm(kvm, inti); } int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, @@ -577,16 +1035,49 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, break; case KVM_S390_SIGP_STOP: case KVM_S390_RESTART: - case KVM_S390_INT_EMERGENCY: + case KVM_S390_INT_CLOCK_COMP: + case KVM_S390_INT_CPU_TIMER: VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); inti->type = s390int->type; break; + case KVM_S390_INT_EXTERNAL_CALL: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", + s390int->parm); + inti->type = s390int->type; + inti->extcall.code = s390int->parm; + break; + case KVM_S390_INT_EMERGENCY: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm); + inti->type = s390int->type; + inti->emerg.code = s390int->parm; + break; + case KVM_S390_MCHK: + VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", + s390int->parm64); + inti->type = s390int->type; + inti->mchk.mcic = s390int->parm64; + break; + case KVM_S390_INT_PFAULT_INIT: + inti->type = s390int->type; + inti->ext.ext_params2 = s390int->parm64; + break; case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: default: kfree(inti); return -EINVAL; } + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm, + s390int->parm64, 2); mutex_lock(&vcpu->kvm->lock); li = &vcpu->arch.local_int; @@ -599,9 +1090,530 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, if (inti->type == KVM_S390_SIGP_STOP) li->action_bits |= ACTION_STOP_ON_STOP; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&vcpu->arch.local_int.wq); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + vcpu->preempted = true; spin_unlock_bh(&li->lock); mutex_unlock(&vcpu->kvm->lock); return 0; } + +void kvm_s390_clear_float_irqs(struct kvm *kvm) +{ + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *n, *inti = NULL; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + list_del(&inti->list); + kfree(inti); + } + fi->irq_count = 0; + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); +} + +static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, + u8 *addr) +{ + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; + struct kvm_s390_irq irq = {0}; + + irq.type = inti->type; + switch (inti->type) { + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + irq.u.ext = inti->ext; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + irq.u.io = inti->io; + break; + case KVM_S390_MCHK: + irq.u.mchk = inti->mchk; + break; + default: + return -EINVAL; + } + + if (copy_to_user(uptr, &irq, sizeof(irq))) + return -EFAULT; + + return 0; +} + +static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) +{ + struct kvm_s390_interrupt_info *inti; + struct kvm_s390_float_interrupt *fi; + int ret = 0; + int n = 0; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + + list_for_each_entry(inti, &fi->list, list) { + if (len < sizeof(struct kvm_s390_irq)) { + /* signal userspace to try again */ + ret = -ENOMEM; + break; + } + ret = copy_irq_to_user(inti, buf); + if (ret) + break; + buf += sizeof(struct kvm_s390_irq); + len -= sizeof(struct kvm_s390_irq); + n++; + } + + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + + return ret < 0 ? ret : n; +} + +static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int r; + + switch (attr->group) { + case KVM_DEV_FLIC_GET_ALL_IRQS: + r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr, + attr->attr); + break; + default: + r = -EINVAL; + } + + return r; +} + +static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti, + u64 addr) +{ + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; + void *target = NULL; + void __user *source; + u64 size; + + if (get_user(inti->type, (u64 __user *)addr)) + return -EFAULT; + + switch (inti->type) { + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + target = (void *) &inti->ext; + source = &uptr->u.ext; + size = sizeof(inti->ext); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + target = (void *) &inti->io; + source = &uptr->u.io; + size = sizeof(inti->io); + break; + case KVM_S390_MCHK: + target = (void *) &inti->mchk; + source = &uptr->u.mchk; + size = sizeof(inti->mchk); + break; + default: + return -EINVAL; + } + + if (copy_from_user(target, source, size)) + return -EFAULT; + + return 0; +} + +static int enqueue_floating_irq(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvm_s390_interrupt_info *inti = NULL; + int r = 0; + int len = attr->attr; + + if (len % sizeof(struct kvm_s390_irq) != 0) + return -EINVAL; + else if (len > KVM_S390_FLIC_MAX_BUFFER) + return -EINVAL; + + while (len >= sizeof(struct kvm_s390_irq)) { + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + r = copy_irq_from_user(inti, attr->addr); + if (r) { + kfree(inti); + return r; + } + r = __inject_vm(dev->kvm, inti); + if (r) { + kfree(inti); + return r; + } + len -= sizeof(struct kvm_s390_irq); + attr->addr += sizeof(struct kvm_s390_irq); + } + + return r; +} + +static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id) +{ + if (id >= MAX_S390_IO_ADAPTERS) + return NULL; + return kvm->arch.adapters[id]; +} + +static int register_io_adapter(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct s390_io_adapter *adapter; + struct kvm_s390_io_adapter adapter_info; + + if (copy_from_user(&adapter_info, + (void __user *)attr->addr, sizeof(adapter_info))) + return -EFAULT; + + if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) || + (dev->kvm->arch.adapters[adapter_info.id] != NULL)) + return -EINVAL; + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) + return -ENOMEM; + + INIT_LIST_HEAD(&adapter->maps); + init_rwsem(&adapter->maps_lock); + atomic_set(&adapter->nr_maps, 0); + adapter->id = adapter_info.id; + adapter->isc = adapter_info.isc; + adapter->maskable = adapter_info.maskable; + adapter->masked = false; + adapter->swap = adapter_info.swap; + dev->kvm->arch.adapters[adapter->id] = adapter; + + return 0; +} + +int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked) +{ + int ret; + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + + if (!adapter || !adapter->maskable) + return -EINVAL; + ret = adapter->masked; + adapter->masked = masked; + return ret; +} + +static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr) +{ + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + struct s390_map_info *map; + int ret; + + if (!adapter || !addr) + return -EINVAL; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + ret = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&map->list); + map->guest_addr = addr; + map->addr = gmap_translate(addr, kvm->arch.gmap); + if (map->addr == -EFAULT) { + ret = -EFAULT; + goto out; + } + ret = get_user_pages_fast(map->addr, 1, 1, &map->page); + if (ret < 0) + goto out; + BUG_ON(ret != 1); + down_write(&adapter->maps_lock); + if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) { + list_add_tail(&map->list, &adapter->maps); + ret = 0; + } else { + put_page(map->page); + ret = -EINVAL; + } + up_write(&adapter->maps_lock); +out: + if (ret) + kfree(map); + return ret; +} + +static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr) +{ + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + struct s390_map_info *map, *tmp; + int found = 0; + + if (!adapter || !addr) + return -EINVAL; + + down_write(&adapter->maps_lock); + list_for_each_entry_safe(map, tmp, &adapter->maps, list) { + if (map->guest_addr == addr) { + found = 1; + atomic_dec(&adapter->nr_maps); + list_del(&map->list); + put_page(map->page); + kfree(map); + break; + } + } + up_write(&adapter->maps_lock); + + return found ? 0 : -EINVAL; +} + +void kvm_s390_destroy_adapters(struct kvm *kvm) +{ + int i; + struct s390_map_info *map, *tmp; + + for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) { + if (!kvm->arch.adapters[i]) + continue; + list_for_each_entry_safe(map, tmp, + &kvm->arch.adapters[i]->maps, list) { + list_del(&map->list); + put_page(map->page); + kfree(map); + } + kfree(kvm->arch.adapters[i]); + } +} + +static int modify_io_adapter(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvm_s390_io_adapter_req req; + struct s390_io_adapter *adapter; + int ret; + + if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) + return -EFAULT; + + adapter = get_io_adapter(dev->kvm, req.id); + if (!adapter) + return -EINVAL; + switch (req.type) { + case KVM_S390_IO_ADAPTER_MASK: + ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask); + if (ret > 0) + ret = 0; + break; + case KVM_S390_IO_ADAPTER_MAP: + ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr); + break; + case KVM_S390_IO_ADAPTER_UNMAP: + ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int r = 0; + unsigned int i; + struct kvm_vcpu *vcpu; + + switch (attr->group) { + case KVM_DEV_FLIC_ENQUEUE: + r = enqueue_floating_irq(dev, attr); + break; + case KVM_DEV_FLIC_CLEAR_IRQS: + r = 0; + kvm_s390_clear_float_irqs(dev->kvm); + break; + case KVM_DEV_FLIC_APF_ENABLE: + dev->kvm->arch.gmap->pfault_enabled = 1; + break; + case KVM_DEV_FLIC_APF_DISABLE_WAIT: + dev->kvm->arch.gmap->pfault_enabled = 0; + /* + * Make sure no async faults are in transition when + * clearing the queues. So we don't need to worry + * about late coming workers. + */ + synchronize_srcu(&dev->kvm->srcu); + kvm_for_each_vcpu(i, vcpu, dev->kvm) + kvm_clear_async_pf_completion_queue(vcpu); + break; + case KVM_DEV_FLIC_ADAPTER_REGISTER: + r = register_io_adapter(dev, attr); + break; + case KVM_DEV_FLIC_ADAPTER_MODIFY: + r = modify_io_adapter(dev, attr); + break; + default: + r = -EINVAL; + } + + return r; +} + +static int flic_create(struct kvm_device *dev, u32 type) +{ + if (!dev) + return -EINVAL; + if (dev->kvm->arch.flic) + return -EINVAL; + dev->kvm->arch.flic = dev; + return 0; +} + +static void flic_destroy(struct kvm_device *dev) +{ + dev->kvm->arch.flic = NULL; + kfree(dev); +} + +/* s390 floating irq controller (flic) */ +struct kvm_device_ops kvm_flic_ops = { + .name = "kvm-flic", + .get_attr = flic_get_attr, + .set_attr = flic_set_attr, + .create = flic_create, + .destroy = flic_destroy, +}; + +static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) +{ + unsigned long bit; + + bit = bit_nr + (addr % PAGE_SIZE) * 8; + + return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit; +} + +static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter, + u64 addr) +{ + struct s390_map_info *map; + + if (!adapter) + return NULL; + + list_for_each_entry(map, &adapter->maps, list) { + if (map->guest_addr == addr) + return map; + } + return NULL; +} + +static int adapter_indicators_set(struct kvm *kvm, + struct s390_io_adapter *adapter, + struct kvm_s390_adapter_int *adapter_int) +{ + unsigned long bit; + int summary_set, idx; + struct s390_map_info *info; + void *map; + + info = get_map_info(adapter, adapter_int->ind_addr); + if (!info) + return -1; + map = page_address(info->page); + bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap); + set_bit(bit, map); + idx = srcu_read_lock(&kvm->srcu); + mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); + set_page_dirty_lock(info->page); + info = get_map_info(adapter, adapter_int->summary_addr); + if (!info) { + srcu_read_unlock(&kvm->srcu, idx); + return -1; + } + map = page_address(info->page); + bit = get_ind_bit(info->addr, adapter_int->summary_offset, + adapter->swap); + summary_set = test_and_set_bit(bit, map); + mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); + set_page_dirty_lock(info->page); + srcu_read_unlock(&kvm->srcu, idx); + return summary_set ? 0 : 1; +} + +/* + * < 0 - not injected due to error + * = 0 - coalesced, summary indicator already active + * > 0 - injected interrupt + */ +static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + int ret; + struct s390_io_adapter *adapter; + + /* We're only interested in the 0->1 transition. */ + if (!level) + return 0; + adapter = get_io_adapter(kvm, e->adapter.adapter_id); + if (!adapter) + return -1; + down_read(&adapter->maps_lock); + ret = adapter_indicators_set(kvm, adapter, &e->adapter); + up_read(&adapter->maps_lock); + if ((ret > 0) && !adapter->masked) { + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_IO(1, 0, 0, 0), + .parm = 0, + .parm64 = (adapter->isc << 27) | 0x80000000, + }; + ret = kvm_s390_inject_vm(kvm, &s390int); + if (ret == 0) + ret = 1; + } + return ret; +} + +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int ret; + + switch (ue->type) { + case KVM_IRQ_ROUTING_S390_ADAPTER: + e->set = set_adapter_int; + e->adapter.summary_addr = ue->u.adapter.summary_addr; + e->adapter.ind_addr = ue->u.adapter.ind_addr; + e->adapter.summary_offset = ue->u.adapter.summary_offset; + e->adapter.ind_offset = ue->u.adapter.ind_offset; + e->adapter.adapter_id = ue->u.adapter.adapter_id; + ret = 0; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + return -EINVAL; +} diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h new file mode 100644 index 00000000000..d98e4159643 --- /dev/null +++ b/arch/s390/kvm/irq.h @@ -0,0 +1,22 @@ +/* + * s390 irqchip routines + * + * Copyright IBM Corp. 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + */ +#ifndef __KVM_IRQ_H +#define __KVM_IRQ_H + +#include <linux/kvm_host.h> + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return 1; +} + +#endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bade533ba28..2f3e14fe91a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1,7 +1,7 @@ /* - * s390host.c -- hosting zSeries kernel virtual machines + * hosting zSeries kernel virtual machines * - * Copyright IBM Corp. 2008,2009 + * Copyright IBM Corp. 2008, 2009 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -11,6 +11,7 @@ * Christian Borntraeger <borntraeger@de.ibm.com> * Heiko Carstens <heiko.carstens@de.ibm.com> * Christian Ehrhardt <ehrhardt@de.ibm.com> + * Jason J. Herne <jjherne@us.ibm.com> */ #include <linux/compiler.h> @@ -27,10 +28,16 @@ #include <asm/lowcore.h> #include <asm/pgtable.h> #include <asm/nmi.h> -#include <asm/system.h> +#include <asm/switch_to.h> +#include <asm/facility.h> +#include <asm/sclp.h> #include "kvm-s390.h" #include "gaccess.h" +#define CREATE_TRACE_POINTS +#include "trace.h" +#include "trace-s390.h" + #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU struct kvm_stats_debugfs_item debugfs_entries[] = { @@ -45,7 +52,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, + { "instruction_stctl", VCPU_STAT(instruction_stctl) }, + { "instruction_stctg", VCPU_STAT(instruction_stctg) }, { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, + { "deliver_external_call", VCPU_STAT(deliver_external_call) }, { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, @@ -53,26 +63,41 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, { "instruction_stidp", VCPU_STAT(instruction_stidp) }, { "instruction_spx", VCPU_STAT(instruction_spx) }, { "instruction_stpx", VCPU_STAT(instruction_stpx) }, { "instruction_stap", VCPU_STAT(instruction_stap) }, { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, + { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, { "instruction_stsch", VCPU_STAT(instruction_stsch) }, { "instruction_chsc", VCPU_STAT(instruction_chsc) }, + { "instruction_essa", VCPU_STAT(instruction_essa) }, { "instruction_stsi", VCPU_STAT(instruction_stsi) }, { "instruction_stfl", VCPU_STAT(instruction_stfl) }, + { "instruction_tprot", VCPU_STAT(instruction_tprot) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, + { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, + { "diagnose_9c", VCPU_STAT(diagnose_9c) }, { NULL } }; -static unsigned long long *facilities; +unsigned long *vfacilities; +static struct gmap_notifier gmap_notifier; + +/* test availability of vfacility */ +int test_vfacility(unsigned long nr) +{ + return __test_facility(nr, (void *) vfacilities); +} /* Section: not file related */ int kvm_arch_hardware_enable(void *garbage) @@ -85,13 +110,18 @@ void kvm_arch_hardware_disable(void *garbage) { } +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); + int kvm_arch_hardware_setup(void) { + gmap_notifier.notifier_call = kvm_gmap_notifier; + gmap_register_ipte_notifier(&gmap_notifier); return 0; } void kvm_arch_hardware_unsetup(void) { + gmap_unregister_ipte_notifier(&gmap_notifier); } void kvm_arch_check_processor_compat(void *rtn) @@ -122,14 +152,58 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_S390_PSW: + case KVM_CAP_S390_GMAP: + case KVM_CAP_SYNC_MMU: +#ifdef CONFIG_KVM_S390_UCONTROL + case KVM_CAP_S390_UCONTROL: +#endif + case KVM_CAP_ASYNC_PF: + case KVM_CAP_SYNC_REGS: + case KVM_CAP_ONE_REG: + case KVM_CAP_ENABLE_CAP: + case KVM_CAP_S390_CSS_SUPPORT: + case KVM_CAP_IRQFD: + case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: + case KVM_CAP_ENABLE_CAP_VM: + case KVM_CAP_VM_ATTRIBUTES: r = 1; break; + case KVM_CAP_NR_VCPUS: + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; + case KVM_CAP_S390_COW: + r = MACHINE_HAS_ESOP; + break; default: r = 0; } return r; } +static void kvm_s390_sync_dirty_log(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + gfn_t cur_gfn, last_gfn; + unsigned long address; + struct gmap *gmap = kvm->arch.gmap; + + down_read(&gmap->mm->mmap_sem); + /* Loop over all guest pages */ + last_gfn = memslot->base_gfn + memslot->npages; + for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { + address = gfn_to_hva_memslot(memslot, cur_gfn); + + if (gmap_test_and_clear_dirty(address, gmap)) + mark_page_dirty(kvm, cur_gfn); + } + up_read(&gmap->mm->mmap_sem); +} + /* Section: vm related */ /* * Get (and clear) the dirty memory log for a memory slot. @@ -137,7 +211,129 @@ int kvm_dev_ioctl_check_extension(long ext) int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - return 0; + int r; + unsigned long n; + struct kvm_memory_slot *memslot; + int is_dirty = 0; + + mutex_lock(&kvm->slots_lock); + + r = -EINVAL; + if (log->slot >= KVM_USER_MEM_SLOTS) + goto out; + + memslot = id_to_memslot(kvm->memslots, log->slot); + r = -ENOENT; + if (!memslot->dirty_bitmap) + goto out; + + kvm_s390_sync_dirty_log(kvm, memslot); + r = kvm_get_dirty_log(kvm, log, &is_dirty); + if (r) + goto out; + + /* Clear the dirty log */ + if (is_dirty) { + n = kvm_dirty_bitmap_bytes(memslot); + memset(memslot->dirty_bitmap, 0, n); + } + r = 0; +out: + mutex_unlock(&kvm->slots_lock); + return r; +} + +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_S390_IRQCHIP: + kvm->arch.use_irqchip = 1; + r = 0; + break; + default: + r = -EINVAL; + break; + } + return r; +} + +static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret; + unsigned int idx; + switch (attr->attr) { + case KVM_S390_VM_MEM_ENABLE_CMMA: + ret = -EBUSY; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus) == 0) { + kvm->arch.use_cmma = 1; + ret = 0; + } + mutex_unlock(&kvm->lock); + break; + case KVM_S390_VM_MEM_CLR_CMMA: + mutex_lock(&kvm->lock); + idx = srcu_read_lock(&kvm->srcu); + page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false); + srcu_read_unlock(&kvm->srcu, idx); + mutex_unlock(&kvm->lock); + ret = 0; + break; + default: + ret = -ENXIO; + break; + } + return ret; +} + +static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_S390_VM_MEM_CTRL: + ret = kvm_s390_mem_control(kvm, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} + +static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_S390_VM_MEM_CTRL: + switch (attr->attr) { + case KVM_S390_VM_MEM_ENABLE_CMMA: + case KVM_S390_VM_MEM_CLR_CMMA: + ret = 0; + break; + default: + ret = -ENXIO; + break; + } + break; + default: + ret = -ENXIO; + break; + } + + return ret; } long kvm_arch_vm_ioctl(struct file *filp, @@ -145,6 +341,7 @@ long kvm_arch_vm_ioctl(struct file *filp, { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; + struct kvm_device_attr attr; int r; switch (ioctl) { @@ -157,6 +354,47 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_s390_inject_vm(kvm, &s390int); break; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + break; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } + case KVM_CREATE_IRQCHIP: { + struct kvm_irq_routing_entry routing; + + r = -EINVAL; + if (kvm->arch.use_irqchip) { + /* Set up dummy routing. */ + memset(&routing, 0, sizeof(routing)); + kvm_set_irq_routing(kvm, &routing, 0, 0); + r = 0; + } + break; + } + case KVM_SET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_s390_vm_set_attr(kvm, &attr); + break; + } + case KVM_GET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_s390_vm_get_attr(kvm, &attr); + break; + } + case KVM_HAS_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_s390_vm_has_attr(kvm, &attr); + break; + } default: r = -ENOTTY; } @@ -164,18 +402,36 @@ long kvm_arch_vm_ioctl(struct file *filp, return r; } -int kvm_arch_init_vm(struct kvm *kvm) +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int rc; char debug_name[16]; + static unsigned long sca_offset; + + rc = -EINVAL; +#ifdef CONFIG_KVM_S390_UCONTROL + if (type & ~KVM_VM_S390_UCONTROL) + goto out_err; + if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) + goto out_err; +#else + if (type) + goto out_err; +#endif rc = s390_enable_sie(); if (rc) goto out_err; + rc = -ENOMEM; + kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; + spin_lock(&kvm_lock); + sca_offset = (sca_offset + 16) & 0x7f0; + kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); + spin_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); @@ -185,11 +441,29 @@ int kvm_arch_init_vm(struct kvm *kvm) spin_lock_init(&kvm->arch.float_int.lock); INIT_LIST_HEAD(&kvm->arch.float_int.list); + init_waitqueue_head(&kvm->arch.ipte_wq); debug_register_view(kvm->arch.dbf, &debug_sprintf_view); VM_EVENT(kvm, 3, "%s", "vm created"); + if (type & KVM_VM_S390_UCONTROL) { + kvm->arch.gmap = NULL; + } else { + kvm->arch.gmap = gmap_alloc(current->mm); + if (!kvm->arch.gmap) + goto out_nogmap; + kvm->arch.gmap->private = kvm; + kvm->arch.gmap->pfault_enabled = 0; + } + + kvm->arch.css_support = 0; + kvm->arch.use_irqchip = 0; + + spin_lock_init(&kvm->arch.start_stop_lock); + return 0; +out_nogmap: + debug_unregister(kvm->arch.dbf); out_nodbf: free_page((unsigned long)(kvm->arch.sca)); out_err: @@ -199,14 +473,27 @@ out_err: void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 3, "%s", "free cpu"); - clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn); - if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == - (__u64) vcpu->arch.sie_block) - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; + trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); + kvm_s390_clear_local_irqs(vcpu); + kvm_clear_async_pf_completion_queue(vcpu); + if (!kvm_is_ucontrol(vcpu->kvm)) { + clear_bit(63 - vcpu->vcpu_id, + (unsigned long *) &vcpu->kvm->arch.sca->mcn); + if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == + (__u64) vcpu->arch.sie_block) + vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; + } smp_mb(); + + if (kvm_is_ucontrol(vcpu->kvm)) + gmap_free(vcpu->arch.gmap); + + if (kvm_s390_cmma_enabled(vcpu->kvm)) + kvm_s390_vcpu_unsetup_cmma(vcpu); free_page((unsigned long)(vcpu->arch.sie_block)); + kvm_vcpu_uninit(vcpu); - kfree(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -234,11 +521,30 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_vcpus(kvm); free_page((unsigned long)(kvm->arch.sca)); debug_unregister(kvm->arch.dbf); + if (!kvm_is_ucontrol(kvm)) + gmap_free(kvm->arch.gmap); + kvm_s390_destroy_adapters(kvm); + kvm_s390_clear_float_irqs(kvm); } /* Section: vcpu related */ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(vcpu); + if (kvm_is_ucontrol(vcpu->kvm)) { + vcpu->arch.gmap = gmap_alloc(current->mm); + if (!vcpu->arch.gmap) + return -ENOMEM; + vcpu->arch.gmap->private = vcpu->kvm; + return 0; + } + + vcpu->arch.gmap = vcpu->kvm->arch.gmap; + vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | + KVM_SYNC_GPRS | + KVM_SYNC_ACRS | + KVM_SYNC_CRS; return 0; } @@ -249,18 +555,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - save_fp_regs(&vcpu->arch.host_fpregs); + save_fp_ctl(&vcpu->arch.host_fpregs.fpc); + save_fp_regs(vcpu->arch.host_fpregs.fprs); save_access_regs(vcpu->arch.host_acrs); - vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; - restore_fp_regs(&vcpu->arch.guest_fpregs); - restore_access_regs(vcpu->arch.guest_acrs); + restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + restore_fp_regs(vcpu->arch.guest_fpregs.fprs); + restore_access_regs(vcpu->run->s.regs.acrs); + gmap_enable(vcpu->arch.gmap); + atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { - save_fp_regs(&vcpu->arch.guest_fpregs); - save_access_regs(vcpu->arch.guest_acrs); - restore_fp_regs(&vcpu->arch.host_fpregs); + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + gmap_disable(vcpu->arch.gmap); + save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + save_fp_regs(vcpu->arch.guest_fpregs.fprs); + save_access_regs(vcpu->run->s.regs.acrs); + restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); + restore_fp_regs(vcpu->arch.host_fpregs.fprs); restore_access_regs(vcpu->arch.host_acrs); } @@ -269,8 +582,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) /* this equals initial cpu reset in pop, but we don't switch to ESA */ vcpu->arch.sie_block->gpsw.mask = 0UL; vcpu->arch.sie_block->gpsw.addr = 0UL; - vcpu->arch.sie_block->prefix = 0UL; - vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_s390_set_prefix(vcpu, 0); vcpu->arch.sie_block->cputm = 0UL; vcpu->arch.sie_block->ckc = 0UL; vcpu->arch.sie_block->todpr = 0; @@ -280,78 +592,283 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.guest_fpregs.fpc = 0; asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); vcpu->arch.sie_block->gbea = 1; + vcpu->arch.sie_block->pp = 0; + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(vcpu); + kvm_s390_vcpu_stop(vcpu); + kvm_s390_clear_local_irqs(vcpu); +} + +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) +{ + free_page(vcpu->arch.sie_block->cbrlo); + vcpu->arch.sie_block->cbrlo = 0; +} + +int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) +{ + vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); + if (!vcpu->arch.sie_block->cbrlo) + return -ENOMEM; + + vcpu->arch.sie_block->ecb2 |= 0x80; + vcpu->arch.sie_block->ecb2 &= ~0x08; + return 0; } int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); - set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests); + int rc = 0; + + atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | + CPUSTAT_SM | + CPUSTAT_STOPPED | + CPUSTAT_GED); vcpu->arch.sie_block->ecb = 6; - vcpu->arch.sie_block->eca = 0xC1002001U; - vcpu->arch.sie_block->fac = (int) (long) facilities; + if (test_vfacility(50) && test_vfacility(73)) + vcpu->arch.sie_block->ecb |= 0x10; + + vcpu->arch.sie_block->ecb2 = 8; + vcpu->arch.sie_block->eca = 0xD1002000U; + if (sclp_has_siif()) + vcpu->arch.sie_block->eca |= 1; + vcpu->arch.sie_block->fac = (int) (long) vfacilities; + vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE | + ICTL_TPROT; + + if (kvm_s390_cmma_enabled(vcpu->kvm)) { + rc = kvm_s390_vcpu_setup_cmma(vcpu); + if (rc) + return rc; + } hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, (unsigned long) vcpu); vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; get_cpu_id(&vcpu->arch.cpu_id); vcpu->arch.cpu_id.version = 0xff; - return 0; + return rc; } struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { - struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); - int rc = -ENOMEM; + struct kvm_vcpu *vcpu; + struct sie_page *sie_page; + int rc = -EINVAL; - if (!vcpu) - goto out_nomem; + if (id >= KVM_MAX_VCPUS) + goto out; - vcpu->arch.sie_block = (struct kvm_s390_sie_block *) - get_zeroed_page(GFP_KERNEL); + rc = -ENOMEM; - if (!vcpu->arch.sie_block) + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) + goto out; + + sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); + if (!sie_page) goto out_free_cpu; + vcpu->arch.sie_block = &sie_page->sie_block; + vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + vcpu->arch.sie_block->icpua = id; - BUG_ON(!kvm->arch.sca); - if (!kvm->arch.sca->cpu[id].sda) - kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; - set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); + if (!kvm_is_ucontrol(kvm)) { + if (!kvm->arch.sca) { + WARN_ON_ONCE(1); + goto out_free_cpu; + } + if (!kvm->arch.sca->cpu[id].sda) + kvm->arch.sca->cpu[id].sda = + (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = + (__u32)(((__u64)kvm->arch.sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; + set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); + } spin_lock_init(&vcpu->arch.local_int.lock); INIT_LIST_HEAD(&vcpu->arch.local_int.list); vcpu->arch.local_int.float_int = &kvm->arch.float_int; - spin_lock(&kvm->arch.float_int.lock); - kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; - init_waitqueue_head(&vcpu->arch.local_int.wq); + vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; - spin_unlock(&kvm->arch.float_int.lock); rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) goto out_free_sie_block; VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, vcpu->arch.sie_block); + trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); return vcpu; out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: - kfree(vcpu); -out_nomem: + kmem_cache_free(kvm_vcpu_cache, vcpu); +out: return ERR_PTR(rc); } int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + return kvm_cpu_has_interrupt(vcpu); +} + +void s390_vcpu_block(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +void s390_vcpu_unblock(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +/* + * Kick a guest cpu out of SIE and wait until SIE is not running. + * If the CPU is not running (e.g. waiting as idle) the function will + * return immediately. */ +void exit_sie(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); + while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) + cpu_relax(); +} + +/* Kick a guest cpu out of SIE and prevent SIE-reentry */ +void exit_sie_sync(struct kvm_vcpu *vcpu) +{ + s390_vcpu_block(vcpu); + exit_sie(vcpu); +} + +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) +{ + int i; + struct kvm *kvm = gmap->private; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + /* match against both prefix pages */ + if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) { + VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); + exit_sie_sync(vcpu); + } + } +} + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ /* kvm common code refers to this, but never calls it */ BUG(); return 0; } +static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, + struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_S390_TODPR: + r = put_user(vcpu->arch.sie_block->todpr, + (u32 __user *)reg->addr); + break; + case KVM_REG_S390_EPOCHDIFF: + r = put_user(vcpu->arch.sie_block->epoch, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CPU_TIMER: + r = put_user(vcpu->arch.sie_block->cputm, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CLOCK_COMP: + r = put_user(vcpu->arch.sie_block->ckc, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFTOKEN: + r = put_user(vcpu->arch.pfault_token, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFCOMPARE: + r = put_user(vcpu->arch.pfault_compare, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFSELECT: + r = put_user(vcpu->arch.pfault_select, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PP: + r = put_user(vcpu->arch.sie_block->pp, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_GBEA: + r = put_user(vcpu->arch.sie_block->gbea, + (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + +static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, + struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_S390_TODPR: + r = get_user(vcpu->arch.sie_block->todpr, + (u32 __user *)reg->addr); + break; + case KVM_REG_S390_EPOCHDIFF: + r = get_user(vcpu->arch.sie_block->epoch, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CPU_TIMER: + r = get_user(vcpu->arch.sie_block->cputm, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CLOCK_COMP: + r = get_user(vcpu->arch.sie_block->ckc, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFTOKEN: + r = get_user(vcpu->arch.pfault_token, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFCOMPARE: + r = get_user(vcpu->arch.pfault_compare, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFSELECT: + r = get_user(vcpu->arch.pfault_select, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PP: + r = get_user(vcpu->arch.sie_block->pp, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_GBEA: + r = get_user(vcpu->arch.sie_block->gbea, + (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) { kvm_s390_vcpu_initial_reset(vcpu); @@ -360,36 +877,41 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); + memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); return 0; } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); + memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); return 0; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); + memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + restore_access_regs(vcpu->run->s.regs.acrs); return 0; } int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); + memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); return 0; } int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { + if (test_fp_ctl(fpu->fpc)) + return -EINVAL; memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); vcpu->arch.guest_fpregs.fpc = fpu->fpc; + restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + restore_fp_regs(vcpu->arch.guest_fpregs.fprs); return 0; } @@ -404,7 +926,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) { int rc = 0; - if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) rc = -EBUSY; else { vcpu->run->psw_mask = psw.mask; @@ -419,10 +941,40 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } +#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ + KVM_GUESTDBG_USE_HW_BP | \ + KVM_GUESTDBG_ENABLE) + int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EINVAL; /* not implemented yet */ + int rc = 0; + + vcpu->guest_debug = 0; + kvm_s390_clear_bp_data(vcpu); + + if (dbg->control & ~VALID_GUESTDBG_FLAGS) + return -EINVAL; + + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + /* enforce guest PER */ + atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); + + if (dbg->control & KVM_GUESTDBG_USE_HW_BP) + rc = kvm_s390_import_bp_data(vcpu, dbg); + } else { + atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); + vcpu->arch.guestdbg.last_bp = 0; + } + + if (rc) { + vcpu->guest_debug = 0; + kvm_s390_clear_bp_data(vcpu); + atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); + } + + return rc; } int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, @@ -437,35 +989,286 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } -static void __vcpu_run(struct kvm_vcpu *vcpu) +bool kvm_s390_cmma_enabled(struct kvm *kvm) +{ + if (!MACHINE_IS_LPAR) + return false; + /* only enable for z10 and later */ + if (!MACHINE_HAS_EDAT1) + return false; + if (!kvm->arch.use_cmma) + return false; + return true; +} + +static bool ibs_enabled(struct kvm_vcpu *vcpu) { - memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; +} + +static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) +{ +retry: + s390_vcpu_unblock(vcpu); + /* + * We use MMU_RELOAD just to re-arm the ipte notifier for the + * guest prefix page. gmap_ipte_notify will wait on the ptl lock. + * This ensures that the ipte instruction for this request has + * already finished. We might race against a second unmapper that + * wants to set the blocking bit. Lets just retry the request loop. + */ + if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { + int rc; + rc = gmap_ipte_notify(vcpu->arch.gmap, + kvm_s390_get_prefix(vcpu), + PAGE_SIZE * 2); + if (rc) + return rc; + goto retry; + } + + if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { + if (!ibs_enabled(vcpu)) { + trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); + atomic_set_mask(CPUSTAT_IBS, + &vcpu->arch.sie_block->cpuflags); + } + goto retry; + } + + if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { + if (ibs_enabled(vcpu)) { + trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); + atomic_clear_mask(CPUSTAT_IBS, + &vcpu->arch.sie_block->cpuflags); + } + goto retry; + } + + return 0; +} + +/** + * kvm_arch_fault_in_page - fault-in guest page if necessary + * @vcpu: The corresponding virtual cpu + * @gpa: Guest physical address + * @writable: Whether the page should be writable or not + * + * Make sure that a guest page has been faulted-in on the host. + * + * Return: Zero on success, negative error code otherwise. + */ +long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) +{ + struct mm_struct *mm = current->mm; + hva_t hva; + long rc; + + hva = gmap_fault(gpa, vcpu->arch.gmap); + if (IS_ERR_VALUE(hva)) + return (long)hva; + down_read(&mm->mmap_sem); + rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL); + up_read(&mm->mmap_sem); + + return rc < 0 ? rc : 0; +} + +static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, + unsigned long token) +{ + struct kvm_s390_interrupt inti; + inti.parm64 = token; + + if (start_token) { + inti.type = KVM_S390_INT_PFAULT_INIT; + WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); + } else { + inti.type = KVM_S390_INT_PFAULT_DONE; + WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); + } +} + +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); + __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); +} + +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); + __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); +} + +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + /* s390 will always inject the page directly */ +} + +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) +{ + /* + * s390 will always inject the page directly, + * but we still want check_async_completion to cleanup + */ + return true; +} + +static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) +{ + hva_t hva; + struct kvm_arch_async_pf arch; + int rc; + + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) + return 0; + if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != + vcpu->arch.pfault_compare) + return 0; + if (psw_extint_disabled(vcpu)) + return 0; + if (kvm_cpu_has_interrupt(vcpu)) + return 0; + if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) + return 0; + if (!vcpu->arch.gmap->pfault_enabled) + return 0; + + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); + hva += current->thread.gmap_addr & ~PAGE_MASK; + if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) + return 0; + + rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); + return rc; +} + +static int vcpu_pre_run(struct kvm_vcpu *vcpu) +{ + int rc, cpuflags; + + /* + * On s390 notifications for arriving pages will be delivered directly + * to the guest but the house keeping for completed pfaults is + * handled outside the worker. + */ + kvm_check_async_pf_completion(vcpu); + + memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); if (need_resched()) schedule(); - if (test_thread_flag(TIF_MCCK_PENDING)) + if (test_cpu_flag(CIF_MCCK_PENDING)) s390_handle_mcck(); - kvm_s390_deliver_pending_interrupts(vcpu); + if (!kvm_is_ucontrol(vcpu->kvm)) + kvm_s390_deliver_pending_interrupts(vcpu); - vcpu->arch.sie_block->icptcode = 0; - local_irq_disable(); - kvm_guest_enter(); - local_irq_enable(); - VCPU_EVENT(vcpu, 6, "entering sie flags %x", - atomic_read(&vcpu->arch.sie_block->cpuflags)); - if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) { - VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = kvm_s390_handle_requests(vcpu); + if (rc) + return rc; + + if (guestdbg_enabled(vcpu)) { + kvm_s390_backup_guest_per_regs(vcpu); + kvm_s390_patch_guest_per_regs(vcpu); } + + vcpu->arch.sie_block->icptcode = 0; + cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); + VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); + trace_kvm_s390_sie_enter(vcpu, cpuflags); + + return 0; +} + +static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) +{ + int rc = -1; + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); - local_irq_disable(); - kvm_guest_exit(); - local_irq_enable(); + trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); + + if (guestdbg_enabled(vcpu)) + kvm_s390_restore_guest_per_regs(vcpu); + + if (exit_reason >= 0) { + rc = 0; + } else if (kvm_is_ucontrol(vcpu->kvm)) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = + current->thread.gmap_addr; + vcpu->run->s390_ucontrol.pgm_code = 0x10; + rc = -EREMOTE; + + } else if (current->thread.gmap_pfault) { + trace_kvm_s390_major_guest_pfault(vcpu); + current->thread.gmap_pfault = 0; + if (kvm_arch_setup_async_pf(vcpu)) { + rc = 0; + } else { + gpa_t gpa = current->thread.gmap_addr; + rc = kvm_arch_fault_in_page(vcpu, gpa, 1); + } + } + + if (rc == -1) { + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + trace_kvm_s390_sie_fault(vcpu); + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); + + if (rc == 0) { + if (kvm_is_ucontrol(vcpu->kvm)) + /* Don't exit for host interrupts. */ + rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; + else + rc = kvm_handle_sie_intercept(vcpu); + } - memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16); + return rc; +} + +static int __vcpu_run(struct kvm_vcpu *vcpu) +{ + int rc, exit_reason; + + /* + * We try to hold kvm->srcu during most of vcpu_run (except when run- + * ning the guest), so that memslots (and other stuff) are protected + */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + do { + rc = vcpu_pre_run(vcpu); + if (rc) + break; + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + /* + * As PF_VCPU will be used in fault handler, between + * guest_enter and guest_exit should be no uaccess. + */ + preempt_disable(); + kvm_guest_enter(); + preempt_enable(); + exit_reason = sie64a(vcpu->arch.sie_block, + vcpu->run->s.regs.gprs); + kvm_guest_exit(); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + rc = vcpu_post_run(vcpu, exit_reason); + } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + return rc; } int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -473,30 +1276,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int rc; sigset_t sigsaved; -rerun_vcpu: - if (vcpu->requests) - if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) - kvm_s390_vcpu_set_mem(vcpu); - - /* verify, that memory has been registered */ - if (!vcpu->arch.sie_block->gmslm) { - vcpu_put(vcpu); - VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu"); - return -EINVAL; + if (guestdbg_exit_pending(vcpu)) { + kvm_s390_prepare_debug_exit(vcpu); + return 0; } if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); - - BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); + kvm_s390_vcpu_start(vcpu); switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: case KVM_EXIT_UNKNOWN: case KVM_EXIT_INTR: case KVM_EXIT_S390_RESET: + case KVM_EXIT_S390_UCONTROL: + case KVM_EXIT_S390_TSCH: + case KVM_EXIT_DEBUG: break; default: BUG(); @@ -504,22 +1301,29 @@ rerun_vcpu: vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; + if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { + kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; + kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); + } + if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { + kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; + memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); + kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); + } might_fault(); - - do { - __vcpu_run(vcpu); - rc = kvm_handle_sie_intercept(vcpu); - } while (!signal_pending(current) && !rc); - - if (rc == SIE_INTERCEPT_RERUNVCPU) - goto rerun_vcpu; + rc = __vcpu_run(vcpu); if (signal_pending(current) && !rc) { kvm_run->exit_reason = KVM_EXIT_INTR; rc = -EINTR; } + if (guestdbg_exit_pending(vcpu) && !rc) { + kvm_s390_prepare_debug_exit(vcpu); + rc = 0; + } + if (rc == -EOPNOTSUPP) { /* intercept cannot be handled in-kernel, prepare kvm-run */ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; @@ -537,6 +1341,8 @@ rerun_vcpu: kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; + kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); + memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -545,81 +1351,192 @@ rerun_vcpu: return rc; } -static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from, - unsigned long n, int prefix) -{ - if (prefix) - return copy_to_guest(vcpu, guestdest, from, n); - else - return copy_to_guest_absolute(vcpu, guestdest, from, n); -} - /* * store status at address * we use have two special cases: * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit * KVM_S390_STORE_STATUS_PREFIXED: -> prefix */ -int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) { - const unsigned char archmode = 1; - int prefix; + unsigned char archmode = 1; + unsigned int px; + u64 clkcomp; + int rc; - if (addr == KVM_S390_STORE_STATUS_NOADDR) { - if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) + if (gpa == KVM_S390_STORE_STATUS_NOADDR) { + if (write_guest_abs(vcpu, 163, &archmode, 1)) return -EFAULT; - addr = SAVE_AREA_BASE; - prefix = 0; - } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) { - if (copy_to_guest(vcpu, 163ul, &archmode, 1)) + gpa = SAVE_AREA_BASE; + } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { + if (write_guest_real(vcpu, 163, &archmode, 1)) return -EFAULT; - addr = SAVE_AREA_BASE; - prefix = 1; - } else - prefix = 0; - - if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), - vcpu->arch.guest_fpregs.fprs, 128, prefix)) - return -EFAULT; - - if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs), - vcpu->arch.guest_gprs, 128, prefix)) - return -EFAULT; - - if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw), - &vcpu->arch.sie_block->gpsw, 16, prefix)) - return -EFAULT; - - if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg), - &vcpu->arch.sie_block->prefix, 4, prefix)) - return -EFAULT; - - if (__guestcopy(vcpu, - addr + offsetof(struct save_area, fp_ctrl_reg), - &vcpu->arch.guest_fpregs.fpc, 4, prefix)) - return -EFAULT; - - if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg), - &vcpu->arch.sie_block->todpr, 4, prefix)) - return -EFAULT; - - if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer), - &vcpu->arch.sie_block->cputm, 8, prefix)) - return -EFAULT; - - if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp), - &vcpu->arch.sie_block->ckc, 8, prefix)) - return -EFAULT; - - if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), - &vcpu->arch.guest_acrs, 64, prefix)) - return -EFAULT; - - if (__guestcopy(vcpu, - addr + offsetof(struct save_area, ctrl_regs), - &vcpu->arch.sie_block->gcr, 128, prefix)) - return -EFAULT; - return 0; + gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE); + } + rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs), + vcpu->arch.guest_fpregs.fprs, 128); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs), + vcpu->run->s.regs.gprs, 128); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw), + &vcpu->arch.sie_block->gpsw, 16); + px = kvm_s390_get_prefix(vcpu); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg), + &px, 4); + rc |= write_guest_abs(vcpu, + gpa + offsetof(struct save_area, fp_ctrl_reg), + &vcpu->arch.guest_fpregs.fpc, 4); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg), + &vcpu->arch.sie_block->todpr, 4); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer), + &vcpu->arch.sie_block->cputm, 8); + clkcomp = vcpu->arch.sie_block->ckc >> 8; + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp), + &clkcomp, 8); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs), + &vcpu->run->s.regs.acrs, 64); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs), + &vcpu->arch.sie_block->gcr, 128); + return rc ? -EFAULT : 0; +} + +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + /* + * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy + * copying in vcpu load/put. Lets update our copies before we save + * it into the save area + */ + save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + save_fp_regs(vcpu->arch.guest_fpregs.fprs); + save_access_regs(vcpu->run->s.regs.acrs); + + return kvm_s390_store_status_unloaded(vcpu, addr); +} + +static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) +{ + return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; +} + +static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) +{ + kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); + kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu); + exit_sie_sync(vcpu); +} + +static void __disable_ibs_on_all_vcpus(struct kvm *kvm) +{ + unsigned int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + __disable_ibs_on_vcpu(vcpu); + } +} + +static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) +{ + kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); + kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu); + exit_sie_sync(vcpu); +} + +void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) +{ + int i, online_vcpus, started_vcpus = 0; + + if (!is_vcpu_stopped(vcpu)) + return; + + trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); + /* Only one cpu at a time may enter/leave the STOPPED state. */ + spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); + online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); + + for (i = 0; i < online_vcpus; i++) { + if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) + started_vcpus++; + } + + if (started_vcpus == 0) { + /* we're the only active VCPU -> speed it up */ + __enable_ibs_on_vcpu(vcpu); + } else if (started_vcpus == 1) { + /* + * As we are starting a second VCPU, we have to disable + * the IBS facility on all VCPUs to remove potentially + * oustanding ENABLE requests. + */ + __disable_ibs_on_all_vcpus(vcpu->kvm); + } + + atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + /* + * Another VCPU might have used IBS while we were offline. + * Let's play safe and flush the VCPU at startup. + */ + vcpu->arch.sie_block->ihcpu = 0xffff; + spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); + return; +} + +void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) +{ + int i, online_vcpus, started_vcpus = 0; + struct kvm_vcpu *started_vcpu = NULL; + + if (is_vcpu_stopped(vcpu)) + return; + + trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); + /* Only one cpu at a time may enter/leave the STOPPED state. */ + spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); + online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); + + atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + __disable_ibs_on_vcpu(vcpu); + + for (i = 0; i < online_vcpus; i++) { + if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { + started_vcpus++; + started_vcpu = vcpu->kvm->vcpus[i]; + } + } + + if (started_vcpus == 1) { + /* + * As we only have one VCPU left, we want to enable the + * IBS facility for that VCPU to speed it up. + */ + __enable_ibs_on_vcpu(started_vcpu); + } + + spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); + return; +} + +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_S390_CSS_SUPPORT: + if (!vcpu->kvm->arch.css_support) { + vcpu->kvm->arch.css_support = 1; + trace_kvm_s390_enable_css(vcpu->kvm); + } + r = 0; + break; + default: + r = -EINVAL; + break; + } + return r; } long kvm_arch_vcpu_ioctl(struct file *filp, @@ -627,6 +1544,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; + int idx; long r; switch (ioctl) { @@ -640,7 +1558,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_S390_STORE_STATUS: + idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_s390_vcpu_store_status(vcpu, arg); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case KVM_S390_SET_INITIAL_PSW: { psw_t psw; @@ -654,39 +1574,118 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_S390_INITIAL_RESET: r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); break; + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + break; + if (ioctl == KVM_SET_ONE_REG) + r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); + else + r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); + break; + } +#ifdef CONFIG_KVM_S390_UCONTROL + case KVM_S390_UCAS_MAP: { + struct kvm_s390_ucas_mapping ucasmap; + + if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { + r = -EFAULT; + break; + } + + if (!kvm_is_ucontrol(vcpu->kvm)) { + r = -EINVAL; + break; + } + + r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, + ucasmap.vcpu_addr, ucasmap.length); + break; + } + case KVM_S390_UCAS_UNMAP: { + struct kvm_s390_ucas_mapping ucasmap; + + if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { + r = -EFAULT; + break; + } + + if (!kvm_is_ucontrol(vcpu->kvm)) { + r = -EINVAL; + break; + } + + r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, + ucasmap.length); + break; + } +#endif + case KVM_S390_VCPU_FAULT: { + r = gmap_fault(arg, vcpu->arch.gmap); + if (!IS_ERR_VALUE(r)) + r = 0; + break; + } + case KVM_ENABLE_CAP: + { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + break; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } default: - r = -EINVAL; + r = -ENOTTY; } return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) + && (kvm_is_ucontrol(vcpu->kvm))) { + vmf->page = virt_to_page(vcpu->arch.sie_block); + get_page(vmf->page); + return 0; + } +#endif + return VM_FAULT_SIGBUS; +} + +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc) + enum kvm_mr_change change) { - /* A few sanity checks. We can have exactly one memory slot which has - to start at guest virtual zero and which has to be located at a - page boundary in userland and which has to end at a page boundary. - The memory in userland is ok to be fragmented into various different - vmas. It is okay to mmap() and munmap() stuff in this slot after - doing this call at any time */ + /* A few sanity checks. We can have memory slots which have to be + located/ended at a segment boundary (1MB). The memory in userland is + ok to be fragmented into various different vmas. It is okay to mmap() + and munmap() stuff in this slot after doing this call at any time */ - if (mem->slot) + if (mem->userspace_addr & 0xffffful) return -EINVAL; - if (mem->guest_phys_addr) - return -EINVAL; - - if (mem->userspace_addr & (PAGE_SIZE - 1)) - return -EINVAL; - - if (mem->memory_size & (PAGE_SIZE - 1)) - return -EINVAL; - - if (!user_alloc) + if (mem->memory_size & 0xffffful) return -EINVAL; return 0; @@ -694,21 +1693,35 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - int user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { - int i; - struct kvm_vcpu *vcpu; + int rc; - /* request update of sie control block for all available vcpus */ - kvm_for_each_vcpu(i, vcpu, kvm) { - if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) - continue; - kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP); - } + /* If the basics of the memslot do not change, we do not want + * to update the gmap. Every update causes several unnecessary + * segment translation exceptions. This is usually handled just + * fine by the normal fault handler + gmap, but it will also + * cause faults on the prefix page of running guest CPUs. + */ + if (old->userspace_addr == mem->userspace_addr && + old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && + old->npages * PAGE_SIZE == mem->memory_size) + return; + + rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, + mem->guest_phys_addr, mem->memory_size); + if (rc) + printk(KERN_WARNING "kvm-s390: failed to commit memory region\n"); + return; } -void kvm_arch_flush_shadow(struct kvm *kvm) +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) { } @@ -721,24 +1734,34 @@ static int __init kvm_s390_init(void) /* * guests can ask for up to 255+1 double words, we need a full page - * to hold the maximum amount of facilites. On the other hand, we + * to hold the maximum amount of facilities. On the other hand, we * only set facilities that are known to work in KVM. */ - facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); - if (!facilities) { + vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); + if (!vfacilities) { kvm_exit(); return -ENOMEM; } - memcpy(facilities, S390_lowcore.stfle_fac_list, 16); - facilities[0] &= 0xff00fff3f47c0000ULL; + memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); + vfacilities[0] &= 0xff82fff3f4fc2000UL; + vfacilities[1] &= 0x005c000000000000UL; return 0; } static void __exit kvm_s390_exit(void) { - free_page((unsigned long) facilities); + free_page((unsigned long) vfacilities); kvm_exit(); } module_init(kvm_s390_init); module_exit(kvm_s390_exit); + +/* + * Enable autoloading of the kvm module. + * Note that we add the module alias here instead of virt/kvm/kvm_main.c + * since x86 takes a different approach. + */ +#include <linux/miscdevice.h> +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index a7b7586626d..a8655ed3161 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -1,7 +1,7 @@ /* - * kvm_s390.h - definition for kvm on s390 + * definition for kvm on s390 * - * Copyright IBM Corp. 2008,2009 + * Copyright IBM Corp. 2008, 2009 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -19,15 +19,18 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> -/* The current code can have up to 256 pages for virtio */ -#define VIRTIODESCSPACE (256ul * 4096ul) - typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); -/* negativ values are error codes, positive values for internal conditions */ -#define SIE_INTERCEPT_RERUNVCPU (1<<0) +/* declare vfacilities extern */ +extern unsigned long *vfacilities; + int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); +/* Transactional Memory Execution related macros */ +#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) +#define TDB_FORMAT1 1 +#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) + #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ do { \ debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ @@ -47,54 +50,187 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; } -int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); -enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); -void kvm_s390_tasklet(unsigned long parm); -void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); -int kvm_s390_inject_vm(struct kvm *kvm, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); -int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); - -static inline long kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu) +static inline int kvm_is_ucontrol(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if (kvm->arch.gmap) + return 0; + return 1; +#else + return 0; +#endif +} + +#define GUEST_PREFIX_SHIFT 13 +static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.sie_block->prefix << GUEST_PREFIX_SHIFT; +} + +static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) +{ + vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; + vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); +} + +static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) +{ + u32 base2 = vcpu->arch.sie_block->ipb >> 28; + u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; +} + +static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, + u64 *address1, u64 *address2) { - return vcpu->arch.sie_block->gmslm - - vcpu->arch.sie_block->gmsor - - VIRTIODESCSPACE + 1ul; + u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; + u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; + u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12; + u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff; + + *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; + *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } -static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) +static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) { - int idx; - struct kvm_memory_slot *mem; - struct kvm_memslots *memslots; + if (r1) + *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; + if (r2) + *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; +} - idx = srcu_read_lock(&vcpu->kvm->srcu); - memslots = kvm_memslots(vcpu->kvm); +static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) +{ + u32 base2 = vcpu->arch.sie_block->ipb >> 28; + u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + + ((vcpu->arch.sie_block->ipb & 0xff00) << 4); + /* The displacement is a 20bit _SIGNED_ value */ + if (disp2 & 0x80000) + disp2+=0xfff00000; + + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2; +} - mem = &memslots->memslots[0]; +static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) +{ + u32 base2 = vcpu->arch.sie_block->ipb >> 28; + u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); - vcpu->arch.sie_block->gmsor = mem->userspace_addr; - vcpu->arch.sie_block->gmslm = - mem->userspace_addr + - (mem->npages << PAGE_SHIFT) + - VIRTIODESCSPACE - 1ul; + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; +} - srcu_read_unlock(&vcpu->kvm->srcu, idx); +/* Set the condition code in the guest program status word */ +static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) +{ + vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44); + vcpu->arch.sie_block->gpsw.mask |= cc << 44; } +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); +enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); +void kvm_s390_tasklet(unsigned long parm); +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); +void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); +void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); +void kvm_s390_clear_float_irqs(struct kvm *kvm); +int __must_check kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); +struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, + u64 cr6, u64 schid); +void kvm_s390_reinject_io_int(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti); +int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); + /* implemented in priv.c */ +int is_valid_psw(psw_t *psw); int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); +int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); +int kvm_s390_handle_01(struct kvm_vcpu *vcpu); +int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); +int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); +int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu); +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); +int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, - unsigned long addr); +long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); +int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu); +void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu); +void s390_vcpu_block(struct kvm_vcpu *vcpu); +void s390_vcpu_unblock(struct kvm_vcpu *vcpu); +void exit_sie(struct kvm_vcpu *vcpu); +void exit_sie_sync(struct kvm_vcpu *vcpu); +int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); +void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); +/* is cmma enabled */ +bool kvm_s390_cmma_enabled(struct kvm *kvm); +int test_vfacility(unsigned long nr); + /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); +/* implemented in interrupt.c */ +int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, + struct kvm_s390_pgm_info *pgm_info); + +/** + * kvm_s390_inject_prog_cond - conditionally inject a program check + * @vcpu: virtual cpu + * @rc: original return/error code + * + * This function is supposed to be used after regular guest access functions + * failed, to conditionally inject a program check to a vcpu. The typical + * pattern would look like + * + * rc = write_guest(vcpu, addr, data, len); + * if (rc) + * return kvm_s390_inject_prog_cond(vcpu, rc); + * + * A negative return code from guest access functions implies an internal error + * like e.g. out of memory. In these cases no program check should be injected + * to the guest. + * A positive value implies that an exception happened while accessing a guest's + * memory. In this case all data belonging to the corresponding program check + * has been stored in vcpu->arch.pgm and can be injected with + * kvm_s390_inject_prog_irq(). + * + * Returns: - the original @rc value if @rc was negative (internal error) + * - zero if @rc was already zero + * - zero or error code from injecting if @rc was positive + * (program check injected to @vcpu) + */ +static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc) +{ + if (rc <= 0) + return rc; + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); +} + +/* implemented in interrupt.c */ +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int psw_extint_disabled(struct kvm_vcpu *vcpu); +void kvm_s390_destroy_adapters(struct kvm *kvm); +int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu); + +/* implemented in guestdbg.c */ +void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); +void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu); +void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu); +int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg); +void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); +void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); +void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); #endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 9194a4b52b2..f89c1cd6775 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1,7 +1,7 @@ /* - * priv.c - handling privileged instructions + * handling privileged instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008, 2013 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -14,205 +14,456 @@ #include <linux/kvm.h> #include <linux/gfp.h> #include <linux/errno.h> +#include <linux/compat.h> +#include <asm/asm-offsets.h> +#include <asm/facility.h> #include <asm/current.h> #include <asm/debug.h> #include <asm/ebcdic.h> #include <asm/sysinfo.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/io.h> +#include <asm/ptrace.h> +#include <asm/compat.h> #include "gaccess.h" #include "kvm-s390.h" +#include "trace.h" + +/* Handle SCK (SET CLOCK) interception */ +static int handle_set_clock(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *cpup; + s64 hostclk, val; + int i, rc; + u64 op2; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + op2 = kvm_s390_get_base_disp_s(vcpu); + if (op2 & 7) /* Operand must be on a doubleword boundary */ + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + rc = read_guest(vcpu, op2, &val, sizeof(val)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + if (store_tod_clock(&hostclk)) { + kvm_s390_set_psw_cc(vcpu, 3); + return 0; + } + val = (val - hostclk) & ~0x3fUL; + + mutex_lock(&vcpu->kvm->lock); + kvm_for_each_vcpu(i, cpup, vcpu->kvm) + cpup->arch.sie_block->epoch = val; + mutex_unlock(&vcpu->kvm->lock); + + kvm_s390_set_psw_cc(vcpu, 0); + return 0; +} static int handle_set_prefix(struct kvm_vcpu *vcpu) { - int base2 = vcpu->arch.sie_block->ipb >> 28; - int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); u64 operand2; - u32 address = 0; - u8 tmp; + u32 address; + int rc; vcpu->stat.instruction_spx++; - operand2 = disp2; - if (base2) - operand2 += vcpu->arch.guest_gprs[base2]; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* get the value */ - if (get_guest_u32(vcpu, operand2, &address)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + rc = read_guest(vcpu, operand2, &address, sizeof(address)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); - address = address & 0x7fffe000u; + address &= 0x7fffe000u; - /* make sure that the new value is valid memory */ - if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || - (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + /* + * Make sure the new value is valid memory. We only need to check the + * first page, since address is 8k aligned and memory pieces are always + * at least 1MB aligned and have at least a size of 1MB. + */ + if (kvm_is_error_gpa(vcpu->kvm, address)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - vcpu->arch.sie_block->prefix = address; - vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_s390_set_prefix(vcpu, address); VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); -out: + trace_kvm_s390_handle_prefix(vcpu, 1, address); return 0; } static int handle_store_prefix(struct kvm_vcpu *vcpu) { - int base2 = vcpu->arch.sie_block->ipb >> 28; - int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); u64 operand2; u32 address; + int rc; vcpu->stat.instruction_stpx++; - operand2 = disp2; - if (base2) - operand2 += vcpu->arch.guest_gprs[base2]; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - address = vcpu->arch.sie_block->prefix; - address = address & 0x7fffe000u; + address = kvm_s390_get_prefix(vcpu); /* get the value */ - if (put_guest_u32(vcpu, operand2, address)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + rc = write_guest(vcpu, operand2, &address, sizeof(address)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); -out: + trace_kvm_s390_handle_prefix(vcpu, 0, address); return 0; } static int handle_store_cpu_address(struct kvm_vcpu *vcpu) { - int base2 = vcpu->arch.sie_block->ipb >> 28; - int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); - u64 useraddr; + u16 vcpu_id = vcpu->vcpu_id; + u64 ga; int rc; vcpu->stat.instruction_stap++; - useraddr = disp2; - if (base2) - useraddr += vcpu->arch.guest_gprs[base2]; - if (useraddr & 1) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + ga = kvm_s390_get_base_disp_s(vcpu); - VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); -out: + if (ga & 1) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga); + trace_kvm_s390_handle_stap(vcpu, ga); return 0; } +static void __skey_check_enable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) + return; + + s390_enable_skey(); + trace_kvm_s390_skey_related_inst(vcpu); + vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); +} + + static int handle_skey(struct kvm_vcpu *vcpu) { + __skey_check_enable(vcpu); + vcpu->stat.instruction_storage_key++; - vcpu->arch.sie_block->gpsw.addr -= 4; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + vcpu->arch.sie_block->gpsw.addr = + __rewind_psw(vcpu->arch.sie_block->gpsw, 4); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } -static int handle_stsch(struct kvm_vcpu *vcpu) +static int handle_ipte_interlock(struct kvm_vcpu *vcpu) { - vcpu->stat.instruction_stsch++; - VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3"); - /* condition code 3 */ - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + + vcpu->stat.instruction_ipte_interlock++; + if (psw_bits(*psw).p) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); + psw->addr = __rewind_psw(*psw, 4); + VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); return 0; } -static int handle_chsc(struct kvm_vcpu *vcpu) +static int handle_test_block(struct kvm_vcpu *vcpu) { - vcpu->stat.instruction_chsc++; - VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3"); - /* condition code 3 */ - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + gpa_t addr; + int reg2; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + kvm_s390_get_regs_rre(vcpu, NULL, ®2); + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + addr = kvm_s390_logical_to_effective(vcpu, addr); + if (kvm_s390_check_low_addr_protection(vcpu, addr)) + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + addr = kvm_s390_real_to_abs(vcpu, addr); + + if (kvm_is_error_gpa(vcpu->kvm, addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + /* + * We don't expect errors on modern systems, and do not care + * about storage keys (yet), so let's just clear the page. + */ + if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE)) + return -EFAULT; + kvm_s390_set_psw_cc(vcpu, 0); + vcpu->run->s.regs.gprs[0] = 0; return 0; } +static int handle_tpi(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_interrupt_info *inti; + unsigned long len; + u32 tpi_data[3]; + int cc, rc; + u64 addr; + + rc = 0; + addr = kvm_s390_get_base_disp_s(vcpu); + if (addr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + cc = 0; + inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0); + if (!inti) + goto no_interrupt; + cc = 1; + tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr; + tpi_data[1] = inti->io.io_int_parm; + tpi_data[2] = inti->io.io_int_word; + if (addr) { + /* + * Store the two-word I/O interruption code into the + * provided area. + */ + len = sizeof(tpi_data) - 4; + rc = write_guest(vcpu, addr, &tpi_data, len); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + } else { + /* + * Store the three-word I/O interruption code into + * the appropriate lowcore area. + */ + len = sizeof(tpi_data); + if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) + rc = -EFAULT; + } + /* + * If we encounter a problem storing the interruption code, the + * instruction is suppressed from the guest's view: reinject the + * interrupt. + */ + if (!rc) + kfree(inti); + else + kvm_s390_reinject_io_int(vcpu->kvm, inti); +no_interrupt: + /* Set condition code and we're done. */ + if (!rc) + kvm_s390_set_psw_cc(vcpu, cc); + return rc ? -EFAULT : 0; +} + +static int handle_tsch(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_interrupt_info *inti; + + inti = kvm_s390_get_io_int(vcpu->kvm, 0, + vcpu->run->s.regs.gprs[1]); + + /* + * Prepare exit to userspace. + * We indicate whether we dequeued a pending I/O interrupt + * so that userspace can re-inject it if the instruction gets + * a program check. While this may re-order the pending I/O + * interrupts, this is no problem since the priority is kept + * intact. + */ + vcpu->run->exit_reason = KVM_EXIT_S390_TSCH; + vcpu->run->s390_tsch.dequeued = !!inti; + if (inti) { + vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id; + vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr; + vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm; + vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word; + } + vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb; + kfree(inti); + return -EREMOTE; +} + +static int handle_io_inst(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (vcpu->kvm->arch.css_support) { + /* + * Most I/O instructions will be handled by userspace. + * Exceptions are tpi and the interrupt portion of tsch. + */ + if (vcpu->arch.sie_block->ipa == 0xb236) + return handle_tpi(vcpu); + if (vcpu->arch.sie_block->ipa == 0xb235) + return handle_tsch(vcpu); + /* Handle in userspace. */ + return -EOPNOTSUPP; + } else { + /* + * Set condition code 3 to stop the guest from issuing channel + * I/O instructions. + */ + kvm_s390_set_psw_cc(vcpu, 3); + return 0; + } +} + static int handle_stfl(struct kvm_vcpu *vcpu) { - unsigned int facility_list; int rc; vcpu->stat.instruction_stfl++; - /* only pass the facility bits, which we can handle */ - facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3; - rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), - &facility_list, sizeof(facility_list)); - if (rc == -EFAULT) - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - else - VCPU_EVENT(vcpu, 5, "store facility list value %x", - facility_list); + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list), + vfacilities, 4); + if (rc) + return rc; + VCPU_EVENT(vcpu, 5, "store facility list value %x", + *(unsigned int *) vfacilities); + trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities); + return 0; +} + +static void handle_new_psw(struct kvm_vcpu *vcpu) +{ + /* Check whether the new psw is enabled for machine checks. */ + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK) + kvm_s390_deliver_pending_machine_checks(vcpu); +} + +#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) +#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL +#define PSW_ADDR_24 0x0000000000ffffffUL +#define PSW_ADDR_31 0x000000007fffffffUL + +int is_valid_psw(psw_t *psw) +{ + if (psw->mask & PSW_MASK_UNASSIGNED) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) { + if (psw->addr & ~PSW_ADDR_31) + return 0; + } + if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24)) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA) + return 0; + if (psw->addr & 1) + return 0; + return 1; +} + +int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) +{ + psw_t *gpsw = &vcpu->arch.sie_block->gpsw; + psw_compat_t new_psw; + u64 addr; + int rc; + + if (gpsw->mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + addr = kvm_s390_get_base_disp_s(vcpu); + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + if (!(new_psw.mask & PSW32_MASK_BASE)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; + gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE; + gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; + if (!is_valid_psw(gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + handle_new_psw(vcpu); + return 0; +} + +static int handle_lpswe(struct kvm_vcpu *vcpu) +{ + psw_t new_psw; + u64 addr; + int rc; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + addr = kvm_s390_get_base_disp_s(vcpu); + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + handle_new_psw(vcpu); return 0; } static int handle_stidp(struct kvm_vcpu *vcpu) { - int base2 = vcpu->arch.sie_block->ipb >> 28; - int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 stidp_data = vcpu->arch.stidp_data; u64 operand2; int rc; vcpu->stat.instruction_stidp++; - operand2 = disp2; - if (base2) - operand2 += vcpu->arch.guest_gprs[base2]; - if (operand2 & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + operand2 = kvm_s390_get_base_disp_s(vcpu); + + if (operand2 & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); -out: return 0; } static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; int cpus = 0; int n; - spin_lock(&fi->lock); - for (n = 0; n < KVM_MAX_VCPUS; n++) - if (fi->local_int[n]) - cpus++; - spin_unlock(&fi->lock); + cpus = atomic_read(&vcpu->kvm->online_vcpus); /* deal with other level 3 hypervisors */ - if (stsi(mem, 3, 2, 2) == -ENOSYS) + if (stsi(mem, 3, 2, 2)) mem->count = 0; if (mem->count < 8) mem->count++; @@ -232,77 +483,106 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) static int handle_stsi(struct kvm_vcpu *vcpu) { - int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28; - int sel1 = vcpu->arch.guest_gprs[0] & 0xff; - int sel2 = vcpu->arch.guest_gprs[1] & 0xffff; - int base2 = vcpu->arch.sie_block->ipb >> 28; - int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; + int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; + int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; + unsigned long mem = 0; u64 operand2; - unsigned long mem; + int rc = 0; vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); - operand2 = disp2; - if (base2) - operand2 += vcpu->arch.guest_gprs[base2]; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - if (operand2 & 0xfff && fc > 0) + if (fc > 3) { + kvm_s390_set_psw_cc(vcpu, 3); + return 0; + } + + if (vcpu->run->s.regs.gprs[0] & 0x0fffff00 + || vcpu->run->s.regs.gprs[1] & 0xffff0000) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - switch (fc) { - case 0: - vcpu->arch.guest_gprs[0] = 3 << 28; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + if (fc == 0) { + vcpu->run->s.regs.gprs[0] = 3 << 28; + kvm_s390_set_psw_cc(vcpu, 0); return 0; + } + + operand2 = kvm_s390_get_base_disp_s(vcpu); + + if (operand2 & 0xfff) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (fc) { case 1: /* same handling for 1 and 2 */ case 2: mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; - if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS) - goto out_mem; + goto out_no_data; + if (stsi((void *) mem, fc, sel1, sel2)) + goto out_no_data; break; case 3: if (sel1 != 2 || sel2 != 2) - goto out_fail; + goto out_no_data; mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); break; - default: - goto out_fail; } - if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out_mem; + rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE); + if (rc) { + rc = kvm_s390_inject_prog_cond(vcpu, rc); + goto out; } + trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.guest_gprs[0] = 0; + kvm_s390_set_psw_cc(vcpu, 0); + vcpu->run->s.regs.gprs[0] = 0; return 0; -out_mem: +out_no_data: + kvm_s390_set_psw_cc(vcpu, 3); +out: free_page(mem); -out_fail: - /* condition code 3 */ - vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; - return 0; + return rc; } -static intercept_handler_t priv_handlers[256] = { +static const intercept_handler_t b2_handlers[256] = { [0x02] = handle_stidp, + [0x04] = handle_set_clock, [0x10] = handle_set_prefix, [0x11] = handle_store_prefix, [0x12] = handle_store_cpu_address, + [0x21] = handle_ipte_interlock, [0x29] = handle_skey, [0x2a] = handle_skey, [0x2b] = handle_skey, - [0x34] = handle_stsch, - [0x5f] = handle_chsc, + [0x2c] = handle_test_block, + [0x30] = handle_io_inst, + [0x31] = handle_io_inst, + [0x32] = handle_io_inst, + [0x33] = handle_io_inst, + [0x34] = handle_io_inst, + [0x35] = handle_io_inst, + [0x36] = handle_io_inst, + [0x37] = handle_io_inst, + [0x38] = handle_io_inst, + [0x39] = handle_io_inst, + [0x3a] = handle_io_inst, + [0x3b] = handle_io_inst, + [0x3c] = handle_io_inst, + [0x50] = handle_ipte_interlock, + [0x5f] = handle_io_inst, + [0x74] = handle_io_inst, + [0x76] = handle_io_inst, [0x7d] = handle_stsi, [0xb1] = handle_stfl, + [0xb2] = handle_lpswe, }; int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) @@ -310,19 +590,431 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) intercept_handler_t handler; /* - * a lot of B2 instructions are priviledged. We first check for - * the priviledges ones, that we can handle in the kernel. If the - * kernel can handle this instruction, we check for the problem - * state bit and (a) handle the instruction or (b) send a code 2 - * program check. - * Anything else goes to userspace.*/ - handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) { - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + * A lot of B2 instructions are priviledged. Here we check for + * the privileged ones, that we can handle in the kernel. + * Anything else goes to userspace. + */ + handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); + + return -EOPNOTSUPP; +} + +static int handle_epsw(struct kvm_vcpu *vcpu) +{ + int reg1, reg2; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + /* This basically extracts the mask half of the psw. */ + vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL; + vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; + if (reg2) { + vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL; + vcpu->run->s.regs.gprs[reg2] |= + vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL; + } + return 0; +} + +#define PFMF_RESERVED 0xfffc0101UL +#define PFMF_SK 0x00020000UL +#define PFMF_CF 0x00010000UL +#define PFMF_UI 0x00008000UL +#define PFMF_FSC 0x00007000UL +#define PFMF_NQ 0x00000800UL +#define PFMF_MR 0x00000400UL +#define PFMF_MC 0x00000200UL +#define PFMF_KEY 0x000000feUL + +static int handle_pfmf(struct kvm_vcpu *vcpu) +{ + int reg1, reg2; + unsigned long start, end; + + vcpu->stat.instruction_pfmf++; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + if (!MACHINE_HAS_PFMF) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* Only provide non-quiescing support if the host supports it */ + if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* No support for conditional-SSKE */ + if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (kvm_s390_check_low_addr_protection(vcpu, start)) + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + } + + switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { + case 0x00000000: + end = (start + (1UL << 12)) & ~((1UL << 12) - 1); + break; + case 0x00001000: + end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + break; + /* We dont support EDAT2 + case 0x00002000: + end = (start + (1UL << 31)) & ~((1UL << 31) - 1); + break;*/ + default: + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + } + while (start < end) { + unsigned long useraddr, abs_addr; + + /* Translate guest address to host address */ + if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0) + abs_addr = kvm_s390_real_to_abs(vcpu, start); else - return handler(vcpu); + abs_addr = start; + useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr)); + if (kvm_is_error_hva(useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (clear_user((void __user *)useraddr, PAGE_SIZE)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { + __skey_check_enable(vcpu); + if (set_guest_storage_key(current->mm, useraddr, + vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, + vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + start += PAGE_SIZE; } + if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) + vcpu->run->s.regs.gprs[reg2] = end; + return 0; +} + +static int handle_essa(struct kvm_vcpu *vcpu) +{ + /* entries expected to be 1FF */ + int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; + unsigned long *cbrlo, cbrle; + struct gmap *gmap; + int i; + + VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries); + gmap = vcpu->arch.gmap; + vcpu->stat.instruction_essa++; + if (!kvm_s390_cmma_enabled(vcpu->kvm)) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* Rewind PSW to repeat the ESSA instruction */ + vcpu->arch.sie_block->gpsw.addr = + __rewind_psw(vcpu->arch.sie_block->gpsw, 4); + vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ + cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); + down_read(&gmap->mm->mmap_sem); + for (i = 0; i < entries; ++i) { + cbrle = cbrlo[i]; + if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE)) + /* invalid entry */ + break; + /* try to free backing */ + __gmap_zap(cbrle, gmap); + } + up_read(&gmap->mm->mmap_sem); + if (i < entries) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + return 0; +} + +static const intercept_handler_t b9_handlers[256] = { + [0x8a] = handle_ipte_interlock, + [0x8d] = handle_epsw, + [0x8e] = handle_ipte_interlock, + [0x8f] = handle_ipte_interlock, + [0xab] = handle_essa, + [0xaf] = handle_pfmf, +}; + +int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + /* This is handled just as for the B2 instructions. */ + handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); + + return -EOPNOTSUPP; +} + +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u32 val = 0; + int reg, rc; + u64 ga; + + vcpu->stat.instruction_lctl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + ga = kvm_s390_get_base_disp_rs(vcpu); + + if (ga & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); + + reg = reg1; + do { + rc = read_guest(vcpu, ga, &val, sizeof(val)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; + vcpu->arch.sie_block->gcr[reg] |= val; + ga += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 ga; + u32 val; + int reg, rc; + + vcpu->stat.instruction_stctl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + ga = kvm_s390_get_base_disp_rs(vcpu); + + if (ga & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga); + + reg = reg1; + do { + val = vcpu->arch.sie_block->gcr[reg] & 0x00000000fffffffful; + rc = write_guest(vcpu, ga, &val, sizeof(val)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + ga += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +static int handle_lctlg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 ga, val; + int reg, rc; + + vcpu->stat.instruction_lctlg++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + ga = kvm_s390_get_base_disp_rsy(vcpu); + + if (ga & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); + + do { + rc = read_guest(vcpu, ga, &val, sizeof(val)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + vcpu->arch.sie_block->gcr[reg] = val; + ga += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +static int handle_stctg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 ga, val; + int reg, rc; + + vcpu->stat.instruction_stctg++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + ga = kvm_s390_get_base_disp_rsy(vcpu); + + if (ga & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga); + + do { + val = vcpu->arch.sie_block->gcr[reg]; + rc = write_guest(vcpu, ga, &val, sizeof(val)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + ga += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +static const intercept_handler_t eb_handlers[256] = { + [0x2f] = handle_lctlg, + [0x25] = handle_stctg, +}; + +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; +} + +static int handle_tprot(struct kvm_vcpu *vcpu) +{ + u64 address1, address2; + unsigned long hva, gpa; + int ret = 0, cc = 0; + bool writable; + + vcpu->stat.instruction_tprot++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); + + /* we only handle the Linux memory detection case: + * access key == 0 + * everything else goes to userspace. */ + if (address2 & 0xf0) + return -EOPNOTSUPP; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) + ipte_lock(vcpu); + ret = guest_translate_address(vcpu, address1, &gpa, 1); + if (ret == PGM_PROTECTION) { + /* Write protected? Try again with read-only... */ + cc = 1; + ret = guest_translate_address(vcpu, address1, &gpa, 0); + } + if (ret) { + if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { + ret = kvm_s390_inject_program_int(vcpu, ret); + } else if (ret > 0) { + /* Translation not available */ + kvm_s390_set_psw_cc(vcpu, 3); + ret = 0; + } + goto out_unlock; + } + + hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable); + if (kvm_is_error_hva(hva)) { + ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } else { + if (!writable) + cc = 1; /* Write not permitted ==> read-only */ + kvm_s390_set_psw_cc(vcpu, cc); + /* Note: CC2 only occurs for storage keys (not supported yet) */ + } +out_unlock: + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) + ipte_unlock(vcpu); + return ret; +} + +int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) +{ + /* For e5xx... instructions we only handle TPROT */ + if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01) + return handle_tprot(vcpu); + return -EOPNOTSUPP; +} + +static int handle_sckpf(struct kvm_vcpu *vcpu) +{ + u32 value; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000) + return kvm_s390_inject_program_int(vcpu, + PGM_SPECIFICATION); + + value = vcpu->run->s.regs.gprs[0] & 0x000000000000ffff; + vcpu->arch.sie_block->todpr = value; + + return 0; +} + +static const intercept_handler_t x01_handlers[256] = { + [0x07] = handle_sckpf, +}; + +int kvm_s390_handle_01(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S deleted file mode 100644 index 7e9d30d567b..00000000000 --- a/arch/s390/kvm/sie64a.S +++ /dev/null @@ -1,96 +0,0 @@ -/* - * sie64a.S - low level sie call - * - * Copyright IBM Corp. 2008,2010 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> - */ - -#include <linux/errno.h> -#include <asm/asm-offsets.h> -#include <asm/setup.h> -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> -#include <asm/thread_info.h> - -_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) - -/* - * offsets into stackframe - * SP_ = offsets into stack sie64 is called with - * SPI_ = offsets into irq stack - */ -SP_GREGS = __SF_EMPTY -SP_HOOK = __SF_EMPTY+8 -SP_GPP = __SF_EMPTY+16 -SPI_PSW = STACK_FRAME_OVERHEAD + __PT_PSW - - - .macro SPP newpp - tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP - jz 0f - .insn s,0xb2800000,\newpp -0: - .endm - -sie_irq_handler: - SPP __LC_CMF_HPP # set host id - larl %r2,sie_inst - clg %r2,SPI_PSW+8(0,%r15) # intercepted sie - jne 1f - xc __LC_SIE_HOOK(8),__LC_SIE_HOOK - lg %r2,__LC_THREAD_INFO # pointer thread_info struct - tm __TI_flags+7(%r2),_TIF_EXIT_SIE - jz 0f - larl %r2,sie_exit # work pending, leave sie - stg %r2,__LC_RETURN_PSW+8 - br %r14 -0: larl %r2,sie_reenter # re-enter with guest id - stg %r2,__LC_RETURN_PSW+8 -1: br %r14 - -/* - * sie64a calling convention: - * %r2 pointer to sie control block - * %r3 guest register save area - */ - .globl sie64a -sie64a: - stg %r3,SP_GREGS(%r15) # save guest register save area - stmg %r6,%r14,__SF_GPRS(%r15) # save registers on entry - lgr %r14,%r2 # pointer to sie control block - larl %r5,sie_irq_handler - stg %r2,SP_GPP(%r15) - stg %r5,SP_HOOK(%r15) # save hook target - lmg %r0,%r13,0(%r3) # load guest gprs 0-13 -sie_reenter: - mvc __LC_SIE_HOOK(8),SP_HOOK(%r15) - SPP SP_GPP(%r15) # set guest id -sie_inst: - sie 0(%r14) - xc __LC_SIE_HOOK(8),__LC_SIE_HOOK - SPP __LC_CMF_HPP # set host id -sie_exit: - lg %r14,SP_GREGS(%r15) - stmg %r0,%r13,0(%r14) # save guest gprs 0-13 - lghi %r2,0 - lmg %r6,%r14,__SF_GPRS(%r15) - br %r14 - -sie_err: - xc __LC_SIE_HOOK(8),__LC_SIE_HOOK - SPP __LC_CMF_HPP # set host id - lg %r14,SP_GREGS(%r15) - stmg %r0,%r13,0(%r14) # save guest gprs 0-13 - lghi %r2,-EFAULT - lmg %r6,%r14,__SF_GPRS(%r15) - br %r14 - - .section __ex_table,"a" - .quad sie_inst,sie_err - .previous diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 702276f5e2f..43079a48cc9 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -1,7 +1,7 @@ /* - * sigp.c - handlinge interprocessor communication + * handling interprocessor communication * - * Copyright IBM Corp. 2008,2009 + * Copyright IBM Corp. 2008, 2013 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -15,58 +15,38 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <linux/slab.h> +#include <asm/sigp.h> #include "gaccess.h" #include "kvm-s390.h" - -/* sigp order codes */ -#define SIGP_SENSE 0x01 -#define SIGP_EXTERNAL_CALL 0x02 -#define SIGP_EMERGENCY 0x03 -#define SIGP_START 0x04 -#define SIGP_STOP 0x05 -#define SIGP_RESTART 0x06 -#define SIGP_STOP_STORE_STATUS 0x09 -#define SIGP_INITIAL_CPU_RESET 0x0b -#define SIGP_CPU_RESET 0x0c -#define SIGP_SET_PREFIX 0x0d -#define SIGP_STORE_STATUS_ADDR 0x0e -#define SIGP_SET_ARCH 0x12 - -/* cpu status bits */ -#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL -#define SIGP_STAT_INCORRECT_STATE 0x00000200UL -#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL -#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL -#define SIGP_STAT_STOPPED 0x00000040UL -#define SIGP_STAT_OPERATOR_INTERV 0x00000020UL -#define SIGP_STAT_CHECK_STOP 0x00000010UL -#define SIGP_STAT_INOPERATIVE 0x00000004UL -#define SIGP_STAT_INVALID_ORDER 0x00000002UL -#define SIGP_STAT_RECEIVER_CHECK 0x00000001UL - +#include "trace.h" static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, - unsigned long *reg) + u64 *reg) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; + int cpuflags; int rc; if (cpu_addr >= KVM_MAX_VCPUS) - return 3; /* not operational */ + return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&fi->lock); - if (fi->local_int[cpu_addr] == NULL) - rc = 3; /* not operational */ - else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) - & CPUSTAT_RUNNING) { - *reg &= 0xffffffff00000000UL; - rc = 1; /* status stored */ - } else { + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + + cpuflags = atomic_read(li->cpuflags); + if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED))) + rc = SIGP_CC_ORDER_CODE_ACCEPTED; + else { *reg &= 0xffffffff00000000UL; - *reg |= SIGP_STAT_STOPPED; - rc = 1; /* status stored */ + if (cpuflags & CPUSTAT_ECALL_PEND) + *reg |= SIGP_STATUS_EXT_CALL_PENDING; + if (cpuflags & CPUSTAT_STOPPED) + *reg |= SIGP_STATUS_STOPPED; + rc = SIGP_CC_STATUS_STORED; } - spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); return rc; @@ -74,44 +54,81 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct kvm_s390_local_interrupt *li; - struct kvm_s390_interrupt_info *inti; - int rc; + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_EMERGENCY, + .parm = vcpu->vcpu_id, + }; + struct kvm_vcpu *dst_vcpu = NULL; + int rc = 0; - if (cpu_addr >= KVM_MAX_VCPUS) - return 3; /* not operational */ + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; - inti = kzalloc(sizeof(*inti), GFP_KERNEL); - if (!inti) - return -ENOMEM; + rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int); + if (!rc) + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); - inti->type = KVM_S390_INT_EMERGENCY; + return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; +} - spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; - if (li == NULL) { - rc = 3; /* not operational */ - kfree(inti); - goto unlock; +static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, + u16 asn, u64 *reg) +{ + struct kvm_vcpu *dst_vcpu = NULL; + const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT; + u16 p_asn, s_asn; + psw_t *psw; + u32 flags; + + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags); + psw = &dst_vcpu->arch.sie_block->gpsw; + p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */ + s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */ + + /* Deliver the emergency signal? */ + if (!(flags & CPUSTAT_STOPPED) + || (psw->mask & psw_int_mask) != psw_int_mask + || ((flags & CPUSTAT_WAIT) && psw->addr != 0) + || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) { + return __sigp_emergency(vcpu, cpu_addr); + } else { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + return SIGP_CC_STATUS_STORED; } - spin_lock_bh(&li->lock); - list_add_tail(&inti->list, &li->list); - atomic_set(&li->active, 1); - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); - spin_unlock_bh(&li->lock); - rc = 0; /* order accepted */ -unlock: - spin_unlock(&fi->lock); - VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); - return rc; +} + +static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_EXTERNAL_CALL, + .parm = vcpu->vcpu_id, + }; + struct kvm_vcpu *dst_vcpu = NULL; + int rc; + + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + + rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int); + if (!rc) + VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); + + return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) { struct kvm_s390_interrupt_info *inti; + int rc = SIGP_CC_ORDER_CODE_ACCEPTED; inti = kzalloc(sizeof(*inti), GFP_ATOMIC); if (!inti) @@ -119,58 +136,71 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) inti->type = KVM_S390_SIGP_STOP; spin_lock_bh(&li->lock); + if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { + kfree(inti); + if ((action & ACTION_STORE_ON_STOP) != 0) + rc = -ESHUTDOWN; + goto out; + } list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); li->action_bits |= action; - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); +out: spin_unlock_bh(&li->lock); - return 0; /* order accepted */ + return rc; } static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; int rc; if (cpu_addr >= KVM_MAX_VCPUS) - return 3; /* not operational */ + return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; - if (li == NULL) { - rc = 3; /* not operational */ - goto unlock; - } + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; rc = __inject_sigp_stop(li, action); -unlock: - spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); - return rc; -} -int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action) -{ - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - return __inject_sigp_stop(li, action); + if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { + /* If the CPU has already been stopped, we still have + * to save the status when doing stop-and-store. This + * has to be done after unlocking all spinlocks. */ + rc = kvm_s390_store_status_unloaded(dst_vcpu, + KVM_S390_STORE_STATUS_NOADDR); + } + + return rc; } static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) { int rc; + unsigned int i; + struct kvm_vcpu *v; switch (parameter & 0xff) { case 0: - rc = 3; /* not operational */ + rc = SIGP_CC_NOT_OPERATIONAL; break; case 1: case 2: - rc = 0; /* order accepted */ + kvm_for_each_vcpu(i, v, vcpu->kvm) { + v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(v); + } + + rc = SIGP_CC_ORDER_CODE_ACCEPTED; break; default: rc = -EOPNOTSUPP; @@ -179,44 +209,41 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) } static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, - unsigned long *reg) + u64 *reg) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct kvm_s390_local_interrupt *li = NULL; + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; struct kvm_s390_interrupt_info *inti; int rc; - u8 tmp; - - /* make sure that the new value is valid memory */ - address = address & 0x7fffe000u; - if ((copy_from_user(&tmp, (void __user *) - (address + vcpu->arch.sie_block->gmsor) , 1)) || - (copy_from_user(&tmp, (void __user *)(address + - vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) { - *reg |= SIGP_STAT_INVALID_PARAMETER; - return 1; /* invalid parameter */ + + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + + /* + * Make sure the new value is valid memory. We only need to check the + * first page, since address is 8k aligned and memory pieces are always + * at least 1MB aligned and have at least a size of 1MB. + */ + address &= 0x7fffe000u; + if (kvm_is_error_gpa(vcpu->kvm, address)) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INVALID_PARAMETER; + return SIGP_CC_STATUS_STORED; } inti = kzalloc(sizeof(*inti), GFP_KERNEL); if (!inti) - return 2; /* busy */ - - spin_lock(&fi->lock); - if (cpu_addr < KVM_MAX_VCPUS) - li = fi->local_int[cpu_addr]; - - if (li == NULL) { - rc = 1; /* incorrect state */ - *reg &= SIGP_STAT_INCORRECT_STATE; - kfree(inti); - goto out_fi; - } + return SIGP_CC_BUSY; spin_lock_bh(&li->lock); /* cpu must be in stopped state */ - if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { - rc = 1; /* incorrect state */ - *reg &= SIGP_STAT_INCORRECT_STATE; + if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + rc = SIGP_CC_STATUS_STORED; kfree(inti); goto out_li; } @@ -226,15 +253,96 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); - rc = 0; /* order accepted */ + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); + rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); out_li: spin_unlock_bh(&li->lock); -out_fi: - spin_unlock(&fi->lock); + return rc; +} + +static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, + u32 addr, u64 *reg) +{ + struct kvm_vcpu *dst_vcpu = NULL; + int flags; + int rc; + + if (cpu_id < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + + spin_lock_bh(&dst_vcpu->arch.local_int.lock); + flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); + spin_unlock_bh(&dst_vcpu->arch.local_int.lock); + if (!(flags & CPUSTAT_STOPPED)) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + return SIGP_CC_STATUS_STORED; + } + + addr &= 0x7ffffe00; + rc = kvm_s390_store_status_unloaded(dst_vcpu, addr); + if (rc == -EFAULT) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INVALID_PARAMETER; + rc = SIGP_CC_STATUS_STORED; + } + return rc; +} + +static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, + u64 *reg) +{ + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return SIGP_CC_NOT_OPERATIONAL; + + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + /* running */ + rc = SIGP_CC_ORDER_CODE_ACCEPTED; + } else { + /* not running */ + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_NOT_RUNNING; + rc = SIGP_CC_STATUS_STORED; + } + + VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, + rc); + + return rc; +} + +/* Test whether the destination CPU is available and not busy */ +static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct kvm_s390_local_interrupt *li; + int rc = SIGP_CC_ORDER_CODE_ACCEPTED; + struct kvm_vcpu *dst_vcpu = NULL; + + if (cpu_addr >= KVM_MAX_VCPUS) + return SIGP_CC_NOT_OPERATIONAL; + + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + spin_lock_bh(&li->lock); + if (li->action_bits & ACTION_STOP_ON_STOP) + rc = SIGP_CC_BUSY; + spin_unlock_bh(&li->lock); + return rc; } @@ -242,34 +350,34 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) { int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int r3 = vcpu->arch.sie_block->ipa & 0x000f; - int base2 = vcpu->arch.sie_block->ipb >> 28; - int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); u32 parameter; - u16 cpu_addr = vcpu->arch.guest_gprs[r3]; + u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; u8 order_code; int rc; /* sigp in userspace can exit */ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - order_code = disp2; - if (base2) - order_code += vcpu->arch.guest_gprs[base2]; + order_code = kvm_s390_get_base_disp_rs(vcpu); if (r1 % 2) - parameter = vcpu->arch.guest_gprs[r1]; + parameter = vcpu->run->s.regs.gprs[r1]; else - parameter = vcpu->arch.guest_gprs[r1 + 1]; + parameter = vcpu->run->s.regs.gprs[r1 + 1]; + trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter); switch (order_code) { case SIGP_SENSE: vcpu->stat.instruction_sigp_sense++; rc = __sigp_sense(vcpu, cpu_addr, - &vcpu->arch.guest_gprs[r1]); + &vcpu->run->s.regs.gprs[r1]); break; - case SIGP_EMERGENCY: + case SIGP_EXTERNAL_CALL: + vcpu->stat.instruction_sigp_external_call++; + rc = __sigp_external_call(vcpu, cpu_addr); + break; + case SIGP_EMERGENCY_SIGNAL: vcpu->stat.instruction_sigp_emergency++; rc = __sigp_emergency(vcpu, cpu_addr); break; @@ -277,22 +385,49 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) vcpu->stat.instruction_sigp_stop++; rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP); break; - case SIGP_STOP_STORE_STATUS: + case SIGP_STOP_AND_STORE_STATUS: vcpu->stat.instruction_sigp_stop++; - rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP); + rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | + ACTION_STOP_ON_STOP); + break; + case SIGP_STORE_STATUS_AT_ADDRESS: + rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); break; - case SIGP_SET_ARCH: + case SIGP_SET_ARCHITECTURE: vcpu->stat.instruction_sigp_arch++; rc = __sigp_set_arch(vcpu, parameter); break; case SIGP_SET_PREFIX: vcpu->stat.instruction_sigp_prefix++; rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, - &vcpu->arch.guest_gprs[r1]); + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_COND_EMERGENCY_SIGNAL: + rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_SENSE_RUNNING: + vcpu->stat.instruction_sigp_sense_running++; + rc = __sigp_sense_running(vcpu, cpu_addr, + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_START: + rc = sigp_check_callable(vcpu, cpu_addr); + if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) + rc = -EOPNOTSUPP; /* Handle START in user space */ break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; - /* user space must know about restart */ + rc = sigp_check_callable(vcpu, cpu_addr); + if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) { + VCPU_EVENT(vcpu, 4, + "sigp restart %x to handle userspace", + cpu_addr); + /* user space must know about restart */ + rc = -EOPNOTSUPP; + } + break; default: return -EOPNOTSUPP; } @@ -300,7 +435,41 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) if (rc < 0) return rc; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44; + kvm_s390_set_psw_cc(vcpu, rc); return 0; } + +/* + * Handle SIGP partial execution interception. + * + * This interception will occur at the source cpu when a source cpu sends an + * external call to a target cpu and the target cpu has the WAIT bit set in + * its cpuflags. Interception will occurr after the interrupt indicator bits at + * the target cpu have been set. All error cases will lead to instruction + * interception, therefore nothing is to be checked or prepared. + */ +int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) +{ + int r3 = vcpu->arch.sie_block->ipa & 0x000f; + u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; + struct kvm_vcpu *dest_vcpu; + u8 order_code = kvm_s390_get_base_disp_rs(vcpu); + + trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); + + if (order_code == SIGP_EXTERNAL_CALL) { + dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + BUG_ON(dest_vcpu == NULL); + + spin_lock_bh(&dest_vcpu->arch.local_int.lock); + if (waitqueue_active(&dest_vcpu->wq)) + wake_up_interruptible(&dest_vcpu->wq); + dest_vcpu->preempted = true; + spin_unlock_bh(&dest_vcpu->arch.local_int.lock); + + kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED); + return 0; + } + + return -EOPNOTSUPP; +} diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h new file mode 100644 index 00000000000..647e9d6a481 --- /dev/null +++ b/arch/s390/kvm/trace-s390.h @@ -0,0 +1,273 @@ +#if !defined(_TRACE_KVMS390_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVMS390_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm-s390 +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace-s390 + +/* + * Trace point for the creation of the kvm instance. + */ +TRACE_EVENT(kvm_s390_create_vm, + TP_PROTO(unsigned long type), + TP_ARGS(type), + + TP_STRUCT__entry( + __field(unsigned long, type) + ), + + TP_fast_assign( + __entry->type = type; + ), + + TP_printk("create vm%s", + __entry->type & KVM_VM_S390_UCONTROL ? " (UCONTROL)" : "") + ); + +/* + * Trace points for creation and destruction of vpcus. + */ +TRACE_EVENT(kvm_s390_create_vcpu, + TP_PROTO(unsigned int id, struct kvm_vcpu *vcpu, + struct kvm_s390_sie_block *sie_block), + TP_ARGS(id, vcpu, sie_block), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(struct kvm_vcpu *, vcpu) + __field(struct kvm_s390_sie_block *, sie_block) + ), + + TP_fast_assign( + __entry->id = id; + __entry->vcpu = vcpu; + __entry->sie_block = sie_block; + ), + + TP_printk("create cpu %d at %p, sie block at %p", __entry->id, + __entry->vcpu, __entry->sie_block) + ); + +TRACE_EVENT(kvm_s390_destroy_vcpu, + TP_PROTO(unsigned int id), + TP_ARGS(id), + + TP_STRUCT__entry( + __field(unsigned int, id) + ), + + TP_fast_assign( + __entry->id = id; + ), + + TP_printk("destroy cpu %d", __entry->id) + ); + +/* + * Trace point for start and stop of vpcus. + */ +TRACE_EVENT(kvm_s390_vcpu_start_stop, + TP_PROTO(unsigned int id, int state), + TP_ARGS(id, state), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(int, state) + ), + + TP_fast_assign( + __entry->id = id; + __entry->state = state; + ), + + TP_printk("%s cpu %d", __entry->state ? "starting" : "stopping", + __entry->id) + ); + +/* + * Trace points for injection of interrupts, either per machine or + * per vcpu. + */ + +#define kvm_s390_int_type \ + {KVM_S390_SIGP_STOP, "sigp stop"}, \ + {KVM_S390_PROGRAM_INT, "program interrupt"}, \ + {KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \ + {KVM_S390_RESTART, "sigp restart"}, \ + {KVM_S390_INT_VIRTIO, "virtio interrupt"}, \ + {KVM_S390_INT_SERVICE, "sclp interrupt"}, \ + {KVM_S390_INT_EMERGENCY, "sigp emergency"}, \ + {KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"} + +TRACE_EVENT(kvm_s390_inject_vm, + TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who), + TP_ARGS(type, parm, parm64, who), + + TP_STRUCT__entry( + __field(__u32, inttype) + __field(__u32, parm) + __field(__u64, parm64) + __field(int, who) + ), + + TP_fast_assign( + __entry->inttype = type & 0x00000000ffffffff; + __entry->parm = parm; + __entry->parm64 = parm64; + __entry->who = who; + ), + + TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx", + (__entry->who == 1) ? " (from kernel)" : + (__entry->who == 2) ? " (from user)" : "", + __entry->inttype, + __print_symbolic(__entry->inttype, kvm_s390_int_type), + __entry->parm, __entry->parm64) + ); + +TRACE_EVENT(kvm_s390_inject_vcpu, + TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \ + int who), + TP_ARGS(id, type, parm, parm64, who), + + TP_STRUCT__entry( + __field(int, id) + __field(__u32, inttype) + __field(__u32, parm) + __field(__u64, parm64) + __field(int, who) + ), + + TP_fast_assign( + __entry->id = id; + __entry->inttype = type & 0x00000000ffffffff; + __entry->parm = parm; + __entry->parm64 = parm64; + __entry->who = who; + ), + + TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx", + (__entry->who == 1) ? " (from kernel)" : + (__entry->who == 2) ? " (from user)" : "", + __entry->id, __entry->inttype, + __print_symbolic(__entry->inttype, kvm_s390_int_type), + __entry->parm, __entry->parm64) + ); + +/* + * Trace point for the actual delivery of interrupts. + */ +TRACE_EVENT(kvm_s390_deliver_interrupt, + TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1), + TP_ARGS(id, type, data0, data1), + + TP_STRUCT__entry( + __field(int, id) + __field(__u32, inttype) + __field(__u64, data0) + __field(__u64, data1) + ), + + TP_fast_assign( + __entry->id = id; + __entry->inttype = type & 0x00000000ffffffff; + __entry->data0 = data0; + __entry->data1 = data1; + ), + + TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \ + "data:%08llx %016llx", + __entry->id, __entry->inttype, + __print_symbolic(__entry->inttype, kvm_s390_int_type), + __entry->data0, __entry->data1) + ); + +/* + * Trace point for resets that may be requested from userspace. + */ +TRACE_EVENT(kvm_s390_request_resets, + TP_PROTO(__u64 resets), + TP_ARGS(resets), + + TP_STRUCT__entry( + __field(__u64, resets) + ), + + TP_fast_assign( + __entry->resets = resets; + ), + + TP_printk("requesting userspace resets %llx", + __entry->resets) + ); + +/* + * Trace point for a vcpu's stop requests. + */ +TRACE_EVENT(kvm_s390_stop_request, + TP_PROTO(unsigned int action_bits), + TP_ARGS(action_bits), + + TP_STRUCT__entry( + __field(unsigned int, action_bits) + ), + + TP_fast_assign( + __entry->action_bits = action_bits; + ), + + TP_printk("stop request, action_bits = %08x", + __entry->action_bits) + ); + + +/* + * Trace point for enabling channel I/O instruction support. + */ +TRACE_EVENT(kvm_s390_enable_css, + TP_PROTO(void *kvm), + TP_ARGS(kvm), + + TP_STRUCT__entry( + __field(void *, kvm) + ), + + TP_fast_assign( + __entry->kvm = kvm; + ), + + TP_printk("enabling channel I/O support (kvm @ %p)\n", + __entry->kvm) + ); + +/* + * Trace point for enabling and disabling interlocking-and-broadcasting + * suppression. + */ +TRACE_EVENT(kvm_s390_enable_disable_ibs, + TP_PROTO(unsigned int id, int state), + TP_ARGS(id, state), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(int, state) + ), + + TP_fast_assign( + __entry->id = id; + __entry->state = state; + ), + + TP_printk("%s ibs on cpu %d", + __entry->state ? "enabling" : "disabling", __entry->id) + ); + + +#endif /* _TRACE_KVMS390_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h new file mode 100644 index 00000000000..916834d7a73 --- /dev/null +++ b/arch/s390/kvm/trace.h @@ -0,0 +1,418 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include <linux/tracepoint.h> +#include <asm/sie.h> +#include <asm/debug.h> +#include <asm/dis.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* + * Helpers for vcpu-specific tracepoints containing the same information + * as s390dbf VCPU_EVENTs. + */ +#define VCPU_PROTO_COMMON struct kvm_vcpu *vcpu +#define VCPU_ARGS_COMMON vcpu +#define VCPU_FIELD_COMMON __field(int, id) \ + __field(unsigned long, pswmask) \ + __field(unsigned long, pswaddr) +#define VCPU_ASSIGN_COMMON do { \ + __entry->id = vcpu->vcpu_id; \ + __entry->pswmask = vcpu->arch.sie_block->gpsw.mask; \ + __entry->pswaddr = vcpu->arch.sie_block->gpsw.addr; \ + } while (0); +#define VCPU_TP_PRINTK(p_str, p_args...) \ + TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \ + __entry->pswmask, __entry->pswaddr, p_args) + +TRACE_EVENT(kvm_s390_skey_related_inst, + TP_PROTO(VCPU_PROTO_COMMON), + TP_ARGS(VCPU_ARGS_COMMON), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + ), + VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu") + ); + +TRACE_EVENT(kvm_s390_major_guest_pfault, + TP_PROTO(VCPU_PROTO_COMMON), + TP_ARGS(VCPU_ARGS_COMMON), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + ), + VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault") + ); + +TRACE_EVENT(kvm_s390_pfault_init, + TP_PROTO(VCPU_PROTO_COMMON, long pfault_token), + TP_ARGS(VCPU_ARGS_COMMON, pfault_token), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(long, pfault_token) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->pfault_token = pfault_token; + ), + VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token) + ); + +TRACE_EVENT(kvm_s390_pfault_done, + TP_PROTO(VCPU_PROTO_COMMON, long pfault_token), + TP_ARGS(VCPU_ARGS_COMMON, pfault_token), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(long, pfault_token) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->pfault_token = pfault_token; + ), + VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token) + ); + +/* + * Tracepoints for SIE entry and exit. + */ +TRACE_EVENT(kvm_s390_sie_enter, + TP_PROTO(VCPU_PROTO_COMMON, int cpuflags), + TP_ARGS(VCPU_ARGS_COMMON, cpuflags), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(int, cpuflags) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->cpuflags = cpuflags; + ), + + VCPU_TP_PRINTK("entering sie flags %x", __entry->cpuflags) + ); + +TRACE_EVENT(kvm_s390_sie_fault, + TP_PROTO(VCPU_PROTO_COMMON), + TP_ARGS(VCPU_ARGS_COMMON), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + ), + + VCPU_TP_PRINTK("%s", "fault in sie instruction") + ); + +TRACE_EVENT(kvm_s390_sie_exit, + TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode), + TP_ARGS(VCPU_ARGS_COMMON, icptcode), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(u8, icptcode) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->icptcode = icptcode; + ), + + VCPU_TP_PRINTK("exit sie icptcode %d (%s)", __entry->icptcode, + __print_symbolic(__entry->icptcode, + sie_intercept_code)) + ); + +/* + * Trace point for intercepted instructions. + */ +TRACE_EVENT(kvm_s390_intercept_instruction, + TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb), + TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u64, instruction) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->instruction = ((__u64)ipa << 48) | + ((__u64)ipb << 16); + ), + + VCPU_TP_PRINTK("intercepted instruction %016llx (%s)", + __entry->instruction, + __print_symbolic(icpt_insn_decoder(__entry->instruction), + icpt_insn_codes)) + ); + +/* + * Trace point for intercepted program interruptions. + */ +TRACE_EVENT(kvm_s390_intercept_prog, + TP_PROTO(VCPU_PROTO_COMMON, __u16 code), + TP_ARGS(VCPU_ARGS_COMMON, code), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u16, code) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->code = code; + ), + + VCPU_TP_PRINTK("intercepted program interruption %04x", + __entry->code) + ); + +/* + * Trace point for validity intercepts. + */ +TRACE_EVENT(kvm_s390_intercept_validity, + TP_PROTO(VCPU_PROTO_COMMON, __u16 viwhy), + TP_ARGS(VCPU_ARGS_COMMON, viwhy), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u16, viwhy) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->viwhy = viwhy; + ), + + VCPU_TP_PRINTK("got validity intercept %04x", __entry->viwhy) + ); + +/* + * Trace points for instructions that are of special interest. + */ + +TRACE_EVENT(kvm_s390_handle_sigp, + TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \ + __u32 parameter), + TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr, parameter), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u8, order_code) + __field(__u16, cpu_addr) + __field(__u32, parameter) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->order_code = order_code; + __entry->cpu_addr = cpu_addr; + __entry->parameter = parameter; + ), + + VCPU_TP_PRINTK("handle sigp order %02x (%s), cpu address %04x, " \ + "parameter %08x", __entry->order_code, + __print_symbolic(__entry->order_code, + sigp_order_codes), + __entry->cpu_addr, __entry->parameter) + ); + +TRACE_EVENT(kvm_s390_handle_sigp_pei, + TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr), + TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u8, order_code) + __field(__u16, cpu_addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->order_code = order_code; + __entry->cpu_addr = cpu_addr; + ), + + VCPU_TP_PRINTK("handle sigp pei order %02x (%s), cpu address %04x", + __entry->order_code, + __print_symbolic(__entry->order_code, + sigp_order_codes), + __entry->cpu_addr) + ); + +TRACE_EVENT(kvm_s390_handle_diag, + TP_PROTO(VCPU_PROTO_COMMON, __u16 code), + TP_ARGS(VCPU_ARGS_COMMON, code), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u16, code) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->code = code; + ), + + VCPU_TP_PRINTK("handle diagnose call %04x (%s)", __entry->code, + __print_symbolic(__entry->code, diagnose_codes)) + ); + +TRACE_EVENT(kvm_s390_handle_lctl, + TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr), + TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(int, g) + __field(int, reg1) + __field(int, reg3) + __field(u64, addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->g = g; + __entry->reg1 = reg1; + __entry->reg3 = reg3; + __entry->addr = addr; + ), + + VCPU_TP_PRINTK("%s: loading cr %x-%x from %016llx", + __entry->g ? "lctlg" : "lctl", + __entry->reg1, __entry->reg3, __entry->addr) + ); + +TRACE_EVENT(kvm_s390_handle_stctl, + TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr), + TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(int, g) + __field(int, reg1) + __field(int, reg3) + __field(u64, addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->g = g; + __entry->reg1 = reg1; + __entry->reg3 = reg3; + __entry->addr = addr; + ), + + VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx", + __entry->g ? "stctg" : "stctl", + __entry->reg1, __entry->reg3, __entry->addr) + ); + +TRACE_EVENT(kvm_s390_handle_prefix, + TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address), + TP_ARGS(VCPU_ARGS_COMMON, set, address), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(int, set) + __field(u32, address) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->set = set; + __entry->address = address; + ), + + VCPU_TP_PRINTK("%s prefix to %08x", + __entry->set ? "setting" : "storing", + __entry->address) + ); + +TRACE_EVENT(kvm_s390_handle_stap, + TP_PROTO(VCPU_PROTO_COMMON, u64 address), + TP_ARGS(VCPU_ARGS_COMMON, address), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(u64, address) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->address = address; + ), + + VCPU_TP_PRINTK("storing cpu address to %016llx", + __entry->address) + ); + +TRACE_EVENT(kvm_s390_handle_stfl, + TP_PROTO(VCPU_PROTO_COMMON, unsigned int facility_list), + TP_ARGS(VCPU_ARGS_COMMON, facility_list), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(unsigned int, facility_list) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->facility_list = facility_list; + ), + + VCPU_TP_PRINTK("store facility list value %08x", + __entry->facility_list) + ); + +TRACE_EVENT(kvm_s390_handle_stsi, + TP_PROTO(VCPU_PROTO_COMMON, int fc, int sel1, int sel2, u64 addr), + TP_ARGS(VCPU_ARGS_COMMON, fc, sel1, sel2, addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(int, fc) + __field(int, sel1) + __field(int, sel2) + __field(u64, addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->fc = fc; + __entry->sel1 = sel1; + __entry->sel2 = sel2; + __entry->addr = addr; + ), + + VCPU_TP_PRINTK("STSI %d.%d.%d information stored to %016llx", + __entry->fc, __entry->sel1, __entry->sel2, + __entry->addr) + ); + +#endif /* _TRACE_KVM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 761ab8b56af..c6d752e8bf2 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -2,8 +2,7 @@ # Makefile for s390-specific library files.. # -lib-y += delay.o string.o uaccess_std.o uaccess_pt.o -obj-y += usercopy.o -obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o -lib-$(CONFIG_64BIT) += uaccess_mvcos.o +lib-y += delay.o string.o uaccess.o find.o +obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o +obj-$(CONFIG_64BIT) += mem64.o lib-$(CONFIG_SMP) += spinlock.o diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 0f53110e1d0..a9f3d0042d5 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -1,7 +1,7 @@ /* * Precise Delay Loops for S390 * - * Copyright IBM Corp. 1999,2008 + * Copyright IBM Corp. 1999, 2008 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, * Heiko Carstens <heiko.carstens@de.ibm.com>, */ @@ -12,6 +12,8 @@ #include <linux/module.h> #include <linux/irqflags.h> #include <linux/interrupt.h> +#include <asm/vtimer.h> +#include <asm/div64.h> void __delay(unsigned long loops) { @@ -27,48 +29,43 @@ void __delay(unsigned long loops) static void __udelay_disabled(unsigned long long usecs) { - unsigned long mask, cr0, cr0_saved; - u64 clock_saved; - u64 end; + unsigned long cr0, cr6, new; + u64 clock_saved, end; - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; - end = get_clock() + (usecs << 12); + end = get_tod_clock() + (usecs << 12); clock_saved = local_tick_disable(); - __ctl_store(cr0_saved, 0, 0); - cr0 = (cr0_saved & 0xffff00e0) | 0x00000800; - __ctl_load(cr0 , 0, 0); + __ctl_store(cr0, 0, 0); + __ctl_store(cr6, 6, 6); + new = (cr0 & 0xffff00e0) | 0x00000800; + __ctl_load(new , 0, 0); + new = 0; + __ctl_load(new, 6, 6); lockdep_off(); do { set_clock_comparator(end); - trace_hardirqs_on(); - __load_psw_mask(mask); - local_irq_disable(); - } while (get_clock() < end); + vtime_stop_cpu(); + } while (get_tod_clock_fast() < end); lockdep_on(); - __ctl_load(cr0_saved, 0, 0); + __ctl_load(cr0, 0, 0); + __ctl_load(cr6, 6, 6); local_tick_enable(clock_saved); } static void __udelay_enabled(unsigned long long usecs) { - unsigned long mask; - u64 clock_saved; - u64 end; + u64 clock_saved, end; - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO; - end = get_clock() + (usecs << 12); + end = get_tod_clock_fast() + (usecs << 12); do { clock_saved = 0; if (end < S390_lowcore.clock_comparator) { clock_saved = local_tick_disable(); set_clock_comparator(end); } - trace_hardirqs_on(); - __load_psw_mask(mask); - local_irq_disable(); + vtime_stop_cpu(); if (clock_saved) local_tick_enable(clock_saved); - } while (get_clock() < end); + } while (get_tod_clock_fast() < end); } /* @@ -112,7 +109,21 @@ void udelay_simple(unsigned long long usecs) { u64 end; - end = get_clock() + (usecs << 12); - while (get_clock() < end) + end = get_tod_clock_fast() + (usecs << 12); + while (get_tod_clock_fast() < end) cpu_relax(); } + +void __ndelay(unsigned long long nsecs) +{ + u64 end; + + nsecs <<= 9; + do_div(nsecs, 125); + end = get_tod_clock_fast() + nsecs; + if (nsecs & ~0xfffUL) + __udelay(nsecs >> 12); + while (get_tod_clock_fast() < end) + barrier(); +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/s390/lib/div64.c b/arch/s390/lib/div64.c index d9e62c0b576..261152f8324 100644 --- a/arch/s390/lib/div64.c +++ b/arch/s390/lib/div64.c @@ -1,9 +1,7 @@ /* - * arch/s390/lib/div64.c - * * __div64_32 implementation for 31 bit. * - * Copyright (C) IBM Corp. 2006 + * Copyright IBM Corp. 2006 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), */ diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c new file mode 100644 index 00000000000..922003c1b90 --- /dev/null +++ b/arch/s390/lib/find.c @@ -0,0 +1,77 @@ +/* + * MSB0 numbered special bitops handling. + * + * On s390x the bits are numbered: + * |0..............63|64............127|128...........191|192...........255| + * and on s390: + * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| + * + * The reason for this bit numbering is the fact that the hardware sets bits + * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap + * from the 'wrong end'. + */ + +#include <linux/compiler.h> +#include <linux/bitops.h> +#include <linux/export.h> + +unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size) +{ + const unsigned long *p = addr; + unsigned long result = 0; + unsigned long tmp; + + while (size & ~(BITS_PER_LONG - 1)) { + if ((tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = (*p) & (~0UL << (BITS_PER_LONG - size)); + if (!tmp) /* Are any bits set? */ + return result + size; /* Nope. */ +found: + return result + (__fls(tmp) ^ (BITS_PER_LONG - 1)); +} +EXPORT_SYMBOL(find_first_bit_inv); + +unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + (offset / BITS_PER_LONG); + unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL >> offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; +found_first: + tmp &= (~0UL << (BITS_PER_LONG - size)); + if (!tmp) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + (__fls(tmp) ^ (BITS_PER_LONG - 1)); +} +EXPORT_SYMBOL(find_next_bit_inv); diff --git a/arch/s390/lib/mem32.S b/arch/s390/lib/mem32.S new file mode 100644 index 00000000000..14ca9244b61 --- /dev/null +++ b/arch/s390/lib/mem32.S @@ -0,0 +1,92 @@ +/* + * String handling functions. + * + * Copyright IBM Corp. 2012 + */ + +#include <linux/linkage.h> + +/* + * memset implementation + * + * This code corresponds to the C construct below. We do distinguish + * between clearing (c == 0) and setting a memory array (c != 0) simply + * because nearly all memset invocations in the kernel clear memory and + * the xc instruction is preferred in such cases. + * + * void *memset(void *s, int c, size_t n) + * { + * if (likely(c == 0)) + * return __builtin_memset(s, 0, n); + * return __builtin_memset(s, c, n); + * } + */ +ENTRY(memset) + basr %r5,%r0 +.Lmemset_base: + ltr %r4,%r4 + bzr %r14 + ltr %r3,%r3 + jnz .Lmemset_fill + ahi %r4,-1 + lr %r3,%r4 + srl %r3,8 + ltr %r3,%r3 + lr %r1,%r2 + je .Lmemset_clear_rest +.Lmemset_clear_loop: + xc 0(256,%r1),0(%r1) + la %r1,256(%r1) + brct %r3,.Lmemset_clear_loop +.Lmemset_clear_rest: + ex %r4,.Lmemset_xc-.Lmemset_base(%r5) + br %r14 +.Lmemset_fill: + stc %r3,0(%r2) + chi %r4,1 + lr %r1,%r2 + ber %r14 + ahi %r4,-2 + lr %r3,%r4 + srl %r3,8 + ltr %r3,%r3 + je .Lmemset_fill_rest +.Lmemset_fill_loop: + mvc 1(256,%r1),0(%r1) + la %r1,256(%r1) + brct %r3,.Lmemset_fill_loop +.Lmemset_fill_rest: + ex %r4,.Lmemset_mvc-.Lmemset_base(%r5) + br %r14 +.Lmemset_xc: + xc 0(1,%r1),0(%r1) +.Lmemset_mvc: + mvc 1(1,%r1),0(%r1) + +/* + * memcpy implementation + * + * void *memcpy(void *dest, const void *src, size_t n) + */ +ENTRY(memcpy) + basr %r5,%r0 +.Lmemcpy_base: + ltr %r4,%r4 + bzr %r14 + ahi %r4,-1 + lr %r0,%r4 + srl %r0,8 + ltr %r0,%r0 + lr %r1,%r2 + jnz .Lmemcpy_loop +.Lmemcpy_rest: + ex %r4,.Lmemcpy_mvc-.Lmemcpy_base(%r5) + br %r14 +.Lmemcpy_loop: + mvc 0(256,%r1),0(%r3) + la %r1,256(%r1) + la %r3,256(%r3) + brct %r0,.Lmemcpy_loop + j .Lmemcpy_rest +.Lmemcpy_mvc: + mvc 0(1,%r1),0(%r3) diff --git a/arch/s390/lib/mem64.S b/arch/s390/lib/mem64.S new file mode 100644 index 00000000000..c6d553e85ab --- /dev/null +++ b/arch/s390/lib/mem64.S @@ -0,0 +1,88 @@ +/* + * String handling functions. + * + * Copyright IBM Corp. 2012 + */ + +#include <linux/linkage.h> + +/* + * memset implementation + * + * This code corresponds to the C construct below. We do distinguish + * between clearing (c == 0) and setting a memory array (c != 0) simply + * because nearly all memset invocations in the kernel clear memory and + * the xc instruction is preferred in such cases. + * + * void *memset(void *s, int c, size_t n) + * { + * if (likely(c == 0)) + * return __builtin_memset(s, 0, n); + * return __builtin_memset(s, c, n); + * } + */ +ENTRY(memset) + ltgr %r4,%r4 + bzr %r14 + ltgr %r3,%r3 + jnz .Lmemset_fill + aghi %r4,-1 + srlg %r3,%r4,8 + ltgr %r3,%r3 + lgr %r1,%r2 + jz .Lmemset_clear_rest +.Lmemset_clear_loop: + xc 0(256,%r1),0(%r1) + la %r1,256(%r1) + brctg %r3,.Lmemset_clear_loop +.Lmemset_clear_rest: + larl %r3,.Lmemset_xc + ex %r4,0(%r3) + br %r14 +.Lmemset_fill: + stc %r3,0(%r2) + cghi %r4,1 + lgr %r1,%r2 + ber %r14 + aghi %r4,-2 + srlg %r3,%r4,8 + ltgr %r3,%r3 + jz .Lmemset_fill_rest +.Lmemset_fill_loop: + mvc 1(256,%r1),0(%r1) + la %r1,256(%r1) + brctg %r3,.Lmemset_fill_loop +.Lmemset_fill_rest: + larl %r3,.Lmemset_mvc + ex %r4,0(%r3) + br %r14 +.Lmemset_xc: + xc 0(1,%r1),0(%r1) +.Lmemset_mvc: + mvc 1(1,%r1),0(%r1) + +/* + * memcpy implementation + * + * void *memcpy(void *dest, const void *src, size_t n) + */ +ENTRY(memcpy) + ltgr %r4,%r4 + bzr %r14 + aghi %r4,-1 + srlg %r5,%r4,8 + ltgr %r5,%r5 + lgr %r1,%r2 + jnz .Lmemcpy_loop +.Lmemcpy_rest: + larl %r5,.Lmemcpy_mvc + ex %r4,0(%r5) + br %r14 +.Lmemcpy_loop: + mvc 0(256,%r1),0(%r3) + la %r1,256(%r1) + la %r3,256(%r3) + brctg %r5,.Lmemcpy_loop + j .Lmemcpy_rest +.Lmemcpy_mvc: + mvc 0(1,%r1),0(%r3) diff --git a/arch/s390/lib/qrnnd.S b/arch/s390/lib/qrnnd.S index eb1df632e74..d321329130e 100644 --- a/arch/s390/lib/qrnnd.S +++ b/arch/s390/lib/qrnnd.S @@ -1,5 +1,7 @@ # S/390 __udiv_qrnnd +#include <linux/linkage.h> + # r2 : &__r # r3 : upper half of 64 bit word n # r4 : lower half of 64 bit word n @@ -8,8 +10,7 @@ # the quotient q is to be returned .text - .globl __udiv_qrnnd -__udiv_qrnnd: +ENTRY(__udiv_qrnnd) st %r2,24(%r15) # store pointer to reminder for later lr %r0,%r3 # reload n lr %r1,%r4 diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 91754ffb920..5b0e445bc3f 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -1,8 +1,7 @@ /* - * arch/s390/lib/spinlock.c * Out of line spinlock code. * - * Copyright (C) IBM Corp. 2004, 2006 + * Copyright IBM Corp. 2004, 2006 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ @@ -10,6 +9,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/init.h> +#include <linux/smp.h> #include <asm/io.h> int spin_retry = 1000; @@ -24,108 +24,102 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); -static inline void _raw_yield(void) -{ - if (MACHINE_HAS_DIAG44) - asm volatile("diag 0,0,0x44"); -} - -static inline void _raw_yield_cpu(int cpu) -{ - if (MACHINE_HAS_DIAG9C) - asm volatile("diag %0,0,0x9c" - : : "d" (cpu_logical_map(cpu))); - else - _raw_yield(); -} - void arch_spin_lock_wait(arch_spinlock_t *lp) { - int count = spin_retry; - unsigned int cpu = ~smp_processor_id(); + unsigned int cpu = SPINLOCK_LOCKVAL; unsigned int owner; + int count; while (1) { - owner = lp->owner_cpu; - if (!owner || smp_vcpu_scheduled(~owner)) { - for (count = spin_retry; count > 0; count--) { - if (arch_spin_is_locked(lp)) - continue; - if (_raw_compare_and_swap(&lp->owner_cpu, 0, - cpu) == 0) - return; - } - if (MACHINE_IS_LPAR) - continue; + owner = ACCESS_ONCE(lp->lock); + /* Try to get the lock if it is free. */ + if (!owner) { + if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + return; + continue; } - owner = lp->owner_cpu; - if (owner) - _raw_yield_cpu(~owner); - if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) - return; + /* Check if the lock owner is running. */ + if (!smp_vcpu_scheduled(~owner)) { + smp_yield_cpu(~owner); + continue; + } + /* Loop for a while on the lock value. */ + count = spin_retry; + do { + owner = ACCESS_ONCE(lp->lock); + } while (owner && count-- > 0); + if (!owner) + continue; + /* + * For multiple layers of hypervisors, e.g. z/VM + LPAR + * yield the CPU if the lock is still unavailable. + */ + if (!MACHINE_IS_LPAR) + smp_yield_cpu(~owner); } } EXPORT_SYMBOL(arch_spin_lock_wait); void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) { - int count = spin_retry; - unsigned int cpu = ~smp_processor_id(); + unsigned int cpu = SPINLOCK_LOCKVAL; unsigned int owner; + int count; local_irq_restore(flags); while (1) { - owner = lp->owner_cpu; - if (!owner || smp_vcpu_scheduled(~owner)) { - for (count = spin_retry; count > 0; count--) { - if (arch_spin_is_locked(lp)) - continue; - local_irq_disable(); - if (_raw_compare_and_swap(&lp->owner_cpu, 0, - cpu) == 0) - return; - local_irq_restore(flags); - } - if (MACHINE_IS_LPAR) - continue; + owner = ACCESS_ONCE(lp->lock); + /* Try to get the lock if it is free. */ + if (!owner) { + local_irq_disable(); + if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + return; + local_irq_restore(flags); } - owner = lp->owner_cpu; - if (owner) - _raw_yield_cpu(~owner); - local_irq_disable(); - if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) - return; - local_irq_restore(flags); + /* Check if the lock owner is running. */ + if (!smp_vcpu_scheduled(~owner)) { + smp_yield_cpu(~owner); + continue; + } + /* Loop for a while on the lock value. */ + count = spin_retry; + do { + owner = ACCESS_ONCE(lp->lock); + } while (owner && count-- > 0); + if (!owner) + continue; + /* + * For multiple layers of hypervisors, e.g. z/VM + LPAR + * yield the CPU if the lock is still unavailable. + */ + if (!MACHINE_IS_LPAR) + smp_yield_cpu(~owner); } } EXPORT_SYMBOL(arch_spin_lock_wait_flags); +void arch_spin_relax(arch_spinlock_t *lp) +{ + unsigned int cpu = lp->lock; + if (cpu != 0) { + if (MACHINE_IS_VM || MACHINE_IS_KVM || + !smp_vcpu_scheduled(~cpu)) + smp_yield_cpu(~cpu); + } +} +EXPORT_SYMBOL(arch_spin_relax); + int arch_spin_trylock_retry(arch_spinlock_t *lp) { - unsigned int cpu = ~smp_processor_id(); int count; - for (count = spin_retry; count > 0; count--) { - if (arch_spin_is_locked(lp)) - continue; - if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) + for (count = spin_retry; count > 0; count--) + if (arch_spin_trylock_once(lp)) return 1; - } return 0; } EXPORT_SYMBOL(arch_spin_trylock_retry); -void arch_spin_relax(arch_spinlock_t *lock) -{ - unsigned int cpu = lock->owner_cpu; - if (cpu != 0) { - if (MACHINE_IS_VM || MACHINE_IS_KVM || - !smp_vcpu_scheduled(~cpu)) - _raw_yield_cpu(~cpu); - } -} -EXPORT_SYMBOL(arch_spin_relax); - void _raw_read_lock_wait(arch_rwlock_t *rw) { unsigned int old; @@ -133,13 +127,13 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } - if (!arch_read_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if ((int) old < 0) continue; - old = rw->lock & 0x7fffffffU; - if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + if (_raw_compare_and_swap(&rw->lock, old, old + 1)) return; } } @@ -153,15 +147,16 @@ void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) local_irq_restore(flags); while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } - if (!arch_read_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if ((int) old < 0) continue; - old = rw->lock & 0x7fffffffU; local_irq_disable(); - if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + if (_raw_compare_and_swap(&rw->lock, old, old + 1)) return; + local_irq_restore(flags); } } EXPORT_SYMBOL(_raw_read_lock_wait_flags); @@ -172,10 +167,10 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw) int count = spin_retry; while (count-- > 0) { - if (!arch_read_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if ((int) old < 0) continue; - old = rw->lock & 0x7fffffffU; - if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + if (_raw_compare_and_swap(&rw->lock, old, old + 1)) return 1; } return 0; @@ -184,16 +179,18 @@ EXPORT_SYMBOL(_raw_read_trylock_retry); void _raw_write_lock_wait(arch_rwlock_t *rw) { + unsigned int old; int count = spin_retry; while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } - if (!arch_write_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if (old) continue; - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) return; } } @@ -201,31 +198,36 @@ EXPORT_SYMBOL(_raw_write_lock_wait); void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) { + unsigned int old; int count = spin_retry; local_irq_restore(flags); while (1) { if (count-- <= 0) { - _raw_yield(); + smp_yield(); count = spin_retry; } - if (!arch_write_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if (old) continue; local_irq_disable(); - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) return; + local_irq_restore(flags); } } EXPORT_SYMBOL(_raw_write_lock_wait_flags); int _raw_write_trylock_retry(arch_rwlock_t *rw) { + unsigned int old; int count = spin_retry; while (count-- > 0) { - if (!arch_write_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if (old) continue; - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) return 1; } return 0; diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 4143b7c1909..b647d5ff0ad 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -1,9 +1,8 @@ /* - * arch/s390/lib/string.c * Optimized string functions * * S390 version - * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2004 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ @@ -44,11 +43,7 @@ static inline char *__strnend(const char *s, size_t n) */ size_t strlen(const char *s) { -#if __GNUC__ < 4 return __strend(s) - s; -#else - return __builtin_strlen(s); -#endif } EXPORT_SYMBOL(strlen); @@ -74,7 +69,6 @@ EXPORT_SYMBOL(strnlen); */ char *strcpy(char *dest, const char *src) { -#if __GNUC__ < 4 register int r0 asm("0") = 0; char *ret = dest; @@ -83,9 +77,6 @@ char *strcpy(char *dest, const char *src) : "+&a" (dest), "+&a" (src) : "d" (r0) : "cc", "memory" ); return ret; -#else - return __builtin_strcpy(dest, src); -#endif } EXPORT_SYMBOL(strcpy); @@ -107,7 +98,7 @@ size_t strlcpy(char *dest, const char *src, size_t size) if (size) { size_t len = (ret >= size) ? size-1 : ret; dest[len] = '\0'; - __builtin_memcpy(dest, src, len); + memcpy(dest, src, len); } return ret; } @@ -125,8 +116,8 @@ EXPORT_SYMBOL(strlcpy); char *strncpy(char *dest, const char *src, size_t n) { size_t len = __strnend(src, n) - src; - __builtin_memset(dest + len, 0, n - len); - __builtin_memcpy(dest, src, len); + memset(dest + len, 0, n - len); + memcpy(dest, src, len); return dest; } EXPORT_SYMBOL(strncpy); @@ -172,7 +163,7 @@ size_t strlcat(char *dest, const char *src, size_t n) if (len >= n) len = n - 1; dest[len] = '\0'; - __builtin_memcpy(dest, src, len); + memcpy(dest, src, len); } return res; } @@ -195,7 +186,7 @@ char *strncat(char *dest, const char *src, size_t n) char *p = __strend(dest); p[len] = '\0'; - __builtin_memcpy(p, src, len); + memcpy(p, src, len); return dest; } EXPORT_SYMBOL(strncat); @@ -349,41 +340,3 @@ void *memscan(void *s, int c, size_t n) return (void *) ret; } EXPORT_SYMBOL(memscan); - -/** - * memcpy - Copy one area of memory to another - * @dest: Where to copy to - * @src: Where to copy from - * @n: The size of the area. - * - * returns a pointer to @dest - */ -void *memcpy(void *dest, const void *src, size_t n) -{ - return __builtin_memcpy(dest, src, n); -} -EXPORT_SYMBOL(memcpy); - -/** - * memset - Fill a region of memory with the given value - * @s: Pointer to the start of the area. - * @c: The byte to fill the area with - * @n: The size of the area. - * - * returns a pointer to @s - */ -void *memset(void *s, int c, size_t n) -{ - char *xs; - - if (c == 0) - return __builtin_memset(s, 0, n); - - xs = (char *) s; - if (n > 0) - do { - *xs++ = c; - } while (--n > 0); - return s; -} -EXPORT_SYMBOL(memset); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c new file mode 100644 index 00000000000..53dd5d7a0c9 --- /dev/null +++ b/arch/s390/lib/uaccess.c @@ -0,0 +1,406 @@ +/* + * Standard user space access functions based on mvcp/mvcs and doing + * interesting things in the secondary space mode. + * + * Copyright IBM Corp. 2006,2014 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Gerald Schaefer (gerald.schaefer@de.ibm.com) + */ + +#include <linux/jump_label.h> +#include <linux/uaccess.h> +#include <linux/export.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <asm/mmu_context.h> +#include <asm/facility.h> + +#ifndef CONFIG_64BIT +#define AHI "ahi" +#define ALR "alr" +#define CLR "clr" +#define LHI "lhi" +#define SLR "slr" +#else +#define AHI "aghi" +#define ALR "algr" +#define CLR "clgr" +#define LHI "lghi" +#define SLR "slgr" +#endif + +static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE; + +static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, + unsigned long size) +{ + register unsigned long reg0 asm("0") = 0x81UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" + "9: jz 7f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ + " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 4f\n" + "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n" + "10:"SLR" %0,%4\n" + " "ALR" %2,%4\n" + "4:"LHI" %4,-1\n" + " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */ + " bras %3,6f\n" /* memset loop */ + " xc 0(1,%2),0(%2)\n" + "5: xc 0(256,%2),0(%2)\n" + " la %2,256(%2)\n" + "6:"AHI" %4,-256\n" + " jnm 5b\n" + " ex %4,0(%3)\n" + " j 8f\n" + "7:"SLR" %0,%0\n" + "8:\n" + EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, + unsigned long size) +{ + unsigned long tmp1, tmp2; + + load_kernel_asce(); + tmp1 = -256UL; + asm volatile( + " sacf 0\n" + "0: mvcp 0(%0,%2),0(%1),%3\n" + "10:jz 8f\n" + "1:"ALR" %0,%3\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "2: mvcp 0(%0,%2),0(%1),%3\n" + "11:jnz 1b\n" + " j 8f\n" + "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ + " "LHI" %3,-4096\n" + " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "4: mvcp 0(%4,%2),0(%1),%3\n" + "12:"SLR" %0,%4\n" + " "ALR" %2,%4\n" + "5:"LHI" %4,-1\n" + " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */ + " bras %3,7f\n" /* memset loop */ + " xc 0(1,%2),0(%2)\n" + "6: xc 0(256,%2),0(%2)\n" + " la %2,256(%2)\n" + "7:"AHI" %4,-256\n" + " jnm 6b\n" + " ex %4,0(%3)\n" + " j 9f\n" + "8:"SLR" %0,%0\n" + "9: sacf 768\n" + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b) + EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (static_key_false(&have_mvcos)) + return copy_from_user_mvcos(to, from, n); + return copy_from_user_mvcp(to, from, n); +} +EXPORT_SYMBOL(__copy_from_user); + +static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, + unsigned long size) +{ + register unsigned long reg0 asm("0") = 0x810000UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + "6: jz 4f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ + " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n" + "7:"SLR" %0,%4\n" + " j 5f\n" + "4:"SLR" %0,%0\n" + "5:\n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, + unsigned long size) +{ + unsigned long tmp1, tmp2; + + load_kernel_asce(); + tmp1 = -256UL; + asm volatile( + " sacf 0\n" + "0: mvcs 0(%0,%1),0(%2),%3\n" + "7: jz 5f\n" + "1:"ALR" %0,%3\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "2: mvcs 0(%0,%1),0(%2),%3\n" + "8: jnz 1b\n" + " j 5f\n" + "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ + " "LHI" %3,-4096\n" + " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "4: mvcs 0(%4,%1),0(%2),%3\n" + "9:"SLR" %0,%4\n" + " j 6f\n" + "5:"SLR" %0,%0\n" + "6: sacf 768\n" + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) + EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (static_key_false(&have_mvcos)) + return copy_to_user_mvcos(to, from, n); + return copy_to_user_mvcs(to, from, n); +} +EXPORT_SYMBOL(__copy_to_user); + +static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, + unsigned long size) +{ + register unsigned long reg0 asm("0") = 0x810081UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + /* FIXME: copy with reduced length. */ + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + " jz 2f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2:"SLR" %0,%0\n" + "3: \n" + EX_TABLE(0b,3b) + : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from, + unsigned long size) +{ + unsigned long tmp1; + + load_kernel_asce(); + asm volatile( + " sacf 256\n" + " "AHI" %0,-1\n" + " jo 5f\n" + " bras %3,3f\n" + "0:"AHI" %0,257\n" + "1: mvc 0(1,%1),0(%2)\n" + " la %1,1(%1)\n" + " la %2,1(%2)\n" + " "AHI" %0,-1\n" + " jnz 1b\n" + " j 5f\n" + "2: mvc 0(256,%1),0(%2)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "3:"AHI" %0,-256\n" + " jnm 2b\n" + "4: ex %0,1b-0b(%3)\n" + "5: "SLR" %0,%0\n" + "6: sacf 768\n" + EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) + : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) + : : "cc", "memory"); + return size; +} + +unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n) +{ + if (static_key_false(&have_mvcos)) + return copy_in_user_mvcos(to, from, n); + return copy_in_user_mvc(to, from, n); +} +EXPORT_SYMBOL(__copy_in_user); + +static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) +{ + register unsigned long reg0 asm("0") = 0x810000UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" + " jz 4f\n" + "1:"ALR" %0,%2\n" + " "SLR" %1,%2\n" + " j 0b\n" + "2: la %3,4095(%1)\n"/* %4 = to + 4095 */ + " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */ + " "SLR" %3,%1\n" + " "CLR" %0,%3\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" + " "SLR" %0,%3\n" + " j 5f\n" + "4:"SLR" %0,%0\n" + "5:\n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) + : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) + : "a" (empty_zero_page), "d" (reg0) : "cc", "memory"); + return size; +} + +static inline unsigned long clear_user_xc(void __user *to, unsigned long size) +{ + unsigned long tmp1, tmp2; + + load_kernel_asce(); + asm volatile( + " sacf 256\n" + " "AHI" %0,-1\n" + " jo 5f\n" + " bras %3,3f\n" + " xc 0(1,%1),0(%1)\n" + "0:"AHI" %0,257\n" + " la %2,255(%1)\n" /* %2 = ptr + 255 */ + " srl %2,12\n" + " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */ + " "SLR" %2,%1\n" + " "CLR" %0,%2\n" /* clear crosses next page boundary? */ + " jnh 5f\n" + " "AHI" %2,-1\n" + "1: ex %2,0(%3)\n" + " "AHI" %2,1\n" + " "SLR" %0,%2\n" + " j 5f\n" + "2: xc 0(256,%1),0(%1)\n" + " la %1,256(%1)\n" + "3:"AHI" %0,-256\n" + " jnm 2b\n" + "4: ex %0,0(%3)\n" + "5: "SLR" %0,%0\n" + "6: sacf 768\n" + EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) + : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +unsigned long __clear_user(void __user *to, unsigned long size) +{ + if (static_key_false(&have_mvcos)) + return clear_user_mvcos(to, size); + return clear_user_xc(to, size); +} +EXPORT_SYMBOL(__clear_user); + +static inline unsigned long strnlen_user_srst(const char __user *src, + unsigned long size) +{ + register unsigned long reg0 asm("0") = 0; + unsigned long tmp1, tmp2; + + asm volatile( + " la %2,0(%1)\n" + " la %3,0(%0,%1)\n" + " "SLR" %0,%0\n" + " sacf 256\n" + "0: srst %3,%2\n" + " jo 0b\n" + " la %0,1(%3)\n" /* strnlen_user results includes \0 */ + " "SLR" %0,%1\n" + "1: sacf 768\n" + EX_TABLE(0b,1b) + : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +unsigned long __strnlen_user(const char __user *src, unsigned long size) +{ + if (unlikely(!size)) + return 0; + load_kernel_asce(); + return strnlen_user_srst(src, size); +} +EXPORT_SYMBOL(__strnlen_user); + +long __strncpy_from_user(char *dst, const char __user *src, long size) +{ + size_t done, len, offset, len_str; + + if (unlikely(size <= 0)) + return 0; + done = 0; + do { + offset = (size_t)src & ~PAGE_MASK; + len = min(size - done, PAGE_SIZE - offset); + if (copy_from_user(dst, src, len)) + return -EFAULT; + len_str = strnlen(dst, len); + done += len_str; + src += len_str; + dst += len_str; + } while ((len_str == len) && (done < size)); + return done; +} +EXPORT_SYMBOL(__strncpy_from_user); + +/* + * The "old" uaccess variant without mvcos can be enforced with the + * uaccess_primary kernel parameter. This is mainly for debugging purposes. + */ +static int uaccess_primary __initdata; + +static int __init parse_uaccess_pt(char *__unused) +{ + uaccess_primary = 1; + return 0; +} +early_param("uaccess_primary", parse_uaccess_pt); + +static int __init uaccess_init(void) +{ + if (IS_ENABLED(CONFIG_64BIT) && !uaccess_primary && test_facility(27)) + static_key_slow_inc(&have_mvcos); + return 0; +} +early_initcall(uaccess_init); diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h deleted file mode 100644 index 126011df14f..00000000000 --- a/arch/s390/lib/uaccess.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * arch/s390/uaccess.h - * - * Copyright IBM Corp. 2007 - * - */ - -#ifndef __ARCH_S390_LIB_UACCESS_H -#define __ARCH_S390_LIB_UACCESS_H - -extern size_t copy_from_user_std(size_t, const void __user *, void *); -extern size_t copy_to_user_std(size_t, void __user *, const void *); -extern size_t strnlen_user_std(size_t, const char __user *); -extern size_t strncpy_from_user_std(size_t, const char __user *, char *); -extern int futex_atomic_cmpxchg_std(int __user *, int, int); -extern int futex_atomic_op_std(int, int __user *, int, int *); - -extern size_t copy_from_user_pt(size_t, const void __user *, void *); -extern size_t copy_to_user_pt(size_t, void __user *, const void *); -extern int futex_atomic_op_pt(int, int __user *, int, int *); -extern int futex_atomic_cmpxchg_pt(int __user *, int, int); - -#endif /* __ARCH_S390_LIB_UACCESS_H */ diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c deleted file mode 100644 index 60455f104ea..00000000000 --- a/arch/s390/lib/uaccess_mvcos.c +++ /dev/null @@ -1,227 +0,0 @@ -/* - * arch/s390/lib/uaccess_mvcos.c - * - * Optimized user space space access functions based on mvcos. - * - * Copyright (C) IBM Corp. 2006 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Gerald Schaefer (gerald.schaefer@de.ibm.com) - */ - -#include <linux/errno.h> -#include <linux/mm.h> -#include <asm/uaccess.h> -#include <asm/futex.h> -#include "uaccess.h" - -#ifndef __s390x__ -#define AHI "ahi" -#define ALR "alr" -#define CLR "clr" -#define LHI "lhi" -#define SLR "slr" -#else -#define AHI "aghi" -#define ALR "algr" -#define CLR "clgr" -#define LHI "lghi" -#define SLR "slgr" -#endif - -static size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x) -{ - register unsigned long reg0 asm("0") = 0x81UL; - unsigned long tmp1, tmp2; - - tmp1 = -4096UL; - asm volatile( - "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" - "9: jz 7f\n" - "1:"ALR" %0,%3\n" - " "SLR" %1,%3\n" - " "SLR" %2,%3\n" - " j 0b\n" - "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ - " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ - " "SLR" %4,%1\n" - " "CLR" %0,%4\n" /* copy crosses next page boundary? */ - " jnh 4f\n" - "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n" - "10:"SLR" %0,%4\n" - " "ALR" %2,%4\n" - "4:"LHI" %4,-1\n" - " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */ - " bras %3,6f\n" /* memset loop */ - " xc 0(1,%2),0(%2)\n" - "5: xc 0(256,%2),0(%2)\n" - " la %2,256(%2)\n" - "6:"AHI" %4,-256\n" - " jnm 5b\n" - " ex %4,0(%3)\n" - " j 8f\n" - "7:"SLR" %0,%0\n" - "8: \n" - EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); - return size; -} - -static size_t copy_from_user_mvcos_check(size_t size, const void __user *ptr, void *x) -{ - if (size <= 256) - return copy_from_user_std(size, ptr, x); - return copy_from_user_mvcos(size, ptr, x); -} - -static size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x) -{ - register unsigned long reg0 asm("0") = 0x810000UL; - unsigned long tmp1, tmp2; - - tmp1 = -4096UL; - asm volatile( - "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" - "6: jz 4f\n" - "1:"ALR" %0,%3\n" - " "SLR" %1,%3\n" - " "SLR" %2,%3\n" - " j 0b\n" - "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ - " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ - " "SLR" %4,%1\n" - " "CLR" %0,%4\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n" - "7:"SLR" %0,%4\n" - " j 5f\n" - "4:"SLR" %0,%0\n" - "5: \n" - EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); - return size; -} - -static size_t copy_to_user_mvcos_check(size_t size, void __user *ptr, - const void *x) -{ - if (size <= 256) - return copy_to_user_std(size, ptr, x); - return copy_to_user_mvcos(size, ptr, x); -} - -static size_t copy_in_user_mvcos(size_t size, void __user *to, - const void __user *from) -{ - register unsigned long reg0 asm("0") = 0x810081UL; - unsigned long tmp1, tmp2; - - tmp1 = -4096UL; - /* FIXME: copy with reduced length. */ - asm volatile( - "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" - " jz 2f\n" - "1:"ALR" %0,%3\n" - " "SLR" %1,%3\n" - " "SLR" %2,%3\n" - " j 0b\n" - "2:"SLR" %0,%0\n" - "3: \n" - EX_TABLE(0b,3b) - : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); - return size; -} - -static size_t clear_user_mvcos(size_t size, void __user *to) -{ - register unsigned long reg0 asm("0") = 0x810000UL; - unsigned long tmp1, tmp2; - - tmp1 = -4096UL; - asm volatile( - "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" - " jz 4f\n" - "1:"ALR" %0,%2\n" - " "SLR" %1,%2\n" - " j 0b\n" - "2: la %3,4095(%1)\n"/* %4 = to + 4095 */ - " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */ - " "SLR" %3,%1\n" - " "CLR" %0,%3\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" - " "SLR" %0,%3\n" - " j 5f\n" - "4:"SLR" %0,%0\n" - "5: \n" - EX_TABLE(0b,2b) EX_TABLE(3b,5b) - : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) - : "a" (empty_zero_page), "d" (reg0) : "cc", "memory"); - return size; -} - -static size_t strnlen_user_mvcos(size_t count, const char __user *src) -{ - char buf[256]; - int rc; - size_t done, len, len_str; - - done = 0; - do { - len = min(count - done, (size_t) 256); - rc = uaccess.copy_from_user(len, src + done, buf); - if (unlikely(rc == len)) - return 0; - len -= rc; - len_str = strnlen(buf, len); - done += len_str; - } while ((len_str == len) && (done < count)); - return done + 1; -} - -static size_t strncpy_from_user_mvcos(size_t count, const char __user *src, - char *dst) -{ - int rc; - size_t done, len, len_str; - - done = 0; - do { - len = min(count - done, (size_t) 4096); - rc = uaccess.copy_from_user(len, src + done, dst); - if (unlikely(rc == len)) - return -EFAULT; - len -= rc; - len_str = strnlen(dst, len); - done += len_str; - } while ((len_str == len) && (done < count)); - return done; -} - -struct uaccess_ops uaccess_mvcos = { - .copy_from_user = copy_from_user_mvcos_check, - .copy_from_user_small = copy_from_user_std, - .copy_to_user = copy_to_user_mvcos_check, - .copy_to_user_small = copy_to_user_std, - .copy_in_user = copy_in_user_mvcos, - .clear_user = clear_user_mvcos, - .strnlen_user = strnlen_user_std, - .strncpy_from_user = strncpy_from_user_std, - .futex_atomic_op = futex_atomic_op_std, - .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std, -}; - -struct uaccess_ops uaccess_mvcos_switch = { - .copy_from_user = copy_from_user_mvcos, - .copy_from_user_small = copy_from_user_mvcos, - .copy_to_user = copy_to_user_mvcos, - .copy_to_user_small = copy_to_user_mvcos, - .copy_in_user = copy_in_user_mvcos, - .clear_user = clear_user_mvcos, - .strnlen_user = strnlen_user_mvcos, - .strncpy_from_user = strncpy_from_user_mvcos, - .futex_atomic_op = futex_atomic_op_pt, - .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt, -}; diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c deleted file mode 100644 index 404f2de296d..00000000000 --- a/arch/s390/lib/uaccess_pt.c +++ /dev/null @@ -1,401 +0,0 @@ -/* - * arch/s390/lib/uaccess_pt.c - * - * User access functions based on page table walks for enhanced - * system layout without hardware support. - * - * Copyright IBM Corp. 2006 - * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) - */ - -#include <linux/errno.h> -#include <linux/hardirq.h> -#include <linux/mm.h> -#include <asm/uaccess.h> -#include <asm/futex.h> -#include "uaccess.h" - -static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - pgd = pgd_offset(mm, addr); - if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - return (pte_t *) 0x3a; - - pud = pud_offset(pgd, addr); - if (pud_none(*pud) || unlikely(pud_bad(*pud))) - return (pte_t *) 0x3b; - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - return (pte_t *) 0x10; - - return pte_offset_map(pmd, addr); -} - -static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, - size_t n, int write_user) -{ - struct mm_struct *mm = current->mm; - unsigned long offset, pfn, done, size; - pte_t *pte; - void *from, *to; - - done = 0; -retry: - spin_lock(&mm->page_table_lock); - do { - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) - goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; - goto fault; - } else if (write_user && !pte_write(*pte)) { - pte = (pte_t *) 0x04; - goto fault; - } - - pfn = pte_pfn(*pte); - offset = uaddr & (PAGE_SIZE - 1); - size = min(n - done, PAGE_SIZE - offset); - if (write_user) { - to = (void *)((pfn << PAGE_SHIFT) + offset); - from = kptr + done; - } else { - from = (void *)((pfn << PAGE_SHIFT) + offset); - to = kptr + done; - } - memcpy(to, from, size); - done += size; - uaddr += size; - } while (done < n); - spin_unlock(&mm->page_table_lock); - return n - done; -fault: - spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, (unsigned long) pte, write_user)) - return n - done; - goto retry; -} - -/* - * Do DAT for user address by page table walk, return kernel address. - * This function needs to be called with current->mm->page_table_lock held. - */ -static __always_inline unsigned long __dat_user_addr(unsigned long uaddr) -{ - struct mm_struct *mm = current->mm; - unsigned long pfn; - pte_t *pte; - int rc; - -retry: - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) - goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; - goto fault; - } - - pfn = pte_pfn(*pte); - return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); -fault: - spin_unlock(&mm->page_table_lock); - rc = __handle_fault(uaddr, (unsigned long) pte, 0); - spin_lock(&mm->page_table_lock); - if (!rc) - goto retry; - return 0; -} - -size_t copy_from_user_pt(size_t n, const void __user *from, void *to) -{ - size_t rc; - - if (segment_eq(get_fs(), KERNEL_DS)) { - memcpy(to, (void __kernel __force *) from, n); - return 0; - } - rc = __user_copy_pt((unsigned long) from, to, n, 0); - if (unlikely(rc)) - memset(to + n - rc, 0, rc); - return rc; -} - -size_t copy_to_user_pt(size_t n, void __user *to, const void *from) -{ - if (segment_eq(get_fs(), KERNEL_DS)) { - memcpy((void __kernel __force *) to, from, n); - return 0; - } - return __user_copy_pt((unsigned long) to, (void *) from, n, 1); -} - -static size_t clear_user_pt(size_t n, void __user *to) -{ - long done, size, ret; - - if (segment_eq(get_fs(), KERNEL_DS)) { - memset((void __kernel __force *) to, 0, n); - return 0; - } - done = 0; - do { - if (n - done > PAGE_SIZE) - size = PAGE_SIZE; - else - size = n - done; - ret = __user_copy_pt((unsigned long) to + done, - &empty_zero_page, size, 1); - done += size; - if (ret) - return ret + n - done; - } while (done < n); - return 0; -} - -static size_t strnlen_user_pt(size_t count, const char __user *src) -{ - char *addr; - unsigned long uaddr = (unsigned long) src; - struct mm_struct *mm = current->mm; - unsigned long offset, pfn, done, len; - pte_t *pte; - size_t len_str; - - if (segment_eq(get_fs(), KERNEL_DS)) - return strnlen((const char __kernel __force *) src, count) + 1; - done = 0; -retry: - spin_lock(&mm->page_table_lock); - do { - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) - goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; - goto fault; - } - - pfn = pte_pfn(*pte); - offset = uaddr & (PAGE_SIZE-1); - addr = (char *)(pfn << PAGE_SHIFT) + offset; - len = min(count - done, PAGE_SIZE - offset); - len_str = strnlen(addr, len); - done += len_str; - uaddr += len_str; - } while ((len_str == len) && (done < count)); - spin_unlock(&mm->page_table_lock); - return done + 1; -fault: - spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, (unsigned long) pte, 0)) - return 0; - goto retry; -} - -static size_t strncpy_from_user_pt(size_t count, const char __user *src, - char *dst) -{ - size_t n = strnlen_user_pt(count, src); - - if (!n) - return -EFAULT; - if (n > count) - n = count; - if (segment_eq(get_fs(), KERNEL_DS)) { - memcpy(dst, (const char __kernel __force *) src, n); - if (dst[n-1] == '\0') - return n-1; - else - return n; - } - if (__user_copy_pt((unsigned long) src, dst, n, 0)) - return -EFAULT; - if (dst[n-1] == '\0') - return n-1; - else - return n; -} - -static size_t copy_in_user_pt(size_t n, void __user *to, - const void __user *from) -{ - struct mm_struct *mm = current->mm; - unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, - uaddr, done, size, error_code; - unsigned long uaddr_from = (unsigned long) from; - unsigned long uaddr_to = (unsigned long) to; - pte_t *pte_from, *pte_to; - int write_user; - - if (segment_eq(get_fs(), KERNEL_DS)) { - memcpy((void __force *) to, (void __force *) from, n); - return 0; - } - done = 0; -retry: - spin_lock(&mm->page_table_lock); - do { - write_user = 0; - uaddr = uaddr_from; - pte_from = follow_table(mm, uaddr_from); - error_code = (unsigned long) pte_from; - if (error_code < 0x1000) - goto fault; - if (!pte_present(*pte_from)) { - error_code = 0x11; - goto fault; - } - - write_user = 1; - uaddr = uaddr_to; - pte_to = follow_table(mm, uaddr_to); - error_code = (unsigned long) pte_to; - if (error_code < 0x1000) - goto fault; - if (!pte_present(*pte_to)) { - error_code = 0x11; - goto fault; - } else if (!pte_write(*pte_to)) { - error_code = 0x04; - goto fault; - } - - pfn_from = pte_pfn(*pte_from); - pfn_to = pte_pfn(*pte_to); - offset_from = uaddr_from & (PAGE_SIZE-1); - offset_to = uaddr_from & (PAGE_SIZE-1); - offset_max = max(offset_from, offset_to); - size = min(n - done, PAGE_SIZE - offset_max); - - memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to, - (void *)(pfn_from << PAGE_SHIFT) + offset_from, size); - done += size; - uaddr_from += size; - uaddr_to += size; - } while (done < n); - spin_unlock(&mm->page_table_lock); - return n - done; -fault: - spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, error_code, write_user)) - return n - done; - goto retry; -} - -#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ - asm volatile("0: l %1,0(%6)\n" \ - "1: " insn \ - "2: cs %1,%2,0(%6)\n" \ - "3: jl 1b\n" \ - " lhi %0,0\n" \ - "4:\n" \ - EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ - : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ - "=m" (*uaddr) \ - : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ - "m" (*uaddr) : "cc" ); - -static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) -{ - int oldval = 0, newval, ret; - - switch (op) { - case FUTEX_OP_SET: - __futex_atomic_op("lr %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_ADD: - __futex_atomic_op("lr %2,%1\nar %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_OR: - __futex_atomic_op("lr %2,%1\nor %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_ANDN: - __futex_atomic_op("lr %2,%1\nnr %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_XOR: - __futex_atomic_op("lr %2,%1\nxr %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - default: - ret = -ENOSYS; - } - if (ret == 0) - *old = oldval; - return ret; -} - -int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) -{ - int ret; - - if (segment_eq(get_fs(), KERNEL_DS)) - return __futex_atomic_op_pt(op, uaddr, oparg, old); - spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); - if (!uaddr) { - spin_unlock(¤t->mm->page_table_lock); - return -EFAULT; - } - get_page(virt_to_page(uaddr)); - spin_unlock(¤t->mm->page_table_lock); - ret = __futex_atomic_op_pt(op, uaddr, oparg, old); - put_page(virt_to_page(uaddr)); - return ret; -} - -static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) -{ - int ret; - - asm volatile("0: cs %1,%4,0(%5)\n" - "1: lr %0,%1\n" - "2:\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) - : "=d" (ret), "+d" (oldval), "=m" (*uaddr) - : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) - : "cc", "memory" ); - return ret; -} - -int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) -{ - int ret; - - if (segment_eq(get_fs(), KERNEL_DS)) - return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); - spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); - if (!uaddr) { - spin_unlock(¤t->mm->page_table_lock); - return -EFAULT; - } - get_page(virt_to_page(uaddr)); - spin_unlock(¤t->mm->page_table_lock); - ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); - put_page(virt_to_page(uaddr)); - return ret; -} - -struct uaccess_ops uaccess_pt = { - .copy_from_user = copy_from_user_pt, - .copy_from_user_small = copy_from_user_pt, - .copy_to_user = copy_to_user_pt, - .copy_to_user_small = copy_to_user_pt, - .copy_in_user = copy_in_user_pt, - .clear_user = clear_user_pt, - .strnlen_user = strnlen_user_pt, - .strncpy_from_user = strncpy_from_user_pt, - .futex_atomic_op = futex_atomic_op_pt, - .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt, -}; diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c deleted file mode 100644 index a6c4f7ed24a..00000000000 --- a/arch/s390/lib/uaccess_std.c +++ /dev/null @@ -1,317 +0,0 @@ -/* - * arch/s390/lib/uaccess_std.c - * - * Standard user space access functions based on mvcp/mvcs and doing - * interesting things in the secondary space mode. - * - * Copyright (C) IBM Corp. 2006 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Gerald Schaefer (gerald.schaefer@de.ibm.com) - */ - -#include <linux/errno.h> -#include <linux/mm.h> -#include <linux/uaccess.h> -#include <asm/futex.h> -#include "uaccess.h" - -#ifndef __s390x__ -#define AHI "ahi" -#define ALR "alr" -#define CLR "clr" -#define LHI "lhi" -#define SLR "slr" -#else -#define AHI "aghi" -#define ALR "algr" -#define CLR "clgr" -#define LHI "lghi" -#define SLR "slgr" -#endif - -size_t copy_from_user_std(size_t size, const void __user *ptr, void *x) -{ - unsigned long tmp1, tmp2; - - tmp1 = -256UL; - asm volatile( - "0: mvcp 0(%0,%2),0(%1),%3\n" - "10:jz 8f\n" - "1:"ALR" %0,%3\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "2: mvcp 0(%0,%2),0(%1),%3\n" - "11:jnz 1b\n" - " j 8f\n" - "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " "LHI" %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " "SLR" %4,%1\n" - " "CLR" %0,%4\n" /* copy crosses next page boundary? */ - " jnh 5f\n" - "4: mvcp 0(%4,%2),0(%1),%3\n" - "12:"SLR" %0,%4\n" - " "ALR" %2,%4\n" - "5:"LHI" %4,-1\n" - " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */ - " bras %3,7f\n" /* memset loop */ - " xc 0(1,%2),0(%2)\n" - "6: xc 0(256,%2),0(%2)\n" - " la %2,256(%2)\n" - "7:"AHI" %4,-256\n" - " jnm 6b\n" - " ex %4,0(%3)\n" - " j 9f\n" - "8:"SLR" %0,%0\n" - "9: \n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b) - EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - return size; -} - -static size_t copy_from_user_std_check(size_t size, const void __user *ptr, - void *x) -{ - if (size <= 1024) - return copy_from_user_std(size, ptr, x); - return copy_from_user_pt(size, ptr, x); -} - -size_t copy_to_user_std(size_t size, void __user *ptr, const void *x) -{ - unsigned long tmp1, tmp2; - - tmp1 = -256UL; - asm volatile( - "0: mvcs 0(%0,%1),0(%2),%3\n" - "7: jz 5f\n" - "1:"ALR" %0,%3\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "2: mvcs 0(%0,%1),0(%2),%3\n" - "8: jnz 1b\n" - " j 5f\n" - "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " "LHI" %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " "SLR" %4,%1\n" - " "CLR" %0,%4\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "4: mvcs 0(%4,%1),0(%2),%3\n" - "9:"SLR" %0,%4\n" - " j 6f\n" - "5:"SLR" %0,%0\n" - "6: \n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) - EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - return size; -} - -static size_t copy_to_user_std_check(size_t size, void __user *ptr, - const void *x) -{ - if (size <= 1024) - return copy_to_user_std(size, ptr, x); - return copy_to_user_pt(size, ptr, x); -} - -static size_t copy_in_user_std(size_t size, void __user *to, - const void __user *from) -{ - unsigned long tmp1; - - asm volatile( - " sacf 256\n" - " "AHI" %0,-1\n" - " jo 5f\n" - " bras %3,3f\n" - "0:"AHI" %0,257\n" - "1: mvc 0(1,%1),0(%2)\n" - " la %1,1(%1)\n" - " la %2,1(%2)\n" - " "AHI" %0,-1\n" - " jnz 1b\n" - " j 5f\n" - "2: mvc 0(256,%1),0(%2)\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "3:"AHI" %0,-256\n" - " jnm 2b\n" - "4: ex %0,1b-0b(%3)\n" - "5: "SLR" %0,%0\n" - "6: sacf 0\n" - EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) - : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) - : : "cc", "memory"); - return size; -} - -static size_t clear_user_std(size_t size, void __user *to) -{ - unsigned long tmp1, tmp2; - - asm volatile( - " sacf 256\n" - " "AHI" %0,-1\n" - " jo 5f\n" - " bras %3,3f\n" - " xc 0(1,%1),0(%1)\n" - "0:"AHI" %0,257\n" - " la %2,255(%1)\n" /* %2 = ptr + 255 */ - " srl %2,12\n" - " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */ - " "SLR" %2,%1\n" - " "CLR" %0,%2\n" /* clear crosses next page boundary? */ - " jnh 5f\n" - " "AHI" %2,-1\n" - "1: ex %2,0(%3)\n" - " "AHI" %2,1\n" - " "SLR" %0,%2\n" - " j 5f\n" - "2: xc 0(256,%1),0(%1)\n" - " la %1,256(%1)\n" - "3:"AHI" %0,-256\n" - " jnm 2b\n" - "4: ex %0,0(%3)\n" - "5: "SLR" %0,%0\n" - "6: sacf 0\n" - EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) - : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - return size; -} - -size_t strnlen_user_std(size_t size, const char __user *src) -{ - register unsigned long reg0 asm("0") = 0UL; - unsigned long tmp1, tmp2; - - asm volatile( - " la %2,0(%1)\n" - " la %3,0(%0,%1)\n" - " "SLR" %0,%0\n" - " sacf 256\n" - "0: srst %3,%2\n" - " jo 0b\n" - " la %0,1(%3)\n" /* strnlen_user results includes \0 */ - " "SLR" %0,%1\n" - "1: sacf 0\n" - EX_TABLE(0b,1b) - : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2) - : "d" (reg0) : "cc", "memory"); - return size; -} - -size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst) -{ - register unsigned long reg0 asm("0") = 0UL; - unsigned long tmp1, tmp2; - - asm volatile( - " la %3,0(%1)\n" - " la %4,0(%0,%1)\n" - " sacf 256\n" - "0: srst %4,%3\n" - " jo 0b\n" - " sacf 0\n" - " la %0,0(%4)\n" - " jh 1f\n" /* found \0 in string ? */ - " "AHI" %4,1\n" /* include \0 in copy */ - "1:"SLR" %0,%1\n" /* %0 = return length (without \0) */ - " "SLR" %4,%1\n" /* %4 = copy length (including \0) */ - "2: mvcp 0(%4,%2),0(%1),%5\n" - " jz 9f\n" - "3:"AHI" %4,-256\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "4: mvcp 0(%4,%2),0(%1),%5\n" - " jnz 3b\n" - " j 9f\n" - "7: sacf 0\n" - "8:"LHI" %0,%6\n" - "9:\n" - EX_TABLE(0b,7b) EX_TABLE(2b,8b) EX_TABLE(4b,8b) - : "+a" (size), "+a" (src), "+d" (dst), "=a" (tmp1), "=a" (tmp2) - : "d" (reg0), "K" (-EFAULT) : "cc", "memory"); - return size; -} - -#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ - asm volatile( \ - " sacf 256\n" \ - "0: l %1,0(%6)\n" \ - "1:"insn \ - "2: cs %1,%2,0(%6)\n" \ - "3: jl 1b\n" \ - " lhi %0,0\n" \ - "4: sacf 0\n" \ - EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ - : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ - "=m" (*uaddr) \ - : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ - "m" (*uaddr) : "cc"); - -int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old) -{ - int oldval = 0, newval, ret; - - switch (op) { - case FUTEX_OP_SET: - __futex_atomic_op("lr %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_ADD: - __futex_atomic_op("lr %2,%1\nar %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_OR: - __futex_atomic_op("lr %2,%1\nor %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_ANDN: - __futex_atomic_op("lr %2,%1\nnr %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - case FUTEX_OP_XOR: - __futex_atomic_op("lr %2,%1\nxr %2,%5\n", - ret, oldval, newval, uaddr, oparg); - break; - default: - ret = -ENOSYS; - } - *old = oldval; - return ret; -} - -int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval) -{ - int ret; - - asm volatile( - " sacf 256\n" - "0: cs %1,%4,0(%5)\n" - "1: lr %0,%1\n" - "2: sacf 0\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) - : "=d" (ret), "+d" (oldval), "=m" (*uaddr) - : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) - : "cc", "memory" ); - return ret; -} - -struct uaccess_ops uaccess_std = { - .copy_from_user = copy_from_user_std_check, - .copy_from_user_small = copy_from_user_std, - .copy_to_user = copy_to_user_std_check, - .copy_to_user_small = copy_to_user_std, - .copy_in_user = copy_in_user_std, - .clear_user = clear_user_std, - .strnlen_user = strnlen_user_std, - .strncpy_from_user = strncpy_from_user_std, - .futex_atomic_op = futex_atomic_op_std, - .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std, -}; diff --git a/arch/s390/lib/usercopy.c b/arch/s390/lib/usercopy.c deleted file mode 100644 index 14b363fec8a..00000000000 --- a/arch/s390/lib/usercopy.c +++ /dev/null @@ -1,8 +0,0 @@ -#include <linux/module.h> -#include <linux/bug.h> - -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/arch/s390/math-emu/Makefile b/arch/s390/math-emu/Makefile index c8489034105..51d399549f6 100644 --- a/arch/s390/math-emu/Makefile +++ b/arch/s390/math-emu/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_MATHEMU) := math.o -EXTRA_CFLAGS := -I$(src) -Iinclude/math-emu -w +ccflags-y := -I$(src) -Iinclude/math-emu -w diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c index cd4e9c168dd..a6ba0d72433 100644 --- a/arch/s390/math-emu/math.c +++ b/arch/s390/math-emu/math.c @@ -1,8 +1,6 @@ /* - * arch/s390/math-emu/math.c - * * S390 version - * Copyright (C) 1999-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2001 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * * 'math.c' emulates IEEE instructions on a S390 processor @@ -21,6 +19,8 @@ #include <math-emu/double.h> #include <math-emu/quad.h> +#define FPC_VALID_MASK 0xF8F8FF03 + /* * I miss a macro to round a floating point number to the * nearest integer in the same floating point format. diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 6fbc6f3fbdf..839592ca265 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,7 +2,9 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ - page-states.o gup.o -obj-$(CONFIG_CMM) += cmm.o -obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o +obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o + +obj-$(CONFIG_CMM) += cmm.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index c66ffd8dbbb..79ddd580d60 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -1,7 +1,7 @@ /* * Collaborative memory management interface. * - * Copyright IBM Corp 2003,2010 + * Copyright IBM Corp 2003, 2010 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, * */ @@ -91,7 +91,7 @@ static long cmm_alloc_pages(long nr, long *counter, } else free_page((unsigned long) npa); } - diag10(addr); + diag10_range(addr >> PAGE_SHIFT, 1); pa->pages[pa->index++] = addr; (*counter)++; spin_unlock(&cmm_lock); @@ -253,12 +253,12 @@ static int cmm_skip_blanks(char *cp, char **endp) static struct ctl_table cmm_table[]; -static int cmm_pages_handler(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int cmm_pages_handler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[16], *p; + unsigned int len; long nr; - int len; if (!*lenp || (*ppos && !write)) { *lenp = 0; @@ -293,12 +293,12 @@ static int cmm_pages_handler(ctl_table *ctl, int write, void __user *buffer, return 0; } -static int cmm_timeout_handler(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int cmm_timeout_handler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; long nr, seconds; - int len; + unsigned int len; if (!*lenp || (*ppos && !write)) { *lenp = 0; @@ -458,12 +458,10 @@ static int __init cmm_init(void) if (rc) goto out_pm; cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); - rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0; - if (rc) - goto out_kthread; - return 0; + if (!IS_ERR(cmm_thread_ptr)) + return 0; -out_kthread: + rc = PTR_ERR(cmm_thread_ptr); unregister_pm_notifier(&cmm_power_notifier); out_pm: unregister_oom_notifier(&cmm_oom_nb); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c new file mode 100644 index 00000000000..46d517c3c76 --- /dev/null +++ b/arch/s390/mm/dump_pagetables.c @@ -0,0 +1,246 @@ +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <asm/sections.h> +#include <asm/pgtable.h> + +static unsigned long max_addr; + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +enum address_markers_idx { + IDENTITY_NR = 0, + KERNEL_START_NR, + KERNEL_END_NR, + VMEMMAP_NR, + VMALLOC_NR, +#ifdef CONFIG_64BIT + MODULES_NR, +#endif +}; + +static struct addr_marker address_markers[] = { + [IDENTITY_NR] = {0, "Identity Mapping"}, + [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"}, + [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"}, + [VMEMMAP_NR] = {0, "vmemmap Area"}, + [VMALLOC_NR] = {0, "vmalloc Area"}, +#ifdef CONFIG_64BIT + [MODULES_NR] = {0, "Modules Area"}, +#endif + { -1, NULL } +}; + +struct pg_state { + int level; + unsigned int current_prot; + unsigned long start_address; + unsigned long current_address; + const struct addr_marker *marker; +}; + +static void print_prot(struct seq_file *m, unsigned int pr, int level) +{ + static const char * const level_name[] = + { "ASCE", "PGD", "PUD", "PMD", "PTE" }; + + seq_printf(m, "%s ", level_name[level]); + if (pr & _PAGE_INVALID) { + seq_printf(m, "I\n"); + return; + } + seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW "); + seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " "); + seq_putc(m, '\n'); +} + +static void note_page(struct seq_file *m, struct pg_state *st, + unsigned int new_prot, int level) +{ + static const char units[] = "KMGTPE"; + int width = sizeof(unsigned long) * 2; + const char *unit = units; + unsigned int prot, cur; + unsigned long delta; + + /* + * If we have a "break" in the series, we need to flush the state + * that we have now. "break" is either changing perms, levels or + * address space marker. + */ + prot = new_prot; + cur = st->current_prot; + + if (!st->level) { + /* First entry */ + st->current_prot = new_prot; + st->level = level; + st->marker = address_markers; + seq_printf(m, "---[ %s ]---\n", st->marker->name); + } else if (prot != cur || level != st->level || + st->current_address >= st->marker[1].start_address) { + /* Print the actual finished series */ + seq_printf(m, "0x%0*lx-0x%0*lx", + width, st->start_address, + width, st->current_address); + delta = (st->current_address - st->start_address) >> 10; + while (!(delta & 0x3ff) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(m, "%9lu%c ", delta, *unit); + print_prot(m, st->current_prot, st->level); + if (st->current_address >= st->marker[1].start_address) { + st->marker++; + seq_printf(m, "---[ %s ]---\n", st->marker->name); + } + st->start_address = st->current_address; + st->current_prot = new_prot; + st->level = level; + } +} + +/* + * The actual page table walker functions. In order to keep the + * implementation of print_prot() short, we only check and pass + * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region, + * segment or page table entry is invalid or read-only. + * After all it's just a hint that the current level being walked + * contains an invalid or read-only entry. + */ +static void walk_pte_level(struct seq_file *m, struct pg_state *st, + pmd_t *pmd, unsigned long addr) +{ + unsigned int prot; + pte_t *pte; + int i; + + for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { + st->current_address = addr; + pte = pte_offset_kernel(pmd, addr); + prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID); + note_page(m, st, prot, 4); + addr += PAGE_SIZE; + } +} + +#ifdef CONFIG_64BIT +#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO) +#else +#define _PMD_PROT_MASK 0 +#endif + +static void walk_pmd_level(struct seq_file *m, struct pg_state *st, + pud_t *pud, unsigned long addr) +{ + unsigned int prot; + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) { + st->current_address = addr; + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) { + if (pmd_large(*pmd)) { + prot = pmd_val(*pmd) & _PMD_PROT_MASK; + note_page(m, st, prot, 3); + } else + walk_pte_level(m, st, pmd, addr); + } else + note_page(m, st, _PAGE_INVALID, 3); + addr += PMD_SIZE; + } +} + +#ifdef CONFIG_64BIT +#define _PUD_PROT_MASK (_REGION3_ENTRY_RO | _REGION3_ENTRY_CO) +#else +#define _PUD_PROT_MASK 0 +#endif + +static void walk_pud_level(struct seq_file *m, struct pg_state *st, + pgd_t *pgd, unsigned long addr) +{ + unsigned int prot; + pud_t *pud; + int i; + + for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { + st->current_address = addr; + pud = pud_offset(pgd, addr); + if (!pud_none(*pud)) + if (pud_large(*pud)) { + prot = pud_val(*pud) & _PUD_PROT_MASK; + note_page(m, st, prot, 2); + } else + walk_pmd_level(m, st, pud, addr); + else + note_page(m, st, _PAGE_INVALID, 2); + addr += PUD_SIZE; + } +} + +static void walk_pgd_level(struct seq_file *m) +{ + unsigned long addr = 0; + struct pg_state st; + pgd_t *pgd; + int i; + + memset(&st, 0, sizeof(st)); + for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) { + st.current_address = addr; + pgd = pgd_offset_k(addr); + if (!pgd_none(*pgd)) + walk_pud_level(m, &st, pgd, addr); + else + note_page(m, &st, _PAGE_INVALID, 1); + addr += PGDIR_SIZE; + } + /* Flush out the last page */ + st.current_address = max_addr; + note_page(m, &st, 0, 0); +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + walk_pgd_level(m); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int pt_dump_init(void) +{ + /* + * Figure out the maximum virtual address being accessible with the + * kernel ASCE. We need this to keep the page table walker functions + * from accessing non-existent entries. + */ +#ifdef CONFIG_32BIT + max_addr = 1UL << 31; +#else + max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; + max_addr = 1UL << (max_addr * 11 + 31); + address_markers[MODULES_NR].start_address = MODULES_VADDR; +#endif + address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; + address_markers[VMALLOC_NR].start_address = VMALLOC_START; + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); + return 0; +} +device_initcall(pt_dump_init); diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c new file mode 100644 index 00000000000..4d1ee88864e --- /dev/null +++ b/arch/s390/mm/extable.c @@ -0,0 +1,81 @@ +#include <linux/module.h> +#include <linux/sort.h> +#include <asm/uaccess.h> + +/* + * Search one exception table for an entry corresponding to the + * given instruction address, and return the address of the entry, + * or NULL if none is found. + * We use a binary search, and thus we assume that the table is + * already sorted. + */ +const struct exception_table_entry * +search_extable(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + const struct exception_table_entry *mid; + unsigned long addr; + + while (first <= last) { + mid = ((last - first) >> 1) + first; + addr = extable_insn(mid); + if (addr < value) + first = mid + 1; + else if (addr > value) + last = mid - 1; + else + return mid; + } + return NULL; +} + +/* + * The exception table needs to be sorted so that the binary + * search that we use to find entries in it works properly. + * This is used both for the kernel exception table and for + * the exception tables of modules that get loaded. + * + */ +static int cmp_ex(const void *a, const void *b) +{ + const struct exception_table_entry *x = a, *y = b; + + /* This compare is only valid after normalization. */ + return x->insn - y->insn; +} + +void sort_extable(struct exception_table_entry *start, + struct exception_table_entry *finish) +{ + struct exception_table_entry *p; + int i; + + /* Normalize entries to being relative to the start of the section */ + for (p = start, i = 0; p < finish; p++, i += 8) + p->insn += i; + sort(start, finish - start, sizeof(*start), cmp_ex, NULL); + /* Denormalize all entries */ + for (p = start, i = 0; p < finish; p++, i += 8) + p->insn -= i; +} + +#ifdef CONFIG_MODULES +/* + * If the exception table is sorted, any referring to the module init + * will be at the beginning or the end. + */ +void trim_init_extable(struct module *m) +{ + /* Trim the beginning */ + while (m->num_exentries && + within_module_init(extable_insn(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /* Trim the end */ + while (m->num_exentries && + within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 3cc95dd0a3a..519bba716cc 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -1,10 +1,9 @@ /* - * File...........: arch/s390/mm/extmem.c * Author(s)......: Carsten Otte <cotte@de.ibm.com> * Rob M van der Heij <rvdheij@nl.ibm.com> * Steven Shultz <shultzss@us.ibm.com> * Bugreports.to..: <Linux390@de.ibm.com> - * (C) IBM Corporation 2002-2004 + * Copyright IBM Corp. 2002, 2004 */ #define KMSG_COMPONENT "extmem" @@ -412,6 +411,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long struct dcss_segment *seg; int rc, diag_cc; + start_addr = end_addr = 0; seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA); if (seg == NULL) { rc = -ENOMEM; @@ -573,6 +573,7 @@ segment_modify_shared (char *name, int do_nonshared) unsigned long start_addr, end_addr, dummy; int rc, diag_cc; + start_addr = end_addr = 0; mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { @@ -681,8 +682,6 @@ void segment_save(char *name) { struct dcss_segment *seg; - int startpfn = 0; - int endpfn = 0; char cmd1[160]; char cmd2[80]; int i, response; @@ -698,8 +697,6 @@ segment_save(char *name) goto out; } - startpfn = seg->start_addr >> PAGE_SHIFT; - endpfn = (seg->end) >> PAGE_SHIFT; sprintf(cmd1, "DEFSEG %s", name); for (i=0; i<seg->segcnt; i++) { sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2c57806c085..3f3b35403d0 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -1,8 +1,6 @@ /* - * arch/s390/mm/fault.c - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Hartmut Penner (hp@de.ibm.com) * Ulrich Weigand (uweigand@de.ibm.com) * @@ -32,11 +30,10 @@ #include <linux/uaccess.h> #include <linux/hugetlb.h> #include <asm/asm-offsets.h> -#include <asm/system.h> #include <asm/pgtable.h> -#include <asm/s390_ext.h> +#include <asm/irq.h> #include <asm/mmu_context.h> -#include <asm/compat.h> +#include <asm/facility.h> #include "../kernel/entry.h" #ifndef CONFIG_64BIT @@ -52,14 +49,20 @@ #define VM_FAULT_BADCONTEXT 0x010000 #define VM_FAULT_BADMAP 0x020000 #define VM_FAULT_BADACCESS 0x040000 +#define VM_FAULT_SIGNAL 0x080000 +#define VM_FAULT_PFAULT 0x100000 -static unsigned long store_indication; +static unsigned long store_indication __read_mostly; -void fault_init(void) +#ifdef CONFIG_64BIT +static int __init fault_init(void) { - if (test_facility(2) && test_facility(75)) + if (test_facility(75)) store_indication = 0xc00; + return 0; } +early_initcall(fault_init); +#endif static inline int notify_page_fault(struct pt_regs *regs) { @@ -103,30 +106,154 @@ void bust_spinlocks(int yes) * Returns the address space associated with the fault. * Returns 0 for kernel space and 1 for user space. */ -static inline int user_space_fault(unsigned long trans_exc_code) +static inline int user_space_fault(struct pt_regs *regs) { + unsigned long trans_exc_code; + /* * The lowest two bits of the translation exception * identification indicate which paging table was used. */ - trans_exc_code &= 3; - if (trans_exc_code == 2) - /* Access via secondary space, set_fs setting decides */ + trans_exc_code = regs->int_parm_long & 3; + if (trans_exc_code == 3) /* home space -> kernel */ + return 0; + if (user_mode(regs)) + return 1; + if (trans_exc_code == 2) /* secondary space -> set_fs */ return current->thread.mm_segment.ar4; - if (user_mode == HOME_SPACE_MODE) - /* User space if the access has been done via home space. */ - return trans_exc_code == 3; - /* - * If the user space is not the home space the kernel runs in home - * space. Access via secondary space has already been covered, - * access via primary space or access register is from user space - * and access via home space is from the kernel. - */ - return trans_exc_code != 3; + if (current->flags & PF_VCPU) + return 1; + return 0; +} + +static int bad_address(void *p) +{ + unsigned long dummy; + + return probe_kernel_address((unsigned long *)p, dummy); +} + +#ifdef CONFIG_64BIT +static void dump_pagetable(unsigned long asce, unsigned long address) +{ + unsigned long *table = __va(asce & PAGE_MASK); + + pr_alert("AS:%016lx ", asce); + switch (asce & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_REGION1: + table = table + ((address >> 53) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R1:%016lx ", *table); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_REGION2: + table = table + ((address >> 42) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R2:%016lx ", *table); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_REGION3: + table = table + ((address >> 31) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R3:%016lx ", *table); + if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE)) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_SEGMENT: + table = table + ((address >> 20) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont(KERN_CONT "S:%016lx ", *table); + if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE)) + goto out; + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + } + table = table + ((address >> 12) & 0xff); + if (bad_address(table)) + goto bad; + pr_cont("P:%016lx ", *table); +out: + pr_cont("\n"); + return; +bad: + pr_cont("BAD\n"); } -static inline void report_user_fault(struct pt_regs *regs, long int_code, - int signr, unsigned long address) +#else /* CONFIG_64BIT */ + +static void dump_pagetable(unsigned long asce, unsigned long address) +{ + unsigned long *table = __va(asce & PAGE_MASK); + + pr_alert("AS:%08lx ", asce); + table = table + ((address >> 20) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("S:%08lx ", *table); + if (*table & _SEGMENT_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + table = table + ((address >> 12) & 0xff); + if (bad_address(table)) + goto bad; + pr_cont("P:%08lx ", *table); +out: + pr_cont("\n"); + return; +bad: + pr_cont("BAD\n"); +} + +#endif /* CONFIG_64BIT */ + +static void dump_fault_info(struct pt_regs *regs) +{ + unsigned long asce; + + pr_alert("Fault in "); + switch (regs->int_parm_long & 3) { + case 3: + pr_cont("home space "); + break; + case 2: + pr_cont("secondary space "); + break; + case 1: + pr_cont("access register "); + break; + case 0: + pr_cont("primary space "); + break; + } + pr_cont("mode while using "); + if (!user_space_fault(regs)) { + asce = S390_lowcore.kernel_asce; + pr_cont("kernel "); + } +#ifdef CONFIG_PGSTE + else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { + struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; + asce = gmap->asce; + pr_cont("gmap "); + } +#endif + else { + asce = S390_lowcore.user_asce; + pr_cont("user "); + } + pr_cont("ASCE.\n"); + dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); +} + +static inline void report_user_fault(struct pt_regs *regs, long signr) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; @@ -134,10 +261,13 @@ static inline void report_user_fault(struct pt_regs *regs, long int_code, return; if (!printk_ratelimit()) return; - printk("User process fault: interruption code 0x%lX ", int_code); + printk(KERN_ALERT "User process fault: interruption code 0x%X ", + regs->int_code); print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); - printk("\n"); - printk("failing address: %lX\n", address); + printk(KERN_CONT "\n"); + printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); + dump_fault_info(regs); show_regs(regs); } @@ -145,24 +275,18 @@ static inline void report_user_fault(struct pt_regs *regs, long int_code, * Send SIGSEGV to task. This is an external routine * to keep the stack usage of do_page_fault small. */ -static noinline void do_sigsegv(struct pt_regs *regs, long int_code, - int si_code, unsigned long trans_exc_code) +static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { struct siginfo si; - unsigned long address; - address = trans_exc_code & __FAIL_ADDR_MASK; - current->thread.prot_addr = address; - current->thread.trap_no = int_code; - report_user_fault(regs, int_code, SIGSEGV, address); + report_user_fault(regs, SIGSEGV); si.si_signo = SIGSEGV; si.si_code = si_code; - si.si_addr = (void __user *) address; + si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); force_sig_info(SIGSEGV, &si, current); } -static noinline void do_no_context(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) +static noinline void do_no_context(struct pt_regs *regs) { const struct exception_table_entry *fixup; unsigned long address; @@ -170,7 +294,7 @@ static noinline void do_no_context(struct pt_regs *regs, long int_code, /* Are we prepared to handle this kernel fault? */ fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) { - regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; return; } @@ -178,115 +302,84 @@ static noinline void do_no_context(struct pt_regs *regs, long int_code, * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - address = trans_exc_code & __FAIL_ADDR_MASK; - if (!user_space_fault(trans_exc_code)) + address = regs->int_parm_long & __FAIL_ADDR_MASK; + if (!user_space_fault(regs)) printk(KERN_ALERT "Unable to handle kernel pointer dereference" - " at virtual kernel address %p\n", (void *)address); + " in virtual kernel address space\n"); else printk(KERN_ALERT "Unable to handle kernel paging request" - " at virtual user address %p\n", (void *)address); - - die("Oops", regs, int_code); + " in virtual user address space\n"); + printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); + dump_fault_info(regs); + die(regs, "Oops"); do_exit(SIGKILL); } -static noinline void do_low_address(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) +static noinline void do_low_address(struct pt_regs *regs) { /* Low-address protection hit in kernel mode means NULL pointer write access in kernel mode. */ if (regs->psw.mask & PSW_MASK_PSTATE) { /* Low-address protection hit in user mode 'cannot happen'. */ - die ("Low-address protection", regs, int_code); + die (regs, "Low-address protection"); do_exit(SIGKILL); } - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs); } -static noinline void do_sigbus(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) +static noinline void do_sigbus(struct pt_regs *regs) { struct task_struct *tsk = current; - unsigned long address; struct siginfo si; /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - address = trans_exc_code & __FAIL_ADDR_MASK; - tsk->thread.prot_addr = address; - tsk->thread.trap_no = int_code; si.si_signo = SIGBUS; si.si_errno = 0; si.si_code = BUS_ADRERR; - si.si_addr = (void __user *) address; + si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); force_sig_info(SIGBUS, &si, tsk); } -#ifdef CONFIG_S390_EXEC_PROTECT -static noinline int signal_return(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) -{ - u16 instruction; - int rc; - - rc = __get_user(instruction, (u16 __user *) regs->psw.addr); - - if (!rc && instruction == 0x0a77) { - clear_tsk_thread_flag(current, TIF_PER_TRAP); - if (is_compat_task()) - sys32_sigreturn(); - else - sys_sigreturn(); - } else if (!rc && instruction == 0x0aad) { - clear_tsk_thread_flag(current, TIF_PER_TRAP); - if (is_compat_task()) - sys32_rt_sigreturn(); - else - sys_rt_sigreturn(); - } else - do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); - return 0; -} -#endif /* CONFIG_S390_EXEC_PROTECT */ - -static noinline void do_fault_error(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code, int fault) +static noinline void do_fault_error(struct pt_regs *regs, int fault) { int si_code; switch (fault) { case VM_FAULT_BADACCESS: -#ifdef CONFIG_S390_EXEC_PROTECT - if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && - (trans_exc_code & 3) == 0) { - signal_return(regs, int_code, trans_exc_code); - break; - } -#endif /* CONFIG_S390_EXEC_PROTECT */ case VM_FAULT_BADMAP: /* Bad memory access. Check if it is kernel or user space. */ - if (regs->psw.mask & PSW_MASK_PSTATE) { + if (user_mode(regs)) { /* User mode accesses just cause a SIGSEGV */ si_code = (fault == VM_FAULT_BADMAP) ? SEGV_MAPERR : SEGV_ACCERR; - do_sigsegv(regs, int_code, si_code, trans_exc_code); + do_sigsegv(regs, si_code); return; } case VM_FAULT_BADCONTEXT: - do_no_context(regs, int_code, trans_exc_code); + case VM_FAULT_PFAULT: + do_no_context(regs); + break; + case VM_FAULT_SIGNAL: + if (!user_mode(regs)) + do_no_context(regs); break; default: /* fault & VM_FAULT_ERROR */ - if (fault & VM_FAULT_OOM) - pagefault_out_of_memory(); - else if (fault & VM_FAULT_SIGBUS) { + if (fault & VM_FAULT_OOM) { + if (!user_mode(regs)) + do_no_context(regs); + else + pagefault_out_of_memory(); + } else if (fault & VM_FAULT_SIGBUS) { /* Kernel mode? Handle exceptions or die */ - if (!(regs->psw.mask & PSW_MASK_PSTATE)) - do_no_context(regs, int_code, trans_exc_code); + if (!user_mode(regs)) + do_no_context(regs); else - do_sigbus(regs, int_code, trans_exc_code); + do_sigbus(regs); } else BUG(); break; @@ -304,20 +397,31 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ -static inline int do_exception(struct pt_regs *regs, int access, - unsigned long trans_exc_code) +static inline int do_exception(struct pt_regs *regs, int access) { +#ifdef CONFIG_PGSTE + struct gmap *gmap; +#endif struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; + unsigned long trans_exc_code; unsigned long address; - int fault, write; + unsigned int flags; + int fault; + + tsk = current; + /* + * The instruction that caused the program check has + * been nullified. Don't signal single step via SIGTRAP. + */ + clear_pt_regs_flag(regs, PIF_PER_TRAP); if (notify_page_fault(regs)) return 0; - tsk = current; mm = tsk->mm; + trans_exc_code = regs->int_parm_long; /* * Verify that the fault happened in user space, that @@ -325,13 +429,37 @@ static inline int do_exception(struct pt_regs *regs, int access, * user context. */ fault = VM_FAULT_BADCONTEXT; - if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) + if (unlikely(!user_space_fault(regs) || in_atomic() || !mm)) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) + flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); +#ifdef CONFIG_PGSTE + gmap = (struct gmap *) + ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); + if (gmap) { + address = __gmap_fault(address, gmap); + if (address == -EFAULT) { + fault = VM_FAULT_BADMAP; + goto out_up; + } + if (address == -ENOMEM) { + fault = VM_FAULT_OOM; + goto out_up; + } + if (gmap->pfault_enabled) + flags |= FAULT_FLAG_RETRY_NOWAIT; + } +#endif + +retry: fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) @@ -359,27 +487,49 @@ static inline int do_exception(struct pt_regs *regs, int access, * make sure we exit gracefully rather than endlessly redo * the fault. */ - write = (access == VM_WRITE || - (trans_exc_code & store_indication) == 0x400) ? - FAULT_FLAG_WRITE : 0; - fault = handle_mm_fault(mm, vma, address, write); + fault = handle_mm_fault(mm, vma, address, flags); + /* No reason to continue if interrupted by SIGKILL. */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { + fault = VM_FAULT_SIGNAL; + goto out; + } if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, - regs, address); - } /* - * The instruction that caused the program check will - * be repeated. Don't signal single step via SIGTRAP. + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. */ - clear_tsk_thread_flag(tsk, TIF_PER_TRAP); + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } + if (fault & VM_FAULT_RETRY) { +#ifdef CONFIG_PGSTE + if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { + /* FAULT_FLAG_RETRY_NOWAIT has been set, + * mmap_sem has not been released */ + current->thread.gmap_pfault = 1; + fault = VM_FAULT_PFAULT; + goto out_up; + } +#endif + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~(FAULT_FLAG_ALLOW_RETRY | + FAULT_FLAG_RETRY_NOWAIT); + flags |= FAULT_FLAG_TRIED; + down_read(&mm->mmap_sem); + goto retry; + } + } fault = 0; out_up: up_read(&mm->mmap_sem); @@ -387,94 +537,41 @@ out: return fault; } -void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void __kprobes do_protection_exception(struct pt_regs *regs) { + unsigned long trans_exc_code; int fault; - /* Protection exception is supressing, decrement psw address. */ - regs->psw.addr -= (pgm_int_code >> 16); + trans_exc_code = regs->int_parm_long; + /* + * Protection exceptions are suppressing, decrement psw address. + * The exception to this rule are aborted transactions, for these + * the PSW already points to the correct location. + */ + if (!(regs->int_code & 0x200)) + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ if (unlikely(!(trans_exc_code & 4))) { - do_low_address(regs, pgm_int_code, trans_exc_code); + do_low_address(regs); return; } - fault = do_exception(regs, VM_WRITE, trans_exc_code); + fault = do_exception(regs, VM_WRITE); if (unlikely(fault)) - do_fault_error(regs, 4, trans_exc_code, fault); + do_fault_error(regs, fault); } -void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void __kprobes do_dat_exception(struct pt_regs *regs) { int access, fault; access = VM_READ | VM_EXEC | VM_WRITE; -#ifdef CONFIG_S390_EXEC_PROTECT - if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && - (trans_exc_code & 3) == 0) - access = VM_EXEC; -#endif - fault = do_exception(regs, access, trans_exc_code); + fault = do_exception(regs, access); if (unlikely(fault)) - do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); -} - -#ifdef CONFIG_64BIT -void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - - if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) - goto no_context; - - down_read(&mm->mmap_sem); - vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK); - up_read(&mm->mmap_sem); - - if (vma) { - update_mm(mm, current); - return; - } - - /* User mode accesses just cause a SIGSEGV */ - if (regs->psw.mask & PSW_MASK_PSTATE) { - do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code); - return; - } - -no_context: - do_no_context(regs, pgm_int_code, trans_exc_code); -} -#endif - -int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) -{ - struct pt_regs regs; - int access, fault; - - regs.psw.mask = psw_kernel_bits; - if (!irqs_disabled()) - regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; - regs.psw.addr = (unsigned long) __builtin_return_address(0); - regs.psw.addr |= PSW_ADDR_AMODE; - uaddr &= PAGE_MASK; - access = write ? VM_WRITE : VM_READ; - fault = do_exception(®s, access, uaddr | 2); - if (unlikely(fault)) { - if (fault & VM_FAULT_OOM) { - pagefault_out_of_memory(); - fault = 0; - } else if (fault & VM_FAULT_SIGBUS) - do_sigbus(®s, pgm_int_code, uaddr); - } - return fault ? -EFAULT : 0; + do_fault_error(regs, fault); } #ifdef CONFIG_PFAULT @@ -491,25 +588,31 @@ static int __init nopfault(char *str) __setup("nopfault", nopfault); -typedef struct { - __u16 refdiagc; - __u16 reffcode; - __u16 refdwlen; - __u16 refversn; - __u64 refgaddr; - __u64 refselmk; - __u64 refcmpmk; - __u64 reserved; -} __attribute__ ((packed, aligned(8))) pfault_refbk_t; +struct pfault_refbk { + u16 refdiagc; + u16 reffcode; + u16 refdwlen; + u16 refversn; + u64 refgaddr; + u64 refselmk; + u64 refcmpmk; + u64 reserved; +} __attribute__ ((packed, aligned(8))); int pfault_init(void) { - pfault_refbk_t refbk = - { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48, - __PF_RES_FIELD }; + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 0, + .refdwlen = 5, + .refversn = 2, + .refgaddr = __LC_CURRENT_PID, + .refselmk = 1ULL << 48, + .refcmpmk = 1ULL << 48, + .reserved = __PF_RES_FIELD }; int rc; - if (!MACHINE_IS_VM || pfault_disable) + if (pfault_disable) return -1; asm volatile( " diag %1,%0,0x258\n" @@ -518,18 +621,20 @@ int pfault_init(void) "2:\n" EX_TABLE(0b,1b) : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc"); - __ctl_set_bit(0, 9); return rc; } void pfault_fini(void) { - pfault_refbk_t refbk = - { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; - - if (!MACHINE_IS_VM || pfault_disable) + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 1, + .refdwlen = 5, + .refversn = 2, + }; + + if (pfault_disable) return; - __ctl_clear_bit(0,9); asm volatile( " diag %0,0,0x258\n" "0:\n" @@ -537,84 +642,134 @@ void pfault_fini(void) : : "a" (&refbk), "m" (refbk) : "cc"); } -static void pfault_interrupt(unsigned int ext_int_code, +static DEFINE_SPINLOCK(pfault_lock); +static LIST_HEAD(pfault_list); + +static void pfault_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { struct task_struct *tsk; __u16 subcode; + pid_t pid; - kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; /* * Get the external interruption subcode & pfault * initial/completion signal bit. VM stores this * in the 'cpu address' field associated with the * external interrupt. */ - subcode = ext_int_code >> 16; + subcode = ext_code.subcode; if ((subcode & 0xff00) != __SUBCODE_MASK) return; - - /* - * Get the token (= address of the task structure of the affected task). - */ -#ifdef CONFIG_64BIT - tsk = *(struct task_struct **) param64; -#else - tsk = *(struct task_struct **) param32; -#endif - + inc_irq_stat(IRQEXT_PFL); + /* Get the token (= pid of the affected task). */ + pid = sizeof(void *) == 4 ? param32 : param64; + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return; + spin_lock(&pfault_lock); if (subcode & 0x0080) { /* signal bit is set -> a page has been swapped in by VM */ - if (xchg(&tsk->thread.pfault_wait, -1) != 0) { + if (tsk->thread.pfault_wait == 1) { /* Initial interrupt was faster than the completion * interrupt. pfault_wait is valid. Set pfault_wait * back to zero and wake up the process. This can * safely be done because the task is still sleeping * and can't produce new pfaults. */ tsk->thread.pfault_wait = 0; + list_del(&tsk->thread.list); wake_up_process(tsk); put_task_struct(tsk); + } else { + /* Completion interrupt was faster than initial + * interrupt. Set pfault_wait to -1 so the initial + * interrupt doesn't put the task to sleep. + * If the task is not running, ignore the completion + * interrupt since it must be a leftover of a PFAULT + * CANCEL operation which didn't remove all pending + * completion interrupts. */ + if (tsk->state == TASK_RUNNING) + tsk->thread.pfault_wait = -1; } } else { /* signal bit not set -> a real page is missing. */ - get_task_struct(tsk); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (xchg(&tsk->thread.pfault_wait, 1) != 0) { + if (WARN_ON_ONCE(tsk != current)) + goto out; + if (tsk->thread.pfault_wait == 1) { + /* Already on the list with a reference: put to sleep */ + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_tsk_need_resched(tsk); + } else if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial - * interrupt (swapped in a -1 for pfault_wait). Set - * pfault_wait back to zero and exit. This can be - * done safely because tsk is running in kernel - * mode and can't produce new pfaults. */ + * interrupt (pfault_wait == -1). Set pfault_wait + * back to zero and exit. */ tsk->thread.pfault_wait = 0; - set_task_state(tsk, TASK_RUNNING); - put_task_struct(tsk); - } else + } else { + /* Initial interrupt arrived before completion + * interrupt. Let the task sleep. + * An extra task reference is needed since a different + * cpu may set the task state to TASK_RUNNING again + * before the scheduler is reached. */ + get_task_struct(tsk); + tsk->thread.pfault_wait = 1; + list_add(&tsk->thread.list, &pfault_list); + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); set_tsk_need_resched(tsk); + } } +out: + spin_unlock(&pfault_lock); + put_task_struct(tsk); +} + +static int pfault_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + struct thread_struct *thread, *next; + struct task_struct *tsk; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DEAD: + spin_lock_irq(&pfault_lock); + list_for_each_entry_safe(thread, next, &pfault_list, list) { + thread->pfault_wait = 0; + list_del(&thread->list); + tsk = container_of(thread, struct task_struct, thread); + wake_up_process(tsk); + put_task_struct(tsk); + } + spin_unlock_irq(&pfault_lock); + break; + default: + break; + } + return NOTIFY_OK; } static int __init pfault_irq_init(void) { int rc; - if (!MACHINE_IS_VM) - return 0; - /* - * Try to get pfault pseudo page faults going. - */ - rc = register_external_interrupt(0x2603, pfault_interrupt); - if (rc) { - pfault_disable = 1; - return rc; - } - if (pfault_init() == 0) - return 0; + rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); + if (rc) + goto out_extint; + rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; + if (rc) + goto out_pfault; + irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL); + hotcpu_notifier(pfault_cpu_notify, 0); + return 0; - /* Tough luck, no pfault. */ +out_pfault: + unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); +out_extint: pfault_disable = 1; - unregister_external_interrupt(0x2603, pfault_interrupt); - return 0; + return rc; } early_initcall(pfault_irq_init); -#endif +#endif /* CONFIG_PFAULT */ diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 45b405ca256..639fce46400 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -24,7 +24,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, pte_t *ptep, pte; struct page *page; - mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL; + mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL; ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); do { @@ -52,11 +52,11 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; - struct page *head, *page; + struct page *head, *page, *tail; int refs; - result = write ? 0 : _SEGMENT_ENTRY_RO; - mask = result | _SEGMENT_ENTRY_INV; + result = write ? 0 : _SEGMENT_ENTRY_PROTECT; + mask = result | _SEGMENT_ENTRY_INVALID; if ((pmd_val(pmd) & mask) != result) return 0; VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); @@ -64,6 +64,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -81,6 +82,17 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, *nr -= refs; while (refs--) put_page(head); + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; } return 1; @@ -103,9 +115,18 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, pmd = *pmdp; barrier(); next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) + /* + * The pmd_trans_splitting() check below explains why + * pmdp_splitting_flush() has to serialize with + * smp_call_function() against our disabled IRQs, to stop + * this gup-fast code from running while we set the + * splitting bit in the pmd. Returning zero will take + * the slow path that will call wait_split_huge_page() + * if the pmd is still in splitting state. + */ + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; - if (unlikely(pmd_huge(pmd))) { + if (unlikely(pmd_large(pmd))) { if (!gup_huge_pmd(pmdp, pmd, addr, next, write, pages, nr)) return 0; @@ -142,28 +163,16 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, return 1; } -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; - unsigned long next; + unsigned long next, flags; pgd_t *pgdp, pgd; int nr = 0; @@ -171,54 +180,67 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if (end < start) - goto slow_irqon; - + if ((end <= start) || (end > TASK_SIZE)) + return 0; /* - * local_irq_disable() doesn't prevent pagetable teardown, but does + * local_irq_save() doesn't prevent pagetable teardown, but does * prevent the pagetables from being freed on s390. * * So long as we atomically load page table pointers versus teardown, * we can follow the address down to the the page and take a ref on it. */ - local_irq_disable(); + local_irq_save(flags); pgdp = pgd_offset(mm, addr); do { pgd = *pgdp; barrier(); next = pgd_addr_end(addr, end); if (pgd_none(pgd)) - goto slow; + break; if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) - goto slow; + break; } while (pgdp++, addr = next, addr != end); - local_irq_enable(); + local_irq_restore(flags); - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); return nr; +} - { - int ret; -slow: - local_irq_enable(); -slow_irqon: - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } +/** + * get_user_pages_fast() - pin user pages in memory + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @write: whether pages will be written to + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + int nr, ret; - return ret; - } + start &= PAGE_MASK; + nr = __get_user_pages_fast(start, nr_pages, write, pages); + if (nr == nr_pages) + return nr; + + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + nr_pages - nr, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + /* Have to be a bit careful with return values */ + if (nr > 0) + ret = (ret < 0) ? nr : ret + nr; + return ret; } diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 639cd21f221..0ff66a7e29b 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -1,38 +1,131 @@ /* * IBM System z Huge TLB Page Support for Kernel. * - * Copyright 2007 IBM Corp. + * Copyright IBM Corp. 2007 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> */ #include <linux/mm.h> #include <linux/hugetlb.h> +static inline pmd_t __pte_to_pmd(pte_t pte) +{ + int none, young, prot; + pmd_t pmd; + + /* + * Convert encoding pte bits pmd bits + * .IR...wrdytp ..R...I...y. + * empty .10...000000 -> ..0...1...0. + * prot-none, clean, old .11...000001 -> ..0...1...1. + * prot-none, clean, young .11...000101 -> ..1...1...1. + * prot-none, dirty, old .10...001001 -> ..0...1...1. + * prot-none, dirty, young .10...001101 -> ..1...1...1. + * read-only, clean, old .11...010001 -> ..1...1...0. + * read-only, clean, young .01...010101 -> ..1...0...1. + * read-only, dirty, old .11...011001 -> ..1...1...0. + * read-only, dirty, young .01...011101 -> ..1...0...1. + * read-write, clean, old .11...110001 -> ..0...1...0. + * read-write, clean, young .01...110101 -> ..0...0...1. + * read-write, dirty, old .10...111001 -> ..0...1...0. + * read-write, dirty, young .00...111101 -> ..0...0...1. + * Huge ptes are dirty by definition, a clean pte is made dirty + * by the conversion. + */ + if (pte_present(pte)) { + pmd_val(pmd) = pte_val(pte) & PAGE_MASK; + if (pte_val(pte) & _PAGE_INVALID) + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + none = (pte_val(pte) & _PAGE_PRESENT) && + !(pte_val(pte) & _PAGE_READ) && + !(pte_val(pte) & _PAGE_WRITE); + prot = (pte_val(pte) & _PAGE_PROTECT) && + !(pte_val(pte) & _PAGE_WRITE); + young = pte_val(pte) & _PAGE_YOUNG; + if (none || young) + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + if (prot || (none && young)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } else + pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; + return pmd; +} + +static inline pte_t __pmd_to_pte(pmd_t pmd) +{ + pte_t pte; + + /* + * Convert encoding pmd bits pte bits + * ..R...I...y. .IR...wrdytp + * empty ..0...1...0. -> .10...000000 + * prot-none, old ..0...1...1. -> .10...001001 + * prot-none, young ..1...1...1. -> .10...001101 + * read-only, old ..1...1...0. -> .11...011001 + * read-only, young ..1...0...1. -> .01...011101 + * read-write, old ..0...1...0. -> .10...111001 + * read-write, young ..0...0...1. -> .00...111101 + * Huge ptes are dirty by definition + */ + if (pmd_present(pmd)) { + pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY | + (pmd_val(pmd) & PAGE_MASK); + if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) + pte_val(pte) |= _PAGE_INVALID; + if (pmd_prot_none(pmd)) { + if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) + pte_val(pte) |= _PAGE_YOUNG; + } else { + pte_val(pte) |= _PAGE_READ; + if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_WRITE; + if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) + pte_val(pte) |= _PAGE_YOUNG; + } + } else + pte_val(pte) = _PAGE_INVALID; + return pte; +} void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *pteptr, pte_t pteval) + pte_t *ptep, pte_t pte) { - pmd_t *pmdp = (pmd_t *) pteptr; - pte_t shadow_pteval = pteval; - unsigned long mask; + pmd_t pmd; + pmd = __pte_to_pmd(pte); if (!MACHINE_HAS_HPAGE) { - pteptr = (pte_t *) pte_page(pteval)[1].index; - mask = pte_val(pteval) & - (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); - pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; - if (mm->context.noexec) { - pteptr += PTRS_PER_PTE; - pte_val(shadow_pteval) = - (_SEGMENT_ENTRY + __pa(pteptr)) | mask; - } - } + pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) |= pte_page(pte)[1].index; + } else + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO; + *(pmd_t *) ptep = pmd; +} - pmd_val(*pmdp) = pte_val(pteval); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); - pmd_val(*pmdp) = pte_val(shadow_pteval); +pte_t huge_ptep_get(pte_t *ptep) +{ + unsigned long origin; + pmd_t pmd; + + pmd = *(pmd_t *) ptep; + if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) { + origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) |= *(unsigned long *) origin; } + return __pmd_to_pte(pmd); +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pmd_t *pmdp = (pmd_t *) ptep; + pte_t pte = huge_ptep_get(ptep); + + pmdp_flush_direct(mm, addr, pmdp); + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + return pte; } int arch_prepare_hugepage(struct page *page) @@ -45,11 +138,11 @@ int arch_prepare_hugepage(struct page *page) if (MACHINE_HAS_HPAGE) return 0; - ptep = (pte_t *) pte_alloc_one(&init_mm, address); + ptep = (pte_t *) pte_alloc_one(&init_mm, addr); if (!ptep) return -ENOMEM; - pte = mk_pte(page, PAGE_RW); + pte_val(pte) = addr; for (i = 0; i < PTRS_PER_PTE; i++) { set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); pte_val(pte) += PAGE_SIZE; @@ -68,6 +161,8 @@ void arch_release_hugepage(struct page *page) ptep = (pte_t *) page[1].index; if (!ptep) return; + clear_table((unsigned long *) ptep, _PAGE_INVALID, + PTRS_PER_PTE * sizeof(pte_t)); page_table_free(&init_mm, (unsigned long *) ptep); page[1].index = 0; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index bb409332a48..0c1073ed1e8 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -1,8 +1,6 @@ /* - * arch/s390/mm/init.c - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Hartmut Penner (hp@de.ibm.com) * * Derived from "arch/i386/mm/init.c" @@ -23,12 +21,13 @@ #include <linux/init.h> #include <linux/pagemap.h> #include <linux/bootmem.h> +#include <linux/memory.h> #include <linux/pfn.h> #include <linux/poison.h> #include <linux/initrd.h> +#include <linux/export.h> #include <linux/gfp.h> #include <asm/processor.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -37,17 +36,18 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/sections.h> +#include <asm/ctl_reg.h> +#include <asm/sclp.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); -static unsigned long setup_zero_pages(void) +static void __init setup_zero_pages(void) { struct cpuid cpu_id; unsigned int order; - unsigned long size; struct page *page; int i; @@ -64,10 +64,19 @@ static unsigned long setup_zero_pages(void) break; case 0x2097: /* z10 */ case 0x2098: /* z10 */ - default: + case 0x2817: /* z196 */ + case 0x2818: /* z196 */ order = 2; break; + case 0x2827: /* zEC12 */ + case 0x2828: /* zEC12 */ + default: + order = 5; + break; } + /* Limit number of empty zero pages for small memory sizes */ + if (order > 2 && totalram_pages <= 16384) + order = 2; empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!empty_zero_page) @@ -76,14 +85,11 @@ static unsigned long setup_zero_pages(void) page = virt_to_page((void *) empty_zero_page); split_page(page, order); for (i = 1 << order; i > 0; i--) { - SetPageReserved(page); + mark_page_reserved(page); page++; } - size = PAGE_SIZE << order; - zero_page_mask = (size - 1) & PAGE_MASK; - - return 1UL << order; + zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } /* @@ -92,18 +98,22 @@ static unsigned long setup_zero_pages(void) void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; - unsigned long pgd_type; + unsigned long pgd_type, asce_bits; init_mm.pgd = swapper_pg_dir; - S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; #ifdef CONFIG_64BIT - /* A three level page table (4TB) is enough for the kernel space. */ - S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; - pgd_type = _REGION3_ENTRY_EMPTY; + if (VMALLOC_END > (1UL << 42)) { + asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION2_ENTRY_EMPTY; + } else { + asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION3_ENTRY_EMPTY; + } #else - S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; + asce_bits = _ASCE_TABLE_LENGTH; pgd_type = _SEGMENT_ENTRY_EMPTY; #endif + S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; clear_table((unsigned long *) init_mm.pgd, pgd_type, sizeof(unsigned long)*2048); vmem_map_init(); @@ -114,123 +124,108 @@ void __init paging_init(void) __ctl_load(S390_lowcore.kernel_asce, 13, 13); arch_local_irq_restore(4UL << (BITS_PER_LONG - 8)); - atomic_set(&init_mm.context.attach_count, 1); - sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); -#ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); -#endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(max_zone_pfns); - fault_init(); } void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; + if (MACHINE_HAS_TLB_LC) + cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(0, mm_cpumask(&init_mm)); + atomic_set(&init_mm.context.attach_count, 1); - max_mapnr = num_physpages = max_low_pfn; + max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); /* Setup guest page hinting */ cmma_init(); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); - totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ - - reservedpages = 0; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >>10, - initsize >> 10); + free_all_bootmem(); + setup_zero_pages(); /* Setup zeroed pages. */ + + mem_init_print_info(NULL); printk("Write protected kernel read-only data: %#lx - %#lx\n", (unsigned long)&_stext, PFN_ALIGN((unsigned long)&_eshared) - 1); } -#ifdef CONFIG_DEBUG_PAGEALLOC -void kernel_map_pages(struct page *page, int numpages, int enable) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long address; - int i; - - for (i = 0; i < numpages; i++) { - address = page_to_phys(page + i); - pgd = pgd_offset_k(address); - pud = pud_offset(pgd, address); - pmd = pmd_offset(pud, address); - pte = pte_offset_kernel(pmd, address); - if (!enable) { - ptep_invalidate(&init_mm, address, pte); - continue; - } - *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); - /* Flush cpu write queue. */ - mb(); - } -} -#endif - -void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr = begin; - - if (begin >= end) - return; - for (; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *)(addr & PAGE_MASK), POISON_FREE_INITMEM, - PAGE_SIZE); - free_page(addr); - totalram_pages++; - } - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); -} - void free_initmem(void) { - free_init_pages("unused kernel memory", - (unsigned long)&__init_begin, - (unsigned long)&__init_end); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) +void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", start, end); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size) { - struct pglist_data *pgdat; + unsigned long zone_start_pfn, zone_end_pfn, nr_pages; + unsigned long start_pfn = PFN_DOWN(start); + unsigned long size_pages = PFN_DOWN(size); struct zone *zone; int rc; - pgdat = NODE_DATA(nid); - zone = pgdat->node_zones + ZONE_MOVABLE; rc = vmem_add_mapping(start, size); if (rc) return rc; - rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size)); + for_each_zone(zone) { + if (zone_idx(zone) != ZONE_MOVABLE) { + /* Add range within existing zone limits */ + zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone->zone_start_pfn + + zone->spanned_pages; + } else { + /* Add remaining range to ZONE_MOVABLE */ + zone_start_pfn = start_pfn; + zone_end_pfn = start_pfn + size_pages; + } + if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn) + continue; + nr_pages = (start_pfn + size_pages > zone_end_pfn) ? + zone_end_pfn - start_pfn : size_pages; + rc = __add_pages(nid, zone, start_pfn, nr_pages); + if (rc) + break; + start_pfn += nr_pages; + size_pages -= nr_pages; + if (!size_pages) + break; + } if (rc) vmem_remove_mapping(start, size); return rc; } + +unsigned long memory_block_size_bytes(void) +{ + /* + * Make sure the memory block size is always greater + * or equal than the memory increment size. + */ + return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm()); +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + /* + * There is no hardware or firmware interface which could trigger a + * hot memory remove on s390. So there is nothing that needs to be + * implemented. + */ + return -EBUSY; +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 71a4b0d34be..2a2e35416d2 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -11,7 +11,10 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/errno.h> -#include <asm/system.h> +#include <linux/gfp.h> +#include <linux/cpu.h> +#include <asm/ctl_reg.h> +#include <asm/io.h> /* * This function writes to kernel memory bypassing DAT and possible @@ -19,7 +22,7 @@ * using the stura instruction. * Returns the number of bytes copied or -EFAULT. */ -static long probe_kernel_write_odd(void *dst, void *src, size_t size) +static long probe_kernel_write_odd(void *dst, const void *src, size_t size) { unsigned long count, aligned; int offset, mask; @@ -45,7 +48,7 @@ static long probe_kernel_write_odd(void *dst, void *src, size_t size) return rc ? rc : count; } -long probe_kernel_write(void *dst, void *src, size_t size) +long probe_kernel_write(void *dst, const void *src, size_t size) { long copied = 0; @@ -60,18 +63,14 @@ long probe_kernel_write(void *dst, void *src, size_t size) return copied < 0 ? -EFAULT : 0; } -int memcpy_real(void *dest, void *src, size_t count) +static int __memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; register unsigned long _len1 asm("3") = (unsigned long) count; register unsigned long _src asm("4") = (unsigned long) src; register unsigned long _len2 asm("5") = (unsigned long) count; - unsigned long flags; int rc = -EFAULT; - if (!count) - return 0; - flags = __arch_local_irq_stnsm(0xf8UL); asm volatile ( "0: mvcle %1,%2,0x0\n" "1: jo 0b\n" @@ -82,6 +81,124 @@ int memcpy_real(void *dest, void *src, size_t count) "+d" (_len2), "=m" (*((long *) dest)) : "m" (*((long *) src)) : "cc", "memory"); + return rc; +} + +/* + * Copy memory in real mode (kernel to kernel) + */ +int memcpy_real(void *dest, void *src, size_t count) +{ + unsigned long flags; + int rc; + + if (!count) + return 0; + local_irq_save(flags); + __arch_local_irq_stnsm(0xfbUL); + rc = __memcpy_real(dest, src, count); + local_irq_restore(flags); + return rc; +} + +/* + * Copy memory in absolute mode (kernel to kernel) + */ +void memcpy_absolute(void *dest, void *src, size_t count) +{ + unsigned long cr0, flags, prefix; + + flags = arch_local_irq_save(); + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); /* disable lowcore protection */ + prefix = store_prefix(); + if (prefix) { + local_mcck_disable(); + set_prefix(0); + memcpy(dest, src, count); + set_prefix(prefix); + local_mcck_enable(); + } else { + memcpy(dest, src, count); + } + __ctl_load(cr0, 0, 0); arch_local_irq_restore(flags); +} + +/* + * Copy memory from kernel (real) to user (virtual) + */ +int copy_to_user_real(void __user *dest, void *src, unsigned long count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (memcpy_real(buf, src + offs, size)) + goto out; + if (copy_to_user(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); return rc; } + +/* + * Check if physical address is within prefix or zero page + */ +static int is_swapped(unsigned long addr) +{ + unsigned long lc; + int cpu; + + if (addr < sizeof(struct _lowcore)) + return 1; + for_each_online_cpu(cpu) { + lc = (unsigned long) lowcore_ptr[cpu]; + if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc) + continue; + return 1; + } + return 0; +} + +/* + * Convert a physical pointer for /dev/mem access + * + * For swapped prefix pages a new buffer is returned that contains a copy of + * the absolute memory. The buffer size is maximum one page large. + */ +void *xlate_dev_mem_ptr(unsigned long addr) +{ + void *bounce = (void *) addr; + unsigned long size; + + get_online_cpus(); + preempt_disable(); + if (is_swapped(addr)) { + size = PAGE_SIZE - (addr & ~PAGE_MASK); + bounce = (void *) __get_free_page(GFP_ATOMIC); + if (bounce) + memcpy_absolute(bounce, (void *) addr, size); + } + preempt_enable(); + put_online_cpus(); + return bounce; +} + +/* + * Free converted buffer for /dev/mem access (if necessary) + */ +void unxlate_dev_mem_ptr(unsigned long addr, void *buf) +{ + if ((void *) addr != buf) + free_page((unsigned long) buf); +} diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c new file mode 100644 index 00000000000..5535cfe0ee1 --- /dev/null +++ b/arch/s390/mm/mem_detect.c @@ -0,0 +1,65 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/memblock.h> +#include <linux/init.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <asm/ipl.h> +#include <asm/sclp.h> +#include <asm/setup.h> + +#define ADDR2G (1ULL << 31) + +#define CHUNK_READ_WRITE 0 +#define CHUNK_READ_ONLY 1 + +static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size) +{ + memblock_add_range(&memblock.memory, start, size, 0, 0); + memblock_add_range(&memblock.physmem, start, size, 0, 0); +} + +void __init detect_memory_memblock(void) +{ + unsigned long long memsize, rnmax, rzm; + unsigned long addr, size; + int type; + + rzm = sclp_get_rzm(); + rnmax = sclp_get_rnmax(); + memsize = rzm * rnmax; + if (!rzm) + rzm = 1ULL << 17; + if (IS_ENABLED(CONFIG_32BIT)) { + rzm = min(ADDR2G, rzm); + memsize = min(ADDR2G, memsize); + } + max_physmem_end = memsize; + addr = 0; + /* keep memblock lists close to the kernel */ + memblock_set_bottom_up(true); + do { + size = 0; + type = tprot(addr); + do { + size += rzm; + if (max_physmem_end && addr + size >= max_physmem_end) + break; + } while (type == tprot(addr + size)); + if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { + if (max_physmem_end && (addr + size > max_physmem_end)) + size = max_physmem_end - addr; + memblock_physmem_add(addr, size); + } + addr += size; + } while (addr < max_physmem_end); + memblock_set_bottom_up(false); + if (!max_physmem_end) + max_physmem_end = memblock_end_of_DRAM(); +} diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index c9a9f7f1818..9b436c21195 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -1,6 +1,4 @@ /* - * linux/arch/s390/mm/mmap.c - * * flexible mmap layout support * * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. @@ -26,10 +24,11 @@ #include <linux/personality.h> #include <linux/mm.h> +#include <linux/mman.h> #include <linux/module.h> #include <linux/random.h> +#include <linux/compat.h> #include <asm/pgalloc.h> -#include <asm/compat.h> static unsigned long stack_maxrandom_size(void) { @@ -65,6 +64,11 @@ static unsigned long mmap_rnd(void) return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; } +static unsigned long mmap_base_legacy(void) +{ + return TASK_UNMAPPED_BASE + mmap_rnd(); +} + static inline unsigned long mmap_base(void) { unsigned long gap = rlimit(RLIMIT_STACK); @@ -90,23 +94,23 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * bit is set, or if the expected stack growth is unlimited: */ if (mmap_is_legacy()) { - mm->mmap_base = TASK_UNMAPPED_BASE; + mm->mmap_base = mmap_base_legacy(); mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } -EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); #else -int s390_mmap_check(unsigned long addr, unsigned long len) +int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) { - if (!is_compat_task() && - len >= TASK_SIZE && TASK_SIZE < (1UL << 53)) + if (is_compat_task() || (TASK_SIZE >= (1UL << 53))) + return 0; + if (!(flags & MAP_FIXED)) + addr = 0; + if ((addr + len) >= TASK_SIZE) return crst_table_upgrade(current->mm, 1UL << 53); return 0; } @@ -165,15 +169,12 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * bit is set, or if the expected stack growth is unlimited: */ if (mmap_is_legacy()) { - mm->mmap_base = TASK_UNMAPPED_BASE; + mm->mmap_base = mmap_base_legacy(); mm->get_unmapped_area = s390_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = s390_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } -EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); #endif diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c new file mode 100644 index 00000000000..8400f494623 --- /dev/null +++ b/arch/s390/mm/pageattr.c @@ -0,0 +1,146 @@ +/* + * Copyright IBM Corp. 2011 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ +#include <linux/hugetlb.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <asm/cacheflush.h> +#include <asm/pgtable.h> +#include <asm/page.h> + +#if PAGE_DEFAULT_KEY +static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) +{ + asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0" + : [addr] "+a" (addr) : [skey] "d" (skey)); + return addr; +} + +void __storage_key_init_range(unsigned long start, unsigned long end) +{ + unsigned long boundary, size; + + while (start < end) { + if (MACHINE_HAS_EDAT1) { + /* set storage keys for a 1MB frame */ + size = 1UL << 20; + boundary = (start + size) & ~(size - 1); + if (boundary <= end) { + do { + start = sske_frame(start, PAGE_DEFAULT_KEY); + } while (start < boundary); + continue; + } + } + page_set_storage_key(start, PAGE_DEFAULT_KEY, 0); + start += PAGE_SIZE; + } +} +#endif + +static pte_t *walk_page_table(unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_k(addr); + if (pgd_none(*pgdp)) + return NULL; + pudp = pud_offset(pgdp, addr); + if (pud_none(*pudp) || pud_large(*pudp)) + return NULL; + pmdp = pmd_offset(pudp, addr); + if (pmd_none(*pmdp) || pmd_large(*pmdp)) + return NULL; + ptep = pte_offset_kernel(pmdp, addr); + if (pte_none(*ptep)) + return NULL; + return ptep; +} + +static void change_page_attr(unsigned long addr, int numpages, + pte_t (*set) (pte_t)) +{ + pte_t *ptep, pte; + int i; + + for (i = 0; i < numpages; i++) { + ptep = walk_page_table(addr); + if (WARN_ON_ONCE(!ptep)) + break; + pte = *ptep; + pte = set(pte); + __ptep_ipte(addr, ptep); + *ptep = pte; + addr += PAGE_SIZE; + } +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + change_page_attr(addr, numpages, pte_wrprotect); + return 0; +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + change_page_attr(addr, numpages, pte_mkwrite); + return 0; +} + +/* not possible */ +int set_memory_nx(unsigned long addr, int numpages) +{ + return 0; +} + +int set_memory_x(unsigned long addr, int numpages) +{ + return 0; +} + +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long address; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int i; + + for (i = 0; i < numpages; i++) { + address = page_to_phys(page + i); + pgd = pgd_offset_k(address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + pte = pte_offset_kernel(pmd, address); + if (!enable) { + __ptep_ipte(address, pte); + pte_val(*pte) = _PAGE_INVALID; + continue; + } + pte_val(*pte) = __pa(address); + } +} + +#ifdef CONFIG_HIBERNATION +bool kernel_page_present(struct page *page) +{ + unsigned long addr; + int cc; + + addr = page_to_phys(page); + asm volatile( + " lra %1,0(%1)\n" + " ipm %0\n" + " srl %0,28" + : "=d" (cc), "+a" (addr) : : "cc"); + return cc == 0; +} +#endif /* CONFIG_HIBERNATION */ + +#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e1850c28cd6..37b8241ec78 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2007,2009 + * Copyright IBM Corp. 2007, 2011 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ @@ -16,188 +16,60 @@ #include <linux/module.h> #include <linux/quicklist.h> #include <linux/rcupdate.h> +#include <linux/slab.h> +#include <linux/swapops.h> -#include <asm/system.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> -struct rcu_table_freelist { - struct rcu_head rcu; - struct mm_struct *mm; - unsigned int pgt_index; - unsigned int crst_index; - unsigned long *table[0]; -}; - -#define RCU_FREELIST_SIZE \ - ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \ - / sizeof(unsigned long)) - -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); - -static void __page_table_free(struct mm_struct *mm, unsigned long *table); -static void __crst_table_free(struct mm_struct *mm, unsigned long *table); - -static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) -{ - struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist); - struct rcu_table_freelist *batch = *batchp; - - if (batch) - return batch; - batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC); - if (batch) { - batch->mm = mm; - batch->pgt_index = 0; - batch->crst_index = RCU_FREELIST_SIZE; - *batchp = batch; - } - return batch; -} - -static void rcu_table_freelist_callback(struct rcu_head *head) -{ - struct rcu_table_freelist *batch = - container_of(head, struct rcu_table_freelist, rcu); - - while (batch->pgt_index > 0) - __page_table_free(batch->mm, batch->table[--batch->pgt_index]); - while (batch->crst_index < RCU_FREELIST_SIZE) - __crst_table_free(batch->mm, batch->table[batch->crst_index++]); - free_page((unsigned long) batch); -} - -void rcu_table_freelist_finish(void) -{ - struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist); - - if (!batch) - return; - call_rcu(&batch->rcu, rcu_table_freelist_callback); - __get_cpu_var(rcu_table_freelist) = NULL; -} - -static void smp_sync(void *arg) -{ -} - #ifndef CONFIG_64BIT #define ALLOC_ORDER 1 -#define TABLES_PER_PAGE 4 -#define FRAG_MASK 15UL -#define SECOND_HALVES 10UL - -void clear_table_pgstes(unsigned long *table) -{ - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); - memset(table + 256, 0, PAGE_SIZE/4); - clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); - memset(table + 768, 0, PAGE_SIZE/4); -} - +#define FRAG_MASK 0x0f #else #define ALLOC_ORDER 2 -#define TABLES_PER_PAGE 2 -#define FRAG_MASK 3UL -#define SECOND_HALVES 2UL - -void clear_table_pgstes(unsigned long *table) -{ - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); - memset(table + 256, 0, PAGE_SIZE/2); -} - +#define FRAG_MASK 0x03 #endif -unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; -EXPORT_SYMBOL(VMALLOC_START); -static int __init parse_vmalloc(char *arg) -{ - if (!arg) - return -EINVAL; - VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK; - return 0; -} -early_param("vmalloc", parse_vmalloc); - -unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) +unsigned long *crst_table_alloc(struct mm_struct *mm) { struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); if (!page) return NULL; - page->index = 0; - if (noexec) { - struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); - if (!shadow) { - __free_pages(page, ALLOC_ORDER); - return NULL; - } - page->index = page_to_phys(shadow); - } - spin_lock_bh(&mm->context.list_lock); - list_add(&page->lru, &mm->context.crst_list); - spin_unlock_bh(&mm->context.list_lock); return (unsigned long *) page_to_phys(page); } -static void __crst_table_free(struct mm_struct *mm, unsigned long *table) -{ - unsigned long *shadow = get_shadow_table(table); - - if (shadow) - free_pages((unsigned long) shadow, ALLOC_ORDER); - free_pages((unsigned long) table, ALLOC_ORDER); -} - void crst_table_free(struct mm_struct *mm, unsigned long *table) { - struct page *page = virt_to_page(table); - - spin_lock_bh(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - __crst_table_free(mm, table); + free_pages((unsigned long) table, ALLOC_ORDER); } -void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) +#ifdef CONFIG_64BIT +static void __crst_table_upgrade(void *arg) { - struct rcu_table_freelist *batch; - struct page *page = virt_to_page(table); + struct mm_struct *mm = arg; - spin_lock_bh(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - if (atomic_read(&mm->mm_users) < 2 && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - __crst_table_free(mm, table); - return; + if (current->active_mm == mm) { + clear_user_asce(); + set_user_asce(mm); } - batch = rcu_table_freelist_get(mm); - if (!batch) { - smp_call_function(smp_sync, NULL, 1); - __crst_table_free(mm, table); - return; - } - batch->table[--batch->crst_index] = table; - if (batch->pgt_index >= batch->crst_index) - rcu_table_freelist_finish(); + __tlb_flush_local(); } -#ifdef CONFIG_64BIT int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) { unsigned long *table, *pgd; unsigned long entry; + int flush; BUG_ON(limit > (1UL << 53)); + flush = 0; repeat: - table = crst_table_alloc(mm, mm->context.noexec); + table = crst_table_alloc(mm); if (!table) return -ENOMEM; spin_lock_bh(&mm->page_table_lock); @@ -221,13 +93,15 @@ repeat: mm->pgd = (pgd_t *) table; mm->task_size = mm->context.asce_limit; table = NULL; + flush = 1; } spin_unlock_bh(&mm->page_table_lock); if (table) crst_table_free(mm, table); if (mm->context.asce_limit < limit) goto repeat; - update_mm(mm, current); + if (flush) + on_each_cpu(__crst_table_upgrade, mm, 0); return 0; } @@ -235,9 +109,10 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) { pgd_t *pgd; - if (mm->context.asce_limit <= limit) - return; - __tlb_flush_mm(mm); + if (current->active_mm == mm) { + clear_user_asce(); + __tlb_flush_mm(mm); + } while (mm->context.asce_limit > limit) { pgd = mm->pgd; switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { @@ -260,142 +135,1228 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) mm->task_size = mm->context.asce_limit; crst_table_free(mm, (unsigned long *) pgd); } - update_mm(mm, current); + if (current->active_mm == mm) + set_user_asce(mm); } #endif +#ifdef CONFIG_PGSTE + +/** + * gmap_alloc - allocate a guest address space + * @mm: pointer to the parent mm_struct + * + * Returns a guest address space structure. + */ +struct gmap *gmap_alloc(struct mm_struct *mm) +{ + struct gmap *gmap; + struct page *page; + unsigned long *table; + + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + if (!gmap) + goto out; + INIT_LIST_HEAD(&gmap->crst_list); + gmap->mm = mm; + page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + if (!page) + goto out_free; + list_add(&page->lru, &gmap->crst_list); + table = (unsigned long *) page_to_phys(page); + crst_table_init(table, _REGION1_ENTRY_EMPTY); + gmap->table = table; + gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); + list_add(&gmap->list, &mm->context.gmap_list); + return gmap; + +out_free: + kfree(gmap); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(gmap_alloc); + +static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) +{ + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct page *page; + + if (*table & _SEGMENT_ENTRY_INVALID) + return 0; + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry(rmap, &mp->mapper, list) { + if (rmap->entry != table) + continue; + list_del(&rmap->list); + kfree(rmap); + break; + } + *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; + return 1; +} + +static void gmap_flush_tlb(struct gmap *gmap) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | + _ASCE_TYPE_REGION1); + else + __tlb_flush_global(); +} + +/** + * gmap_free - free a guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_free(struct gmap *gmap) +{ + struct page *page, *next; + unsigned long *table; + int i; + + + /* Flush tlb. */ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | + _ASCE_TYPE_REGION1); + else + __tlb_flush_global(); + + /* Free all segment & region tables. */ + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { + table = (unsigned long *) page_to_phys(page); + if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) + /* Remove gmap rmap structures for segment table. */ + for (i = 0; i < PTRS_PER_PMD; i++, table++) + gmap_unlink_segment(gmap, table); + __free_pages(page, ALLOC_ORDER); + } + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + list_del(&gmap->list); + kfree(gmap); +} +EXPORT_SYMBOL_GPL(gmap_free); + +/** + * gmap_enable - switch primary space to the guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_enable(struct gmap *gmap) +{ + S390_lowcore.gmap = (unsigned long) gmap; +} +EXPORT_SYMBOL_GPL(gmap_enable); + +/** + * gmap_disable - switch back to the standard primary address space + * @gmap: pointer to the guest address space structure + */ +void gmap_disable(struct gmap *gmap) +{ + S390_lowcore.gmap = 0UL; +} +EXPORT_SYMBOL_GPL(gmap_disable); + /* - * page table entry allocation/free routines. + * gmap_alloc_table is assumed to be called with mmap_sem held + */ +static int gmap_alloc_table(struct gmap *gmap, + unsigned long *table, unsigned long init) + __releases(&gmap->mm->page_table_lock) + __acquires(&gmap->mm->page_table_lock) +{ + struct page *page; + unsigned long *new; + + /* since we dont free the gmap table until gmap_free we can unlock */ + spin_unlock(&gmap->mm->page_table_lock); + page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + spin_lock(&gmap->mm->page_table_lock); + if (!page) + return -ENOMEM; + new = (unsigned long *) page_to_phys(page); + crst_table_init(new, init); + if (*table & _REGION_ENTRY_INVALID) { + list_add(&page->lru, &gmap->crst_list); + *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + (*table & _REGION_ENTRY_TYPE_MASK); + } else + __free_pages(page, ALLOC_ORDER); + return 0; +} + +/** + * gmap_unmap_segment - unmap segment from the guest address space + * @gmap: pointer to the guest address space structure + * @addr: address in the guest address space + * @len: length of the memory area to unmap + * + * Returns 0 if the unmap succeeded, -EINVAL if not. + */ +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +{ + unsigned long *table; + unsigned long off; + int flush; + + if ((to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || to + len < to) + return -EINVAL; + + flush = 0; + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + for (off = 0; off < len; off += PMD_SIZE) { + /* Walk the guest addr space page table */ + table = gmap->table + (((to + off) >> 53) & 0x7ff); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 42) & 0x7ff); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 31) & 0x7ff); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 20) & 0x7ff); + + /* Clear segment table entry in guest address space. */ + flush |= gmap_unlink_segment(gmap, table); + *table = _SEGMENT_ENTRY_INVALID; + } +out: + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; +} +EXPORT_SYMBOL_GPL(gmap_unmap_segment); + +/** + * gmap_mmap_segment - map a segment to the guest address space + * @gmap: pointer to the guest address space structure + * @from: source address in the parent address space + * @to: target address in the guest address space + * + * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. + */ +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len) +{ + unsigned long *table; + unsigned long off; + int flush; + + if ((from | to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || from + len > TASK_MAX_SIZE || + from + len < from || to + len < to) + return -EINVAL; + + flush = 0; + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + for (off = 0; off < len; off += PMD_SIZE) { + /* Walk the gmap address space page table */ + table = gmap->table + (((to + off) >> 53) & 0x7ff); + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 42) & 0x7ff); + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 31) & 0x7ff); + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 20) & 0x7ff); + + /* Store 'from' address in an invalid segment table entry. */ + flush |= gmap_unlink_segment(gmap, table); + *table = (from + off) | (_SEGMENT_ENTRY_INVALID | + _SEGMENT_ENTRY_PROTECT); + } + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; + +out_unmap: + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + gmap_unmap_segment(gmap, to, len); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(gmap_map_segment); + +static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) +{ + unsigned long *table; + + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + return table; +} + +/** + * __gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, vmaddr, segment; + struct gmap_pgtable *mp; + struct page *page; + + current->thread.gmap_addr = address; + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return PTR_ERR(segment_ptr); + /* Convert the gmap address to an mm address. */ + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INVALID)) { + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } else if (segment & _SEGMENT_ENTRY_PROTECT) { + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + return vmaddr | (address & ~PMD_MASK); + } + return -EFAULT; +} +EXPORT_SYMBOL_GPL(__gmap_translate); + +/** + * gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + */ +unsigned long gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_translate(address, gmap); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_translate); + +static int gmap_connect_pgtable(unsigned long address, unsigned long segment, + unsigned long *segment_ptr, struct gmap *gmap) +{ + unsigned long vmaddr; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct mm_struct *mm; + struct page *page; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + mm = gmap->mm; + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + vma = find_vma(mm, vmaddr); + if (!vma || vma->vm_start > vmaddr) + return -EFAULT; + /* Walk the parent mm page table */ + pgd = pgd_offset(mm, vmaddr); + pud = pud_alloc(mm, pgd, vmaddr); + if (!pud) + return -ENOMEM; + pmd = pmd_alloc(mm, pud, vmaddr); + if (!pmd) + return -ENOMEM; + if (!pmd_present(*pmd) && + __pte_alloc(mm, vma, pmd, vmaddr)) + return -ENOMEM; + /* large pmds cannot yet be handled */ + if (pmd_large(*pmd)) + return -EFAULT; + /* pmd now points to a valid segment table entry. */ + rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); + if (!rmap) + return -ENOMEM; + /* Link gmap segment table entry location to page table. */ + page = pmd_page(*pmd); + mp = (struct gmap_pgtable *) page->index; + rmap->gmap = gmap; + rmap->entry = segment_ptr; + rmap->vmaddr = address & PMD_MASK; + spin_lock(&mm->page_table_lock); + if (*segment_ptr == segment) { + list_add(&rmap->list, &mp->mapper); + /* Set gmap segment table entry to page table. */ + *segment_ptr = pmd_val(*pmd) & PAGE_MASK; + rmap = NULL; + } + spin_unlock(&mm->page_table_lock); + kfree(rmap); + return 0; +} + +static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) +{ + struct gmap_rmap *rmap, *next; + struct gmap_pgtable *mp; + struct page *page; + int flush; + + flush = 0; + spin_lock(&mm->page_table_lock); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry_safe(rmap, next, &mp->mapper, list) { + *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | + _SEGMENT_ENTRY_PROTECT); + list_del(&rmap->list); + kfree(rmap); + flush = 1; + } + spin_unlock(&mm->page_table_lock); + if (flush) + __tlb_flush_global(); +} + +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, segment; + struct gmap_pgtable *mp; + struct page *page; + int rc; + + current->thread.gmap_addr = address; + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return -EFAULT; + /* Convert the gmap address to an mm address. */ + while (1) { + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INVALID)) { + /* Page table is present */ + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } + if (!(segment & _SEGMENT_ENTRY_PROTECT)) + /* Nothing mapped in the gmap address space. */ + break; + rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); + if (rc) + return rc; + } + return -EFAULT; +} + +unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_fault(address, gmap); + up_read(&gmap->mm->mmap_sem); + + return rc; +} +EXPORT_SYMBOL_GPL(gmap_fault); + +static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) +{ + if (!non_swap_entry(entry)) + dec_mm_counter(mm, MM_SWAPENTS); + else if (is_migration_entry(entry)) { + struct page *page = migration_entry_to_page(entry); + + if (PageAnon(page)) + dec_mm_counter(mm, MM_ANONPAGES); + else + dec_mm_counter(mm, MM_FILEPAGES); + } + free_swap_and_cache(entry); +} + +/** + * The mm->mmap_sem lock must be held + */ +static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) +{ + unsigned long ptev, pgstev; + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep, pte; + + ptep = get_locked_pte(mm, address, &ptl); + if (unlikely(!ptep)) + return; + pte = *ptep; + if (!pte_swap(pte)) + goto out_pte; + /* Zap unused and logically-zero pages */ + pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); + ptev = pte_val(pte); + if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || + ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { + gmap_zap_swap_entry(pte_to_swp_entry(pte), mm); + pte_clear(mm, address, ptep); + } + pgste_set_unlock(ptep, pgste); +out_pte: + pte_unmap_unlock(*ptep, ptl); +} + +/* + * this function is assumed to be called with mmap_sem held + */ +void __gmap_zap(unsigned long address, struct gmap *gmap) +{ + unsigned long *table, *segment_ptr; + unsigned long segment, pgstev, ptev; + struct gmap_pgtable *mp; + struct page *page; + + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return; + segment = *segment_ptr; + if (segment & _SEGMENT_ENTRY_INVALID) + return; + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + address = mp->vmaddr | (address & ~PMD_MASK); + /* Page table is present */ + table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN); + table = table + ((address >> 12) & 0xff); + pgstev = table[PTRS_PER_PTE]; + ptev = table[0]; + /* quick check, checked again with locks held */ + if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || + ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) + gmap_zap_unused(gmap->mm, address); +} +EXPORT_SYMBOL_GPL(__gmap_zap); + +void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) +{ + + unsigned long *table, address, size; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct page *page; + + down_read(&gmap->mm->mmap_sem); + address = from; + while (address < to) { + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + vma = find_vma(gmap->mm, mp->vmaddr); + size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); + zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), + size, NULL); + address = (address + PMD_SIZE) & PMD_MASK; + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @start: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep, entry; + pgste_t pgste; + int rc = 0; + + if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + /* Convert gmap address and connect the page tables */ + addr = __gmap_fault(start, gmap); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) { + rc = -EFAULT; + break; + } + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + if (unlikely(!ptep)) + continue; + /* Set notification bit in the pgste of the pte */ + entry = *ptep; + if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= PGSTE_IN_BIT; + pgste_set_unlock(ptep, pgste); + start += PAGE_SIZE; + len -= PAGE_SIZE; + } + spin_unlock(ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. */ -unsigned long *page_table_alloc(struct mm_struct *mm) +void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte) +{ + unsigned long segment_offset; + struct gmap_notifier *nb; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct page *page; + + segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + segment_offset = segment_offset * (4096 / sizeof(pte_t)); + page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + spin_lock(&gmap_notifier_lock); + list_for_each_entry(rmap, &mp->mapper, list) { + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(rmap->gmap, + rmap->vmaddr + segment_offset); + } + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); + +static inline int page_table_with_pgste(struct page *page) +{ + return atomic_read(&page->_mapcount) == 0; +} + +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, + unsigned long vmaddr) { struct page *page; unsigned long *table; - unsigned long bits; + struct gmap_pgtable *mp; - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); + if (!mp) { + __free_page(page); + return NULL; + } + if (!pgtable_page_ctor(page)) { + kfree(mp); + __free_page(page); + return NULL; + } + mp->vmaddr = vmaddr & PMD_MASK; + INIT_LIST_HEAD(&mp->mapper); + page->index = (unsigned long) mp; + atomic_set(&page->_mapcount, 0); + table = (unsigned long *) page_to_phys(page); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + return table; +} + +static inline void page_table_free_pgste(unsigned long *table) +{ + struct page *page; + struct gmap_pgtable *mp; + + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + BUG_ON(!list_empty(&mp->mapper)); + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + kfree(mp); + __free_page(page); +} + +static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, bool init_skey) +{ + pte_t *start_pte, *pte; + spinlock_t *ptl; + pgste_t pgste; + + start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + pte = start_pte; + do { + pgste = pgste_get_lock(pte); + pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + if (init_skey) { + unsigned long address; + + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | + PGSTE_GR_BIT | PGSTE_GC_BIT); + + /* skip invalid and not writable pages */ + if (pte_val(*pte) & _PAGE_INVALID || + !(pte_val(*pte) & _PAGE_WRITE)) { + pgste_set_unlock(pte, pgste); + continue; + } + + address = pte_val(*pte) & PAGE_MASK; + page_set_storage_key(address, PAGE_DEFAULT_KEY, 1); + } + pgste_set_unlock(pte, pgste); + } while (pte++, addr += PAGE_SIZE, addr != end); + pte_unmap_unlock(start_pte, ptl); + + return addr; +} + +static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, bool init_skey) +{ + unsigned long next; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + continue; + next = page_table_reset_pte(mm, pmd, addr, next, init_skey); + } while (pmd++, addr = next, addr != end); + + return addr; +} + +static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, bool init_skey) +{ + unsigned long next; + pud_t *pud; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + next = page_table_reset_pmd(mm, pud, addr, next, init_skey); + } while (pud++, addr = next, addr != end); + + return addr; +} + +void page_table_reset_pgste(struct mm_struct *mm, unsigned long start, + unsigned long end, bool init_skey) +{ + unsigned long addr, next; + pgd_t *pgd; + + down_write(&mm->mmap_sem); + if (init_skey && mm_use_skey(mm)) + goto out_up; + addr = start; + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + next = page_table_reset_pud(mm, pgd, addr, next, init_skey); + } while (pgd++, addr = next, addr != end); + if (init_skey) + current->mm->context.use_skey = 1; +out_up: + up_write(&mm->mmap_sem); +} +EXPORT_SYMBOL(page_table_reset_pgste); + +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq) +{ + spinlock_t *ptl; + pgste_t old, new; + pte_t *ptep; + + down_read(&mm->mmap_sem); + ptep = get_locked_pte(current->mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + + new = old = pgste_get_lock(ptep); + pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; + pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + unsigned long address, bits, skey; + + address = pte_val(*ptep) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); + /* Set storage key ACC and FP */ + page_set_storage_key(address, skey, !nq); + /* Merge host changed & referenced into pgste */ + pgste_val(new) |= bits << 52; + } + /* changing the guest storage key is considered a change of the page */ + if ((pgste_val(new) ^ pgste_val(old)) & + (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) + pgste_val(new) |= PGSTE_UC_BIT; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(*ptep, ptl); + up_read(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL(set_guest_storage_key); + +#else /* CONFIG_PGSTE */ + +static inline int page_table_with_pgste(struct page *page) +{ + return 0; +} + +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, + unsigned long vmaddr) +{ + return NULL; +} + +void page_table_reset_pgste(struct mm_struct *mm, unsigned long start, + unsigned long end, bool init_skey) +{ +} + +static inline void page_table_free_pgste(unsigned long *table) +{ +} + +static inline void gmap_disconnect_pgtable(struct mm_struct *mm, + unsigned long *table) +{ +} + +#endif /* CONFIG_PGSTE */ + +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ + unsigned int old, new; + + do { + old = atomic_read(v); + new = old ^ bits; + } while (atomic_cmpxchg(v, old, new) != old); + return new; +} + +/* + * page table entry allocation/free routines. + */ +unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *uninitialized_var(table); + struct page *uninitialized_var(page); + unsigned int mask, bit; + + if (mm_has_pgste(mm)) + return page_table_alloc_pgste(mm, vmaddr); + /* Allocate fragments of a 4K page as 1K/2K page table */ spin_lock_bh(&mm->context.list_lock); - page = NULL; + mask = FRAG_MASK; if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, struct page, lru); - if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) - page = NULL; + table = (unsigned long *) page_to_phys(page); + mask = atomic_read(&page->_mapcount); + mask = mask | (mask >> 4); } - if (!page) { + if ((mask & FRAG_MASK) == FRAG_MASK) { spin_unlock_bh(&mm->context.list_lock); page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; - pgtable_page_ctor(page); - page->flags &= ~FRAG_MASK; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + atomic_set(&page->_mapcount, 1); table = (unsigned long *) page_to_phys(page); - if (mm->context.has_pgste) - clear_table_pgstes(table); - else - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + clear_table(table, _PAGE_INVALID, PAGE_SIZE); spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); + } else { + for (bit = 1; mask & bit; bit <<= 1) + table += PTRS_PER_PTE; + mask = atomic_xor_bits(&page->_mapcount, bit); + if ((mask & FRAG_MASK) == FRAG_MASK) + list_del(&page->lru); } - table = (unsigned long *) page_to_phys(page); - while (page->flags & bits) { - table += 256; - bits <<= 1; - } - page->flags |= bits; - if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) - list_move_tail(&page->lru, &mm->context.pgtable_list); spin_unlock_bh(&mm->context.list_lock); return table; } -static void __page_table_free(struct mm_struct *mm, unsigned long *table) +void page_table_free(struct mm_struct *mm, unsigned long *table) { struct page *page; - unsigned long bits; + unsigned int bit, mask; - bits = ((unsigned long) table) & 15; - table = (unsigned long *)(((unsigned long) table) ^ bits); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - page->flags ^= bits; - if (!(page->flags & FRAG_MASK)) { + if (page_table_with_pgste(page)) { + gmap_disconnect_pgtable(mm, table); + return page_table_free_pgste(table); + } + /* Free 1K/2K page table fragment of a 4K page */ + bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); + spin_lock_bh(&mm->context.list_lock); + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) + list_del(&page->lru); + mask = atomic_xor_bits(&page->_mapcount, bit); + if (mask & FRAG_MASK) + list_add(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + if (mask == 0) { pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); __free_page(page); } } -void page_table_free(struct mm_struct *mm, unsigned long *table) +static void __page_table_free_rcu(void *table, unsigned bit) { struct page *page; - unsigned long bits; - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; - bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); + if (bit == FRAG_MASK) + return page_table_free_pgste(table); + /* Free 1K/2K page table fragment of a 4K page */ page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - spin_lock_bh(&mm->context.list_lock); - page->flags ^= bits; - if (page->flags & FRAG_MASK) { - /* Page now has some free pgtable fragments. */ - if (!list_empty(&page->lru)) - list_move(&page->lru, &mm->context.pgtable_list); - page = NULL; - } else - /* All fragments of the 4K page have been freed. */ - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - if (page) { + if (atomic_xor_bits(&page->_mapcount, bit) == 0) { pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); __free_page(page); } } -void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) { - struct rcu_table_freelist *batch; + struct mm_struct *mm; struct page *page; - unsigned long bits; + unsigned int bit, mask; - if (atomic_read(&mm->mm_users) < 2 && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - page_table_free(mm, table); - return; - } - batch = rcu_table_freelist_get(mm); - if (!batch) { - smp_call_function(smp_sync, NULL, 1); - page_table_free(mm, table); + mm = tlb->mm; + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) { + gmap_disconnect_pgtable(mm, table); + table = (unsigned long *) (__pa(table) | FRAG_MASK); + tlb_remove_table(tlb, table); return; } - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; - bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); spin_lock_bh(&mm->context.list_lock); - /* Delayed freeing with rcu prevents reuse of pgtable fragments */ - list_del_init(&page->lru); + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) + list_del(&page->lru); + mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); + if (mask & FRAG_MASK) + list_add_tail(&page->lru, &mm->context.pgtable_list); spin_unlock_bh(&mm->context.list_lock); - table = (unsigned long *)(((unsigned long) table) | bits); - batch->table[batch->pgt_index++] = table; - if (batch->pgt_index >= batch->crst_index) - rcu_table_freelist_finish(); + table = (unsigned long *) (__pa(table) | (bit << 4)); + tlb_remove_table(tlb, table); +} + +static void __tlb_remove_table(void *_table) +{ + const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; + void *table = (void *)((unsigned long) _table & ~mask); + unsigned type = (unsigned long) _table & mask; + + if (type) + __page_table_free_rcu(table, type); + else + free_pages((unsigned long) table, ALLOC_ORDER); +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely + * on IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); } -void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) +void tlb_table_flush(struct mmu_gather *tlb) { + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + tlb->mm->context.flush_mm = 1; + if (*batch == NULL) { + *batch = (struct mmu_table_batch *) + __get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + __tlb_flush_mm_lazy(tlb->mm); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_flush_mmu(tlb); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void thp_split_vma(struct vm_area_struct *vma) +{ + unsigned long addr; + + for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) + follow_page(vma, addr, FOLL_SPLIT); +} + +static inline void thp_split_mm(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + thp_split_vma(vma); + vma->vm_flags &= ~VM_HUGEPAGE; + vma->vm_flags |= VM_NOHUGEPAGE; + } + mm->def_flags |= VM_NOHUGEPAGE; +} +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, + struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end) +{ + unsigned long next, *table, *new; struct page *page; + pmd_t *pmd; - spin_lock_bh(&mm->context.list_lock); - /* Free shadow region and segment tables. */ - list_for_each_entry(page, &mm->context.crst_list, lru) - if (page->index) { - free_pages((unsigned long) page->index, ALLOC_ORDER); - page->index = 0; + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); +again: + if (pmd_none_or_clear_bad(pmd)) + continue; + table = (unsigned long *) pmd_deref(*pmd); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) + continue; + /* Allocate new page table with pgstes */ + new = page_table_alloc_pgste(mm, addr); + if (!new) + return -ENOMEM; + + spin_lock(&mm->page_table_lock); + if (likely((unsigned long *) pmd_deref(*pmd) == table)) { + /* Nuke pmd entry pointing to the "short" page table */ + pmdp_flush_lazy(mm, addr, pmd); + pmd_clear(pmd); + /* Copy ptes from old table to new table */ + memcpy(new, table, PAGE_SIZE/2); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + /* Establish new table */ + pmd_populate(mm, pmd, (pte_t *) new); + /* Free old table with rcu, there might be a walker! */ + page_table_free_rcu(tlb, table); + new = NULL; } - /* "Free" second halves of page tables. */ - list_for_each_entry(page, &mm->context.pgtable_list, lru) - page->flags &= ~SECOND_HALVES; - spin_unlock_bh(&mm->context.list_lock); - mm->context.noexec = 0; - update_mm(mm, tsk); + spin_unlock(&mm->page_table_lock); + if (new) { + page_table_free_pgste(new); + goto again; + } + } while (pmd++, addr = next, addr != end); + + return addr; +} + +static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, + struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t *pud; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + next = page_table_realloc_pmd(tlb, mm, pud, addr, next); + if (unlikely(IS_ERR_VALUE(next))) + return next; + } while (pud++, addr = next, addr != end); + + return addr; +} + +static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pgd_t *pgd; + + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + next = page_table_realloc_pud(tlb, mm, pgd, addr, next); + if (unlikely(IS_ERR_VALUE(next))) + return next; + } while (pgd++, addr = next, addr != end); + + return 0; } /* @@ -404,74 +1365,132 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) int s390_enable_sie(void) { struct task_struct *tsk = current; - struct mm_struct *mm, *old_mm; - - /* Do we have switched amode? If no, we cannot do sie */ - if (user_mode == HOME_SPACE_MODE) - return -EINVAL; + struct mm_struct *mm = tsk->mm; + struct mmu_gather tlb; /* Do we have pgstes? if yes, we are done */ - if (tsk->mm->context.has_pgste) + if (mm_has_pgste(tsk->mm)) return 0; - /* lets check if we are allowed to replace the mm */ - task_lock(tsk); - if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || -#ifdef CONFIG_AIO - !hlist_empty(&tsk->mm->ioctx_list) || -#endif - tsk->mm != tsk->active_mm) { - task_unlock(tsk); - return -EINVAL; - } - task_unlock(tsk); + down_write(&mm->mmap_sem); + /* split thp mappings and disable thp for future mappings */ + thp_split_mm(mm); + /* Reallocate the page tables with pgstes */ + tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); + if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE)) + mm->context.has_pgste = 1; + tlb_finish_mmu(&tlb, 0, TASK_SIZE); + up_write(&mm->mmap_sem); + return mm->context.has_pgste ? 0 : -ENOMEM; +} +EXPORT_SYMBOL_GPL(s390_enable_sie); - /* we copy the mm and let dup_mm create the page tables with_pgstes */ - tsk->mm->context.alloc_pgste = 1; - mm = dup_mm(tsk); - tsk->mm->context.alloc_pgste = 0; - if (!mm) - return -ENOMEM; +/* + * Enable storage key handling from now on and initialize the storage + * keys with the default key. + */ +void s390_enable_skey(void) +{ + page_table_reset_pgste(current->mm, 0, TASK_SIZE, true); +} +EXPORT_SYMBOL_GPL(s390_enable_skey); - /* Now lets check again if something happened */ - task_lock(tsk); - if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || -#ifdef CONFIG_AIO - !hlist_empty(&tsk->mm->ioctx_list) || -#endif - tsk->mm != tsk->active_mm) { - mmput(mm); - task_unlock(tsk); - return -EINVAL; +/* + * Test and reset if a guest page is dirty + */ +bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap) +{ + pte_t *pte; + spinlock_t *ptl; + bool dirty = false; + + pte = get_locked_pte(gmap->mm, address, &ptl); + if (unlikely(!pte)) + return false; + + if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte)) + dirty = true; + + spin_unlock(ptl); + return dirty; +} +EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty); + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + /* No need to flush TLB + * On s390 reference bits are in storage key and never in TLB */ + return pmdp_test_and_clear_young(vma, address, pmdp); +} + +int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + + if (pmd_same(*pmdp, entry)) + return 0; + pmdp_invalidate(vma, address, pmdp); + set_pmd_at(vma->vm_mm, address, pmdp, entry); + return 1; +} + +static void pmdp_splitting_flush_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, + (unsigned long *) pmdp)) { + /* need to serialize against gup-fast (IRQ disabled) */ + smp_call_function(pmdp_splitting_flush_sync, NULL, 1); } +} - /* ok, we are alone. No ptrace, no threads, etc. */ - old_mm = tsk->mm; - tsk->mm = tsk->active_mm = mm; - preempt_disable(); - update_mm(mm, tsk); - atomic_inc(&mm->context.attach_count); - atomic_dec(&old_mm->context.attach_count); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - preempt_enable(); - task_unlock(tsk); - mmput(old_mm); - return 0; +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(pmd_lockptr(mm, pmdp)); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; } -EXPORT_SYMBOL_GPL(s390_enable_sie); -#if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) -bool kernel_page_present(struct page *page) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { - unsigned long addr; - int cc; + struct list_head *lh; + pgtable_t pgtable; + pte_t *ptep; + + assert_spin_locked(pmd_lockptr(mm, pmdp)); - addr = page_to_phys(page); - asm volatile( - " lra %1,0(%1)\n" - " ipm %0\n" - " srl %0,28" - : "=d" (cc), "+a" (addr) : : "cc"); - return cc == 0; + /* FIFO */ + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + ptep = (pte_t *) pgtable; + pte_val(*ptep) = _PAGE_INVALID; + ptep++; + pte_val(*ptep) = _PAGE_INVALID; + return pgtable; } -#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 34c43f23b28..fe9012a49aa 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -1,6 +1,4 @@ /* - * arch/s390/mm/vmem.c - * * Copyright IBM Corp. 2006 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ @@ -12,6 +10,7 @@ #include <linux/list.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/memblock.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/setup.h> @@ -61,17 +60,18 @@ static inline pmd_t *vmem_pmd_alloc(void) return pmd; } -static pte_t __ref *vmem_pte_alloc(void) +static pte_t __ref *vmem_pte_alloc(unsigned long address) { pte_t *pte; if (slab_is_available()) - pte = (pte_t *) page_table_alloc(&init_mm); + pte = (pte_t *) page_table_alloc(&init_mm, address); else - pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); + pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t), + PTRS_PER_PTE * sizeof(pte_t)); if (!pte) return NULL; - clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) pte, _PAGE_INVALID, PTRS_PER_PTE * sizeof(pte_t)); return pte; } @@ -81,57 +81,65 @@ static pte_t __ref *vmem_pte_alloc(void) */ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) { - unsigned long address; + unsigned long end = start + size; + unsigned long address = start; pgd_t *pg_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; - pte_t pte; int ret = -ENOMEM; - for (address = start; address < start + size; address += PAGE_SIZE) { + while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate_kernel(&init_mm, pg_dir, pu_dir); + pgd_populate(&init_mm, pg_dir, pu_dir); } - pu_dir = pud_offset(pg_dir, address); +#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) + if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && + !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { + pud_val(*pu_dir) = __pa(address) | + _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | + (ro ? _REGION_ENTRY_PROTECT : 0); + address += PUD_SIZE; + continue; + } +#endif if (pud_none(*pu_dir)) { pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pud_populate_kernel(&init_mm, pu_dir, pm_dir); + pud_populate(&init_mm, pu_dir, pm_dir); } - - pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); pm_dir = pmd_offset(pu_dir, address); - -#ifdef __s390x__ - if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && - (address + HPAGE_SIZE <= start + size) && - (address >= HPAGE_SIZE)) { - pte_val(pte) |= _SEGMENT_ENTRY_LARGE; - pmd_val(*pm_dir) = pte_val(pte); - address += HPAGE_SIZE - PAGE_SIZE; +#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) + if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && + !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { + pmd_val(*pm_dir) = __pa(address) | + _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | + _SEGMENT_ENTRY_YOUNG | + (ro ? _SEGMENT_ENTRY_PROTECT : 0); + address += PMD_SIZE; continue; } #endif if (pmd_none(*pm_dir)) { - pt_dir = vmem_pte_alloc(); + pt_dir = vmem_pte_alloc(address); if (!pt_dir) goto out; - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + pmd_populate(&init_mm, pm_dir, pt_dir); } pt_dir = pte_offset_kernel(pm_dir, address); - *pt_dir = pte; + pte_val(*pt_dir) = __pa(address) | + pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); + address += PAGE_SIZE; } ret = 0; out: - flush_tlb_kernel_range(start, start + size); return ret; } @@ -141,58 +149,67 @@ out: */ static void vmem_remove_range(unsigned long start, unsigned long size) { - unsigned long address; + unsigned long end = start + size; + unsigned long address = start; pgd_t *pg_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; pte_t pte; - pte_val(pte) = _PAGE_TYPE_EMPTY; - for (address = start; address < start + size; address += PAGE_SIZE) { + pte_val(pte) = _PAGE_INVALID; + while (address < end) { pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + address += PGDIR_SIZE; + continue; + } pu_dir = pud_offset(pg_dir, address); - if (pud_none(*pu_dir)) + if (pud_none(*pu_dir)) { + address += PUD_SIZE; + continue; + } + if (pud_large(*pu_dir)) { + pud_clear(pu_dir); + address += PUD_SIZE; continue; + } pm_dir = pmd_offset(pu_dir, address); - if (pmd_none(*pm_dir)) + if (pmd_none(*pm_dir)) { + address += PMD_SIZE; continue; - - if (pmd_huge(*pm_dir)) { - pmd_clear_kernel(pm_dir); - address += HPAGE_SIZE - PAGE_SIZE; + } + if (pmd_large(*pm_dir)) { + pmd_clear(pm_dir); + address += PMD_SIZE; continue; } - pt_dir = pte_offset_kernel(pm_dir, address); *pt_dir = pte; + address += PAGE_SIZE; } - flush_tlb_kernel_range(start, start + size); + flush_tlb_kernel_range(start, end); } /* * Add a backed mem_map array to the virtual mem_map array. */ -int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { - unsigned long address, start_addr, end_addr; + unsigned long address = start; pgd_t *pg_dir; pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; - pte_t pte; int ret = -ENOMEM; - start_addr = (unsigned long) start; - end_addr = (unsigned long) (start + nr); - - for (address = start_addr; address < end_addr; address += PAGE_SIZE) { + for (address = start; address < end;) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate_kernel(&init_mm, pg_dir, pu_dir); + pgd_populate(&init_mm, pg_dir, pu_dir); } pu_dir = pud_offset(pg_dir, address); @@ -200,15 +217,38 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pud_populate_kernel(&init_mm, pu_dir, pm_dir); + pud_populate(&init_mm, pu_dir, pm_dir); } pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) { - pt_dir = vmem_pte_alloc(); +#ifdef CONFIG_64BIT + /* Use 1MB frames for vmemmap if available. We always + * use large frames even if they are only partially + * used. + * Otherwise we would have also page tables since + * vmemmap_populate gets called for each section + * separately. */ + if (MACHINE_HAS_EDAT1) { + void *new_page; + + new_page = vmemmap_alloc_block(PMD_SIZE, node); + if (!new_page) + goto out; + pmd_val(*pm_dir) = __pa(new_page) | + _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | + _SEGMENT_ENTRY_CO; + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } +#endif + pt_dir = vmem_pte_alloc(address); if (!pt_dir) goto out; - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + pmd_populate(&init_mm, pm_dir, pt_dir); + } else if (pmd_large(*pm_dir)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; } pt_dir = pte_offset_kernel(pm_dir, address); @@ -218,17 +258,21 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) new_page =__pa(vmem_alloc_pages(0)); if (!new_page) goto out; - pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); - *pt_dir = pte; + pte_val(*pt_dir) = + __pa(new_page) | pgprot_val(PAGE_KERNEL); } + address += PAGE_SIZE; } - memset(start, 0, nr * sizeof(struct page)); + memset((void *)start, 0, end - start); ret = 0; out: - flush_tlb_kernel_range(start_addr, end_addr); return ret; } +void vmemmap_free(unsigned long start, unsigned long end) +{ +} + /* * Add memory segment to the segment list if it doesn't overlap with * an already present segment. @@ -329,14 +373,14 @@ out: void __init vmem_map_init(void) { unsigned long ro_start, ro_end; - unsigned long start, end; - int i; - - ro_start = ((unsigned long)&_stext) & PAGE_MASK; - ro_end = PFN_ALIGN((unsigned long)&_eshared); - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - start = memory_chunk[i].addr; - end = memory_chunk[i].addr + memory_chunk[i].size; + struct memblock_region *reg; + phys_addr_t start, end; + + ro_start = PFN_ALIGN((unsigned long)&_stext); + ro_end = (unsigned long)&_eshared & PAGE_MASK; + for_each_memblock(memory, reg) { + start = reg->base; + end = reg->base + reg->size - 1; if (start >= ro_end || end <= ro_start) vmem_add_mem(start, end - start, 0); else if (start >= ro_start && end <= ro_end) @@ -356,23 +400,21 @@ void __init vmem_map_init(void) } /* - * Convert memory chunk array to a memory segment list so there is a single - * list that contains both r/w memory and shared memory segments. + * Convert memblock.memory to a memory segment list so there is a single + * list that contains all memory segments. */ static int __init vmem_convert_memory_chunk(void) { + struct memblock_region *reg; struct memory_segment *seg; - int i; mutex_lock(&vmem_mutex); - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (!memory_chunk[i].size) - continue; + for_each_memblock(memory, reg) { seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); - seg->start = memory_chunk[i].addr; - seg->size = memory_chunk[i].size; + seg->start = reg->base; + seg->size = reg->size; insert_memory_segment(seg); } mutex_unlock(&vmem_mutex); diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile new file mode 100644 index 00000000000..90568c33ddb --- /dev/null +++ b/arch/s390/net/Makefile @@ -0,0 +1,4 @@ +# +# Arch-specific network modules +# +obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S new file mode 100644 index 00000000000..7e45d13816c --- /dev/null +++ b/arch/s390/net/bpf_jit.S @@ -0,0 +1,130 @@ +/* + * BPF Jit compiler for s390, help functions. + * + * Copyright IBM Corp. 2012 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <linux/linkage.h> + +/* + * Calling convention: + * registers %r2, %r6-%r8, %r10-%r11, %r13, %r15 are call saved + * %r2: skb pointer + * %r3: offset parameter + * %r5: BPF A accumulator + * %r8: return address + * %r9: save register for skb pointer + * %r10: skb->data + * %r11: skb->len - skb->data_len (headlen) + * %r12: BPF X accumulator + * + * skb_copy_bits takes 4 parameters: + * %r2 = skb pointer + * %r3 = offset into skb data + * %r4 = length to copy + * %r5 = pointer to temp buffer + */ +#define SKBDATA %r8 + + /* A = *(u32 *) (skb->data+K+X) */ +ENTRY(sk_load_word_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u32 *) (skb->data+K) */ +ENTRY(sk_load_word) + llgfr %r1,%r3 # extend offset + ahi %r3,4 # offset + 4 + clr %r11,%r3 # hlen <= offset + 4 ? + jl sk_load_word_slow + l %r5,0(%r1,%r10) # get word from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_word_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,4 # 4 bytes + la %r5,160(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = *(u16 *) (skb->data+K+X) */ +ENTRY(sk_load_half_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u16 *) (skb->data+K) */ +ENTRY(sk_load_half) + llgfr %r1,%r3 # extend offset + ahi %r3,2 # offset + 2 + clr %r11,%r3 # hlen <= offset + 2 ? + jl sk_load_half_slow + llgh %r5,0(%r1,%r10) # get half from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_half_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,2 # 2 bytes + la %r5,162(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(2,%r15),160(%r15) + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = *(u8 *) (skb->data+K+X) */ +ENTRY(sk_load_byte_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u8 *) (skb->data+K) */ +ENTRY(sk_load_byte) + llgfr %r1,%r3 # extend offset + clr %r11,%r3 # hlen < offset ? + jle sk_load_byte_slow + lhi %r5,0 + ic %r5,0(%r1,%r10) # get byte from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_byte_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,1 # 1 bytes + la %r5,163(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(3,%r15),160(%r15) + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = (*(u8 *)(skb->data+K) & 0xf) << 2 */ +ENTRY(sk_load_byte_msh) + llgfr %r1,%r3 # extend offset + clr %r11,%r3 # hlen < offset ? + jle sk_load_byte_slow + lhi %r12,0 + ic %r12,0(%r1,%r10) # get byte from skb + nill %r12,0x0f + sll %r12,2 + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_byte_msh_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,2 # 2 bytes + la %r5,162(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(3,%r15),160(%r15) + l %r12,160(%r15) # load result from temp buffer + nill %r12,0x0f + sll %r12,2 + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c new file mode 100644 index 00000000000..a2cbd875543 --- /dev/null +++ b/arch/s390/net/bpf_jit_comp.c @@ -0,0 +1,891 @@ +/* + * BPF Jit compiler for s390. + * + * Copyright IBM Corp. 2012 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <linux/moduleloader.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include <linux/filter.h> +#include <linux/random.h> +#include <linux/init.h> +#include <asm/cacheflush.h> +#include <asm/facility.h> +#include <asm/dis.h> + +/* + * Conventions: + * %r2 = skb pointer + * %r3 = offset parameter + * %r4 = scratch register / length parameter + * %r5 = BPF A accumulator + * %r8 = return address + * %r9 = save register for skb pointer + * %r10 = skb->data + * %r11 = skb->len - skb->data_len (headlen) + * %r12 = BPF X accumulator + * %r13 = literal pool pointer + * 0(%r15) - 63(%r15) scratch memory array with BPF_MEMWORDS + */ +int bpf_jit_enable __read_mostly; + +/* + * assembly code in arch/x86/net/bpf_jit.S + */ +extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; +extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; + +struct bpf_jit { + unsigned int seen; + u8 *start; + u8 *prg; + u8 *mid; + u8 *lit; + u8 *end; + u8 *base_ip; + u8 *ret0_ip; + u8 *exit_ip; + unsigned int off_load_word; + unsigned int off_load_half; + unsigned int off_load_byte; + unsigned int off_load_bmsh; + unsigned int off_load_iword; + unsigned int off_load_ihalf; + unsigned int off_load_ibyte; +}; + +#define BPF_SIZE_MAX 4096 /* Max size for program */ + +#define SEEN_DATAREF 1 /* might call external helpers */ +#define SEEN_XREG 2 /* ebx is used */ +#define SEEN_MEM 4 /* use mem[] for temporary storage */ +#define SEEN_RET0 8 /* pc_ret0 points to a valid return 0 */ +#define SEEN_LITERAL 16 /* code uses literals */ +#define SEEN_LOAD_WORD 32 /* code uses sk_load_word */ +#define SEEN_LOAD_HALF 64 /* code uses sk_load_half */ +#define SEEN_LOAD_BYTE 128 /* code uses sk_load_byte */ +#define SEEN_LOAD_BMSH 256 /* code uses sk_load_byte_msh */ +#define SEEN_LOAD_IWORD 512 /* code uses sk_load_word_ind */ +#define SEEN_LOAD_IHALF 1024 /* code uses sk_load_half_ind */ +#define SEEN_LOAD_IBYTE 2048 /* code uses sk_load_byte_ind */ + +#define EMIT2(op) \ +({ \ + if (jit->prg + 2 <= jit->mid) \ + *(u16 *) jit->prg = op; \ + jit->prg += 2; \ +}) + +#define EMIT4(op) \ +({ \ + if (jit->prg + 4 <= jit->mid) \ + *(u32 *) jit->prg = op; \ + jit->prg += 4; \ +}) + +#define EMIT4_DISP(op, disp) \ +({ \ + unsigned int __disp = (disp) & 0xfff; \ + EMIT4(op | __disp); \ +}) + +#define EMIT4_IMM(op, imm) \ +({ \ + unsigned int __imm = (imm) & 0xffff; \ + EMIT4(op | __imm); \ +}) + +#define EMIT4_PCREL(op, pcrel) \ +({ \ + long __pcrel = ((pcrel) >> 1) & 0xffff; \ + EMIT4(op | __pcrel); \ +}) + +#define EMIT6(op1, op2) \ +({ \ + if (jit->prg + 6 <= jit->mid) { \ + *(u32 *) jit->prg = op1; \ + *(u16 *) (jit->prg + 4) = op2; \ + } \ + jit->prg += 6; \ +}) + +#define EMIT6_DISP(op1, op2, disp) \ +({ \ + unsigned int __disp = (disp) & 0xfff; \ + EMIT6(op1 | __disp, op2); \ +}) + +#define EMIT6_IMM(op, imm) \ +({ \ + unsigned int __imm = (imm); \ + EMIT6(op | (__imm >> 16), __imm & 0xffff); \ +}) + +#define EMIT_CONST(val) \ +({ \ + unsigned int ret; \ + ret = (unsigned int) (jit->lit - jit->base_ip); \ + jit->seen |= SEEN_LITERAL; \ + if (jit->lit + 4 <= jit->end) \ + *(u32 *) jit->lit = val; \ + jit->lit += 4; \ + ret; \ +}) + +#define EMIT_FN_CONST(bit, fn) \ +({ \ + unsigned int ret; \ + ret = (unsigned int) (jit->lit - jit->base_ip); \ + if (jit->seen & bit) { \ + jit->seen |= SEEN_LITERAL; \ + if (jit->lit + 8 <= jit->end) \ + *(void **) jit->lit = fn; \ + jit->lit += 8; \ + } \ + ret; \ +}) + +static void bpf_jit_prologue(struct bpf_jit *jit) +{ + /* Save registers and create stack frame if necessary */ + if (jit->seen & SEEN_DATAREF) { + /* stmg %r8,%r15,88(%r15) */ + EMIT6(0xeb8ff058, 0x0024); + /* lgr %r14,%r15 */ + EMIT4(0xb90400ef); + /* aghi %r15,<offset> */ + EMIT4_IMM(0xa7fb0000, (jit->seen & SEEN_MEM) ? -112 : -80); + /* stg %r14,152(%r15) */ + EMIT6(0xe3e0f098, 0x0024); + } else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) + /* stmg %r12,%r13,120(%r15) */ + EMIT6(0xebcdf078, 0x0024); + else if (jit->seen & SEEN_XREG) + /* stg %r12,120(%r15) */ + EMIT6(0xe3c0f078, 0x0024); + else if (jit->seen & SEEN_LITERAL) + /* stg %r13,128(%r15) */ + EMIT6(0xe3d0f080, 0x0024); + + /* Setup literal pool */ + if (jit->seen & SEEN_LITERAL) { + /* basr %r13,0 */ + EMIT2(0x0dd0); + jit->base_ip = jit->prg; + } + jit->off_load_word = EMIT_FN_CONST(SEEN_LOAD_WORD, sk_load_word); + jit->off_load_half = EMIT_FN_CONST(SEEN_LOAD_HALF, sk_load_half); + jit->off_load_byte = EMIT_FN_CONST(SEEN_LOAD_BYTE, sk_load_byte); + jit->off_load_bmsh = EMIT_FN_CONST(SEEN_LOAD_BMSH, sk_load_byte_msh); + jit->off_load_iword = EMIT_FN_CONST(SEEN_LOAD_IWORD, sk_load_word_ind); + jit->off_load_ihalf = EMIT_FN_CONST(SEEN_LOAD_IHALF, sk_load_half_ind); + jit->off_load_ibyte = EMIT_FN_CONST(SEEN_LOAD_IBYTE, sk_load_byte_ind); + + /* Filter needs to access skb data */ + if (jit->seen & SEEN_DATAREF) { + /* l %r11,<len>(%r2) */ + EMIT4_DISP(0x58b02000, offsetof(struct sk_buff, len)); + /* s %r11,<data_len>(%r2) */ + EMIT4_DISP(0x5bb02000, offsetof(struct sk_buff, data_len)); + /* lg %r10,<data>(%r2) */ + EMIT6_DISP(0xe3a02000, 0x0004, + offsetof(struct sk_buff, data)); + } +} + +static void bpf_jit_epilogue(struct bpf_jit *jit) +{ + /* Return 0 */ + if (jit->seen & SEEN_RET0) { + jit->ret0_ip = jit->prg; + /* lghi %r2,0 */ + EMIT4(0xa7290000); + } + jit->exit_ip = jit->prg; + /* Restore registers */ + if (jit->seen & SEEN_DATAREF) + /* lmg %r8,%r15,<offset>(%r15) */ + EMIT6_DISP(0xeb8ff000, 0x0004, + (jit->seen & SEEN_MEM) ? 200 : 168); + else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) + /* lmg %r12,%r13,120(%r15) */ + EMIT6(0xebcdf078, 0x0004); + else if (jit->seen & SEEN_XREG) + /* lg %r12,120(%r15) */ + EMIT6(0xe3c0f078, 0x0004); + else if (jit->seen & SEEN_LITERAL) + /* lg %r13,128(%r15) */ + EMIT6(0xe3d0f080, 0x0004); + /* br %r14 */ + EMIT2(0x07fe); +} + +/* Helper to find the offset of pkt_type in sk_buff + * Make sure its still a 3bit field starting at the MSBs within a byte. + */ +#define PKT_TYPE_MAX 0xe0 +static int pkt_type_offset; + +static int __init bpf_pkt_type_offset_init(void) +{ + struct sk_buff skb_probe = { + .pkt_type = ~0, + }; + char *ct = (char *)&skb_probe; + int off; + + pkt_type_offset = -1; + for (off = 0; off < sizeof(struct sk_buff); off++) { + if (!ct[off]) + continue; + if (ct[off] == PKT_TYPE_MAX) + pkt_type_offset = off; + else { + /* Found non matching bit pattern, fix needed. */ + WARN_ON_ONCE(1); + pkt_type_offset = -1; + return -1; + } + } + return 0; +} +device_initcall(bpf_pkt_type_offset_init); + +/* + * make sure we dont leak kernel information to user + */ +static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) +{ + /* Clear temporary memory if (seen & SEEN_MEM) */ + if (jit->seen & SEEN_MEM) + /* xc 0(64,%r15),0(%r15) */ + EMIT6(0xd73ff000, 0xf000); + /* Clear X if (seen & SEEN_XREG) */ + if (jit->seen & SEEN_XREG) + /* lhi %r12,0 */ + EMIT4(0xa7c80000); + /* Clear A if the first register does not set it. */ + switch (filter[0].code) { + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_IND: + case BPF_LD | BPF_H | BPF_IND: + case BPF_LD | BPF_B | BPF_IND: + case BPF_LD | BPF_IMM: + case BPF_LD | BPF_MEM: + case BPF_MISC | BPF_TXA: + case BPF_RET | BPF_K: + /* first instruction sets A register */ + break; + default: /* A = 0 */ + /* lhi %r5,0 */ + EMIT4(0xa7580000); + } +} + +static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, + unsigned int *addrs, int i, int last) +{ + unsigned int K; + int offset; + unsigned int mask; + u16 code; + + K = filter->k; + code = bpf_anc_helper(filter); + + switch (code) { + case BPF_ALU | BPF_ADD | BPF_X: /* A += X */ + jit->seen |= SEEN_XREG; + /* ar %r5,%r12 */ + EMIT2(0x1a5c); + break; + case BPF_ALU | BPF_ADD | BPF_K: /* A += K */ + if (!K) + break; + if (K <= 16383) + /* ahi %r5,<K> */ + EMIT4_IMM(0xa75a0000, K); + else if (test_facility(21)) + /* alfi %r5,<K> */ + EMIT6_IMM(0xc25b0000, K); + else + /* a %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5a50d000, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */ + jit->seen |= SEEN_XREG; + /* sr %r5,%r12 */ + EMIT2(0x1b5c); + break; + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ + if (!K) + break; + if (K <= 16384) + /* ahi %r5,-K */ + EMIT4_IMM(0xa75a0000, -K); + else if (test_facility(21)) + /* alfi %r5,-K */ + EMIT6_IMM(0xc25b0000, -K); + else + /* s %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5b50d000, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */ + jit->seen |= SEEN_XREG; + /* msr %r5,%r12 */ + EMIT4(0xb252005c); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ + if (K <= 16383) + /* mhi %r5,K */ + EMIT4_IMM(0xa75c0000, K); + else if (test_facility(34)) + /* msfi %r5,<K> */ + EMIT6_IMM(0xc2510000, K); + else + /* ms %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x7150d000, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */ + jit->seen |= SEEN_XREG | SEEN_RET0; + /* ltr %r12,%r12 */ + EMIT2(0x12cc); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* dlr %r4,%r12 */ + EMIT4(0xb997004c); + break; + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ + if (K == 1) + break; + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* dl %r4,<d(K)>(%r13) */ + EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */ + jit->seen |= SEEN_XREG | SEEN_RET0; + /* ltr %r12,%r12 */ + EMIT2(0x12cc); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* dlr %r4,%r12 */ + EMIT4(0xb997004c); + /* lr %r5,%r4 */ + EMIT2(0x1854); + break; + case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */ + if (K == 1) { + /* lhi %r5,0 */ + EMIT4(0xa7580000); + break; + } + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* dl %r4,<d(K)>(%r13) */ + EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); + /* lr %r5,%r4 */ + EMIT2(0x1854); + break; + case BPF_ALU | BPF_AND | BPF_X: /* A &= X */ + jit->seen |= SEEN_XREG; + /* nr %r5,%r12 */ + EMIT2(0x145c); + break; + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ + if (test_facility(21)) + /* nilf %r5,<K> */ + EMIT6_IMM(0xc05b0000, K); + else + /* n %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5450d000, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_OR | BPF_X: /* A |= X */ + jit->seen |= SEEN_XREG; + /* or %r5,%r12 */ + EMIT2(0x165c); + break; + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ + if (test_facility(21)) + /* oilf %r5,<K> */ + EMIT6_IMM(0xc05d0000, K); + else + /* o %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5650d000, EMIT_CONST(K)); + break; + case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */ + case BPF_ALU | BPF_XOR | BPF_X: + jit->seen |= SEEN_XREG; + /* xr %r5,%r12 */ + EMIT2(0x175c); + break; + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ + if (!K) + break; + /* x %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5750d000, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ + jit->seen |= SEEN_XREG; + /* sll %r5,0(%r12) */ + EMIT4(0x8950c000); + break; + case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */ + if (K == 0) + break; + /* sll %r5,K */ + EMIT4_DISP(0x89500000, K); + break; + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ + jit->seen |= SEEN_XREG; + /* srl %r5,0(%r12) */ + EMIT4(0x8850c000); + break; + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ + if (K == 0) + break; + /* srl %r5,K */ + EMIT4_DISP(0x88500000, K); + break; + case BPF_ALU | BPF_NEG: /* A = -A */ + /* lnr %r5,%r5 */ + EMIT2(0x1155); + break; + case BPF_JMP | BPF_JA: /* ip += K */ + offset = addrs[i + K] + jit->start - jit->prg; + EMIT4_PCREL(0xa7f40000, offset); + break; + case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */ + mask = 0x200000; /* jh */ + goto kbranch; + case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */ + mask = 0xa00000; /* jhe */ + goto kbranch; + case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */ + mask = 0x800000; /* je */ +kbranch: /* Emit compare if the branch targets are different */ + if (filter->jt != filter->jf) { + if (K <= 16383) + /* chi %r5,<K> */ + EMIT4_IMM(0xa75e0000, K); + else if (test_facility(21)) + /* clfi %r5,<K> */ + EMIT6_IMM(0xc25f0000, K); + else + /* c %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5950d000, EMIT_CONST(K)); + } +branch: if (filter->jt == filter->jf) { + if (filter->jt == 0) + break; + /* j <jt> */ + offset = addrs[i + filter->jt] + jit->start - jit->prg; + EMIT4_PCREL(0xa7f40000, offset); + break; + } + if (filter->jt != 0) { + /* brc <mask>,<jt> */ + offset = addrs[i + filter->jt] + jit->start - jit->prg; + EMIT4_PCREL(0xa7040000 | mask, offset); + } + if (filter->jf != 0) { + /* brc <mask^15>,<jf> */ + offset = addrs[i + filter->jf] + jit->start - jit->prg; + EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset); + } + break; + case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */ + mask = 0x700000; /* jnz */ + /* Emit test if the branch targets are different */ + if (filter->jt != filter->jf) { + if (K > 65535) { + /* lr %r4,%r5 */ + EMIT2(0x1845); + /* n %r4,<d(K)>(%r13) */ + EMIT4_DISP(0x5440d000, EMIT_CONST(K)); + } else + /* tmll %r5,K */ + EMIT4_IMM(0xa7510000, K); + } + goto branch; + case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */ + mask = 0x200000; /* jh */ + goto xbranch; + case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */ + mask = 0xa00000; /* jhe */ + goto xbranch; + case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */ + mask = 0x800000; /* je */ +xbranch: /* Emit compare if the branch targets are different */ + if (filter->jt != filter->jf) { + jit->seen |= SEEN_XREG; + /* cr %r5,%r12 */ + EMIT2(0x195c); + } + goto branch; + case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */ + mask = 0x700000; /* jnz */ + /* Emit test if the branch targets are different */ + if (filter->jt != filter->jf) { + jit->seen |= SEEN_XREG; + /* lr %r4,%r5 */ + EMIT2(0x1845); + /* nr %r4,%r12 */ + EMIT2(0x144c); + } + goto branch; + case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD; + offset = jit->off_load_word; + goto load_abs; + case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF; + offset = jit->off_load_half; + goto load_abs; + case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE; + offset = jit->off_load_byte; +load_abs: if ((int) K < 0) + goto out; +call_fn: /* lg %r1,<d(function)>(%r13) */ + EMIT6_DISP(0xe310d000, 0x0004, offset); + /* l %r3,<d(K)>(%r13) */ + EMIT4_DISP(0x5830d000, EMIT_CONST(K)); + /* basr %r8,%r1 */ + EMIT2(0x0d81); + /* jnz <ret0> */ + EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg)); + break; + case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD; + offset = jit->off_load_iword; + goto call_fn; + case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF; + offset = jit->off_load_ihalf; + goto call_fn; + case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE; + offset = jit->off_load_ibyte; + goto call_fn; + case BPF_LDX | BPF_B | BPF_MSH: + /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */ + jit->seen |= SEEN_RET0; + if ((int) K < 0) { + /* j <ret0> */ + EMIT4_PCREL(0xa7f40000, (jit->ret0_ip - jit->prg)); + break; + } + jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH; + offset = jit->off_load_bmsh; + goto call_fn; + case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + /* l %r5,<d(len)>(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len)); + break; + case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ + jit->seen |= SEEN_XREG; + /* l %r12,<d(len)>(%r2) */ + EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len)); + break; + case BPF_LD | BPF_IMM: /* A = K */ + if (K <= 16383) + /* lhi %r5,K */ + EMIT4_IMM(0xa7580000, K); + else if (test_facility(21)) + /* llilf %r5,<K> */ + EMIT6_IMM(0xc05f0000, K); + else + /* l %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5850d000, EMIT_CONST(K)); + break; + case BPF_LDX | BPF_IMM: /* X = K */ + jit->seen |= SEEN_XREG; + if (K <= 16383) + /* lhi %r12,<K> */ + EMIT4_IMM(0xa7c80000, K); + else if (test_facility(21)) + /* llilf %r12,<K> */ + EMIT6_IMM(0xc0cf0000, K); + else + /* l %r12,<d(K)>(%r13) */ + EMIT4_DISP(0x58c0d000, EMIT_CONST(K)); + break; + case BPF_LD | BPF_MEM: /* A = mem[K] */ + jit->seen |= SEEN_MEM; + /* l %r5,<K>(%r15) */ + EMIT4_DISP(0x5850f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_LDX | BPF_MEM: /* X = mem[K] */ + jit->seen |= SEEN_XREG | SEEN_MEM; + /* l %r12,<K>(%r15) */ + EMIT4_DISP(0x58c0f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_MISC | BPF_TAX: /* X = A */ + jit->seen |= SEEN_XREG; + /* lr %r12,%r5 */ + EMIT2(0x18c5); + break; + case BPF_MISC | BPF_TXA: /* A = X */ + jit->seen |= SEEN_XREG; + /* lr %r5,%r12 */ + EMIT2(0x185c); + break; + case BPF_RET | BPF_K: + if (K == 0) { + jit->seen |= SEEN_RET0; + if (last) + break; + /* j <ret0> */ + EMIT4_PCREL(0xa7f40000, jit->ret0_ip - jit->prg); + } else { + if (K <= 16383) + /* lghi %r2,K */ + EMIT4_IMM(0xa7290000, K); + else + /* llgf %r2,<K>(%r13) */ + EMIT6_DISP(0xe320d000, 0x0016, EMIT_CONST(K)); + /* j <exit> */ + if (last && !(jit->seen & SEEN_RET0)) + break; + EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); + } + break; + case BPF_RET | BPF_A: + /* llgfr %r2,%r5 */ + EMIT4(0xb9160025); + /* j <exit> */ + EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); + break; + case BPF_ST: /* mem[K] = A */ + jit->seen |= SEEN_MEM; + /* st %r5,<K>(%r15) */ + EMIT4_DISP(0x5050f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ + jit->seen |= SEEN_XREG | SEEN_MEM; + /* st %r12,<K>(%r15) */ + EMIT4_DISP(0x50c0f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(protocol)>(%r2) */ + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol)); + break; + case BPF_ANC | SKF_AD_IFINDEX: /* if (!skb->dev) return 0; + * A = skb->dev->ifindex */ + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + jit->seen |= SEEN_RET0; + /* lg %r1,<d(dev)>(%r2) */ + EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); + /* ltgr %r1,%r1 */ + EMIT4(0xb9020011); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); + /* l %r5,<d(ifindex)>(%r1) */ + EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex)); + break; + case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + /* l %r5,<d(mark)>(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark)); + break; + case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(queue_mapping)>(%r2) */ + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping)); + break; + case BPF_ANC | SKF_AD_HATYPE: /* if (!skb->dev) return 0; + * A = skb->dev->type */ + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); + jit->seen |= SEEN_RET0; + /* lg %r1,<d(dev)>(%r2) */ + EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); + /* ltgr %r1,%r1 */ + EMIT4(0xb9020011); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(type)>(%r1) */ + EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); + break; + case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + /* l %r5,<d(hash)>(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash)); + break; + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(vlan_tci)>(%r2) */ + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci)); + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { + /* nill %r5,0xefff */ + EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT); + } else { + /* nill %r5,0x1000 */ + EMIT4_IMM(0xa5570000, VLAN_TAG_PRESENT); + /* srl %r5,12 */ + EMIT4_DISP(0x88500000, 12); + } + break; + case BPF_ANC | SKF_AD_PKTTYPE: + if (pkt_type_offset < 0) + goto out; + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* ic %r5,<d(pkt_type_offset)>(%r2) */ + EMIT4_DISP(0x43502000, pkt_type_offset); + /* srl %r5,5 */ + EMIT4_DISP(0x88500000, 5); + break; + case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */ +#ifdef CONFIG_SMP + /* l %r5,<d(cpu_nr)> */ + EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr)); +#else + /* lhi %r5,0 */ + EMIT4(0xa7580000); +#endif + break; + default: /* too complex, give up */ + goto out; + } + addrs[i] = jit->prg - jit->start; + return 0; +out: + return -1; +} + +/* + * Note: for security reasons, bpf code will follow a randomly + * sized amount of illegal instructions. + */ +struct bpf_binary_header { + unsigned int pages; + u8 image[]; +}; + +static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, + u8 **image_ptr) +{ + struct bpf_binary_header *header; + unsigned int sz, hole; + + /* Most BPF filters are really small, but if some of them fill a page, + * allow at least 128 extra bytes for illegal instructions. + */ + sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE); + header = module_alloc(sz); + if (!header) + return NULL; + memset(header, 0, sz); + header->pages = sz / PAGE_SIZE; + hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header)); + /* Insert random number of illegal instructions before BPF code + * and make sure the first instruction starts at an even address. + */ + *image_ptr = &header->image[(prandom_u32() % hole) & -2]; + return header; +} + +void bpf_jit_compile(struct sk_filter *fp) +{ + struct bpf_binary_header *header = NULL; + unsigned long size, prg_len, lit_len; + struct bpf_jit jit, cjit; + unsigned int *addrs; + int pass, i; + + if (!bpf_jit_enable) + return; + addrs = kcalloc(fp->len, sizeof(*addrs), GFP_KERNEL); + if (addrs == NULL) + return; + memset(&jit, 0, sizeof(cjit)); + memset(&cjit, 0, sizeof(cjit)); + + for (pass = 0; pass < 10; pass++) { + jit.prg = jit.start; + jit.lit = jit.mid; + + bpf_jit_prologue(&jit); + bpf_jit_noleaks(&jit, fp->insns); + for (i = 0; i < fp->len; i++) { + if (bpf_jit_insn(&jit, fp->insns + i, addrs, i, + i == fp->len - 1)) + goto out; + } + bpf_jit_epilogue(&jit); + if (jit.start) { + WARN_ON(jit.prg > cjit.prg || jit.lit > cjit.lit); + if (memcmp(&jit, &cjit, sizeof(jit)) == 0) + break; + } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) { + prg_len = jit.prg - jit.start; + lit_len = jit.lit - jit.mid; + size = prg_len + lit_len; + if (size >= BPF_SIZE_MAX) + goto out; + header = bpf_alloc_binary(size, &jit.start); + if (!header) + goto out; + jit.prg = jit.mid = jit.start + prg_len; + jit.lit = jit.end = jit.start + prg_len + lit_len; + jit.base_ip += (unsigned long) jit.start; + jit.exit_ip += (unsigned long) jit.start; + jit.ret0_ip += (unsigned long) jit.start; + } + cjit = jit; + } + if (bpf_jit_enable > 1) { + bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start); + if (jit.start) + print_fn_code(jit.start, jit.mid - jit.start); + } + if (jit.start) { + set_memory_ro((unsigned long)header, header->pages); + fp->bpf_func = (void *) jit.start; + fp->jited = 1; + } +out: + kfree(addrs); +} + +void bpf_jit_free(struct sk_filter *fp) +{ + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *header = (void *)addr; + + if (!fp->jited) + goto free_filter; + + set_memory_rw(addr, header->pages); + module_free(NULL, header); + +free_filter: + kfree(fp); +} diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 537b2d840e6..524c4b61582 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,4 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-$(CONFIG_64BIT) += hwsampler.o diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c index bc4b84a35ca..8a6811b2cdb 100644 --- a/arch/s390/oprofile/backtrace.c +++ b/arch/s390/oprofile/backtrace.c @@ -1,8 +1,6 @@ -/** - * arch/s390/oprofile/backtrace.c - * +/* * S390 Version - * Copyright (C) 2005 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright IBM Corp. 2005 * Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com> */ @@ -60,7 +58,7 @@ void s390_backtrace(struct pt_regs * const regs, unsigned int depth) unsigned long head; struct stack_frame* head_sf; - if (user_mode (regs)) + if (user_mode(regs)) return; head = regs->gprs[15]; diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c new file mode 100644 index 00000000000..e53c6f26880 --- /dev/null +++ b/arch/s390/oprofile/hwsampler.c @@ -0,0 +1,1178 @@ +/* + * Copyright IBM Corp. 2010 + * Author: Heinz Graalfs <graalfs@de.ibm.com> + */ + +#include <linux/kernel_stat.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/workqueue.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/semaphore.h> +#include <linux/oom.h> +#include <linux/oprofile.h> + +#include <asm/facility.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> + +#include "hwsampler.h" +#include "op_counter.h" + +#define MAX_NUM_SDB 511 +#define MIN_NUM_SDB 1 + +DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); + +struct hws_execute_parms { + void *buffer; + signed int rc; +}; + +DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); +EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer); + +static DEFINE_MUTEX(hws_sem); +static DEFINE_MUTEX(hws_sem_oom); + +static unsigned char hws_flush_all; +static unsigned int hws_oom; +static unsigned int hws_alert; +static struct workqueue_struct *hws_wq; + +static unsigned int hws_state; +enum { + HWS_INIT = 1, + HWS_DEALLOCATED, + HWS_STOPPED, + HWS_STARTED, + HWS_STOPPING }; + +/* set to 1 if called by kernel during memory allocation */ +static unsigned char oom_killer_was_active; +/* size of SDBT and SDB as of allocate API */ +static unsigned long num_sdbt = 100; +static unsigned long num_sdb = 511; +/* sampling interval (machine cycles) */ +static unsigned long interval; + +static unsigned long min_sampler_rate; +static unsigned long max_sampler_rate; + +static void execute_qsi(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = qsi(ep->buffer); +} + +static void execute_ssctl(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = lsctl(ep->buffer); +} + +static int smp_ctl_ssctl_stop(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 0; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) { + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + dump_stack(); + } + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.es || cb->qsi.cs) { + printk(KERN_EMERG "CPUMF sampling did not stop properly.\n"); + dump_stack(); + } + + return rc; +} + +static int smp_ctl_ssctl_deactivate(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 1; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.cs) + printk(KERN_EMERG "CPUMF sampling was not set inactive.\n"); + + return rc; +} + +static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.h = 1; + cb->ssctl.tear = cb->first_sdbt; + cb->ssctl.dear = *(unsigned long *) cb->first_sdbt; + cb->ssctl.interval = interval; + cb->ssctl.es = 1; + cb->ssctl.cs = 1; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + if (ep.rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu); + + return rc; +} + +static int smp_ctl_qsi(int cpu) +{ + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + return ep.rc; +} + +static void hws_ext_handler(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct hws_cpu_buffer *cb = &__get_cpu_var(sampler_cpu_buffer); + + if (!(param32 & CPU_MF_INT_SF_MASK)) + return; + + if (!hws_alert) + return; + + inc_irq_stat(IRQEXT_CMS); + atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); + + if (hws_wq) + queue_work(hws_wq, &cb->worker); +} + +static void worker(struct work_struct *work); + +static void add_samples_to_oprofile(unsigned cpu, unsigned long *, + unsigned long *dear); + +static void init_all_cpu_buffers(void) +{ + int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + memset(cb, 0, sizeof(struct hws_cpu_buffer)); + } +} + +static void prepare_cpu_buffers(void) +{ + struct hws_cpu_buffer *cb; + int cpu; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + atomic_set(&cb->ext_params, 0); + cb->worker_entry = 0; + cb->sample_overflow = 0; + cb->req_alert = 0; + cb->incorrect_sdbt_entry = 0; + cb->invalid_entry_address = 0; + cb->loss_of_sample_data = 0; + cb->sample_auth_change_alert = 0; + cb->finish = 0; + cb->oom = 0; + cb->stop_mode = 0; + } +} + +/* + * allocate_sdbt() - allocate sampler memory + * @cpu: the cpu for which sampler memory is allocated + * + * A 4K page is allocated for each requested SDBT. + * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs. + * Set ALERT_REQ mask in each SDBs trailer. + * Returns zero if successful, <0 otherwise. + */ +static int allocate_sdbt(int cpu) +{ + int j, k, rc; + unsigned long *sdbt; + unsigned long sdb; + unsigned long *tail; + unsigned long *trailer; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->first_sdbt) + return -EINVAL; + + sdbt = NULL; + tail = sdbt; + + for (j = 0; j < num_sdbt; j++) { + sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdbt) { + if (sdbt) + free_page((unsigned long)sdbt); + + goto allocate_sdbt_error; + } + if (cb->first_sdbt == 0) + cb->first_sdbt = (unsigned long)sdbt; + + /* link current page to tail of chain */ + if (tail) + *tail = (unsigned long)(void *)sdbt + 1; + + mutex_unlock(&hws_sem_oom); + + for (k = 0; k < num_sdb; k++) { + /* get and set SDB page */ + sdb = get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdb) { + if (sdb) + free_page(sdb); + + goto allocate_sdbt_error; + } + *sdbt = sdb; + trailer = trailer_entry_ptr(*sdbt); + *trailer = SDB_TE_ALERT_REQ_MASK; + sdbt++; + mutex_unlock(&hws_sem_oom); + } + tail = sdbt; + } + mutex_lock(&hws_sem_oom); + if (oom_killer_was_active) + goto allocate_sdbt_error; + + rc = 0; + if (tail) + *tail = (unsigned long) + ((void *)cb->first_sdbt) + 1; + +allocate_sdbt_exit: + mutex_unlock(&hws_sem_oom); + return rc; + +allocate_sdbt_error: + rc = -ENOMEM; + goto allocate_sdbt_exit; +} + +/* + * deallocate_sdbt() - deallocate all sampler memory + * + * For each online CPU all SDBT trees are deallocated. + * Returns the number of freed pages. + */ +static int deallocate_sdbt(void) +{ + int cpu; + int counter; + + counter = 0; + + for_each_online_cpu(cpu) { + unsigned long start; + unsigned long sdbt; + unsigned long *curr; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->first_sdbt) + continue; + + sdbt = cb->first_sdbt; + curr = (unsigned long *) sdbt; + start = sdbt; + + /* we'll free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* watch for link entry reset if found */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page(sdbt); + + /* we are done if we reach the start */ + if ((unsigned long) curr == start) + break; + else + sdbt = (unsigned long) curr; + } else { + /* process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + counter++; + } + cb->first_sdbt = 0; + } + return counter; +} + +static int start_sampling(int cpu) +{ + int rc; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu); + goto start_exit; + } + + rc = -EINVAL; + if (!cb->qsi.es) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu); + goto start_exit; + } + + if (!cb->qsi.cs) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu); + goto start_exit; + } + + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n", + cpu, interval); + + rc = 0; + +start_exit: + return rc; +} + +static int stop_sampling(int cpu) +{ + unsigned long v; + int rc; + struct hws_cpu_buffer *cb; + + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (!rc && !cb->qsi.es) + printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu); + + rc = smp_ctl_ssctl_stop(cpu); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n", + cpu, rc); + goto stop_exit; + } + + printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu); + +stop_exit: + v = cb->req_alert; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert," + " count=%lu.\n", cpu, v); + + v = cb->loss_of_sample_data; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data," + " count=%lu.\n", cpu, v); + + v = cb->invalid_entry_address; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address," + " count=%lu.\n", cpu, v); + + v = cb->incorrect_sdbt_entry; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Incorrect SDBT address," + " count=%lu.\n", cpu, v); + + v = cb->sample_auth_change_alert; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Sample authorization change," + " count=%lu.\n", cpu, v); + + return rc; +} + +static int check_hardware_prerequisites(void) +{ + if (!test_facility(68)) + return -EOPNOTSUPP; + return 0; +} +/* + * hws_oom_callback() - the OOM callback function + * + * In case the callback is invoked during memory allocation for the + * hw sampler, all obtained memory is deallocated and a flag is set + * so main sampler memory allocation can exit with a failure code. + * In case the callback is invoked during sampling the hw sampler + * is deactivated for all CPUs. + */ +static int hws_oom_callback(struct notifier_block *nfb, + unsigned long dummy, void *parm) +{ + unsigned long *freed; + int cpu; + struct hws_cpu_buffer *cb; + + freed = parm; + + mutex_lock(&hws_sem_oom); + + if (hws_state == HWS_DEALLOCATED) { + /* during memory allocation */ + if (oom_killer_was_active == 0) { + oom_killer_was_active = 1; + *freed += deallocate_sdbt(); + } + } else { + int i; + cpu = get_cpu(); + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->oom) { + for_each_online_cpu(i) { + smp_ctl_ssctl_deactivate(i); + cb->oom = 1; + } + cb->finish = 1; + + printk(KERN_INFO + "hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n", + cpu); + } + } + + mutex_unlock(&hws_sem_oom); + + return NOTIFY_OK; +} + +static struct notifier_block hws_oom_notifier = { + .notifier_call = hws_oom_callback +}; + +static int hws_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + /* We do not have sampler space available for all possible CPUs. + All CPUs should be online when hw sampling is activated. */ + return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD; +} + +static struct notifier_block hws_cpu_notifier = { + .notifier_call = hws_cpu_callback +}; + +/** + * hwsampler_deactivate() - set hardware sampling temporarily inactive + * @cpu: specifies the CPU to be set inactive. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deactivate(unsigned int cpu) +{ + /* + * Deactivate hw sampling temporarily and flush the buffer + * by pushing all the pending samples to oprofile buffer. + * + * This function can be called under one of the following conditions: + * Memory unmap, task is exiting. + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.cs) { + rc = smp_ctl_ssctl_deactivate(cpu); + if (rc) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu); + cb->finish = 1; + hws_state = HWS_STOPPING; + } else { + hws_flush_all = 1; + /* Add work to queue to read pending samples.*/ + queue_work_on(cpu, hws_wq, &cb->worker); + } + } + } + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + return rc; +} + +/** + * hwsampler_activate() - activate/resume hardware sampling which was deactivated + * @cpu: specifies the CPU to be set active. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_activate(unsigned int cpu) +{ + /* + * Re-activate hw sampling. This should be called in pair with + * hwsampler_deactivate(). + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (!cb->qsi.cs) { + hws_flush_all = 0; + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_ERR + "CPU %d, CPUMF activate sampling failed.\n", + cpu); + } + } + } + + mutex_unlock(&hws_sem); + + return rc; +} + +static int check_qsi_on_setup(void) +{ + int rc; + unsigned int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (rc) + return -EOPNOTSUPP; + + if (!cb->qsi.as) { + printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n"); + return -EINVAL; + } + + if (cb->qsi.es) { + printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n"); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + return -EINVAL; + + printk(KERN_INFO + "CPU %d, CPUMF Sampling stopped now.\n", cpu); + } + } + return 0; +} + +static int check_qsi_on_start(void) +{ + unsigned int cpu; + int rc; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + if (!cb->qsi.as) + return -EINVAL; + + if (cb->qsi.es) + return -EINVAL; + + if (cb->qsi.cs) + return -EINVAL; + } + return 0; +} + +static void worker_on_start(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->worker_entry = cb->first_sdbt; +} + +static int worker_check_error(unsigned int cpu, int ext_params) +{ + int rc; + unsigned long *sdbt; + struct hws_cpu_buffer *cb; + + rc = 0; + cb = &per_cpu(sampler_cpu_buffer, cpu); + sdbt = (unsigned long *) cb->worker_entry; + + if (!sdbt || !*sdbt) + return -EINVAL; + + if (ext_params & CPU_MF_INT_SF_PRA) + cb->req_alert++; + + if (ext_params & CPU_MF_INT_SF_LSDA) + cb->loss_of_sample_data++; + + if (ext_params & CPU_MF_INT_SF_IAE) { + cb->invalid_entry_address++; + rc = -EINVAL; + } + + if (ext_params & CPU_MF_INT_SF_ISE) { + cb->incorrect_sdbt_entry++; + rc = -EINVAL; + } + + if (ext_params & CPU_MF_INT_SF_SACA) { + cb->sample_auth_change_alert++; + rc = -EINVAL; + } + + return rc; +} + +static void worker_on_finish(unsigned int cpu) +{ + int rc, i; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->finish) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.es) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n", + cpu); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", + cpu); + + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (!cb->finish) { + cb->finish = 1; + queue_work_on(i, hws_wq, + &cb->worker); + } + } + } + } +} + +static void worker_on_interrupt(unsigned int cpu) +{ + unsigned long *sdbt; + unsigned char done; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + sdbt = (unsigned long *) cb->worker_entry; + + done = 0; + /* do not proceed if stop was entered, + * forget the buffers not yet processed */ + while (!done && !cb->stop_mode) { + unsigned long *trailer; + struct hws_trailer_entry *te; + unsigned long *dear = 0; + + trailer = trailer_entry_ptr(*sdbt); + /* leave loop if no more work to do */ + if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) { + done = 1; + if (!hws_flush_all) + continue; + } + + te = (struct hws_trailer_entry *)trailer; + cb->sample_overflow += te->overflow; + + add_samples_to_oprofile(cpu, sdbt, dear); + + /* reset trailer */ + xchg((unsigned char *) te, 0x40); + + /* advance to next sdb slot in current sdbt */ + sdbt++; + /* in case link bit is set use address w/o link bit */ + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + cb->worker_entry = (unsigned long)sdbt; + } +} + +static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, + unsigned long *dear) +{ + struct hws_basic_entry *sample_data_ptr; + unsigned long *trailer; + + trailer = trailer_entry_ptr(*sdbt); + if (dear) { + if (dear > trailer) + return; + trailer = dear; + } + + sample_data_ptr = (struct hws_basic_entry *)(*sdbt); + + while ((unsigned long *)sample_data_ptr < trailer) { + struct pt_regs *regs = NULL; + struct task_struct *tsk = NULL; + + /* + * Check sampling mode, 1 indicates basic (=customer) sampling + * mode. + */ + if (sample_data_ptr->def != 1) { + /* sample slot is not yet written */ + break; + } else { + /* make sure we don't use it twice, + * the next time the sampler will set it again */ + sample_data_ptr->def = 0; + } + + /* Get pt_regs. */ + if (sample_data_ptr->P == 1) { + /* userspace sample */ + unsigned int pid = sample_data_ptr->prim_asn; + if (!counter_config.user) + goto skip_sample; + rcu_read_lock(); + tsk = pid_task(find_vpid(pid), PIDTYPE_PID); + if (tsk) + regs = task_pt_regs(tsk); + rcu_read_unlock(); + } else { + /* kernelspace sample */ + if (!counter_config.kernel) + goto skip_sample; + regs = task_pt_regs(current); + } + + mutex_lock(&hws_sem); + oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, + !sample_data_ptr->P, tsk); + mutex_unlock(&hws_sem); + skip_sample: + sample_data_ptr++; + } +} + +static void worker(struct work_struct *work) +{ + unsigned int cpu; + int ext_params; + struct hws_cpu_buffer *cb; + + cb = container_of(work, struct hws_cpu_buffer, worker); + cpu = smp_processor_id(); + ext_params = atomic_xchg(&cb->ext_params, 0); + + if (!cb->worker_entry) + worker_on_start(cpu); + + if (worker_check_error(cpu, ext_params)) + return; + + if (!cb->finish) + worker_on_interrupt(cpu); + + if (cb->finish) + worker_on_finish(cpu); +} + +/** + * hwsampler_allocate() - allocate memory for the hardware sampler + * @sdbt: number of SDBTs per online CPU (must be > 0) + * @sdb: number of SDBs per SDBT (minimum 1, maximum 511) + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb) +{ + int cpu, rc; + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_DEALLOCATED) + goto allocate_exit; + + if (sdbt < 1) + goto allocate_exit; + + if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB) + goto allocate_exit; + + num_sdbt = sdbt; + num_sdb = sdb; + + oom_killer_was_active = 0; + register_oom_notifier(&hws_oom_notifier); + + for_each_online_cpu(cpu) { + if (allocate_sdbt(cpu)) { + unregister_oom_notifier(&hws_oom_notifier); + goto allocate_error; + } + } + unregister_oom_notifier(&hws_oom_notifier); + if (oom_killer_was_active) + goto allocate_error; + + hws_state = HWS_STOPPED; + rc = 0; + +allocate_exit: + mutex_unlock(&hws_sem); + return rc; + +allocate_error: + rc = -ENOMEM; + printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n"); + goto allocate_exit; +} + +/** + * hwsampler_deallocate() - deallocate hardware sampler memory + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deallocate(void) +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto deallocate_exit; + + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + hws_alert = 0; + deallocate_sdbt(); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +deallocate_exit: + mutex_unlock(&hws_sem); + + return rc; +} + +unsigned long hwsampler_query_min_interval(void) +{ + return min_sampler_rate; +} + +unsigned long hwsampler_query_max_interval(void) +{ + return max_sampler_rate; +} + +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + return cb->sample_overflow; +} + +int hwsampler_setup(void) +{ + int rc; + int cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state) + goto setup_exit; + + hws_state = HWS_INIT; + + init_all_cpu_buffers(); + + rc = check_hardware_prerequisites(); + if (rc) + goto setup_exit; + + rc = check_qsi_on_setup(); + if (rc) + goto setup_exit; + + rc = -EINVAL; + hws_wq = create_workqueue("hwsampler"); + if (!hws_wq) + goto setup_exit; + + register_cpu_notifier(&hws_cpu_notifier); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + INIT_WORK(&cb->worker, worker); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (min_sampler_rate != cb->qsi.min_sampl_rate) { + if (min_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different min sampler rate values.\n"); + if (min_sampler_rate < cb->qsi.min_sampl_rate) + min_sampler_rate = + cb->qsi.min_sampl_rate; + } else + min_sampler_rate = cb->qsi.min_sampl_rate; + } + if (max_sampler_rate != cb->qsi.max_sampl_rate) { + if (max_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different max sampler rate values.\n"); + if (max_sampler_rate > cb->qsi.max_sampl_rate) + max_sampler_rate = + cb->qsi.max_sampl_rate; + } else + max_sampler_rate = cb->qsi.max_sampl_rate; + } + } + register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +setup_exit: + mutex_unlock(&hws_sem); + return rc; +} + +int hwsampler_shutdown(void) +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) { + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + + if (hws_state == HWS_STOPPED) { + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + hws_alert = 0; + deallocate_sdbt(); + } + if (hws_wq) { + destroy_workqueue(hws_wq); + hws_wq = NULL; + } + + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler); + hws_state = HWS_INIT; + rc = 0; + } + mutex_unlock(&hws_sem); + + unregister_cpu_notifier(&hws_cpu_notifier); + + return rc; +} + +/** + * hwsampler_start_all() - start hardware sampling on all online CPUs + * @rate: specifies the used interval when samples are taken + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_start_all(unsigned long rate) +{ + int rc, cpu; + + mutex_lock(&hws_sem); + + hws_oom = 0; + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto start_all_exit; + + interval = rate; + + /* fail if rate is not valid */ + if (interval < min_sampler_rate || interval > max_sampler_rate) + goto start_all_exit; + + rc = check_qsi_on_start(); + if (rc) + goto start_all_exit; + + prepare_cpu_buffers(); + + for_each_online_cpu(cpu) { + rc = start_sampling(cpu); + if (rc) + break; + } + if (rc) { + for_each_online_cpu(cpu) { + stop_sampling(cpu); + } + goto start_all_exit; + } + hws_state = HWS_STARTED; + rc = 0; + +start_all_exit: + mutex_unlock(&hws_sem); + + if (rc) + return rc; + + register_oom_notifier(&hws_oom_notifier); + hws_oom = 1; + hws_flush_all = 0; + /* now let them in, 1407 CPUMF external interrupts */ + hws_alert = 1; + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +/** + * hwsampler_stop_all() - stop hardware sampling on all online CPUs + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_stop_all(void) +{ + int tmp_rc, rc, cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = 0; + if (hws_state == HWS_INIT) { + mutex_unlock(&hws_sem); + return 0; + } + hws_state = HWS_STOPPING; + mutex_unlock(&hws_sem); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->stop_mode = 1; + tmp_rc = stop_sampling(cpu); + if (tmp_rc) + rc = tmp_rc; + } + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + if (hws_oom) { + unregister_oom_notifier(&hws_oom_notifier); + hws_oom = 0; + } + hws_state = HWS_STOPPED; + mutex_unlock(&hws_sem); + + return rc; +} diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h new file mode 100644 index 00000000000..a483d06f2fa --- /dev/null +++ b/arch/s390/oprofile/hwsampler.h @@ -0,0 +1,63 @@ +/* + * CPUMF HW sampler functions and internal structures + * + * Copyright IBM Corp. 2010 + * Author(s): Heinz Graalfs <graalfs@de.ibm.com> + */ + +#ifndef HWSAMPLER_H_ +#define HWSAMPLER_H_ + +#include <linux/workqueue.h> +#include <asm/cpu_mf.h> + +struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */ +{ /* bytes 0 - 7 Bit(s) */ + unsigned int s:1; /* 0: maximum buffer indicator */ + unsigned int h:1; /* 1: part. level reserved for VM use*/ + unsigned long b2_53:52; /* 2-53: zeros */ + unsigned int es:1; /* 54: sampling enable control */ + unsigned int b55_61:7; /* 55-61: - zeros */ + unsigned int cs:1; /* 62: sampling activation control */ + unsigned int b63:1; /* 63: zero */ + unsigned long interval; /* 8-15: sampling interval */ + unsigned long tear; /* 16-23: TEAR contents */ + unsigned long dear; /* 24-31: DEAR contents */ + /* 32-63: */ + unsigned long rsvrd1; /* reserved */ + unsigned long rsvrd2; /* reserved */ + unsigned long rsvrd3; /* reserved */ + unsigned long rsvrd4; /* reserved */ +}; + +struct hws_cpu_buffer { + unsigned long first_sdbt; /* @ of 1st SDB-Table for this CP*/ + unsigned long worker_entry; + unsigned long sample_overflow; /* taken from SDB ... */ + struct hws_qsi_info_block qsi; + struct hws_ssctl_request_block ssctl; + struct work_struct worker; + atomic_t ext_params; + unsigned long req_alert; + unsigned long loss_of_sample_data; + unsigned long invalid_entry_address; + unsigned long incorrect_sdbt_entry; + unsigned long sample_auth_change_alert; + unsigned int finish:1; + unsigned int oom:1; + unsigned int stop_mode:1; +}; + +int hwsampler_setup(void); +int hwsampler_shutdown(void); +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); +int hwsampler_deallocate(void); +unsigned long hwsampler_query_min_interval(void); +unsigned long hwsampler_query_max_interval(void); +int hwsampler_start_all(unsigned long interval); +int hwsampler_stop_all(void); +int hwsampler_deactivate(unsigned int cpu); +int hwsampler_activate(unsigned int cpu); +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu); + +#endif /*HWSAMPLER_H_*/ diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 7a995113b91..9ffe645d598 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -1,26 +1,524 @@ -/** - * arch/s390/oprofile/init.c - * +/* * S390 Version - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2002, 2011 * Author(s): Thomas Spatzier (tspat@de.ibm.com) + * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) + * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2011 OProfile authors */ #include <linux/oprofile.h> +#include <linux/perf_event.h> #include <linux/init.h> #include <linux/errno.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <asm/processor.h> +#include "../../../drivers/oprofile/oprof.h" extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); -int __init oprofile_arch_init(struct oprofile_operations* ops) +#ifdef CONFIG_64BIT + +#include "hwsampler.h" +#include "op_counter.h" + +#define DEFAULT_INTERVAL 4127518 + +#define DEFAULT_SDBT_BLOCKS 1 +#define DEFAULT_SDB_BLOCKS 511 + +static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; +static unsigned long oprofile_min_interval; +static unsigned long oprofile_max_interval; + +static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; +static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; + +static int hwsampler_enabled; +static int hwsampler_running; /* start_mutex must be held to change */ +static int hwsampler_available; + +static struct oprofile_operations timer_ops; + +struct op_counter_config counter_config; + +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, +}; +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing timer to be returned " + "as cpu type\n"); + } else { + force_cpu_type = 0; + } + + return 0; +} +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); +MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" + "(report cpu_type \"timer\""); + +static int __oprofile_hwsampler_start(void) +{ + int retval; + + retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + if (retval) + return retval; + + retval = hwsampler_start_all(oprofile_hw_interval); + if (retval) + hwsampler_deallocate(); + + return retval; +} + +static int oprofile_hwsampler_start(void) +{ + int retval; + + hwsampler_running = hwsampler_enabled; + + if (!hwsampler_running) + return timer_ops.start(); + + retval = perf_reserve_sampling(); + if (retval) + return retval; + + retval = __oprofile_hwsampler_start(); + if (retval) + perf_release_sampling(); + + return retval; +} + +static void oprofile_hwsampler_stop(void) +{ + if (!hwsampler_running) { + timer_ops.stop(); + return; + } + + hwsampler_stop_all(); + hwsampler_deallocate(); + perf_release_sampling(); + return; +} + +/* + * File ops used for: + * /dev/oprofile/0/enabled + * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) + */ + +static ssize_t hwsampler_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); +} + +static ssize_t hwsampler_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = val; + + return count; +} + +static const struct file_operations hwsampler_fops = { + .read = hwsampler_read, + .write = hwsampler_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/count + * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) + * + * Make sure that the value is within the hardware range. + */ + +static ssize_t hw_interval_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, + count, offset); +} + +static ssize_t hw_interval_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + if (val < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + else if (val > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + else + oprofile_hw_interval = val; + + return count; +} + +static const struct file_operations hw_interval_fops = { + .read = hw_interval_read, + .write = hw_interval_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/event + * Only a single event with number 0 is supported with this counter. + * + * /dev/oprofile/0/unit_mask + * This is a dummy file needed by the user space tools. + * No value other than 0 is accepted or returned. + */ + +static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(0, buf, count, offset); +} + +static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + if (val != 0) + return -EINVAL; + return count; +} + +static const struct file_operations zero_fops = { + .read = hwsampler_zero_read, + .write = hwsampler_zero_write, +}; + +/* /dev/oprofile/0/kernel file ops. */ + +static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.kernel, + buf, count, offset); +} + +static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.kernel = val; + + return count; +} + +static const struct file_operations kernel_fops = { + .read = hwsampler_kernel_read, + .write = hwsampler_kernel_write, +}; + +/* /dev/oprofile/0/user file ops. */ + +static ssize_t hwsampler_user_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.user, + buf, count, offset); +} + +static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.user = val; + + return count; +} + +static const struct file_operations user_fops = { + .read = hwsampler_user_read, + .write = hwsampler_user_write, +}; + + +/* + * File ops used for: /dev/oprofile/timer/enabled + * The value always has to be the inverted value of hwsampler_enabled. So + * no separate variable is created. That way we do not need locking. + */ + +static ssize_t timer_enabled_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); +} + +static ssize_t timer_enabled_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + /* Timer cannot be disabled without having hardware sampling. */ + if (val == 0 && !hwsampler_available) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = !val; + + return count; +} + +static const struct file_operations timer_enabled_fops = { + .read = timer_enabled_read, + .write = timer_enabled_write, +}; + + +static int oprofile_create_hwsampling_files(struct dentry *root) +{ + struct dentry *dir; + + dir = oprofilefs_mkdir(root, "timer"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(dir, "enabled", &timer_enabled_fops); + + if (!hwsampler_available) + return 0; + + /* reinitialize default values */ + hwsampler_enabled = 1; + counter_config.kernel = 1; + counter_config.user = 1; + + if (!force_cpu_type) { + /* + * Create the counter file system. A single virtual + * counter is created which can be used to + * enable/disable hardware sampling dynamically from + * user space. The user space will configure a single + * counter with a single event. The value of 'event' + * and 'unit_mask' are not evaluated by the kernel code + * and can only be set to 0. + */ + + dir = oprofilefs_mkdir(root, "0"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(dir, "enabled", &hwsampler_fops); + oprofilefs_create_file(dir, "event", &zero_fops); + oprofilefs_create_file(dir, "count", &hw_interval_fops); + oprofilefs_create_file(dir, "unit_mask", &zero_fops); + oprofilefs_create_file(dir, "kernel", &kernel_fops); + oprofilefs_create_file(dir, "user", &user_fops); + oprofilefs_create_ulong(dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + + } else { + /* + * Hardware sampling can be used but the cpu_type is + * forced to timer in order to deal with legacy user + * space tools. The /dev/oprofile/hwsampling fs is + * provided in that case. + */ + dir = oprofilefs_mkdir(root, "hwsampling"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(dir, "hwsampler", + &hwsampler_fops); + oprofilefs_create_file(dir, "hw_interval", + &hw_interval_fops); + oprofilefs_create_ro_ulong(dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } + return 0; +} + +static int oprofile_hwsampler_init(struct oprofile_operations *ops) +{ + /* + * Initialize the timer mode infrastructure as well in order + * to be able to switch back dynamically. oprofile_timer_init + * is not supposed to fail. + */ + if (oprofile_timer_init(ops)) + BUG(); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + ops->create_files = oprofile_create_hwsampling_files; + + /* + * If the user space tools do not support newer cpu types, + * the force_cpu_type module parameter + * can be used to always return \"timer\" as cpu type. + */ + if (force_cpu_type != timer) { + struct cpuid id; + + get_cpu_id (&id); + + switch (id.machine) { + case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; + case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; + case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; + default: return -ENODEV; + } + } + + if (hwsampler_setup()) + return -ENODEV; + + /* + * Query the range for the sampling interval from the + * hardware. + */ + oprofile_min_interval = hwsampler_query_min_interval(); + if (oprofile_min_interval == 0) + return -ENODEV; + oprofile_max_interval = hwsampler_query_max_interval(); + if (oprofile_max_interval == 0) + return -ENODEV; + + /* The initial value should be sane */ + if (oprofile_hw_interval < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + if (oprofile_hw_interval > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + + printk(KERN_INFO "oprofile: System z hardware sampling " + "facility found.\n"); + + ops->start = oprofile_hwsampler_start; + ops->stop = oprofile_hwsampler_stop; + + return 0; +} + +static void oprofile_hwsampler_exit(void) +{ + hwsampler_shutdown(); +} + +#endif /* CONFIG_64BIT */ + +int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; + +#ifdef CONFIG_64BIT + + /* + * -ENODEV is not reported to the caller. The module itself + * will use the timer mode sampling as fallback and this is + * always available. + */ + hwsampler_available = oprofile_hwsampler_init(ops) == 0; + + return 0; +#else return -ENODEV; +#endif } void oprofile_arch_exit(void) { +#ifdef CONFIG_64BIT + oprofile_hwsampler_exit(); +#endif } diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h new file mode 100644 index 00000000000..61b2531eef1 --- /dev/null +++ b/arch/s390/oprofile/op_counter.h @@ -0,0 +1,21 @@ +/* + * Copyright IBM Corp. 2011 + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) + * + * @remark Copyright 2011 OProfile authors + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +struct op_counter_config { + /* `enabled' maps to the hwsampler_file variable. */ + /* `count' maps to the oprofile_hw_interval variable. */ + /* `event' and `unit_mask' are unused. */ + unsigned long kernel; + unsigned long user; +}; + +extern struct op_counter_config counter_config; + +#endif /* OP_COUNTER_H */ diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile new file mode 100644 index 00000000000..a9e1dc4ae44 --- /dev/null +++ b/arch/s390/pci/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for the s390 PCI subsystem. +# + +obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \ + pci_event.o pci_debug.o pci_insn.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c new file mode 100644 index 00000000000..30de42730b2 --- /dev/null +++ b/arch/s390/pci/pci.c @@ -0,0 +1,935 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + * + * The System z PCI code is a rewrite from a prototype by + * the following people (Kudoz!): + * Alexander Schmidt + * Christoph Raisch + * Hannes Hering + * Hoang-Nam Nguyen + * Jan-Bernd Themann + * Stefan Roscher + * Thomas Klein + */ + +#define COMPONENT "zPCI" +#define pr_fmt(fmt) COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/kernel_stat.h> +#include <linux/seq_file.h> +#include <linux/pci.h> +#include <linux/msi.h> + +#include <asm/isc.h> +#include <asm/airq.h> +#include <asm/facility.h> +#include <asm/pci_insn.h> +#include <asm/pci_clp.h> +#include <asm/pci_dma.h> + +#define DEBUG /* enable pr_debug */ + +#define SIC_IRQ_MODE_ALL 0 +#define SIC_IRQ_MODE_SINGLE 1 + +#define ZPCI_NR_DMA_SPACES 1 +#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS + +/* list of all detected zpci devices */ +static LIST_HEAD(zpci_list); +static DEFINE_SPINLOCK(zpci_list_lock); + +static struct irq_chip zpci_irq_chip = { + .name = "zPCI", + .irq_unmask = unmask_msi_irq, + .irq_mask = mask_msi_irq, +}; + +static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); +static DEFINE_SPINLOCK(zpci_domain_lock); + +static struct airq_iv *zpci_aisb_iv; +static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES]; + +/* Adapter interrupt definitions */ +static void zpci_irq_handler(struct airq_struct *airq); + +static struct airq_struct zpci_airq = { + .handler = zpci_irq_handler, + .isc = PCI_ISC, +}; + +/* I/O Map */ +static DEFINE_SPINLOCK(zpci_iomap_lock); +static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); +struct zpci_iomap_entry *zpci_iomap_start; +EXPORT_SYMBOL_GPL(zpci_iomap_start); + +static struct kmem_cache *zdev_fmb_cache; + +struct zpci_dev *get_zdev(struct pci_dev *pdev) +{ + return (struct zpci_dev *) pdev->sysdata; +} + +struct zpci_dev *get_zdev_by_fid(u32 fid) +{ + struct zpci_dev *tmp, *zdev = NULL; + + spin_lock(&zpci_list_lock); + list_for_each_entry(tmp, &zpci_list, entry) { + if (tmp->fid == fid) { + zdev = tmp; + break; + } + } + spin_unlock(&zpci_list_lock); + return zdev; +} + +static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) +{ + return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; +} + +int pci_domain_nr(struct pci_bus *bus) +{ + return ((struct zpci_dev *) bus->sysdata)->domain; +} +EXPORT_SYMBOL_GPL(pci_domain_nr); + +int pci_proc_domain(struct pci_bus *bus) +{ + return pci_domain_nr(bus); +} +EXPORT_SYMBOL_GPL(pci_proc_domain); + +/* Modify PCI: Register adapter interruptions */ +static int zpci_set_airq(struct zpci_dev *zdev) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); + struct zpci_fib fib = {0}; + + fib.isc = PCI_ISC; + fib.sum = 1; /* enable summary notifications */ + fib.noi = airq_iv_end(zdev->aibv); + fib.aibv = (unsigned long) zdev->aibv->vector; + fib.aibvo = 0; /* each zdev has its own interrupt vector */ + fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8; + fib.aisbo = zdev->aisb & 63; + + return zpci_mod_fc(req, &fib); +} + +struct mod_pci_args { + u64 base; + u64 limit; + u64 iota; + u64 fmb_addr; +}; + +static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn); + struct zpci_fib fib = {0}; + + fib.pba = args->base; + fib.pal = args->limit; + fib.iota = args->iota; + fib.fmb_addr = args->fmb_addr; + + return zpci_mod_fc(req, &fib); +} + +/* Modify PCI: Register I/O address translation parameters */ +int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, + u64 base, u64 limit, u64 iota) +{ + struct mod_pci_args args = { base, limit, iota, 0 }; + + WARN_ON_ONCE(iota & 0x3fff); + args.iota |= ZPCI_IOTA_RTTO_FLAG; + return mod_pci(zdev, ZPCI_MOD_FC_REG_IOAT, dmaas, &args); +} + +/* Modify PCI: Unregister I/O address translation parameters */ +int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) +{ + struct mod_pci_args args = { 0, 0, 0, 0 }; + + return mod_pci(zdev, ZPCI_MOD_FC_DEREG_IOAT, dmaas, &args); +} + +/* Modify PCI: Unregister adapter interruptions */ +static int zpci_clear_airq(struct zpci_dev *zdev) +{ + struct mod_pci_args args = { 0, 0, 0, 0 }; + + return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args); +} + +/* Modify PCI: Set PCI function measurement parameters */ +int zpci_fmb_enable_device(struct zpci_dev *zdev) +{ + struct mod_pci_args args = { 0, 0, 0, 0 }; + + if (zdev->fmb) + return -EINVAL; + + zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL); + if (!zdev->fmb) + return -ENOMEM; + WARN_ON((u64) zdev->fmb & 0xf); + + args.fmb_addr = virt_to_phys(zdev->fmb); + return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args); +} + +/* Modify PCI: Disable PCI function measurement */ +int zpci_fmb_disable_device(struct zpci_dev *zdev) +{ + struct mod_pci_args args = { 0, 0, 0, 0 }; + int rc; + + if (!zdev->fmb) + return -EINVAL; + + /* Function measurement is disabled if fmb address is zero */ + rc = mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args); + + kmem_cache_free(zdev_fmb_cache, zdev->fmb); + zdev->fmb = NULL; + return rc; +} + +#define ZPCI_PCIAS_CFGSPC 15 + +static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len); + u64 data; + int rc; + + rc = zpci_load(&data, req, offset); + if (!rc) { + data = data << ((8 - len) * 8); + data = le64_to_cpu(data); + *val = (u32) data; + } else + *val = 0xffffffff; + return rc; +} + +static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len); + u64 data = val; + int rc; + + data = cpu_to_le64(data); + data = data >> ((8 - len) * 8); + rc = zpci_store(data, req, offset); + return rc; +} + +void pcibios_fixup_bus(struct pci_bus *bus) +{ +} + +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, + resource_size_t align) +{ + return 0; +} + +/* combine single writes by using store-block insn */ +void __iowrite64_copy(void __iomem *to, const void *from, size_t count) +{ + zpci_memcpy_toio(to, from, count); +} + +/* Create a virtual mapping cookie for a PCI BAR */ +void __iomem *pci_iomap(struct pci_dev *pdev, int bar, unsigned long max) +{ + struct zpci_dev *zdev = get_zdev(pdev); + u64 addr; + int idx; + + if ((bar & 7) != bar) + return NULL; + + idx = zdev->bars[bar].map_idx; + spin_lock(&zpci_iomap_lock); + zpci_iomap_start[idx].fh = zdev->fh; + zpci_iomap_start[idx].bar = bar; + spin_unlock(&zpci_iomap_lock); + + addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48); + return (void __iomem *) addr; +} +EXPORT_SYMBOL_GPL(pci_iomap); + +void pci_iounmap(struct pci_dev *pdev, void __iomem *addr) +{ + unsigned int idx; + + idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48; + spin_lock(&zpci_iomap_lock); + zpci_iomap_start[idx].fh = 0; + zpci_iomap_start[idx].bar = 0; + spin_unlock(&zpci_iomap_lock); +} +EXPORT_SYMBOL_GPL(pci_iounmap); + +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *val) +{ + struct zpci_dev *zdev = get_zdev_by_bus(bus); + int ret; + + if (!zdev || devfn != ZPCI_DEVFN) + ret = -ENODEV; + else + ret = zpci_cfg_load(zdev, where, val, size); + + return ret; +} + +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 val) +{ + struct zpci_dev *zdev = get_zdev_by_bus(bus); + int ret; + + if (!zdev || devfn != ZPCI_DEVFN) + ret = -ENODEV; + else + ret = zpci_cfg_store(zdev, where, val, size); + + return ret; +} + +static struct pci_ops pci_root_ops = { + .read = pci_read, + .write = pci_write, +}; + +static void zpci_irq_handler(struct airq_struct *airq) +{ + unsigned long si, ai; + struct airq_iv *aibv; + int irqs_on = 0; + + inc_irq_stat(IRQIO_PCI); + for (si = 0;;) { + /* Scan adapter summary indicator bit vector */ + si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv)); + if (si == -1UL) { + if (irqs_on++) + /* End of second scan with interrupts on. */ + break; + /* First scan complete, reenable interrupts. */ + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + si = 0; + continue; + } + + /* Scan the adapter interrupt vector for this device. */ + aibv = zpci_aibv[si]; + for (ai = 0;;) { + ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv)); + if (ai == -1UL) + break; + inc_irq_stat(IRQIO_MSI); + airq_iv_lock(aibv, ai); + generic_handle_irq(airq_iv_get_data(aibv, ai)); + airq_iv_unlock(aibv, ai); + } + } +} + +int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + struct zpci_dev *zdev = get_zdev(pdev); + unsigned int hwirq, msi_vecs; + unsigned long aisb; + struct msi_desc *msi; + struct msi_msg msg; + int rc, irq; + + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX); + msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI); + + /* Allocate adapter summary indicator bit */ + rc = -EIO; + aisb = airq_iv_alloc_bit(zpci_aisb_iv); + if (aisb == -1UL) + goto out; + zdev->aisb = aisb; + + /* Create adapter interrupt vector */ + rc = -ENOMEM; + zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK); + if (!zdev->aibv) + goto out_si; + + /* Wire up shortcut pointer */ + zpci_aibv[aisb] = zdev->aibv; + + /* Request MSI interrupts */ + hwirq = 0; + list_for_each_entry(msi, &pdev->msi_list, list) { + rc = -EIO; + irq = irq_alloc_desc(0); /* Alloc irq on node 0 */ + if (irq < 0) + goto out_msi; + rc = irq_set_msi_desc(irq, msi); + if (rc) + goto out_msi; + irq_set_chip_and_handler(irq, &zpci_irq_chip, + handle_simple_irq); + msg.data = hwirq; + msg.address_lo = zdev->msi_addr & 0xffffffff; + msg.address_hi = zdev->msi_addr >> 32; + write_msi_msg(irq, &msg); + airq_iv_set_data(zdev->aibv, hwirq, irq); + hwirq++; + } + + /* Enable adapter interrupts */ + rc = zpci_set_airq(zdev); + if (rc) + goto out_msi; + + return (msi_vecs == nvec) ? 0 : msi_vecs; + +out_msi: + list_for_each_entry(msi, &pdev->msi_list, list) { + if (hwirq-- == 0) + break; + irq_set_msi_desc(msi->irq, NULL); + irq_free_desc(msi->irq); + msi->msg.address_lo = 0; + msi->msg.address_hi = 0; + msi->msg.data = 0; + msi->irq = 0; + } + zpci_aibv[aisb] = NULL; + airq_iv_release(zdev->aibv); +out_si: + airq_iv_free_bit(zpci_aisb_iv, aisb); +out: + return rc; +} + +void arch_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + struct msi_desc *msi; + int rc; + + /* Disable adapter interrupts */ + rc = zpci_clear_airq(zdev); + if (rc) + return; + + /* Release MSI interrupts */ + list_for_each_entry(msi, &pdev->msi_list, list) { + if (msi->msi_attrib.is_msix) + default_msix_mask_irq(msi, 1); + else + default_msi_mask_irq(msi, 1, 1); + irq_set_msi_desc(msi->irq, NULL); + irq_free_desc(msi->irq); + msi->msg.address_lo = 0; + msi->msg.address_hi = 0; + msi->msg.data = 0; + msi->irq = 0; + } + + zpci_aibv[zdev->aisb] = NULL; + airq_iv_release(zdev->aibv); + airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); +} + +static void zpci_map_resources(struct zpci_dev *zdev) +{ + struct pci_dev *pdev = zdev->pdev; + resource_size_t len; + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + len = pci_resource_len(pdev, i); + if (!len) + continue; + pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0); + pdev->resource[i].end = pdev->resource[i].start + len - 1; + } +} + +static void zpci_unmap_resources(struct zpci_dev *zdev) +{ + struct pci_dev *pdev = zdev->pdev; + resource_size_t len; + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + len = pci_resource_len(pdev, i); + if (!len) + continue; + pci_iounmap(pdev, (void *) pdev->resource[i].start); + } +} + +static int __init zpci_irq_init(void) +{ + int rc; + + rc = register_adapter_interrupt(&zpci_airq); + if (rc) + goto out; + /* Set summary to 1 to be called every time for the ISC. */ + *zpci_airq.lsi_ptr = 1; + + rc = -ENOMEM; + zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC); + if (!zpci_aisb_iv) + goto out_airq; + + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + return 0; + +out_airq: + unregister_adapter_interrupt(&zpci_airq); +out: + return rc; +} + +static void zpci_irq_exit(void) +{ + airq_iv_release(zpci_aisb_iv); + unregister_adapter_interrupt(&zpci_airq); +} + +static int zpci_alloc_iomap(struct zpci_dev *zdev) +{ + int entry; + + spin_lock(&zpci_iomap_lock); + entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); + if (entry == ZPCI_IOMAP_MAX_ENTRIES) { + spin_unlock(&zpci_iomap_lock); + return -ENOSPC; + } + set_bit(entry, zpci_iomap); + spin_unlock(&zpci_iomap_lock); + return entry; +} + +static void zpci_free_iomap(struct zpci_dev *zdev, int entry) +{ + spin_lock(&zpci_iomap_lock); + memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry)); + clear_bit(entry, zpci_iomap); + spin_unlock(&zpci_iomap_lock); +} + +static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start, + unsigned long size, unsigned long flags) +{ + struct resource *r; + + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (!r) + return NULL; + + r->start = start; + r->end = r->start + size - 1; + r->flags = flags; + r->name = zdev->res_name; + + if (request_resource(&iomem_resource, r)) { + kfree(r); + return NULL; + } + return r; +} + +static int zpci_setup_bus_resources(struct zpci_dev *zdev, + struct list_head *resources) +{ + unsigned long addr, size, flags; + struct resource *res; + int i, entry; + + snprintf(zdev->res_name, sizeof(zdev->res_name), + "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR); + + for (i = 0; i < PCI_BAR_COUNT; i++) { + if (!zdev->bars[i].size) + continue; + entry = zpci_alloc_iomap(zdev); + if (entry < 0) + return entry; + zdev->bars[i].map_idx = entry; + + /* only MMIO is supported */ + flags = IORESOURCE_MEM; + if (zdev->bars[i].val & 8) + flags |= IORESOURCE_PREFETCH; + if (zdev->bars[i].val & 4) + flags |= IORESOURCE_MEM_64; + + addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48); + + size = 1UL << zdev->bars[i].size; + + res = __alloc_res(zdev, addr, size, flags); + if (!res) { + zpci_free_iomap(zdev, entry); + return -ENOMEM; + } + zdev->bars[i].res = res; + pci_add_resource(resources, res); + } + + return 0; +} + +static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) +{ + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + if (!zdev->bars[i].size) + continue; + + zpci_free_iomap(zdev, zdev->bars[i].map_idx); + release_resource(zdev->bars[i].res); + kfree(zdev->bars[i].res); + } +} + +int pcibios_add_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + struct resource *res; + int i; + + zdev->pdev = pdev; + pdev->dev.groups = zpci_attr_groups; + zpci_map_resources(zdev); + + for (i = 0; i < PCI_BAR_COUNT; i++) { + res = &pdev->resource[i]; + if (res->parent || !res->flags) + continue; + pci_claim_resource(pdev, i); + } + + return 0; +} + +int pcibios_enable_device(struct pci_dev *pdev, int mask) +{ + struct zpci_dev *zdev = get_zdev(pdev); + + zdev->pdev = pdev; + zpci_debug_init_device(zdev); + zpci_fmb_enable_device(zdev); + zpci_map_resources(zdev); + + return pci_enable_resources(pdev, mask); +} + +void pcibios_disable_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + + zpci_unmap_resources(zdev); + zpci_fmb_disable_device(zdev); + zpci_debug_exit_device(zdev); + zdev->pdev = NULL; +} + +#ifdef CONFIG_HIBERNATE_CALLBACKS +static int zpci_restore(struct device *dev) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + int ret = 0; + + if (zdev->state != ZPCI_FN_STATE_ONLINE) + goto out; + + ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES); + if (ret) + goto out; + + zpci_map_resources(zdev); + zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, + zdev->start_dma + zdev->iommu_size - 1, + (u64) zdev->dma_table); + +out: + return ret; +} + +static int zpci_freeze(struct device *dev) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + + if (zdev->state != ZPCI_FN_STATE_ONLINE) + return 0; + + zpci_unregister_ioat(zdev, 0); + return clp_disable_fh(zdev); +} + +struct dev_pm_ops pcibios_pm_ops = { + .thaw_noirq = zpci_restore, + .freeze_noirq = zpci_freeze, + .restore_noirq = zpci_restore, + .poweroff_noirq = zpci_freeze, +}; +#endif /* CONFIG_HIBERNATE_CALLBACKS */ + +static int zpci_alloc_domain(struct zpci_dev *zdev) +{ + spin_lock(&zpci_domain_lock); + zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES); + if (zdev->domain == ZPCI_NR_DEVICES) { + spin_unlock(&zpci_domain_lock); + return -ENOSPC; + } + set_bit(zdev->domain, zpci_domain); + spin_unlock(&zpci_domain_lock); + return 0; +} + +static void zpci_free_domain(struct zpci_dev *zdev) +{ + spin_lock(&zpci_domain_lock); + clear_bit(zdev->domain, zpci_domain); + spin_unlock(&zpci_domain_lock); +} + +void pcibios_remove_bus(struct pci_bus *bus) +{ + struct zpci_dev *zdev = get_zdev_by_bus(bus); + + zpci_exit_slot(zdev); + zpci_cleanup_bus_resources(zdev); + zpci_free_domain(zdev); + + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + + kfree(zdev); +} + +static int zpci_scan_bus(struct zpci_dev *zdev) +{ + LIST_HEAD(resources); + int ret; + + ret = zpci_setup_bus_resources(zdev, &resources); + if (ret) + return ret; + + zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops, + zdev, &resources); + if (!zdev->bus) { + zpci_cleanup_bus_resources(zdev); + return -EIO; + } + + zdev->bus->max_bus_speed = zdev->max_bus_speed; + return 0; +} + +int zpci_enable_device(struct zpci_dev *zdev) +{ + int rc; + + rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES); + if (rc) + goto out; + + rc = zpci_dma_init_device(zdev); + if (rc) + goto out_dma; + + zdev->state = ZPCI_FN_STATE_ONLINE; + return 0; + +out_dma: + clp_disable_fh(zdev); +out: + return rc; +} +EXPORT_SYMBOL_GPL(zpci_enable_device); + +int zpci_disable_device(struct zpci_dev *zdev) +{ + zpci_dma_exit_device(zdev); + return clp_disable_fh(zdev); +} +EXPORT_SYMBOL_GPL(zpci_disable_device); + +int zpci_create_device(struct zpci_dev *zdev) +{ + int rc; + + rc = zpci_alloc_domain(zdev); + if (rc) + goto out; + + if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { + rc = zpci_enable_device(zdev); + if (rc) + goto out_free; + } + rc = zpci_scan_bus(zdev); + if (rc) + goto out_disable; + + spin_lock(&zpci_list_lock); + list_add_tail(&zdev->entry, &zpci_list); + spin_unlock(&zpci_list_lock); + + zpci_init_slot(zdev); + + return 0; + +out_disable: + if (zdev->state == ZPCI_FN_STATE_ONLINE) + zpci_disable_device(zdev); +out_free: + zpci_free_domain(zdev); +out: + return rc; +} + +void zpci_stop_device(struct zpci_dev *zdev) +{ + zpci_dma_exit_device(zdev); + /* + * Note: SCLP disables fh via set-pci-fn so don't + * do that here. + */ +} +EXPORT_SYMBOL_GPL(zpci_stop_device); + +static inline int barsize(u8 size) +{ + return (size) ? (1 << size) >> 10 : 0; +} + +static int zpci_mem_init(void) +{ + zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), + 16, 0, NULL); + if (!zdev_fmb_cache) + goto error_zdev; + + /* TODO: use realloc */ + zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start), + GFP_KERNEL); + if (!zpci_iomap_start) + goto error_iomap; + return 0; + +error_iomap: + kmem_cache_destroy(zdev_fmb_cache); +error_zdev: + return -ENOMEM; +} + +static void zpci_mem_exit(void) +{ + kfree(zpci_iomap_start); + kmem_cache_destroy(zdev_fmb_cache); +} + +static unsigned int s390_pci_probe = 1; +static unsigned int s390_pci_initialized; + +char * __init pcibios_setup(char *str) +{ + if (!strcmp(str, "off")) { + s390_pci_probe = 0; + return NULL; + } + return str; +} + +bool zpci_is_enabled(void) +{ + return s390_pci_initialized; +} + +static int __init pci_base_init(void) +{ + int rc; + + if (!s390_pci_probe) + return 0; + + if (!test_facility(2) || !test_facility(69) + || !test_facility(71) || !test_facility(72)) + return 0; + + rc = zpci_debug_init(); + if (rc) + goto out; + + rc = zpci_mem_init(); + if (rc) + goto out_mem; + + rc = zpci_irq_init(); + if (rc) + goto out_irq; + + rc = zpci_dma_init(); + if (rc) + goto out_dma; + + rc = clp_scan_pci_devices(); + if (rc) + goto out_find; + + s390_pci_initialized = 1; + return 0; + +out_find: + zpci_dma_exit(); +out_dma: + zpci_irq_exit(); +out_irq: + zpci_mem_exit(); +out_mem: + zpci_debug_exit(); +out: + return rc; +} +subsys_initcall_sync(pci_base_init); + +void zpci_rescan(void) +{ + if (zpci_is_enabled()) + clp_rescan_pci_devices_simple(); +} diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c new file mode 100644 index 00000000000..96545d7659f --- /dev/null +++ b/arch/s390/pci/pci_clp.c @@ -0,0 +1,392 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#define COMPONENT "zPCI" +#define pr_fmt(fmt) COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/delay.h> +#include <linux/pci.h> +#include <asm/pci_debug.h> +#include <asm/pci_clp.h> + +static inline void zpci_err_clp(unsigned int rsp, int rc) +{ + struct { + unsigned int rsp; + int rc; + } __packed data = {rsp, rc}; + + zpci_err_hex(&data, sizeof(data)); +} + +/* + * Call Logical Processor + * Retry logic is handled by the caller. + */ +static inline u8 clp_instr(void *data) +{ + struct { u8 _[CLP_BLK_SIZE]; } *req = data; + u64 ignored; + u8 cc; + + asm volatile ( + " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req) + : [req] "a" (req) + : "cc"); + return cc; +} + +static void *clp_alloc_block(gfp_t gfp_mask) +{ + return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE)); +} + +static void clp_free_block(void *ptr) +{ + free_pages((unsigned long) ptr, get_order(CLP_BLK_SIZE)); +} + +static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, + struct clp_rsp_query_pci_grp *response) +{ + zdev->tlb_refresh = response->refresh; + zdev->dma_mask = response->dasm; + zdev->msi_addr = response->msia; + zdev->fmb_update = response->mui; + + switch (response->version) { + case 1: + zdev->max_bus_speed = PCIE_SPEED_5_0GT; + break; + default: + zdev->max_bus_speed = PCI_SPEED_UNKNOWN; + break; + } +} + +static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) +{ + struct clp_req_rsp_query_pci_grp *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP; + rrb->response.hdr.len = sizeof(rrb->response); + rrb->request.pfgid = pfgid; + + rc = clp_instr(rrb); + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) + clp_store_query_pci_fngrp(zdev, &rrb->response); + else { + zpci_err("Q PCI FGRP:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + rc = -EIO; + } + clp_free_block(rrb); + return rc; +} + +static int clp_store_query_pci_fn(struct zpci_dev *zdev, + struct clp_rsp_query_pci *response) +{ + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + zdev->bars[i].val = le32_to_cpu(response->bar[i]); + zdev->bars[i].size = response->bar_size[i]; + } + zdev->start_dma = response->sdma; + zdev->end_dma = response->edma; + zdev->pchid = response->pchid; + zdev->pfgid = response->pfgid; + zdev->pft = response->pft; + zdev->vfn = response->vfn; + zdev->uid = response->uid; + + memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip)); + if (response->util_str_avail) { + memcpy(zdev->util_str, response->util_str, + sizeof(zdev->util_str)); + } + + return 0; +} + +static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) +{ + struct clp_req_rsp_query_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; + rrb->response.hdr.len = sizeof(rrb->response); + rrb->request.fh = fh; + + rc = clp_instr(rrb); + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { + rc = clp_store_query_pci_fn(zdev, &rrb->response); + if (rc) + goto out; + if (rrb->response.pfgid) + rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid); + } else { + zpci_err("Q PCI FN:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + rc = -EIO; + } +out: + clp_free_block(rrb); + return rc; +} + +int clp_add_pci_device(u32 fid, u32 fh, int configured) +{ + struct zpci_dev *zdev; + int rc; + + zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured); + zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); + if (!zdev) + return -ENOMEM; + + zdev->fh = fh; + zdev->fid = fid; + + /* Query function properties and update zdev */ + rc = clp_query_pci_fn(zdev, fh); + if (rc) + goto error; + + if (configured) + zdev->state = ZPCI_FN_STATE_CONFIGURED; + else + zdev->state = ZPCI_FN_STATE_STANDBY; + + rc = zpci_create_device(zdev); + if (rc) + goto error; + return 0; + +error: + kfree(zdev); + return rc; +} + +/* + * Enable/Disable a given PCI function defined by its function handle. + */ +static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) +{ + struct clp_req_rsp_set_pci *rrb; + int rc, retries = 100; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + do { + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_SET_PCI_FN; + rrb->response.hdr.len = sizeof(rrb->response); + rrb->request.fh = *fh; + rrb->request.oc = command; + rrb->request.ndas = nr_dma_as; + + rc = clp_instr(rrb); + if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) { + retries--; + if (retries < 0) + break; + msleep(20); + } + } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); + + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) + *fh = rrb->response.fh; + else { + zpci_err("Set PCI FN:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + rc = -EIO; + } + clp_free_block(rrb); + return rc; +} + +int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) +{ + u32 fh = zdev->fh; + int rc; + + rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN); + if (!rc) + /* Success -> store enabled handle in zdev */ + zdev->fh = fh; + + zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); + return rc; +} + +int clp_disable_fh(struct zpci_dev *zdev) +{ + u32 fh = zdev->fh; + int rc; + + if (!zdev_enabled(zdev)) + return 0; + + rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN); + if (!rc) + /* Success -> store disabled handle in zdev */ + zdev->fh = fh; + + zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); + return rc; +} + +static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, + void (*cb)(struct clp_fh_list_entry *entry)) +{ + u64 resume_token = 0; + int entries, i, rc; + + do { + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_LIST_PCI; + /* store as many entries as possible */ + rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN; + rrb->request.resume_token = resume_token; + + /* Get PCI function handle list */ + rc = clp_instr(rrb); + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { + zpci_err("List PCI FN:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + rc = -EIO; + goto out; + } + + WARN_ON_ONCE(rrb->response.entry_size != + sizeof(struct clp_fh_list_entry)); + + entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) / + rrb->response.entry_size; + + resume_token = rrb->response.resume_token; + for (i = 0; i < entries; i++) + cb(&rrb->response.fh_list[i]); + } while (resume_token); +out: + return rc; +} + +static void __clp_add(struct clp_fh_list_entry *entry) +{ + if (!entry->vendor_id) + return; + + clp_add_pci_device(entry->fid, entry->fh, entry->config_state); +} + +static void __clp_rescan(struct clp_fh_list_entry *entry) +{ + struct zpci_dev *zdev; + + if (!entry->vendor_id) + return; + + zdev = get_zdev_by_fid(entry->fid); + if (!zdev) { + clp_add_pci_device(entry->fid, entry->fh, entry->config_state); + return; + } + + if (!entry->config_state) { + /* + * The handle is already disabled, that means no iota/irq freeing via + * the firmware interfaces anymore. Need to free resources manually + * (DMA memory, debug, sysfs)... + */ + zpci_stop_device(zdev); + } +} + +static void __clp_update(struct clp_fh_list_entry *entry) +{ + struct zpci_dev *zdev; + + if (!entry->vendor_id) + return; + + zdev = get_zdev_by_fid(entry->fid); + if (!zdev) + return; + + zdev->fh = entry->fh; +} + +int clp_scan_pci_devices(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_add); + + clp_free_block(rrb); + return rc; +} + +int clp_rescan_pci_devices(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_rescan); + + clp_free_block(rrb); + return rc; +} + +int clp_rescan_pci_devices_simple(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_NOWAIT); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_update); + + clp_free_block(rrb); + return rc; +} diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c new file mode 100644 index 00000000000..c5c66840ac0 --- /dev/null +++ b/arch/s390/pci/pci_debug.c @@ -0,0 +1,167 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#define COMPONENT "zPCI" +#define pr_fmt(fmt) COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/export.h> +#include <linux/pci.h> +#include <asm/debug.h> + +#include <asm/pci_dma.h> + +static struct dentry *debugfs_root; +debug_info_t *pci_debug_msg_id; +EXPORT_SYMBOL_GPL(pci_debug_msg_id); +debug_info_t *pci_debug_err_id; +EXPORT_SYMBOL_GPL(pci_debug_err_id); + +static char *pci_perf_names[] = { + /* hardware counters */ + "Load operations", + "Store operations", + "Store block operations", + "Refresh operations", + "DMA read bytes", + "DMA write bytes", + /* software counters */ + "Allocated pages", + "Mapped pages", + "Unmapped pages", +}; + +static int pci_perf_show(struct seq_file *m, void *v) +{ + struct zpci_dev *zdev = m->private; + u64 *stat; + int i; + + if (!zdev) + return 0; + if (!zdev->fmb) + return seq_printf(m, "FMB statistics disabled\n"); + + /* header */ + seq_printf(m, "FMB @ %p\n", zdev->fmb); + seq_printf(m, "Update interval: %u ms\n", zdev->fmb_update); + seq_printf(m, "Samples: %u\n", zdev->fmb->samples); + seq_printf(m, "Last update TOD: %Lx\n", zdev->fmb->last_update); + + /* hardware counters */ + stat = (u64 *) &zdev->fmb->ld_ops; + for (i = 0; i < 4; i++) + seq_printf(m, "%26s:\t%llu\n", + pci_perf_names[i], *(stat + i)); + if (zdev->fmb->dma_valid) + for (i = 4; i < 6; i++) + seq_printf(m, "%26s:\t%llu\n", + pci_perf_names[i], *(stat + i)); + /* software counters */ + for (i = 6; i < ARRAY_SIZE(pci_perf_names); i++) + seq_printf(m, "%26s:\t%llu\n", + pci_perf_names[i], + atomic64_read((atomic64_t *) (stat + i))); + + return 0; +} + +static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, + size_t count, loff_t *off) +{ + struct zpci_dev *zdev = ((struct seq_file *) file->private_data)->private; + unsigned long val; + int rc; + + if (!zdev) + return 0; + + rc = kstrtoul_from_user(ubuf, count, 10, &val); + if (rc) + return rc; + + switch (val) { + case 0: + rc = zpci_fmb_disable_device(zdev); + if (rc) + return rc; + break; + case 1: + rc = zpci_fmb_enable_device(zdev); + if (rc) + return rc; + break; + } + return count; +} + +static int pci_perf_seq_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, pci_perf_show, + file_inode(filp)->i_private); +} + +static const struct file_operations debugfs_pci_perf_fops = { + .open = pci_perf_seq_open, + .read = seq_read, + .write = pci_perf_seq_write, + .llseek = seq_lseek, + .release = single_release, +}; + +void zpci_debug_init_device(struct zpci_dev *zdev) +{ + zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev), + debugfs_root); + if (IS_ERR(zdev->debugfs_dev)) + zdev->debugfs_dev = NULL; + + zdev->debugfs_perf = debugfs_create_file("statistics", + S_IFREG | S_IRUGO | S_IWUSR, + zdev->debugfs_dev, zdev, + &debugfs_pci_perf_fops); + if (IS_ERR(zdev->debugfs_perf)) + zdev->debugfs_perf = NULL; +} + +void zpci_debug_exit_device(struct zpci_dev *zdev) +{ + debugfs_remove(zdev->debugfs_perf); + debugfs_remove(zdev->debugfs_dev); +} + +int __init zpci_debug_init(void) +{ + /* event trace buffer */ + pci_debug_msg_id = debug_register("pci_msg", 8, 1, 8 * sizeof(long)); + if (!pci_debug_msg_id) + return -EINVAL; + debug_register_view(pci_debug_msg_id, &debug_sprintf_view); + debug_set_level(pci_debug_msg_id, 3); + + /* error log */ + pci_debug_err_id = debug_register("pci_error", 2, 1, 16); + if (!pci_debug_err_id) + return -EINVAL; + debug_register_view(pci_debug_err_id, &debug_hex_ascii_view); + debug_set_level(pci_debug_err_id, 6); + + debugfs_root = debugfs_create_dir("pci", NULL); + return 0; +} + +void zpci_debug_exit(void) +{ + if (pci_debug_msg_id) + debug_unregister(pci_debug_msg_id); + if (pci_debug_err_id) + debug_unregister(pci_debug_err_id); + + debugfs_remove(debugfs_root); +} diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c new file mode 100644 index 00000000000..f91c0311980 --- /dev/null +++ b/arch/s390/pci/pci_dma.c @@ -0,0 +1,506 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/iommu-helper.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> +#include <linux/pci.h> +#include <asm/pci_dma.h> + +static struct kmem_cache *dma_region_table_cache; +static struct kmem_cache *dma_page_table_cache; + +static unsigned long *dma_alloc_cpu_table(void) +{ + unsigned long *table, *entry; + + table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); + if (!table) + return NULL; + + for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) + *entry = ZPCI_TABLE_INVALID | ZPCI_TABLE_PROTECTED; + return table; +} + +static void dma_free_cpu_table(void *table) +{ + kmem_cache_free(dma_region_table_cache, table); +} + +static unsigned long *dma_alloc_page_table(void) +{ + unsigned long *table, *entry; + + table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); + if (!table) + return NULL; + + for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) + *entry = ZPCI_PTE_INVALID | ZPCI_TABLE_PROTECTED; + return table; +} + +static void dma_free_page_table(void *table) +{ + kmem_cache_free(dma_page_table_cache, table); +} + +static unsigned long *dma_get_seg_table_origin(unsigned long *entry) +{ + unsigned long *sto; + + if (reg_entry_isvalid(*entry)) + sto = get_rt_sto(*entry); + else { + sto = dma_alloc_cpu_table(); + if (!sto) + return NULL; + + set_rt_sto(entry, sto); + validate_rt_entry(entry); + entry_clr_protected(entry); + } + return sto; +} + +static unsigned long *dma_get_page_table_origin(unsigned long *entry) +{ + unsigned long *pto; + + if (reg_entry_isvalid(*entry)) + pto = get_st_pto(*entry); + else { + pto = dma_alloc_page_table(); + if (!pto) + return NULL; + set_st_pto(entry, pto); + validate_st_entry(entry); + entry_clr_protected(entry); + } + return pto; +} + +static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) +{ + unsigned long *sto, *pto; + unsigned int rtx, sx, px; + + rtx = calc_rtx(dma_addr); + sto = dma_get_seg_table_origin(&rto[rtx]); + if (!sto) + return NULL; + + sx = calc_sx(dma_addr); + pto = dma_get_page_table_origin(&sto[sx]); + if (!pto) + return NULL; + + px = calc_px(dma_addr); + return &pto[px]; +} + +static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr, + dma_addr_t dma_addr, int flags) +{ + unsigned long *entry; + + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + if (!entry) { + WARN_ON_ONCE(1); + return; + } + + if (flags & ZPCI_PTE_INVALID) { + invalidate_pt_entry(entry); + return; + } else { + set_pt_pfaa(entry, page_addr); + validate_pt_entry(entry); + } + + if (flags & ZPCI_TABLE_PROTECTED) + entry_set_protected(entry); + else + entry_clr_protected(entry); +} + +static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, + dma_addr_t dma_addr, size_t size, int flags) +{ + unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + u8 *page_addr = (u8 *) (pa & PAGE_MASK); + dma_addr_t start_dma_addr = dma_addr; + unsigned long irq_flags; + int i, rc = 0; + + if (!nr_pages) + return -EINVAL; + + spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); + if (!zdev->dma_table) + goto no_refresh; + + for (i = 0; i < nr_pages; i++) { + dma_update_cpu_trans(zdev, page_addr, dma_addr, flags); + page_addr += PAGE_SIZE; + dma_addr += PAGE_SIZE; + } + + /* + * rpcit is not required to establish new translations when previously + * invalid translation-table entries are validated, however it is + * required when altering previously valid entries. + */ + if (!zdev->tlb_refresh && + ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) + /* + * TODO: also need to check that the old entry is indeed INVALID + * and not only for one page but for the whole range... + * -> now we WARN_ON in that case but with lazy unmap that + * needs to be redone! + */ + goto no_refresh; + + rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, + nr_pages * PAGE_SIZE); + +no_refresh: + spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); + return rc; +} + +static void dma_free_seg_table(unsigned long entry) +{ + unsigned long *sto = get_rt_sto(entry); + int sx; + + for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) + if (reg_entry_isvalid(sto[sx])) + dma_free_page_table(get_st_pto(sto[sx])); + + dma_free_cpu_table(sto); +} + +static void dma_cleanup_tables(struct zpci_dev *zdev) +{ + unsigned long *table; + int rtx; + + if (!zdev || !zdev->dma_table) + return; + + table = zdev->dma_table; + for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) + if (reg_entry_isvalid(table[rtx])) + dma_free_seg_table(table[rtx]); + + dma_free_cpu_table(table); + zdev->dma_table = NULL; +} + +static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, + unsigned long start, int size) +{ + unsigned long boundary_size; + + boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; + return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, + start, size, 0, boundary_size, 0); +} + +static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) +{ + unsigned long offset, flags; + + spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); + offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); + if (offset == -1) + offset = __dma_alloc_iommu(zdev, 0, size); + + if (offset != -1) { + zdev->next_bit = offset + size; + if (zdev->next_bit >= zdev->iommu_pages) + zdev->next_bit = 0; + } + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); + return offset; +} + +static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size) +{ + unsigned long flags; + + spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); + if (!zdev->iommu_bitmap) + goto out; + bitmap_clear(zdev->iommu_bitmap, offset, size); + if (offset >= zdev->next_bit) + zdev->next_bit = offset + size; +out: + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); +} + +int dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + return 0; +} +EXPORT_SYMBOL_GPL(dma_set_mask); + +static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + unsigned long nr_pages, iommu_page_index; + unsigned long pa = page_to_phys(page) + offset; + int flags = ZPCI_PTE_VALID; + dma_addr_t dma_addr; + + /* This rounds up number of pages based on size and offset */ + nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); + iommu_page_index = dma_alloc_iommu(zdev, nr_pages); + if (iommu_page_index == -1) + goto out_err; + + /* Use rounded up size */ + size = nr_pages * PAGE_SIZE; + + dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE; + if (dma_addr + size > zdev->end_dma) + goto out_free; + + if (direction == DMA_NONE || direction == DMA_TO_DEVICE) + flags |= ZPCI_TABLE_PROTECTED; + + if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) { + atomic64_add(nr_pages, &zdev->fmb->mapped_pages); + return dma_addr + (offset & ~PAGE_MASK); + } + +out_free: + dma_free_iommu(zdev, iommu_page_index, nr_pages); +out_err: + zpci_err("map error:\n"); + zpci_err_hex(&pa, sizeof(pa)); + return DMA_ERROR_CODE; +} + +static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + unsigned long iommu_page_index; + int npages; + + npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); + dma_addr = dma_addr & PAGE_MASK; + if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, + ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) { + zpci_err("unmap error:\n"); + zpci_err_hex(&dma_addr, sizeof(dma_addr)); + } + + atomic64_add(npages, &zdev->fmb->unmapped_pages); + iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; + dma_free_iommu(zdev, iommu_page_index, npages); +} + +static void *s390_dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + struct page *page; + unsigned long pa; + dma_addr_t map; + + size = PAGE_ALIGN(size); + page = alloc_pages(flag, get_order(size)); + if (!page) + return NULL; + + pa = page_to_phys(page); + memset((void *) pa, 0, size); + + map = s390_dma_map_pages(dev, page, pa % PAGE_SIZE, + size, DMA_BIDIRECTIONAL, NULL); + if (dma_mapping_error(dev, map)) { + free_pages(pa, get_order(size)); + return NULL; + } + + atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages); + if (dma_handle) + *dma_handle = map; + return (void *) pa; +} + +static void s390_dma_free(struct device *dev, size_t size, + void *pa, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + + size = PAGE_ALIGN(size); + atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages); + s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); + free_pages((unsigned long) pa, get_order(size)); +} + +static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nr_elements, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + int mapped_elements = 0; + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nr_elements, i) { + struct page *page = sg_page(s); + s->dma_address = s390_dma_map_pages(dev, page, s->offset, + s->length, dir, NULL); + if (!dma_mapping_error(dev, s->dma_address)) { + s->dma_length = s->length; + mapped_elements++; + } else + goto unmap; + } +out: + return mapped_elements; + +unmap: + for_each_sg(sg, s, mapped_elements, i) { + if (s->dma_address) + s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, + dir, NULL); + s->dma_address = 0; + s->dma_length = 0; + } + mapped_elements = 0; + goto out; +} + +static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nr_elements, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nr_elements, i) { + s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL); + s->dma_address = 0; + s->dma_length = 0; + } +} + +int zpci_dma_init_device(struct zpci_dev *zdev) +{ + int rc; + + spin_lock_init(&zdev->iommu_bitmap_lock); + spin_lock_init(&zdev->dma_table_lock); + + zdev->dma_table = dma_alloc_cpu_table(); + if (!zdev->dma_table) { + rc = -ENOMEM; + goto out_clean; + } + + zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET; + zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; + zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); + if (!zdev->iommu_bitmap) { + rc = -ENOMEM; + goto out_reg; + } + + rc = zpci_register_ioat(zdev, + 0, + zdev->start_dma + PAGE_OFFSET, + zdev->start_dma + zdev->iommu_size - 1, + (u64) zdev->dma_table); + if (rc) + goto out_reg; + return 0; + +out_reg: + dma_free_cpu_table(zdev->dma_table); +out_clean: + return rc; +} + +void zpci_dma_exit_device(struct zpci_dev *zdev) +{ + zpci_unregister_ioat(zdev, 0); + dma_cleanup_tables(zdev); + vfree(zdev->iommu_bitmap); + zdev->iommu_bitmap = NULL; + zdev->next_bit = 0; +} + +static int __init dma_alloc_cpu_table_caches(void) +{ + dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", + ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, + 0, NULL); + if (!dma_region_table_cache) + return -ENOMEM; + + dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", + ZPCI_PT_SIZE, ZPCI_PT_ALIGN, + 0, NULL); + if (!dma_page_table_cache) { + kmem_cache_destroy(dma_region_table_cache); + return -ENOMEM; + } + return 0; +} + +int __init zpci_dma_init(void) +{ + return dma_alloc_cpu_table_caches(); +} + +void zpci_dma_exit(void) +{ + kmem_cache_destroy(dma_page_table_cache); + kmem_cache_destroy(dma_region_table_cache); +} + +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) + +static int __init dma_debug_do_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + return 0; +} +fs_initcall(dma_debug_do_init); + +struct dma_map_ops s390_dma_ops = { + .alloc = s390_dma_alloc, + .free = s390_dma_free, + .map_sg = s390_dma_map_sg, + .unmap_sg = s390_dma_unmap_sg, + .map_page = s390_dma_map_pages, + .unmap_page = s390_dma_unmap_pages, + /* if we support direct DMA this must be conditional */ + .is_phys = 0, + /* dma_supported is unconditionally true without a callback */ +}; +EXPORT_SYMBOL_GPL(s390_dma_ops); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c new file mode 100644 index 00000000000..6d7f5a3016c --- /dev/null +++ b/arch/s390/pci/pci_event.c @@ -0,0 +1,136 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#define COMPONENT "zPCI" +#define pr_fmt(fmt) COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <asm/pci_debug.h> +#include <asm/sclp.h> + +/* Content Code Description for PCI Function Error */ +struct zpci_ccdf_err { + u32 reserved1; + u32 fh; /* function handle */ + u32 fid; /* function id */ + u32 ett : 4; /* expected table type */ + u32 mvn : 12; /* MSI vector number */ + u32 dmaas : 8; /* DMA address space */ + u32 : 6; + u32 q : 1; /* event qualifier */ + u32 rw : 1; /* read/write */ + u64 faddr; /* failing address */ + u32 reserved3; + u16 reserved4; + u16 pec; /* PCI event code */ +} __packed; + +/* Content Code Description for PCI Function Availability */ +struct zpci_ccdf_avail { + u32 reserved1; + u32 fh; /* function handle */ + u32 fid; /* function id */ + u32 reserved2; + u32 reserved3; + u32 reserved4; + u32 reserved5; + u16 reserved6; + u16 pec; /* PCI event code */ +} __packed; + +static void __zpci_event_error(struct zpci_ccdf_err *ccdf) +{ + struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); + + zpci_err("error CCDF:\n"); + zpci_err_hex(ccdf, sizeof(*ccdf)); + + if (!zdev) + return; + + pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", + pci_name(zdev->pdev), ccdf->pec, ccdf->fid); +} + +void zpci_event_error(void *data) +{ + if (zpci_is_enabled()) + __zpci_event_error(data); +} + +static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) +{ + struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); + struct pci_dev *pdev = zdev ? zdev->pdev : NULL; + int ret; + + pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n", + pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); + zpci_err("avail CCDF:\n"); + zpci_err_hex(ccdf, sizeof(*ccdf)); + + switch (ccdf->pec) { + case 0x0301: /* Standby -> Configured */ + if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY) + break; + zdev->state = ZPCI_FN_STATE_CONFIGURED; + zdev->fh = ccdf->fh; + ret = zpci_enable_device(zdev); + if (ret) + break; + pci_rescan_bus(zdev->bus); + break; + case 0x0302: /* Reserved -> Standby */ + if (!zdev) + clp_add_pci_device(ccdf->fid, ccdf->fh, 0); + break; + case 0x0303: /* Deconfiguration requested */ + if (pdev) + pci_stop_and_remove_bus_device(pdev); + + ret = zpci_disable_device(zdev); + if (ret) + break; + + ret = sclp_pci_deconfigure(zdev->fid); + zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); + if (!ret) + zdev->state = ZPCI_FN_STATE_STANDBY; + + break; + case 0x0304: /* Configured -> Standby */ + if (pdev) { + /* Give the driver a hint that the function is + * already unusable. */ + pdev->error_state = pci_channel_io_perm_failure; + pci_stop_and_remove_bus_device(pdev); + } + + zdev->fh = ccdf->fh; + zpci_disable_device(zdev); + zdev->state = ZPCI_FN_STATE_STANDBY; + break; + case 0x0306: /* 0x308 or 0x302 for multiple devices */ + clp_rescan_pci_devices(); + break; + case 0x0308: /* Standby -> Reserved */ + if (!zdev) + break; + pci_stop_root_bus(zdev->bus); + pci_remove_root_bus(zdev->bus); + break; + default: + break; + } +} + +void zpci_event_availability(void *data) +{ + if (zpci_is_enabled()) + __zpci_event_availability(data); +} diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c new file mode 100644 index 00000000000..85267c058af --- /dev/null +++ b/arch/s390/pci/pci_insn.c @@ -0,0 +1,202 @@ +/* + * s390 specific pci instructions + * + * Copyright IBM Corp. 2013 + */ + +#include <linux/export.h> +#include <linux/errno.h> +#include <linux/delay.h> +#include <asm/pci_insn.h> +#include <asm/processor.h> + +#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */ + +/* Modify PCI Function Controls */ +static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) +{ + u8 cc; + + asm volatile ( + " .insn rxy,0xe300000000d0,%[req],%[fib]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib) + : : "cc"); + *status = req >> 24 & 0xff; + return cc; +} + +int zpci_mod_fc(u64 req, struct zpci_fib *fib) +{ + u8 cc, status; + + do { + cc = __mpcifc(req, fib, &status); + if (cc == 2) + msleep(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d\n", + __func__, cc, status); + return (cc) ? -EIO : 0; +} + +/* Refresh PCI Translations */ +static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) +{ + register u64 __addr asm("2") = addr; + register u64 __range asm("3") = range; + u8 cc; + + asm volatile ( + " .insn rre,0xb9d30000,%[fn],%[addr]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [fn] "+d" (fn) + : [addr] "d" (__addr), "d" (__range) + : "cc"); + *status = fn >> 24 & 0xff; + return cc; +} + +int zpci_refresh_trans(u64 fn, u64 addr, u64 range) +{ + u8 cc, status; + + do { + cc = __rpcit(fn, addr, range, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n", + __func__, cc, status, addr, range); + return (cc) ? -EIO : 0; +} + +/* Set Interruption Controls */ +void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) +{ + asm volatile ( + " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" + : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused)); +} + +/* PCI Load */ +static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) +{ + register u64 __req asm("2") = req; + register u64 __offset asm("3") = offset; + int cc = -ENXIO; + u64 __data; + + asm volatile ( + " .insn rre,0xb9d20000,%[data],%[req]\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [data] "=d" (__data), [req] "+d" (__req) + : "d" (__offset) + : "cc"); + *status = __req >> 24 & 0xff; + if (!cc) + *data = __data; + + return cc; +} + +int zpci_load(u64 *data, u64 req, u64 offset) +{ + u8 status; + int cc; + + do { + cc = __pcilg(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc > 0) ? -EIO : cc; +} +EXPORT_SYMBOL_GPL(zpci_load); + +/* PCI Store */ +static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) +{ + register u64 __req asm("2") = req; + register u64 __offset asm("3") = offset; + int cc = -ENXIO; + + asm volatile ( + " .insn rre,0xb9d00000,%[data],%[req]\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [req] "+d" (__req) + : "d" (__offset), [data] "d" (data) + : "cc"); + *status = __req >> 24 & 0xff; + return cc; +} + +int zpci_store(u64 data, u64 req, u64 offset) +{ + u8 status; + int cc; + + do { + cc = __pcistg(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc > 0) ? -EIO : cc; +} +EXPORT_SYMBOL_GPL(zpci_store); + +/* PCI Store Block */ +static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) +{ + int cc = -ENXIO; + + asm volatile ( + " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [req] "+d" (req) + : [offset] "d" (offset), [data] "Q" (*data) + : "cc"); + *status = req >> 24 & 0xff; + return cc; +} + +int zpci_store_block(const u64 *data, u64 req, u64 offset) +{ + u8 status; + int cc; + + do { + cc = __pcistb(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc > 0) ? -EIO : cc; +} +EXPORT_SYMBOL_GPL(zpci_store_block); diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c new file mode 100644 index 00000000000..9190214b870 --- /dev/null +++ b/arch/s390/pci/pci_sysfs.c @@ -0,0 +1,110 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#define COMPONENT "zPCI" +#define pr_fmt(fmt) COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/stat.h> +#include <linux/pci.h> + +#define zpci_attr(name, fmt, member) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); \ + \ + return sprintf(buf, fmt, zdev->member); \ +} \ +static DEVICE_ATTR_RO(name) + +zpci_attr(function_id, "0x%08x\n", fid); +zpci_attr(function_handle, "0x%08x\n", fh); +zpci_attr(pchid, "0x%04x\n", pchid); +zpci_attr(pfgid, "0x%02x\n", pfgid); +zpci_attr(vfn, "0x%04x\n", vfn); +zpci_attr(pft, "0x%02x\n", pft); +zpci_attr(uid, "0x%x\n", uid); +zpci_attr(segment0, "0x%02x\n", pfip[0]); +zpci_attr(segment1, "0x%02x\n", pfip[1]); +zpci_attr(segment2, "0x%02x\n", pfip[2]); +zpci_attr(segment3, "0x%02x\n", pfip[3]); + +static ssize_t recover_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); + int ret; + + if (!device_remove_file_self(dev, attr)) + return count; + + pci_stop_and_remove_bus_device(pdev); + ret = zpci_disable_device(zdev); + if (ret) + return ret; + + ret = zpci_enable_device(zdev); + if (ret) + return ret; + + pci_rescan_bus(zdev->bus); + return count; +} +static DEVICE_ATTR_WO(recover); + +static ssize_t util_string_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); + + return memory_read_from_buffer(buf, count, &off, zdev->util_str, + sizeof(zdev->util_str)); +} +static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN); +static struct bin_attribute *zpci_bin_attrs[] = { + &bin_attr_util_string, + NULL, +}; + +static struct attribute *zpci_dev_attrs[] = { + &dev_attr_function_id.attr, + &dev_attr_function_handle.attr, + &dev_attr_pchid.attr, + &dev_attr_pfgid.attr, + &dev_attr_pft.attr, + &dev_attr_vfn.attr, + &dev_attr_uid.attr, + &dev_attr_recover.attr, + NULL, +}; +static struct attribute_group zpci_attr_group = { + .attrs = zpci_dev_attrs, + .bin_attrs = zpci_bin_attrs, +}; + +static struct attribute *pfip_attrs[] = { + &dev_attr_segment0.attr, + &dev_attr_segment1.attr, + &dev_attr_segment2.attr, + &dev_attr_segment3.attr, + NULL, +}; +static struct attribute_group pfip_attr_group = { + .name = "pfip", + .attrs = pfip_attrs, +}; + +const struct attribute_group *zpci_attr_groups[] = { + &zpci_attr_group, + &pfip_attr_group, + NULL, +}; |
