diff options
Diffstat (limited to 'arch/s390')
391 files changed, 71305 insertions, 14943 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild new file mode 100644 index 00000000000..647c3eccc3d --- /dev/null +++ b/arch/s390/Kbuild @@ -0,0 +1,9 @@ +obj-y += kernel/ +obj-y += mm/ +obj-$(CONFIG_KVM) += kvm/ +obj-$(CONFIG_CRYPTO_HW) += crypto/ +obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ +obj-$(CONFIG_APPLDATA_BASE) += appldata/ +obj-$(CONFIG_MATHEMU) += math-emu/ +obj-y += net/ +obj-$(CONFIG_PCI) += pci/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b66602ad7b3..bb63499fc5d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -1,145 +1,437 @@ -# -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. -# - config MMU - bool - default y + def_bool y + +config ZONE_DMA + def_bool y + +config LOCKDEP_SUPPORT + def_bool y + +config STACKTRACE_SUPPORT + def_bool y + +config HAVE_LATENCYTOP_SUPPORT + def_bool y config RWSEM_GENERIC_SPINLOCK bool config RWSEM_XCHGADD_ALGORITHM - bool - default y + def_bool y -config GENERIC_CALIBRATE_DELAY - bool - default y +config ARCH_HAS_ILOG2_U32 + def_bool n -config GENERIC_BUST_SPINLOCK - bool +config ARCH_HAS_ILOG2_U64 + def_bool n + +config GENERIC_HWEIGHT + def_bool y + +config GENERIC_BUG + def_bool y if BUG + +config GENERIC_BUG_RELATIVE_POINTERS + def_bool y + +config ARCH_DMA_ADDR_T_64BIT + def_bool 64BIT + +config GENERIC_LOCKBREAK + def_bool y if SMP && PREEMPT -mainmenu "Linux Kernel Configuration" +config PGSTE + def_bool y if KVM + +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + +config KEXEC + def_bool y + +config AUDIT_ARCH + def_bool y + +config NO_IOPORT_MAP + def_bool y + +config PCI_QUIRKS + def_bool n config S390 - bool - default y + def_bool y + select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_INLINE_READ_LOCK + select ARCH_INLINE_READ_LOCK_BH + select ARCH_INLINE_READ_LOCK_IRQ + select ARCH_INLINE_READ_LOCK_IRQSAVE + select ARCH_INLINE_READ_TRYLOCK + select ARCH_INLINE_READ_UNLOCK + select ARCH_INLINE_READ_UNLOCK_BH + select ARCH_INLINE_READ_UNLOCK_IRQ + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE + select ARCH_INLINE_SPIN_LOCK + select ARCH_INLINE_SPIN_LOCK_BH + select ARCH_INLINE_SPIN_LOCK_IRQ + select ARCH_INLINE_SPIN_LOCK_IRQSAVE + select ARCH_INLINE_SPIN_TRYLOCK + select ARCH_INLINE_SPIN_TRYLOCK_BH + select ARCH_INLINE_SPIN_UNLOCK + select ARCH_INLINE_SPIN_UNLOCK_BH + select ARCH_INLINE_SPIN_UNLOCK_IRQ + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + select ARCH_INLINE_WRITE_LOCK + select ARCH_INLINE_WRITE_LOCK_BH + select ARCH_INLINE_WRITE_LOCK_IRQ + select ARCH_INLINE_WRITE_LOCK_IRQSAVE + select ARCH_INLINE_WRITE_TRYLOCK + select ARCH_INLINE_WRITE_UNLOCK + select ARCH_INLINE_WRITE_UNLOCK_BH + select ARCH_INLINE_WRITE_UNLOCK_IRQ + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_WANT_IPC_PARSE_VERSION + select BUILDTIME_EXTABLE_SORT + select CLONE_BACKWARDS2 + select GENERIC_CLOCKEVENTS + select GENERIC_CPU_DEVICES if !SMP + select GENERIC_FIND_FIRST_BIT + select GENERIC_SMP_IDLE_THREAD + select GENERIC_TIME_VSYSCALL + select HAVE_ALIGNED_STRUCT_PAGE if SLUB + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT + select HAVE_BPF_JIT if 64BIT && PACK_STACK + select HAVE_CMPXCHG_DOUBLE + select HAVE_CMPXCHG_LOCAL + select HAVE_C_RECORDMCOUNT + select HAVE_DEBUG_KMEMLEAK + select HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 + select HAVE_KERNEL_LZMA + select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_KPROBES + select HAVE_KRETPROBES + select HAVE_KVM if 64BIT + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_MEMBLOCK_PHYS_MAP + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_OPROFILE + select HAVE_PERF_EVENTS + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_UID16 if 32BIT + select HAVE_VIRT_CPU_ACCOUNTING + select KTIME_SCALAR if 32BIT + select MODULES_USE_ELF_RELA + select NO_BOOTMEM + select OLD_SIGACTION + select OLD_SIGSUSPEND3 + select SYSCTL_EXCEPTION_TRACE + select TTY + select VIRT_CPU_ACCOUNTING + select VIRT_TO_BUS + +config SCHED_OMIT_FRAME_POINTER + def_bool y source "init/Kconfig" -menu "Base setup" +source "kernel/Kconfig.freezer" + +menu "Processor type and features" + +config HAVE_MARCH_Z900_FEATURES + def_bool n + +config HAVE_MARCH_Z990_FEATURES + def_bool n + select HAVE_MARCH_Z900_FEATURES + +config HAVE_MARCH_Z9_109_FEATURES + def_bool n + select HAVE_MARCH_Z990_FEATURES + +config HAVE_MARCH_Z10_FEATURES + def_bool n + select HAVE_MARCH_Z9_109_FEATURES + +config HAVE_MARCH_Z196_FEATURES + def_bool n + select HAVE_MARCH_Z10_FEATURES + +config HAVE_MARCH_ZEC12_FEATURES + def_bool n + select HAVE_MARCH_Z196_FEATURES + +choice + prompt "Processor type" + default MARCH_G5 + +config MARCH_G5 + bool "System/390 model G5 and G6" + depends on !64BIT + help + Select this to build a 31 bit kernel that works + on all ESA/390 and z/Architecture machines. + +config MARCH_Z900 + bool "IBM zSeries model z800 and z900" + select HAVE_MARCH_Z900_FEATURES if 64BIT + help + Select this to enable optimizations for model z800/z900 (2064 and + 2066 series). This will enable some optimizations that are not + available on older ESA/390 (31 Bit) only CPUs. + +config MARCH_Z990 + bool "IBM zSeries model z890 and z990" + select HAVE_MARCH_Z990_FEATURES if 64BIT + help + Select this to enable optimizations for model z890/z990 (2084 and + 2086 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z9_109 + bool "IBM System z9" + select HAVE_MARCH_Z9_109_FEATURES if 64BIT + help + Select this to enable optimizations for IBM System z9 (2094 and + 2096 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z10 + bool "IBM System z10" + select HAVE_MARCH_Z10_FEATURES if 64BIT + help + Select this to enable optimizations for IBM System z10 (2097 and + 2098 series). The kernel will be slightly faster but will not work + on older machines. + +config MARCH_Z196 + bool "IBM zEnterprise 114 and 196" + select HAVE_MARCH_Z196_FEATURES if 64BIT + help + Select this to enable optimizations for IBM zEnterprise 114 and 196 + (2818 and 2817 series). The kernel will be slightly faster but will + not work on older machines. + +config MARCH_ZEC12 + bool "IBM zBC12 and zEC12" + select HAVE_MARCH_ZEC12_FEATURES if 64BIT + help + Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and + 2827 series). The kernel will be slightly faster but will not work on + older machines. + +endchoice + +config MARCH_G5_TUNE + def_bool TUNE_G5 || MARCH_G5 && TUNE_DEFAULT + +config MARCH_Z900_TUNE + def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT -comment "Processor type and features" +config MARCH_Z990_TUNE + def_bool TUNE_Z990 || MARCH_Z990 && TUNE_DEFAULT + +config MARCH_Z9_109_TUNE + def_bool TUNE_Z9_109 || MARCH_Z9_109 && TUNE_DEFAULT + +config MARCH_Z10_TUNE + def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT + +config MARCH_Z196_TUNE + def_bool TUNE_Z196 || MARCH_Z196 && TUNE_DEFAULT + +config MARCH_ZEC12_TUNE + def_bool TUNE_ZEC12 || MARCH_ZEC12 && TUNE_DEFAULT + +choice + prompt "Tune code generation" + default TUNE_DEFAULT + help + Cause the compiler to tune (-mtune) the generated code for a machine. + This will make the code run faster on the selected machine but + somewhat slower on other machines. + This option only changes how the compiler emits instructions, not the + selection of instructions itself, so the resulting kernel will run on + all other machines. + +config TUNE_DEFAULT + bool "Default" + help + Tune the generated code for the target processor for which the kernel + will be compiled. + +config TUNE_G5 + bool "System/390 model G5 and G6" + +config TUNE_Z900 + bool "IBM zSeries model z800 and z900" + +config TUNE_Z990 + bool "IBM zSeries model z890 and z990" + +config TUNE_Z9_109 + bool "IBM System z9" + +config TUNE_Z10 + bool "IBM System z10" + +config TUNE_Z196 + bool "IBM zEnterprise 114 and 196" + +config TUNE_ZEC12 + bool "IBM zBC12 and zEC12" + +endchoice config 64BIT - bool "64 bit kernel" + def_bool y + prompt "64 bit kernel" help - Select this option if you have a 64 bit IBM zSeries machine + Select this option if you have an IBM z/Architecture machine and want to use the 64 bit addressing mode. +config 32BIT + def_bool y if !64BIT + +config COMPAT + def_bool y + prompt "Kernel support for 31 bit emulation" + depends on 64BIT + select COMPAT_BINFMT_ELF if BINFMT_ELF + select ARCH_WANT_OLD_COMPAT_IPC + select COMPAT_OLD_SIGACTION + help + Select this option if you want to enable your system kernel to + handle system-calls from ELF binaries for 31 bit ESA. This option + (and some other stuff like libraries and such) is needed for + executing 31 bit applications. It is safe to say "Y". + +config SYSVIPC_COMPAT + def_bool y if COMPAT && SYSVIPC + +config KEYS_COMPAT + def_bool y if COMPAT && KEYS + config SMP - bool "Symmetric multi-processing support" + def_bool y + prompt "Symmetric multi-processing support" ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If you have a system with more than one CPU, say Y. - If you say N here, the kernel will run on single and multiprocessor + If you say N here, the kernel will run on uni- and multiprocessor machines, but will use only one CPU of a multiprocessor machine. If you say Y here, the kernel will run on many, but not all, - singleprocessor machines. On a singleprocessor machine, the kernel + uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. - See also the <file:Documentation/smp.txt> and the SMP-HOWTO - available at <http://www.tldp.org/docs.html#howto>. + See also the SMP-HOWTO available at + <http://www.tldp.org/docs.html#howto>. Even if you don't know what to do here, say Y. config NR_CPUS - int "Maximum number of CPUs (2-64)" - range 2 64 + int "Maximum number of CPUs (2-256)" + range 2 256 depends on SMP - default "32" + default "32" if !64BIT + default "64" if 64BIT help This allows you to specify the maximum number of CPUs which this - kernel will support. The maximum supported value is 64 and the + kernel will support. The maximum supported value is 256 and the minimum value which makes sense is 2. This is purely to save memory - each supported CPU adds approximately sixteen kilobytes to the kernel image. config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" + def_bool y + prompt "Support for hot-pluggable CPUs" depends on SMP - select HOTPLUG - default n help Say Y here to be able to turn CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. +config SCHED_MC + def_bool n + +config SCHED_BOOK + def_bool y + prompt "Book scheduler support" + depends on SMP + select SCHED_MC + help + Book scheduler support improves the CPU scheduler's decision making + when dealing with machines that have several books. + +source kernel/Kconfig.preempt + config MATHEMU - bool "IEEE FPU emulation" + def_bool y + prompt "IEEE FPU emulation" depends on MARCH_G5 help This option is required for IEEE compliant floating point arithmetic - on older S/390 machines. Say Y unless you know your machine doesn't + on older ESA/390 machines. Say Y unless you know your machine doesn't need this. -config COMPAT - bool "Kernel support for 31 bit emulation" - depends on 64BIT - help - Select this option if you want to enable your system kernel to - handle system-calls from ELF binaries for 31 bit ESA. This option - (and some other stuff like libraries and such) is needed for - executing 31 bit applications. It is safe to say "Y". +source kernel/Kconfig.hz -config SYSVIPC_COMPAT - bool - depends on COMPAT && SYSVIPC - default y +endmenu -config BINFMT_ELF32 - tristate "Kernel support for 31 bit ELF binaries" - depends on COMPAT - help - This allows you to run 32-bit Linux/ELF binaries on your zSeries - in 64 bit mode. Everybody wants this; say Y. +menu "Memory setup" -comment "Code generation options" +config ARCH_SPARSEMEM_ENABLE + def_bool y + select SPARSEMEM_VMEMMAP_ENABLE + select SPARSEMEM_VMEMMAP + select SPARSEMEM_STATIC if !64BIT -choice - prompt "Processor type" - default MARCH_G5 +config ARCH_SPARSEMEM_DEFAULT + def_bool y -config MARCH_G5 - bool "S/390 model G5 and G6" - depends on !64BIT - help - Select this to build a 31 bit kernel that works - on all S/390 and zSeries machines. +config ARCH_SELECT_MEMORY_MODEL + def_bool y -config MARCH_Z900 - bool "IBM eServer zSeries model z800 and z900" - help - Select this to optimize for zSeries machines. This - will enable some optimizations that are not available - on older 31 bit only CPUs. +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y if SPARSEMEM -config MARCH_Z990 - bool "IBM eServer zSeries model z890 and z990" - help - Select this enable optimizations for model z890/z990. - This will be slightly faster but does not work on - older machines such as the z900. +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y -endchoice +config ARCH_ENABLE_SPLIT_PMD_PTLOCK + def_bool y + depends on 64BIT + +config FORCE_MAX_ZONEORDER + int + default "9" + +source "mm/Kconfig" config PACK_STACK - bool "Pack kernel stack" + def_bool y + prompt "Pack kernel stack" help This option enables the compiler option -mkernel-backchain if it is available. If the option is available the compiler supports @@ -151,22 +443,9 @@ config PACK_STACK Say Y if you are unsure. -config SMALL_STACK - bool "Use 4kb/8kb for kernel stack instead of 8kb/16kb" - depends on PACK_STACK - help - If you say Y here and the compiler supports the -mkernel-backchain - option the kernel will use a smaller kernel stack size. For 31 bit - the reduced size is 4kb instead of 8kb and for 64 bit it is 8kb - instead of 16kb. This allows to run more thread on a system and - reduces the pressure on the memory management for higher order - page allocations. - - Say N if you are unsure. - - config CHECK_STACK - bool "Detect kernel stack overflow" + def_bool y + prompt "Detect kernel stack overflow" help This option enables the compiler option -mstack-guard and -mstack-size if they are available. If the compiler supports them @@ -189,118 +468,199 @@ config STACK_GUARD The minimum size for the stack guard should be 256 for 31 bit and 512 for 64 bit. -config WARN_STACK - bool "Emit compiler warnings for function with broken stack usage" +config WARN_DYNAMIC_STACK + def_bool n + prompt "Emit compiler warnings for function with dynamic stack usage" help - This option enables the compiler options -mwarn-framesize and - -mwarn-dynamicstack. If the compiler supports these options it - will generate warnings for function which either use alloca or - create a stack frame bigger then CONFIG_WARN_STACK_SIZE. + This option enables the compiler option -mwarn-dynamicstack. If the + compiler supports this options generates warnings for functions + that dynamically allocate stack space using alloca. Say N if you are unsure. -config WARN_STACK_SIZE - int "Maximum frame size considered safe (128-2048)" - range 128 2048 - depends on WARN_STACK - default "256" - help - This allows you to specify the maximum frame size a function may - have without the compiler complaining about it. - -source "mm/Kconfig" - -comment "I/O subsystem configuration" +endmenu -config MACHCHK_WARNING - bool "Process warning machine checks" - help - Select this option if you want the machine check handler on IBM S/390 or - zSeries to process warning machine checks (e.g. on power failures). - If unsure, say "Y". +menu "I/O subsystem" config QDIO - tristate "QDIO support" + def_tristate y + prompt "QDIO support" ---help--- This driver provides the Queued Direct I/O base support for - IBM mainframes. - - For details please refer to the documentation provided by IBM at - <http://www10.software.ibm.com/developerworks/opensource/linux390> + IBM System z. To compile this driver as a module, choose M here: the module will be called qdio. If unsure, say Y. -config QDIO_PERF_STATS - bool "Performance statistics in /proc" - depends on QDIO +menuconfig PCI + bool "PCI support" + default n + depends on 64BIT + select PCI_MSI help - Say Y here to get performance statistics in /proc/qdio_perf + Enable PCI support. - If unsure, say N. +if PCI -config QDIO_DEBUG - bool "Extended debugging information" - depends on QDIO +config PCI_NR_FUNCTIONS + int "Maximum number of PCI functions (1-4096)" + range 1 4096 + default "64" help - Say Y here to get extended debugging output in - /sys/kernel/debug/s390dbf/qdio... - Warning: this option reduces the performance of the QDIO module. + This allows you to specify the maximum number of PCI functions which + this kernel will support. - If unsure, say N. +config PCI_NR_MSI + int "Maximum number of MSI interrupts (64-32768)" + range 64 32768 + default "256" + help + This defines the number of virtual interrupts the kernel will + provide for MSI interrupts. If you configure your system to have + too few drivers will fail to allocate MSI interrupts for all + PCI devices. -comment "Misc" +source "drivers/pci/Kconfig" +source "drivers/pci/pcie/Kconfig" +source "drivers/pci/hotplug/Kconfig" -config PREEMPT - bool "Preemptible Kernel" +endif # PCI + +config PCI_DOMAINS + def_bool PCI + +config HAS_IOMEM + def_bool PCI + +config IOMMU_HELPER + def_bool PCI + +config HAS_DMA + def_bool PCI + select HAVE_DMA_API_DEBUG + +config NEED_SG_DMA_LENGTH + def_bool PCI + +config HAVE_DMA_ATTRS + def_bool PCI + +config NEED_DMA_MAP_STATE + def_bool PCI + +config CHSC_SCH + def_tristate m + prompt "Support for CHSC subchannels" help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. + This driver allows usage of CHSC subchannels. A CHSC subchannel + is usually present on LPAR only. + The driver creates a device /dev/chsc, which may be used to + obtain I/O configuration information about the machine and + to issue asynchronous chsc commands (DANGEROUS). + You will usually only want to use this interface on a special + LPAR designated for system management. - Say N if you are unsure. + To compile this driver as a module, choose M here: the + module will be called chsc_sch. -config IPL - bool "Builtin IPL record support" + If unsure, say N. + +config SCM_BUS + def_bool y + depends on 64BIT + prompt "SCM bus driver" help - If you want to use the produced kernel to IPL directly from a - device, you have to merge a bootsector specific to the device - into the first bytes of the kernel. You will have to select the - IPL device. + Bus driver for Storage Class Memory. -choice - prompt "IPL method generated into head.S" - depends on IPL - default IPL_TAPE +config EADM_SCH + def_tristate m + prompt "Support for EADM subchannels" + depends on SCM_BUS help - Select "tape" if you want to IPL the image from a Tape. + This driver allows usage of EADM subchannels. EADM subchannels act + as a communication vehicle for SCM increments. - Select "vm_reader" if you are running under VM/ESA and want - to IPL the image from the emulated card reader. + To compile this driver as a module, choose M here: the + module will be called eadm_sch. -config IPL_TAPE - bool "tape" +endmenu -config IPL_VM - bool "vm_reader" +menu "Dump support" -endchoice +config CRASH_DUMP + bool "kernel crash dumps" + depends on 64BIT && SMP + select KEXEC + help + Generate crash dump after being started by kexec. + Crash dump kernels are loaded in the main kernel with kexec-tools + into a specially reserved region and then later executed after + a crash by kdump/kexec. + Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. + This option also enables s390 zfcpdump. + See also <file:Documentation/s390/zfcpdump.txt> + +endmenu + +menu "Executable file formats / Emulations" source "fs/Kconfig.binfmt" -config PROCESS_DEBUG - bool "Show crashed user process info" - help - Say Y to print all process fault locations to the console. This is - a debugging option; you probably do not want to set it unless you - are an S390 port maintainer. +config SECCOMP + def_bool y + prompt "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc/<pid>/seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. + +endmenu + +menu "Power Management" + +config ARCH_HIBERNATION_POSSIBLE + def_bool y if 64BIT + +source "kernel/power/Kconfig" + +endmenu + +source "net/Kconfig" + +config PCMCIA + def_bool n + +config CCW + def_bool y + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "arch/s390/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" + +menu "Virtualization" config PFAULT - bool "Pseudo page fault support" + def_bool y + prompt "Pseudo page fault support" help Select this option, if you want to use PFAULT pseudo page fault handling under VM. If running native or in LPAR, this option @@ -313,15 +673,19 @@ config PFAULT config SHARED_KERNEL bool "VM shared kernel support" + depends on !JUMP_LABEL help Select this option, if you want to share the text segment of the Linux kernel between different VM guests. This reduces memory usage with lots of guests but greatly increases kernel size. + Also if a kernel was IPL'ed from a shared segment the kexec system + call will not work. You should only select this option if you know what you are doing and want to exploit this feature. config CMM - tristate "Cooperative memory management" + def_tristate n + prompt "Cooperative memory management" help Select this option, if you want to enable the kernel interface to reduce the memory size of the system. This is accomplished @@ -332,36 +696,18 @@ config CMM Everybody who wants to run Linux under VM should select this option. -config CMM_PROC - bool "/proc interface to cooperative memory management" - depends on CMM - help - Select this option to enable the /proc interface to the - cooperative memory management. - config CMM_IUCV - bool "IUCV special message interface to cooperative memory management" + def_bool y + prompt "IUCV special message interface to cooperative memory management" depends on CMM && (SMSGIUCV=y || CMM=SMSGIUCV) help Select this option to enable the special message interface to the cooperative memory management. -config VIRT_TIMER - bool "Virtual CPU timer support" - help - This provides a kernel interface for virtual CPU timers. - Default is disabled. - -config VIRT_CPU_ACCOUNTING - bool "Base user process accounting on virtual cpu timer" - depends on VIRT_TIMER - help - Select this option to use CPU timer deltas to do user - process accounting. - config APPLDATA_BASE - bool "Linux - VM Monitor Stream, base infrastructure" - depends on PROC_FS && VIRT_TIMER=y + def_bool n + prompt "Linux - VM Monitor Stream, base infrastructure" + depends on PROC_FS help This provides a kernel interface for creating and updating z/VM APPLDATA monitor records. The monitor records are updated at certain time @@ -375,8 +721,9 @@ config APPLDATA_BASE The /proc entries can also be read from, showing the current settings. config APPLDATA_MEM - tristate "Monitor memory management statistics" - depends on APPLDATA_BASE + def_tristate m + prompt "Monitor memory management statistics" + depends on APPLDATA_BASE && VM_EVENT_COUNTERS help This provides memory management related data to the Linux - VM Monitor Stream, like paging/swapping rate, memory utilisation, etc. @@ -391,7 +738,8 @@ config APPLDATA_MEM appldata_mem.o. config APPLDATA_OS - tristate "Monitor OS statistics" + def_tristate m + prompt "Monitor OS statistics" depends on APPLDATA_BASE help This provides OS related data to the Linux - VM Monitor Stream, like @@ -405,8 +753,9 @@ config APPLDATA_OS appldata_os.o. config APPLDATA_NET_SUM - tristate "Monitor overall network statistics" - depends on APPLDATA_BASE + def_tristate m + prompt "Monitor overall network statistics" + depends on APPLDATA_BASE && NET help This provides network related data to the Linux - VM Monitor Stream, currently there is only a total sum of network I/O statistics, no @@ -419,57 +768,29 @@ config APPLDATA_NET_SUM This can also be compiled as a module, which will be called appldata_net_sum.o. -config NO_IDLE_HZ - bool "No HZ timer ticks in idle" +config S390_HYPFS_FS + def_bool y + prompt "s390 hypervisor file system support" + select SYS_HYPERVISOR help - Switches the regular HZ timer off when the system is going idle. - This helps z/VM to detect that the Linux system is idle. VM can - then "swap-out" this guest which reduces memory usage. It also - reduces the overhead of idle systems. + This is a virtual file system intended to provide accounting + information in an s390 hypervisor environment. - The HZ timer can be switched on/off via /proc/sys/kernel/hz_timer. - hz_timer=0 means HZ timer is disabled. hz_timer=1 means HZ - timer is active. +source "arch/s390/kvm/Kconfig" -config NO_IDLE_HZ_INIT - bool "HZ timer in idle off by default" - depends on NO_IDLE_HZ +config S390_GUEST + def_bool y + prompt "s390 support for virtio devices" + depends on 64BIT + select TTY + select VIRTUALIZATION + select VIRTIO + select VIRTIO_CONSOLE help - The HZ timer is switched off in idle by default. That means the - HZ timer is already disabled at boot time. + Enabling this option adds support for virtio based paravirtual device + drivers on s390. -config KEXEC - bool "kexec system call (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - kexec is a system call that implements the ability to shutdown your - current kernel, and to start another kernel. It is like a reboot - but is independent of hardware/microcode support. + Select this option if you want to run the kernel as a guest under + the KVM hypervisor. endmenu - -source "net/Kconfig" - -config PCMCIA - bool - default n - -source "drivers/base/Kconfig" - -source "drivers/scsi/Kconfig" - -source "drivers/s390/Kconfig" - -source "drivers/net/Kconfig" - -source "fs/Kconfig" - -source "arch/s390/oprofile/Kconfig" - -source "arch/s390/Kconfig.debug" - -source "security/Kconfig" - -source "crypto/Kconfig" - -source "lib/Kconfig" diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index f53b6d5300e..c56878e1245 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -1,5 +1,35 @@ menu "Kernel hacking" +config TRACE_IRQFLAGS_SUPPORT + def_bool y + source "lib/Kconfig.debug" +config STRICT_DEVMEM + def_bool y + prompt "Filter access to /dev/mem" + ---help--- + This option restricts access to /dev/mem. If this option is + disabled, you allow userspace access to all memory, including + kernel and userspace memory. Accidental memory access is likely + to be disastrous. + Memory access is required for experts who want to debug the kernel. + + If you are unsure, say Y. + +config S390_PTDUMP + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL + select DEBUG_FS + ---help--- + Say Y here if you want to show the kernel pagetable layout in a + debugfs file. This information is only useful for kernel developers + who are working in architecture specific areas of the kernel. + It is probably not a good idea to enable this feature in a production + kernel. + If in doubt, say "N" + +config DEBUG_SET_MODULE_RONX + def_bool y + depends on MODULES endmenu diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 6c6b197898d..874e6d6e9c5 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -14,91 +14,116 @@ # ifndef CONFIG_64BIT +LD_BFD := elf32-s390 LDFLAGS := -m elf_s390 -CFLAGS += -m31 -AFLAGS += -m31 +KBUILD_CFLAGS += -m31 +KBUILD_AFLAGS += -m31 UTS_MACHINE := s390 STACK_SIZE := 8192 -CHECKFLAGS += -D__s390__ +CHECKFLAGS += -D__s390__ -msize-long else +LD_BFD := elf64-s390 LDFLAGS := -m elf64_s390 -MODFLAGS += -fpic -D__PIC__ -CFLAGS += -m64 -AFLAGS += -m64 +KBUILD_AFLAGS_MODULE += -fPIC +KBUILD_CFLAGS_MODULE += -fPIC +KBUILD_CFLAGS += -m64 +KBUILD_AFLAGS += -m64 UTS_MACHINE := s390x STACK_SIZE := 16384 CHECKFLAGS += -D__s390__ -D__s390x__ endif -cflags-$(CONFIG_MARCH_G5) += $(call cc-option,-march=g5) -cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900) -cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990) +export LD_BFD + +cflags-$(CONFIG_MARCH_G5) += -march=g5 +cflags-$(CONFIG_MARCH_Z900) += -march=z900 +cflags-$(CONFIG_MARCH_Z990) += -march=z990 +cflags-$(CONFIG_MARCH_Z9_109) += -march=z9-109 +cflags-$(CONFIG_MARCH_Z10) += -march=z10 +cflags-$(CONFIG_MARCH_Z196) += -march=z196 +cflags-$(CONFIG_MARCH_ZEC12) += -march=zEC12 + +cflags-$(CONFIG_MARCH_G5_TUNE) += -mtune=g5 +cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900 +cflags-$(CONFIG_MARCH_Z990_TUNE) += -mtune=z990 +cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109 +cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10 +cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196 +cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 + +#KBUILD_IMAGE is necessary for make rpm +KBUILD_IMAGE :=arch/s390/boot/image + +# +# Prevent tail-call optimizations, to get clearer backtraces: +# +cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls # old style option for packed stacks ifeq ($(call cc-option-yn,-mkernel-backchain),y) cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -ifdef CONFIG_SMALL_STACK -STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) ) -endif endif # new style option for packed stacks ifeq ($(call cc-option-yn,-mpacked-stack),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -cflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -aflags-$(CONFIG_SMALL_STACK) += -D__SMALL_STACK -ifdef CONFIG_SMALL_STACK -STACK_SIZE := $(shell echo $$(($(STACK_SIZE)/2)) ) -endif endif ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y) cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE) +ifneq ($(call cc-option-yn,-mstack-size=8192),y) cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) endif +endif ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) -cflags-$(CONFIG_WARN_STACK) += -mwarn-dynamicstack -cflags-$(CONFIG_WARN_STACK) += -mwarn-framesize=$(CONFIG_WARN_STACK_SIZE) +cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack endif -CFLAGS += -mbackchain -msoft-float $(cflags-y) -CFLAGS += $(call cc-option,-finline-limit=10000) -CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare -AFLAGS += $(aflags-y) +KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) +KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare +KBUILD_AFLAGS += $(aflags-y) OBJCOPYFLAGS := -O binary -LDFLAGS_vmlinux := -e start -head-y := arch/$(ARCH)/kernel/head.o arch/$(ARCH)/kernel/init_task.o +head-y := arch/s390/kernel/head.o +head-y += arch/s390/kernel/$(if $(CONFIG_64BIT),head64.o,head31.o) -core-y += arch/$(ARCH)/mm/ arch/$(ARCH)/kernel/ arch/$(ARCH)/crypto/ \ - arch/$(ARCH)/appldata/ -libs-y += arch/$(ARCH)/lib/ +# See arch/s390/Kbuild for content of core part of the kernel +core-y += arch/s390/ + +libs-y += arch/s390/lib/ drivers-y += drivers/s390/ -drivers-$(CONFIG_MATHEMU) += arch/$(ARCH)/math-emu/ # must be linked after kernel drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/ -boot := arch/$(ARCH)/boot +boot := arch/s390/boot -all: image +all: image bzImage install: vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ -image: vmlinux +image bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ +zfcpdump: + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +vdso_install: +ifeq ($(CONFIG_64BIT),y) + $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ +endif + $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ + archclean: $(Q)$(MAKE) $(clean)=$(boot) # Don't use tabs in echo arguments define archhelp echo '* image - Kernel image for IPL ($(boot)/image)' + echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)' endef diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h index e806a8922bb..4a67f2b5f6a 100644 --- a/arch/s390/appldata/appldata.h +++ b/arch/s390/appldata/appldata.h @@ -1,15 +1,11 @@ /* - * arch/s390/appldata/appldata.h - * * Definitions and interface for Linux - z/VM Monitor Stream. * - * Copyright (C) 2003 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright IBM Corp. 2003, 2008 * - * Author: Gerald Schaefer <geraldsc@de.ibm.com> + * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ -//#define APPLDATA_DEBUG /* Debug messages on/off */ - #define APPLDATA_MAX_REC_SIZE 4024 /* Maximum size of the */ /* data buffer */ #define APPLDATA_MAX_PROCS 100 @@ -21,24 +17,13 @@ #define APPLDATA_RECORD_NET_SUM_ID 0x03 /* must be < 256 ! */ #define APPLDATA_RECORD_PROC_ID 0x04 -#define CTL_APPLDATA 2120 /* sysctl IDs, must be unique */ -#define CTL_APPLDATA_TIMER 2121 +#define CTL_APPLDATA_TIMER 2121 /* sysctl IDs, must be unique */ #define CTL_APPLDATA_INTERVAL 2122 #define CTL_APPLDATA_MEM 2123 #define CTL_APPLDATA_OS 2124 #define CTL_APPLDATA_NET_SUM 2125 #define CTL_APPLDATA_PROC 2126 -#define P_INFO(x...) printk(KERN_INFO MY_PRINT_NAME " info: " x) -#define P_ERROR(x...) printk(KERN_ERR MY_PRINT_NAME " error: " x) -#define P_WARNING(x...) printk(KERN_WARNING MY_PRINT_NAME " status: " x) - -#ifdef APPLDATA_DEBUG -#define P_DEBUG(x...) printk(KERN_DEBUG MY_PRINT_NAME " debug: " x) -#else -#define P_DEBUG(x...) do {} while (0) -#endif - struct appldata_ops { struct list_head list; struct ctl_table_header *sysctl_header; @@ -46,14 +31,17 @@ struct appldata_ops { int active; /* monitoring status */ /* fill in from here */ - unsigned int ctl_nr; /* sysctl ID */ char name[APPLDATA_PROC_NAME_LENGTH]; /* name of /proc fs node */ unsigned char record_nr; /* Record Nr. for Product ID */ void (*callback)(void *data); /* callback function */ void *data; /* record data */ unsigned int size; /* size of record */ struct module *owner; /* THIS_MODULE */ + char mod_lvl[2]; /* modification level, EBCDIC */ }; extern int appldata_register_ops(struct appldata_ops *ops); extern void appldata_unregister_ops(struct appldata_ops *ops); +extern int appldata_diag(char record_nr, u16 function, unsigned long buffer, + u16 length, char *mod_lvl); + diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index d06a8d71c71..47c8630c93c 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -1,150 +1,106 @@ /* - * arch/s390/appldata/appldata_base.c - * * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1. * Exports appldata_register_ops() and appldata_unregister_ops() for the * data gathering modules. * - * Copyright (C) 2003 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright IBM Corp. 2003, 2009 * - * Author: Gerald Schaefer <geraldsc@de.ibm.com> + * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ -#include <linux/config.h> +#define KMSG_COMPONENT "appldata" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/errno.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/smp.h> #include <linux/interrupt.h> #include <linux/proc_fs.h> -#include <linux/page-flags.h> +#include <linux/mm.h> #include <linux/swap.h> #include <linux/pagemap.h> #include <linux/sysctl.h> -#include <asm/timer.h> -//#include <linux/kernel_stat.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/workqueue.h> +#include <linux/suspend.h> +#include <linux/platform_device.h> +#include <asm/appldata.h> +#include <asm/vtimer.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/smp.h> #include "appldata.h" -#define MY_PRINT_NAME "appldata" /* for debug messages, etc. */ #define APPLDATA_CPU_INTERVAL 10000 /* default (CPU) time for sampling interval in milliseconds */ #define TOD_MICRO 0x01000 /* nr. of TOD clock units for 1 microsecond */ -#ifndef CONFIG_64BIT - -#define APPLDATA_START_INTERVAL_REC 0x00 /* Function codes for */ -#define APPLDATA_STOP_REC 0x01 /* DIAG 0xDC */ -#define APPLDATA_GEN_EVENT_RECORD 0x02 -#define APPLDATA_START_CONFIG_REC 0x03 - -#else - -#define APPLDATA_START_INTERVAL_REC 0x80 -#define APPLDATA_STOP_REC 0x81 -#define APPLDATA_GEN_EVENT_RECORD 0x82 -#define APPLDATA_START_CONFIG_REC 0x83 - -#endif /* CONFIG_64BIT */ - -/* - * Parameter list for DIAGNOSE X'DC' - */ -#ifndef CONFIG_64BIT -struct appldata_parameter_list { - u16 diag; /* The DIAGNOSE code X'00DC' */ - u8 function; /* The function code for the DIAGNOSE */ - u8 parlist_length; /* Length of the parameter list */ - u32 product_id_addr; /* Address of the 16-byte product ID */ - u16 reserved; - u16 buffer_length; /* Length of the application data buffer */ - u32 buffer_addr; /* Address of the application data buffer */ -}; -#else -struct appldata_parameter_list { - u16 diag; - u8 function; - u8 parlist_length; - u32 unused01; - u16 reserved; - u16 buffer_length; - u32 unused02; - u64 product_id_addr; - u64 buffer_addr; -}; -#endif /* CONFIG_64BIT */ +static struct platform_device *appldata_pdev; /* * /proc entries (sysctl) */ static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; -static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, +static int appldata_timer_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static int appldata_interval_handler(ctl_table *ctl, int write, - struct file *filp, +static int appldata_interval_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table_header *appldata_sysctl_header; static struct ctl_table appldata_table[] = { { - .ctl_name = CTL_APPLDATA_TIMER, .procname = "timer", .mode = S_IRUGO | S_IWUSR, - .proc_handler = &appldata_timer_handler, + .proc_handler = appldata_timer_handler, }, { - .ctl_name = CTL_APPLDATA_INTERVAL, .procname = "interval", .mode = S_IRUGO | S_IWUSR, - .proc_handler = &appldata_interval_handler, + .proc_handler = appldata_interval_handler, }, - { .ctl_name = 0 } + { }, }; static struct ctl_table appldata_dir_table[] = { { - .ctl_name = CTL_APPLDATA, .procname = appldata_proc_name, .maxlen = 0, .mode = S_IRUGO | S_IXUGO, .child = appldata_table, }, - { .ctl_name = 0 } + { }, }; /* * Timer */ -DEFINE_PER_CPU(struct vtimer_list, appldata_timer); -static atomic_t appldata_expire_count = ATOMIC_INIT(0); +static struct vtimer_list appldata_timer; static DEFINE_SPINLOCK(appldata_timer_lock); static int appldata_interval = APPLDATA_CPU_INTERVAL; static int appldata_timer_active; +static int appldata_timer_suspended = 0; /* * Work queue */ static struct workqueue_struct *appldata_wq; -static void appldata_work_fn(void *data); -static DECLARE_WORK(appldata_work, appldata_work_fn, NULL); +static void appldata_work_fn(struct work_struct *work); +static DECLARE_WORK(appldata_work, appldata_work_fn); /* * Ops list */ -static DEFINE_SPINLOCK(appldata_ops_lock); +static DEFINE_MUTEX(appldata_ops_mutex); static LIST_HEAD(appldata_ops_list); @@ -154,15 +110,9 @@ static LIST_HEAD(appldata_ops_list); * * schedule work and reschedule timer */ -static void appldata_timer_function(unsigned long data, struct pt_regs *regs) +static void appldata_timer_function(unsigned long data) { - P_DEBUG(" -= Timer =-\n"); - P_DEBUG("CPU: %i, expire_count: %i\n", smp_processor_id(), - atomic_read(&appldata_expire_count)); - if (atomic_dec_and_test(&appldata_expire_count)) { - atomic_set(&appldata_expire_count, num_online_cpus()); - queue_work(appldata_wq, (struct work_struct *) data); - } + queue_work(appldata_wq, (struct work_struct *) data); } /* @@ -170,24 +120,19 @@ static void appldata_timer_function(unsigned long data, struct pt_regs *regs) * * call data gathering function for each (active) module */ -static void appldata_work_fn(void *data) +static void appldata_work_fn(struct work_struct *work) { struct list_head *lh; struct appldata_ops *ops; - int i; - P_DEBUG(" -= Work Queue =-\n"); - i = 0; - spin_lock(&appldata_ops_lock); + mutex_lock(&appldata_ops_mutex); list_for_each(lh, &appldata_ops_list) { ops = list_entry(lh, struct appldata_ops, list); - P_DEBUG("list_for_each loop: %i) active = %u, name = %s\n", - ++i, ops->active, ops->name); if (ops->active == 1) { ops->callback(ops->data); } } - spin_unlock(&appldata_ops_lock); + mutex_unlock(&appldata_ops_mutex); } /* @@ -195,69 +140,26 @@ static void appldata_work_fn(void *data) * * prepare parameter list, issue DIAG 0xDC */ -static int appldata_diag(char record_nr, u16 function, unsigned long buffer, - u16 length) +int appldata_diag(char record_nr, u16 function, unsigned long buffer, + u16 length, char *mod_lvl) { - unsigned long ry; - struct appldata_product_id { - char prod_nr[7]; /* product nr. */ - char prod_fn[2]; /* product function */ - char record_nr; /* record nr. */ - char version_nr[2]; /* version */ - char release_nr[2]; /* release */ - char mod_lvl[2]; /* modification lvl. */ - } appldata_product_id = { - /* all strings are EBCDIC, record_nr is byte */ + struct appldata_product_id id = { .prod_nr = {0xD3, 0xC9, 0xD5, 0xE4, - 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */ - .prod_fn = {0xD5, 0xD3}, /* "NL" */ - .record_nr = record_nr, - .version_nr = {0xF2, 0xF6}, /* "26" */ - .release_nr = {0xF0, 0xF1}, /* "01" */ - .mod_lvl = {0xF0, 0xF0}, /* "00" */ - }; - struct appldata_parameter_list appldata_parameter_list = { - .diag = 0xDC, - .function = function, - .parlist_length = - sizeof(appldata_parameter_list), - .buffer_length = length, - .product_id_addr = - (unsigned long) &appldata_product_id, - .buffer_addr = virt_to_phys((void *) buffer) + 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */ + .prod_fn = 0xD5D3, /* "NL" */ + .version_nr = 0xF2F6, /* "26" */ + .release_nr = 0xF0F1, /* "01" */ }; - if (!MACHINE_IS_VM) - return -ENOSYS; - ry = -1; - asm volatile( - "diag %1,%0,0xDC\n\t" - : "=d" (ry) - : "d" (&appldata_parameter_list), - "m" (appldata_parameter_list), - "m" (appldata_product_id) - : "cc"); - return (int) ry; + id.record_nr = record_nr; + id.mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1]; + return appldata_asm(&id, function, (void *) buffer, length); } /************************ timer, work, DIAG <END> ****************************/ /****************************** /proc stuff **********************************/ -/* - * appldata_mod_vtimer_wrap() - * - * wrapper function for mod_virt_timer(), because smp_call_function_on() - * accepts only one parameter. - */ -static void __appldata_mod_vtimer_wrap(void *p) { - struct { - struct vtimer_list *timer; - u64 expires; - } *args = p; - mod_virt_timer(args->timer, args->expires); -} - #define APPLDATA_ADD_TIMER 0 #define APPLDATA_DEL_TIMER 1 #define APPLDATA_MOD_TIMER 2 @@ -268,51 +170,28 @@ static void __appldata_mod_vtimer_wrap(void *p) { * Add, delete or modify virtual timers on all online cpus. * The caller needs to get the appldata_timer_lock spinlock. */ -static void -__appldata_vtimer_setup(int cmd) +static void __appldata_vtimer_setup(int cmd) { - u64 per_cpu_interval; - int i; + u64 timer_interval = (u64) appldata_interval * 1000 * TOD_MICRO; switch (cmd) { case APPLDATA_ADD_TIMER: if (appldata_timer_active) break; - per_cpu_interval = (u64) (appldata_interval*1000 / - num_online_cpus()) * TOD_MICRO; - for_each_online_cpu(i) { - per_cpu(appldata_timer, i).expires = per_cpu_interval; - smp_call_function_on(add_virt_timer_periodic, - &per_cpu(appldata_timer, i), - 0, 1, i); - } + appldata_timer.expires = timer_interval; + add_virt_timer_periodic(&appldata_timer); appldata_timer_active = 1; - P_INFO("Monitoring timer started.\n"); break; case APPLDATA_DEL_TIMER: - for_each_online_cpu(i) - del_virt_timer(&per_cpu(appldata_timer, i)); + del_virt_timer(&appldata_timer); if (!appldata_timer_active) break; appldata_timer_active = 0; - atomic_set(&appldata_expire_count, num_online_cpus()); - P_INFO("Monitoring timer stopped.\n"); break; case APPLDATA_MOD_TIMER: - per_cpu_interval = (u64) (appldata_interval*1000 / - num_online_cpus()) * TOD_MICRO; if (!appldata_timer_active) break; - for_each_online_cpu(i) { - struct { - struct vtimer_list *timer; - u64 expires; - } args; - args.timer = &per_cpu(appldata_timer, i); - args.expires = per_cpu_interval; - smp_call_function_on(__appldata_mod_vtimer_wrap, - &args, 0, 1, i); - } + mod_virt_timer_periodic(&appldata_timer, timer_interval); } } @@ -322,10 +201,10 @@ __appldata_vtimer_setup(int cmd) * Start/Stop timer, show status of timer (0 = not active, 1 = active) */ static int -appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, +appldata_timer_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int len; + unsigned int len; char buf[2]; if (!*lenp || *ppos) { @@ -333,7 +212,9 @@ appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, return 0; } if (!write) { - len = sprintf(buf, appldata_timer_active ? "1\n" : "0\n"); + strncpy(buf, appldata_timer_active ? "1\n" : "0\n", + ARRAY_SIZE(buf)); + len = strnlen(buf, ARRAY_SIZE(buf)); if (len > *lenp) len = *lenp; if (copy_to_user(buffer, buf, len)) @@ -362,10 +243,11 @@ out: * current timer interval. */ static int -appldata_interval_handler(ctl_table *ctl, int write, struct file *filp, +appldata_interval_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int len, interval; + unsigned int len; + int interval; char buf[16]; if (!*lenp || *ppos) { @@ -381,22 +263,17 @@ appldata_interval_handler(ctl_table *ctl, int write, struct file *filp, goto out; } len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) { + if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) return -EFAULT; - } + interval = 0; sscanf(buf, "%i", &interval); - if (interval <= 0) { - P_ERROR("Timer CPU interval has to be > 0!\n"); + if (interval <= 0) return -EINVAL; - } spin_lock(&appldata_timer_lock); appldata_interval = interval; __appldata_vtimer_setup(APPLDATA_MOD_TIMER); spin_unlock(&appldata_timer_lock); - - P_INFO("Monitoring CPU interval set to %u milliseconds.\n", - interval); out: *lenp = len; *ppos += len; @@ -410,16 +287,17 @@ out: * monitoring (0 = not in process, 1 = in process) */ static int -appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, +appldata_generic_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; - int rc, len, found; + unsigned int len; + int rc, found; char buf[2]; struct list_head *lh; found = 0; - spin_lock(&appldata_ops_lock); + mutex_lock(&appldata_ops_mutex); list_for_each(lh, &appldata_ops_list) { tmp_ops = list_entry(lh, struct appldata_ops, list); if (&tmp_ops->ctl_table[2] == ctl) { @@ -427,15 +305,15 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, } } if (!found) { - spin_unlock(&appldata_ops_lock); + mutex_unlock(&appldata_ops_mutex); return -ENODEV; } ops = ctl->data; if (!try_module_get(ops->owner)) { // protect this function - spin_unlock(&appldata_ops_lock); + mutex_unlock(&appldata_ops_mutex); return -ENODEV; } - spin_unlock(&appldata_ops_lock); + mutex_unlock(&appldata_ops_mutex); if (!*lenp || *ppos) { *lenp = 0; @@ -443,7 +321,8 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, return 0; } if (!write) { - len = sprintf(buf, ops->active ? "1\n" : "0\n"); + strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf)); + len = strnlen(buf, ARRAY_SIZE(buf)); if (len > *lenp) len = *lenp; if (copy_to_user(buffer, buf, len)) { @@ -459,42 +338,36 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, return -EFAULT; } - spin_lock(&appldata_ops_lock); + mutex_lock(&appldata_ops_mutex); if ((buf[0] == '1') && (ops->active == 0)) { // protect work queue callback if (!try_module_get(ops->owner)) { - spin_unlock(&appldata_ops_lock); + mutex_unlock(&appldata_ops_mutex); module_put(ops->owner); return -ENODEV; } - ops->active = 1; ops->callback(ops->data); // init record rc = appldata_diag(ops->record_nr, APPLDATA_START_INTERVAL_REC, - (unsigned long) ops->data, ops->size); + (unsigned long) ops->data, ops->size, + ops->mod_lvl); if (rc != 0) { - P_ERROR("START DIAG 0xDC for %s failed, " - "return code: %d\n", ops->name, rc); + pr_err("Starting the data collection for %s " + "failed with rc=%d\n", ops->name, rc); module_put(ops->owner); - ops->active = 0; - } else { - P_INFO("Monitoring %s data enabled, " - "DIAG 0xDC started.\n", ops->name); - } + } else + ops->active = 1; } else if ((buf[0] == '0') && (ops->active == 1)) { ops->active = 0; rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, - (unsigned long) ops->data, ops->size); - if (rc != 0) { - P_ERROR("STOP DIAG 0xDC for %s failed, " - "return code: %d\n", ops->name, rc); - } else { - P_INFO("Monitoring %s data disabled, " - "DIAG 0xDC stopped.\n", ops->name); - } + (unsigned long) ops->data, ops->size, + ops->mod_lvl); + if (rc != 0) + pr_err("Stopping the data collection for %s " + "failed with rc=%d\n", ops->name, rc); module_put(ops->owner); } - spin_unlock(&appldata_ops_lock); + mutex_unlock(&appldata_ops_mutex); out: *lenp = len; *ppos += len; @@ -513,76 +386,37 @@ out: */ int appldata_register_ops(struct appldata_ops *ops) { - struct list_head *lh; - struct appldata_ops *tmp_ops; - int i; - - i = 0; + if (ops->size > APPLDATA_MAX_REC_SIZE) + return -EINVAL; - if ((ops->size > APPLDATA_MAX_REC_SIZE) || - (ops->size < 0)){ - P_ERROR("Invalid size of %s record = %i, maximum = %i!\n", - ops->name, ops->size, APPLDATA_MAX_REC_SIZE); + ops->ctl_table = kzalloc(4 * sizeof(struct ctl_table), GFP_KERNEL); + if (!ops->ctl_table) return -ENOMEM; - } - if ((ops->ctl_nr == CTL_APPLDATA) || - (ops->ctl_nr == CTL_APPLDATA_TIMER) || - (ops->ctl_nr == CTL_APPLDATA_INTERVAL)) { - P_ERROR("ctl_nr %i already in use!\n", ops->ctl_nr); - return -EBUSY; - } - ops->ctl_table = kmalloc(4*sizeof(struct ctl_table), GFP_KERNEL); - if (ops->ctl_table == NULL) { - P_ERROR("Not enough memory for %s ctl_table!\n", ops->name); - return -ENOMEM; - } - memset(ops->ctl_table, 0, 4*sizeof(struct ctl_table)); - spin_lock(&appldata_ops_lock); - list_for_each(lh, &appldata_ops_list) { - tmp_ops = list_entry(lh, struct appldata_ops, list); - P_DEBUG("register_ops loop: %i) name = %s, ctl = %i\n", - ++i, tmp_ops->name, tmp_ops->ctl_nr); - P_DEBUG("Comparing %s (ctl %i) with %s (ctl %i)\n", - tmp_ops->name, tmp_ops->ctl_nr, ops->name, - ops->ctl_nr); - if (strncmp(tmp_ops->name, ops->name, - APPLDATA_PROC_NAME_LENGTH) == 0) { - P_ERROR("Name \"%s\" already registered!\n", ops->name); - kfree(ops->ctl_table); - spin_unlock(&appldata_ops_lock); - return -EBUSY; - } - if (tmp_ops->ctl_nr == ops->ctl_nr) { - P_ERROR("ctl_nr %i already registered!\n", ops->ctl_nr); - kfree(ops->ctl_table); - spin_unlock(&appldata_ops_lock); - return -EBUSY; - } - } + mutex_lock(&appldata_ops_mutex); list_add(&ops->list, &appldata_ops_list); - spin_unlock(&appldata_ops_lock); + mutex_unlock(&appldata_ops_mutex); - ops->ctl_table[0].ctl_name = CTL_APPLDATA; ops->ctl_table[0].procname = appldata_proc_name; ops->ctl_table[0].maxlen = 0; ops->ctl_table[0].mode = S_IRUGO | S_IXUGO; ops->ctl_table[0].child = &ops->ctl_table[2]; - ops->ctl_table[1].ctl_name = 0; - - ops->ctl_table[2].ctl_name = ops->ctl_nr; ops->ctl_table[2].procname = ops->name; ops->ctl_table[2].mode = S_IRUGO | S_IWUSR; ops->ctl_table[2].proc_handler = appldata_generic_handler; ops->ctl_table[2].data = ops; - ops->ctl_table[3].ctl_name = 0; - - ops->sysctl_header = register_sysctl_table(ops->ctl_table,1); - - P_INFO("%s-ops registered!\n", ops->name); + ops->sysctl_header = register_sysctl_table(ops->ctl_table); + if (!ops->sysctl_header) + goto out; return 0; +out: + mutex_lock(&appldata_ops_mutex); + list_del(&ops->list); + mutex_unlock(&appldata_ops_mutex); + kfree(ops->ctl_table); + return -ENOMEM; } /* @@ -592,71 +426,100 @@ int appldata_register_ops(struct appldata_ops *ops) */ void appldata_unregister_ops(struct appldata_ops *ops) { - void *table; - spin_lock(&appldata_ops_lock); + mutex_lock(&appldata_ops_mutex); list_del(&ops->list); - /* at that point any incoming access will fail */ - table = ops->ctl_table; - ops->ctl_table = NULL; - spin_unlock(&appldata_ops_lock); + mutex_unlock(&appldata_ops_mutex); unregister_sysctl_table(ops->sysctl_header); - kfree(table); - P_INFO("%s-ops unregistered!\n", ops->name); + kfree(ops->ctl_table); } /********************** module-ops management <END> **************************/ -/******************************* init / exit *********************************/ - -static void -appldata_online_cpu(int cpu) +/**************************** suspend / resume *******************************/ +static int appldata_freeze(struct device *dev) { - init_virt_timer(&per_cpu(appldata_timer, cpu)); - per_cpu(appldata_timer, cpu).function = appldata_timer_function; - per_cpu(appldata_timer, cpu).data = (unsigned long) - &appldata_work; - atomic_inc(&appldata_expire_count); + struct appldata_ops *ops; + int rc; + struct list_head *lh; + spin_lock(&appldata_timer_lock); - __appldata_vtimer_setup(APPLDATA_MOD_TIMER); + if (appldata_timer_active) { + __appldata_vtimer_setup(APPLDATA_DEL_TIMER); + appldata_timer_suspended = 1; + } spin_unlock(&appldata_timer_lock); + + mutex_lock(&appldata_ops_mutex); + list_for_each(lh, &appldata_ops_list) { + ops = list_entry(lh, struct appldata_ops, list); + if (ops->active == 1) { + rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, + (unsigned long) ops->data, ops->size, + ops->mod_lvl); + if (rc != 0) + pr_err("Stopping the data collection for %s " + "failed with rc=%d\n", ops->name, rc); + } + } + mutex_unlock(&appldata_ops_mutex); + return 0; } -static void -appldata_offline_cpu(int cpu) +static int appldata_restore(struct device *dev) { - del_virt_timer(&per_cpu(appldata_timer, cpu)); - if (atomic_dec_and_test(&appldata_expire_count)) { - atomic_set(&appldata_expire_count, num_online_cpus()); - queue_work(appldata_wq, &appldata_work); - } + struct appldata_ops *ops; + int rc; + struct list_head *lh; + spin_lock(&appldata_timer_lock); - __appldata_vtimer_setup(APPLDATA_MOD_TIMER); + if (appldata_timer_suspended) { + __appldata_vtimer_setup(APPLDATA_ADD_TIMER); + appldata_timer_suspended = 0; + } spin_unlock(&appldata_timer_lock); + + mutex_lock(&appldata_ops_mutex); + list_for_each(lh, &appldata_ops_list) { + ops = list_entry(lh, struct appldata_ops, list); + if (ops->active == 1) { + ops->callback(ops->data); // init record + rc = appldata_diag(ops->record_nr, + APPLDATA_START_INTERVAL_REC, + (unsigned long) ops->data, ops->size, + ops->mod_lvl); + if (rc != 0) { + pr_err("Starting the data collection for %s " + "failed with rc=%d\n", ops->name, rc); + } + } + } + mutex_unlock(&appldata_ops_mutex); + return 0; } -static int -appldata_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int appldata_thaw(struct device *dev) { - switch (action) { - case CPU_ONLINE: - appldata_online_cpu((long) hcpu); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - appldata_offline_cpu((long) hcpu); - break; -#endif - default: - break; - } - return NOTIFY_OK; + return appldata_restore(dev); } -static struct notifier_block __devinitdata appldata_nb = { - .notifier_call = appldata_cpu_notify, +static const struct dev_pm_ops appldata_pm_ops = { + .freeze = appldata_freeze, + .thaw = appldata_thaw, + .restore = appldata_restore, }; +static struct platform_driver appldata_pdrv = { + .driver = { + .name = "appldata", + .owner = THIS_MODULE, + .pm = &appldata_pm_ops, + }, +}; +/************************* suspend / resume <END> ****************************/ + + +/******************************* init / exit *********************************/ + /* * appldata_init() * @@ -664,124 +527,49 @@ static struct notifier_block __devinitdata appldata_nb = { */ static int __init appldata_init(void) { - int i; + int rc; - P_DEBUG("sizeof(parameter_list) = %lu\n", - sizeof(struct appldata_parameter_list)); + init_virt_timer(&appldata_timer); + appldata_timer.function = appldata_timer_function; + appldata_timer.data = (unsigned long) &appldata_work; + rc = platform_driver_register(&appldata_pdrv); + if (rc) + return rc; + + appldata_pdev = platform_device_register_simple("appldata", -1, NULL, + 0); + if (IS_ERR(appldata_pdev)) { + rc = PTR_ERR(appldata_pdev); + goto out_driver; + } appldata_wq = create_singlethread_workqueue("appldata"); if (!appldata_wq) { - P_ERROR("Could not create work queue\n"); - return -ENOMEM; + rc = -ENOMEM; + goto out_device; } - for_each_online_cpu(i) - appldata_online_cpu(i); - - /* Register cpu hotplug notifier */ - register_cpu_notifier(&appldata_nb); - - appldata_sysctl_header = register_sysctl_table(appldata_dir_table, 1); -#ifdef MODULE - appldata_dir_table[0].de->owner = THIS_MODULE; - appldata_table[0].de->owner = THIS_MODULE; - appldata_table[1].de->owner = THIS_MODULE; -#endif - - P_DEBUG("Base interface initialized.\n"); + appldata_sysctl_header = register_sysctl_table(appldata_dir_table); return 0; -} - -/* - * appldata_exit() - * - * stop timer, unregister /proc entries - */ -static void __exit appldata_exit(void) -{ - struct list_head *lh; - struct appldata_ops *ops; - int rc, i; - P_DEBUG("Unloading module ...\n"); - /* - * ops list should be empty, but just in case something went wrong... - */ - spin_lock(&appldata_ops_lock); - list_for_each(lh, &appldata_ops_list) { - ops = list_entry(lh, struct appldata_ops, list); - rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, - (unsigned long) ops->data, ops->size); - if (rc != 0) { - P_ERROR("STOP DIAG 0xDC for %s failed, " - "return code: %d\n", ops->name, rc); - } - } - spin_unlock(&appldata_ops_lock); - - for_each_online_cpu(i) - appldata_offline_cpu(i); - - appldata_timer_active = 0; - - unregister_sysctl_table(appldata_sysctl_header); - - destroy_workqueue(appldata_wq); - P_DEBUG("... module unloaded!\n"); +out_device: + platform_device_unregister(appldata_pdev); +out_driver: + platform_driver_unregister(&appldata_pdrv); + return rc; } -/**************************** init / exit <END> ******************************/ +__initcall(appldata_init); -module_init(appldata_init); -module_exit(appldata_exit); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Gerald Schaefer"); -MODULE_DESCRIPTION("Linux-VM Monitor Stream, base infrastructure"); +/**************************** init / exit <END> ******************************/ EXPORT_SYMBOL_GPL(appldata_register_ops); EXPORT_SYMBOL_GPL(appldata_unregister_ops); +EXPORT_SYMBOL_GPL(appldata_diag); -#ifdef MODULE -/* - * Kernel symbols needed by appldata_mem and appldata_os modules. - * However, if this file is compiled as a module (for testing only), these - * symbols are not exported. In this case, we define them locally and export - * those. - */ -void si_swapinfo(struct sysinfo *val) -{ - val->freeswap = -1ul; - val->totalswap = -1ul; -} - -unsigned long avenrun[3] = {-1 - FIXED_1/200, -1 - FIXED_1/200, - -1 - FIXED_1/200}; -int nr_threads = -1; - -void get_full_page_state(struct page_state *ps) -{ - memset(ps, -1, sizeof(struct page_state)); -} - -unsigned long nr_running(void) -{ - return -1; -} - -unsigned long nr_iowait(void) -{ - return -1; -} - -/*unsigned long nr_context_switches(void) -{ - return -1; -}*/ -#endif /* MODULE */ +#ifdef CONFIG_SWAP EXPORT_SYMBOL_GPL(si_swapinfo); +#endif EXPORT_SYMBOL_GPL(nr_threads); -EXPORT_SYMBOL_GPL(avenrun); -EXPORT_SYMBOL_GPL(get_full_page_state); EXPORT_SYMBOL_GPL(nr_running); EXPORT_SYMBOL_GPL(nr_iowait); -//EXPORT_SYMBOL_GPL(nr_context_switches); diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index f0e2fbed3d4..edcf2a70694 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -1,28 +1,24 @@ /* - * arch/s390/appldata/appldata_mem.c - * * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects data related to memory management. * - * Copyright (C) 2003 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright IBM Corp. 2003, 2006 * - * Author: Gerald Schaefer <geraldsc@de.ibm.com> + * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/errno.h> #include <linux/kernel_stat.h> -#include <asm/io.h> #include <linux/pagemap.h> #include <linux/swap.h> +#include <linux/slab.h> +#include <asm/io.h> #include "appldata.h" -#define MY_PRINT_NAME "appldata_mem" /* for debug messages, etc. */ #define P2K(x) ((x) << (PAGE_SHIFT - 10)) /* Converts #Pages to KB */ /* @@ -68,33 +64,9 @@ struct appldata_mem_data { u64 pgmajfault; /* page faults (major only) */ // <-- New in 2.6 -} __attribute__((packed)) appldata_mem_data; +} __packed; -static inline void appldata_debug_print(struct appldata_mem_data *mem_data) -{ - P_DEBUG("--- MEM - RECORD ---\n"); - P_DEBUG("pgpgin = %8lu KB\n", mem_data->pgpgin); - P_DEBUG("pgpgout = %8lu KB\n", mem_data->pgpgout); - P_DEBUG("pswpin = %8lu Pages\n", mem_data->pswpin); - P_DEBUG("pswpout = %8lu Pages\n", mem_data->pswpout); - P_DEBUG("pgalloc = %8lu \n", mem_data->pgalloc); - P_DEBUG("pgfault = %8lu \n", mem_data->pgfault); - P_DEBUG("pgmajfault = %8lu \n", mem_data->pgmajfault); - P_DEBUG("sharedram = %8lu KB\n", mem_data->sharedram); - P_DEBUG("totalram = %8lu KB\n", mem_data->totalram); - P_DEBUG("freeram = %8lu KB\n", mem_data->freeram); - P_DEBUG("totalhigh = %8lu KB\n", mem_data->totalhigh); - P_DEBUG("freehigh = %8lu KB\n", mem_data->freehigh); - P_DEBUG("bufferram = %8lu KB\n", mem_data->bufferram); - P_DEBUG("cached = %8lu KB\n", mem_data->cached); - P_DEBUG("totalswap = %8lu KB\n", mem_data->totalswap); - P_DEBUG("freeswap = %8lu KB\n", mem_data->freeswap); - P_DEBUG("sync_count_1 = %u\n", mem_data->sync_count_1); - P_DEBUG("sync_count_2 = %u\n", mem_data->sync_count_2); - P_DEBUG("timestamp = %lX\n", mem_data->timestamp); -} - /* * appldata_get_mem_data() * @@ -104,24 +76,24 @@ static void appldata_get_mem_data(void *data) { /* * don't put large structures on the stack, we are - * serialized through the appldata_ops_lock and can use static + * serialized through the appldata_ops_mutex and can use static */ static struct sysinfo val; - static struct page_state ps; + unsigned long ev[NR_VM_EVENT_ITEMS]; struct appldata_mem_data *mem_data; mem_data = data; mem_data->sync_count_1++; - get_full_page_state(&ps); - mem_data->pgpgin = ps.pgpgin >> 1; - mem_data->pgpgout = ps.pgpgout >> 1; - mem_data->pswpin = ps.pswpin; - mem_data->pswpout = ps.pswpout; - mem_data->pgalloc = ps.pgalloc_high + ps.pgalloc_normal + - ps.pgalloc_dma; - mem_data->pgfault = ps.pgfault; - mem_data->pgmajfault = ps.pgmajfault; + all_vm_events(ev); + mem_data->pgpgin = ev[PGPGIN] >> 1; + mem_data->pgpgout = ev[PGPGOUT] >> 1; + mem_data->pswpin = ev[PSWPIN]; + mem_data->pswpout = ev[PSWPOUT]; + mem_data->pgalloc = ev[PGALLOC_NORMAL]; + mem_data->pgalloc += ev[PGALLOC_DMA]; + mem_data->pgfault = ev[PGFAULT]; + mem_data->pgmajfault = ev[PGMAJFAULT]; si_meminfo(&val); mem_data->sharedram = val.sharedram; @@ -130,28 +102,25 @@ static void appldata_get_mem_data(void *data) mem_data->totalhigh = P2K(val.totalhigh); mem_data->freehigh = P2K(val.freehigh); mem_data->bufferram = P2K(val.bufferram); - mem_data->cached = P2K(atomic_read(&nr_pagecache) - val.bufferram); + mem_data->cached = P2K(global_page_state(NR_FILE_PAGES) + - val.bufferram); si_swapinfo(&val); mem_data->totalswap = P2K(val.totalswap); mem_data->freeswap = P2K(val.freeswap); - mem_data->timestamp = get_clock(); + mem_data->timestamp = get_tod_clock(); mem_data->sync_count_2++; -#ifdef APPLDATA_DEBUG - appldata_debug_print(mem_data); -#endif } static struct appldata_ops ops = { - .ctl_nr = CTL_APPLDATA_MEM, .name = "mem", .record_nr = APPLDATA_RECORD_MEM_ID, .size = sizeof(struct appldata_mem_data), .callback = &appldata_get_mem_data, - .data = &appldata_mem_data, .owner = THIS_MODULE, + .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -162,17 +131,17 @@ static struct appldata_ops ops = { */ static int __init appldata_mem_init(void) { - int rc; + int ret; + + ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; - P_DEBUG("sizeof(mem) = %lu\n", sizeof(struct appldata_mem_data)); + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); - rc = appldata_register_ops(&ops); - if (rc != 0) { - P_ERROR("Error registering ops, rc = %i\n", rc); - } else { - P_DEBUG("%s-ops registered!\n", ops.name); - } - return rc; + return ret; } /* @@ -183,7 +152,7 @@ static int __init appldata_mem_init(void) static void __exit appldata_mem_exit(void) { appldata_unregister_ops(&ops); - P_DEBUG("%s-ops unregistered!\n", ops.name); + kfree(ops.data); } diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 2a4c7432db4..66037d2622b 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -1,29 +1,23 @@ /* - * arch/s390/appldata/appldata_net_sum.c - * * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects accumulated network statistics (Packets received/transmitted, * dropped, errors, ...). * - * Copyright (C) 2003 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright IBM Corp. 2003, 2006 * - * Author: Gerald Schaefer <geraldsc@de.ibm.com> + * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/errno.h> #include <linux/kernel_stat.h> #include <linux/netdevice.h> +#include <net/net_namespace.h> #include "appldata.h" -#define MY_PRINT_NAME "appldata_net_sum" /* for debug messages, etc. */ - - /* * Network data * @@ -57,29 +51,9 @@ struct appldata_net_sum_data { u64 rx_dropped; /* no space in linux buffers */ u64 tx_dropped; /* no space available in linux */ u64 collisions; /* collisions while transmitting */ -} __attribute__((packed)) appldata_net_sum_data; +} __packed; -static inline void appldata_print_debug(struct appldata_net_sum_data *net_data) -{ - P_DEBUG("--- NET - RECORD ---\n"); - - P_DEBUG("nr_interfaces = %u\n", net_data->nr_interfaces); - P_DEBUG("rx_packets = %8lu\n", net_data->rx_packets); - P_DEBUG("tx_packets = %8lu\n", net_data->tx_packets); - P_DEBUG("rx_bytes = %8lu\n", net_data->rx_bytes); - P_DEBUG("tx_bytes = %8lu\n", net_data->tx_bytes); - P_DEBUG("rx_errors = %8lu\n", net_data->rx_errors); - P_DEBUG("tx_errors = %8lu\n", net_data->tx_errors); - P_DEBUG("rx_dropped = %8lu\n", net_data->rx_dropped); - P_DEBUG("tx_dropped = %8lu\n", net_data->tx_dropped); - P_DEBUG("collisions = %8lu\n", net_data->collisions); - - P_DEBUG("sync_count_1 = %u\n", net_data->sync_count_1); - P_DEBUG("sync_count_2 = %u\n", net_data->sync_count_2); - P_DEBUG("timestamp = %lX\n", net_data->timestamp); -} - /* * appldata_get_net_sum_data() * @@ -90,7 +64,6 @@ static void appldata_get_net_sum_data(void *data) int i; struct appldata_net_sum_data *net_data; struct net_device *dev; - struct net_device_stats *stats; unsigned long rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors, tx_errors, rx_dropped, tx_dropped, collisions; @@ -107,12 +80,13 @@ static void appldata_get_net_sum_data(void *data) rx_dropped = 0; tx_dropped = 0; collisions = 0; - read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { - if (dev->get_stats == NULL) { - continue; - } - stats = dev->get_stats(dev); + + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + const struct rtnl_link_stats64 *stats; + struct rtnl_link_stats64 temp; + + stats = dev_get_stats(dev, &temp); rx_packets += stats->rx_packets; tx_packets += stats->tx_packets; rx_bytes += stats->rx_bytes; @@ -124,7 +98,8 @@ static void appldata_get_net_sum_data(void *data) collisions += stats->collisions; i++; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); + net_data->nr_interfaces = i; net_data->rx_packets = rx_packets; net_data->tx_packets = tx_packets; @@ -136,22 +111,18 @@ static void appldata_get_net_sum_data(void *data) net_data->tx_dropped = tx_dropped; net_data->collisions = collisions; - net_data->timestamp = get_clock(); + net_data->timestamp = get_tod_clock(); net_data->sync_count_2++; -#ifdef APPLDATA_DEBUG - appldata_print_debug(net_data); -#endif } static struct appldata_ops ops = { - .ctl_nr = CTL_APPLDATA_NET_SUM, .name = "net_sum", .record_nr = APPLDATA_RECORD_NET_SUM_ID, .size = sizeof(struct appldata_net_sum_data), .callback = &appldata_get_net_sum_data, - .data = &appldata_net_sum_data, .owner = THIS_MODULE, + .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -162,17 +133,17 @@ static struct appldata_ops ops = { */ static int __init appldata_net_init(void) { - int rc; + int ret; - P_DEBUG("sizeof(net) = %lu\n", sizeof(struct appldata_net_sum_data)); + ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; - rc = appldata_register_ops(&ops); - if (rc != 0) { - P_ERROR("Error registering ops, rc = %i\n", rc); - } else { - P_DEBUG("%s-ops registered!\n", ops.name); - } - return rc; + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -183,7 +154,7 @@ static int __init appldata_net_init(void) static void __exit appldata_net_exit(void) { appldata_unregister_ops(&ops); - P_DEBUG("%s-ops unregistered!\n", ops.name); + kfree(ops.data); } diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 99ddd3bf2fb..69b23b25ac3 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -1,15 +1,15 @@ /* - * arch/s390/appldata/appldata_os.c - * * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects misc. OS related data (CPU utilization, running processes). * - * Copyright (C) 2003 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright IBM Corp. 2003, 2006 * - * Author: Gerald Schaefer <geraldsc@de.ibm.com> + * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com> */ -#include <linux/config.h> +#define KMSG_COMPONENT "appldata" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> @@ -17,12 +17,12 @@ #include <linux/kernel_stat.h> #include <linux/netdevice.h> #include <linux/sched.h> +#include <asm/appldata.h> #include <asm/smp.h> #include "appldata.h" -#define MY_PRINT_NAME "appldata_os" /* for debug messages, etc. */ #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) @@ -44,11 +44,14 @@ struct appldata_os_per_cpu { u32 per_cpu_system; /* ... spent in kernel mode */ u32 per_cpu_idle; /* ... spent in idle mode */ -// New in 2.6 --> + /* New in 2.6 */ u32 per_cpu_irq; /* ... spent in interrupts */ u32 per_cpu_softirq; /* ... spent in softirqs */ u32 per_cpu_iowait; /* ... spent while waiting for I/O */ -// <-- New in 2.6 + + /* New in modification level 01 */ + u32 per_cpu_steal; /* ... stolen by hypervisor */ + u32 cpu_id; /* number of this CPU */ } __attribute__((packed)); struct appldata_os_data { @@ -68,10 +71,9 @@ struct appldata_os_data { u32 avenrun[3]; /* average nr. of running processes during */ /* the last 1, 5 and 15 minutes */ -// New in 2.6 --> + /* New in 2.6 */ u32 nr_iowait; /* number of blocked threads (waiting for I/O) */ -// <-- New in 2.6 /* per cpu data */ struct appldata_os_per_cpu os_cpu[0]; @@ -79,42 +81,13 @@ struct appldata_os_data { static struct appldata_os_data *appldata_os_data; +static struct appldata_ops ops = { + .name = "os", + .record_nr = APPLDATA_RECORD_OS_ID, + .owner = THIS_MODULE, + .mod_lvl = {0xF0, 0xF1}, /* EBCDIC "01" */ +}; -static inline void appldata_print_debug(struct appldata_os_data *os_data) -{ - int a0, a1, a2, i; - - P_DEBUG("--- OS - RECORD ---\n"); - P_DEBUG("nr_threads = %u\n", os_data->nr_threads); - P_DEBUG("nr_running = %u\n", os_data->nr_running); - P_DEBUG("nr_iowait = %u\n", os_data->nr_iowait); - P_DEBUG("avenrun(int) = %8x / %8x / %8x\n", os_data->avenrun[0], - os_data->avenrun[1], os_data->avenrun[2]); - a0 = os_data->avenrun[0]; - a1 = os_data->avenrun[1]; - a2 = os_data->avenrun[2]; - P_DEBUG("avenrun(float) = %d.%02d / %d.%02d / %d.%02d\n", - LOAD_INT(a0), LOAD_FRAC(a0), LOAD_INT(a1), LOAD_FRAC(a1), - LOAD_INT(a2), LOAD_FRAC(a2)); - - P_DEBUG("nr_cpus = %u\n", os_data->nr_cpus); - for (i = 0; i < os_data->nr_cpus; i++) { - P_DEBUG("cpu%u : user = %u, nice = %u, system = %u, " - "idle = %u, irq = %u, softirq = %u, iowait = %u\n", - i, - os_data->os_cpu[i].per_cpu_user, - os_data->os_cpu[i].per_cpu_nice, - os_data->os_cpu[i].per_cpu_system, - os_data->os_cpu[i].per_cpu_idle, - os_data->os_cpu[i].per_cpu_irq, - os_data->os_cpu[i].per_cpu_softirq, - os_data->os_cpu[i].per_cpu_iowait); - } - - P_DEBUG("sync_count_1 = %u\n", os_data->sync_count_1); - P_DEBUG("sync_count_2 = %u\n", os_data->sync_count_2); - P_DEBUG("timestamp = %lX\n", os_data->timestamp); -} /* * appldata_get_os_data() @@ -123,14 +96,13 @@ static inline void appldata_print_debug(struct appldata_os_data *os_data) */ static void appldata_get_os_data(void *data) { - int i, j; + int i, j, rc; struct appldata_os_data *os_data; + unsigned int new_size; os_data = data; os_data->sync_count_1++; - os_data->nr_cpus = num_online_cpus(); - os_data->nr_threads = nr_threads; os_data->nr_running = nr_running(); os_data->nr_iowait = nr_iowait(); @@ -141,39 +113,54 @@ static void appldata_get_os_data(void *data) j = 0; for_each_online_cpu(i) { os_data->os_cpu[j].per_cpu_user = - cputime_to_jiffies(kstat_cpu(i).cpustat.user); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]); os_data->os_cpu[j].per_cpu_nice = - cputime_to_jiffies(kstat_cpu(i).cpustat.nice); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]); os_data->os_cpu[j].per_cpu_system = - cputime_to_jiffies(kstat_cpu(i).cpustat.system); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]); os_data->os_cpu[j].per_cpu_idle = - cputime_to_jiffies(kstat_cpu(i).cpustat.idle); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]); os_data->os_cpu[j].per_cpu_irq = - cputime_to_jiffies(kstat_cpu(i).cpustat.irq); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]); os_data->os_cpu[j].per_cpu_softirq = - cputime_to_jiffies(kstat_cpu(i).cpustat.softirq); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]); os_data->os_cpu[j].per_cpu_iowait = - cputime_to_jiffies(kstat_cpu(i).cpustat.iowait); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]); + os_data->os_cpu[j].per_cpu_steal = + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]); + os_data->os_cpu[j].cpu_id = i; j++; } - os_data->timestamp = get_clock(); + os_data->nr_cpus = j; + + new_size = sizeof(struct appldata_os_data) + + (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu)); + if (ops.size != new_size) { + if (ops.active) { + rc = appldata_diag(APPLDATA_RECORD_OS_ID, + APPLDATA_START_INTERVAL_REC, + (unsigned long) ops.data, new_size, + ops.mod_lvl); + if (rc != 0) + pr_err("Starting a new OS data collection " + "failed with rc=%d\n", rc); + + rc = appldata_diag(APPLDATA_RECORD_OS_ID, + APPLDATA_STOP_REC, + (unsigned long) ops.data, ops.size, + ops.mod_lvl); + if (rc != 0) + pr_err("Stopping a faulty OS data " + "collection failed with rc=%d\n", rc); + } + ops.size = new_size; + } + os_data->timestamp = get_tod_clock(); os_data->sync_count_2++; -#ifdef APPLDATA_DEBUG - appldata_print_debug(os_data); -#endif } -static struct appldata_ops ops = { - .ctl_nr = CTL_APPLDATA_OS, - .name = "os", - .record_nr = APPLDATA_RECORD_OS_ID, - .callback = &appldata_get_os_data, - .owner = THIS_MODULE, -}; - - /* * appldata_os_init() * @@ -181,41 +168,32 @@ static struct appldata_ops ops = { */ static int __init appldata_os_init(void) { - int rc, size; + int rc, max_size; - size = sizeof(struct appldata_os_data) + - (NR_CPUS * sizeof(struct appldata_os_per_cpu)); - if (size > APPLDATA_MAX_REC_SIZE) { - P_ERROR("Size of record = %i, bigger than maximum (%i)!\n", - size, APPLDATA_MAX_REC_SIZE); + max_size = sizeof(struct appldata_os_data) + + (num_possible_cpus() * sizeof(struct appldata_os_per_cpu)); + if (max_size > APPLDATA_MAX_REC_SIZE) { + pr_err("Maximum OS record size %i exceeds the maximum " + "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE); rc = -ENOMEM; goto out; } - P_DEBUG("sizeof(os) = %i, sizeof(os_cpu) = %lu\n", size, - sizeof(struct appldata_os_per_cpu)); - appldata_os_data = kmalloc(size, GFP_DMA); + appldata_os_data = kzalloc(max_size, GFP_KERNEL | GFP_DMA); if (appldata_os_data == NULL) { - P_ERROR("No memory for %s!\n", ops.name); rc = -ENOMEM; goto out; } - memset(appldata_os_data, 0, size); appldata_os_data->per_cpu_size = sizeof(struct appldata_os_per_cpu); appldata_os_data->cpu_offset = offsetof(struct appldata_os_data, os_cpu); - P_DEBUG("cpu offset = %u\n", appldata_os_data->cpu_offset); ops.data = appldata_os_data; - ops.size = size; + ops.callback = &appldata_get_os_data; rc = appldata_register_ops(&ops); - if (rc != 0) { - P_ERROR("Error registering ops, rc = %i\n", rc); + if (rc != 0) kfree(appldata_os_data); - } else { - P_DEBUG("%s-ops registered!\n", ops.name); - } out: return rc; } @@ -229,7 +207,6 @@ static void __exit appldata_os_exit(void) { appldata_unregister_ops(&ops); kfree(appldata_os_data); - P_DEBUG("%s-ops unregistered!\n", ops.name); } diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore new file mode 100644 index 00000000000..017d5912ad2 --- /dev/null +++ b/arch/s390/boot/.gitignore @@ -0,0 +1,2 @@ +image +bzImage diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 4d97eef36b8..9a42ecec564 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -6,13 +6,21 @@ COMPILE_VERSION := __linux_compile_version_id__`hostname | \ tr -c '[0-9A-Za-z]' '_'`__`date | \ tr -c '[0-9A-Za-z]' '_'`_t -EXTRA_CFLAGS := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I. +ccflags-y := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I. targets := image +targets += bzImage +subdir- := compressed $(obj)/image: vmlinux FORCE $(call if_changed,objcopy) -install: $(CONFIGURE) $(obj)/image - sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/image \ - System.map Kerntypes "$(INSTALL_PATH)" +$(obj)/bzImage: $(obj)/compressed/vmlinux FORCE + $(call if_changed,objcopy) + +$(obj)/compressed/vmlinux: FORCE + $(Q)$(MAKE) $(build)=$(obj)/compressed $@ + +install: $(CONFIGURE) $(obj)/bzImage + sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ + System.map "$(INSTALL_PATH)" diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore new file mode 100644 index 00000000000..ae06b9b4c02 --- /dev/null +++ b/arch/s390/boot/compressed/.gitignore @@ -0,0 +1,3 @@ +sizes.h +vmlinux +vmlinux.lds diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile new file mode 100644 index 00000000000..f90d1fc6d60 --- /dev/null +++ b/arch/s390/boot/compressed/Makefile @@ -0,0 +1,71 @@ +# +# linux/arch/s390/boot/compressed/Makefile +# +# create a compressed vmlinux image from the original vmlinux +# + +BITS := $(if $(CONFIG_64BIT),64,31) + +targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 +targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 +targets += misc.o piggy.o sizes.h head$(BITS).o + +KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks +KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) +KBUILD_CFLAGS += $(call cc-option,-ffreestanding) + +GCOV_PROFILE := n + +OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o) +OBJECTS += $(obj)/head$(BITS).o $(obj)/misc.o $(obj)/piggy.o + +LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T +$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) + $(call if_changed,ld) + @: + +sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 0x\1/p' + +quiet_cmd_sizes = GEN $@ + cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@ + +$(obj)/sizes.h: vmlinux + $(call if_changed,sizes) + +AFLAGS_head$(BITS).o += -I$(obj) +$(obj)/head$(BITS).o: $(obj)/sizes.h + +CFLAGS_misc.o += -I$(obj) +$(obj)/misc.o: $(obj)/sizes.h + +OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +$(obj)/vmlinux.bin: vmlinux + $(call if_changed,objcopy) + +vmlinux.bin.all-y := $(obj)/vmlinux.bin + +suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZ4) := lz4 +suffix-$(CONFIG_KERNEL_LZMA) := lzma +suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_XZ) := xz + +$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) + $(call if_changed,gzip) +$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) + $(call if_changed,bzip2) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) + $(call if_changed,lz4) +$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) + $(call if_changed,lzma) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) + $(call if_changed,lzo) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) + $(call if_changed,xzkern) + +LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T +$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) + $(call if_changed,ld) diff --git a/arch/s390/boot/compressed/head31.S b/arch/s390/boot/compressed/head31.S new file mode 100644 index 00000000000..e8c9e18b803 --- /dev/null +++ b/arch/s390/boot/compressed/head31.S @@ -0,0 +1,51 @@ +/* + * Startup glue code to uncompress the kernel + * + * Copyright IBM Corp. 2010 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include "sizes.h" + +__HEAD +ENTRY(startup_continue) + basr %r13,0 # get base +.LPG1: + # setup stack + l %r15,.Lstack-.LPG1(%r13) + ahi %r15,-96 + l %r1,.Ldecompress-.LPG1(%r13) + basr %r14,%r1 + # setup registers for memory mover & branch to target + lr %r4,%r2 + l %r2,.Loffset-.LPG1(%r13) + la %r4,0(%r2,%r4) + l %r3,.Lmvsize-.LPG1(%r13) + lr %r5,%r3 + # move the memory mover someplace safe + la %r1,0x200 + mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) + # decompress image is started at 0x11000 + lr %r6,%r2 + br %r1 +mover: + mvcle %r2,%r4,0 + jo mover + br %r6 +mover_end: + + .align 8 +.Lstack: + .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) +.Ldecompress: + .long decompress_kernel +.Loffset: + .long 0x11000 +.Lmvsize: + .long SZ__bss_start diff --git a/arch/s390/boot/compressed/head64.S b/arch/s390/boot/compressed/head64.S new file mode 100644 index 00000000000..f86a4eef28a --- /dev/null +++ b/arch/s390/boot/compressed/head64.S @@ -0,0 +1,48 @@ +/* + * Startup glue code to uncompress the kernel + * + * Copyright IBM Corp. 2010 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include "sizes.h" + +__HEAD +ENTRY(startup_continue) + basr %r13,0 # get base +.LPG1: + # setup stack + lg %r15,.Lstack-.LPG1(%r13) + aghi %r15,-160 + brasl %r14,decompress_kernel + # setup registers for memory mover & branch to target + lgr %r4,%r2 + lg %r2,.Loffset-.LPG1(%r13) + la %r4,0(%r2,%r4) + lg %r3,.Lmvsize-.LPG1(%r13) + lgr %r5,%r3 + # move the memory mover someplace safe + la %r1,0x200 + mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) + # decompress image is started at 0x11000 + lgr %r6,%r2 + br %r1 +mover: + mvcle %r2,%r4,0 + jo mover + br %r6 +mover_end: + + .align 8 +.Lstack: + .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) +.Loffset: + .quad 0x11000 +.Lmvsize: + .quad SZ__bss_start diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c new file mode 100644 index 00000000000..57cbaff1f39 --- /dev/null +++ b/arch/s390/boot/compressed/misc.c @@ -0,0 +1,175 @@ +/* + * Definitions and wrapper functions for kernel decompressor + * + * Copyright IBM Corp. 2010 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <asm/uaccess.h> +#include <asm/page.h> +#include <asm/ipl.h> +#include "sizes.h" + +/* + * gzip declarations + */ +#define STATIC static + +#undef memset +#undef memcpy +#undef memmove +#define memmove memmove +#define memzero(s, n) memset((s), 0, (n)) + +/* Symbols defined by linker scripts */ +extern char input_data[]; +extern int input_len; +extern char _text, _end; +extern char _bss, _ebss; + +static void error(char *m); + +static unsigned long free_mem_ptr; +static unsigned long free_mem_end_ptr; + +#ifdef CONFIG_HAVE_KERNEL_BZIP2 +#define HEAP_SIZE 0x400000 +#else +#define HEAP_SIZE 0x10000 +#endif + +#ifdef CONFIG_KERNEL_GZIP +#include "../../../../lib/decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#include "../../../../lib/decompress_bunzip2.c" +#endif + +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + +#ifdef CONFIG_KERNEL_LZMA +#include "../../../../lib/decompress_unlzma.c" +#endif + +#ifdef CONFIG_KERNEL_LZO +#include "../../../../lib/decompress_unlzo.c" +#endif + +#ifdef CONFIG_KERNEL_XZ +#include "../../../../lib/decompress_unxz.c" +#endif + +extern _sclp_print_early(const char *); + +static int puts(const char *s) +{ + _sclp_print_early(s); + return 0; +} + +void *memset(void *s, int c, size_t n) +{ + char *xs; + + xs = s; + while (n--) + *xs++ = c; + return s; +} + +void *memcpy(void *dest, const void *src, size_t n) +{ + const char *s = src; + char *d = dest; + + while (n--) + *d++ = *s++; + return dest; +} + +void *memmove(void *dest, const void *src, size_t n) +{ + const char *s = src; + char *d = dest; + + if (d <= s) { + while (n--) + *d++ = *s++; + } else { + d += n; + s += n; + while (n--) + *--d = *--s; + } + return dest; +} + +static void error(char *x) +{ + unsigned long long psw = 0x000a0000deadbeefULL; + + puts("\n\n"); + puts(x); + puts("\n\n -- System halted"); + + asm volatile("lpsw %0" : : "Q" (psw)); +} + +/* + * Safe guard the ipl parameter block against a memory area that will be + * overwritten. The validity check for the ipl parameter block is complex + * (see cio_get_iplinfo and ipl_save_parameters) but if the pointer to + * the ipl parameter block intersects with the passed memory area we can + * safely assume that we can read from that memory. In that case just copy + * the memory to IPL_PARMBLOCK_ORIGIN even if there is no ipl parameter + * block. + */ +static void check_ipl_parmblock(void *start, unsigned long size) +{ + void *src, *dst; + + src = (void *)(unsigned long) S390_lowcore.ipl_parmblock_ptr; + if (src + PAGE_SIZE <= start || src >= start + size) + return; + dst = (void *) IPL_PARMBLOCK_ORIGIN; + memmove(dst, src, PAGE_SIZE); + S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN; +} + +unsigned long decompress_kernel(void) +{ + unsigned long output_addr; + unsigned char *output; + + output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL; + check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start); + memset(&_bss, 0, &_ebss - &_bss); + free_mem_ptr = (unsigned long)&_end; + free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; + output = (unsigned char *) output_addr; + +#ifdef CONFIG_BLK_DEV_INITRD + /* + * Move the initrd right behind the end of the decompressed + * kernel image. + */ + if (INITRD_START && INITRD_SIZE && + INITRD_START < (unsigned long) output + SZ__bss_start) { + check_ipl_parmblock(output + SZ__bss_start, + INITRD_START + INITRD_SIZE); + memmove(output + SZ__bss_start, + (void *) INITRD_START, INITRD_SIZE); + INITRD_START = (unsigned long) output + SZ__bss_start; + } +#endif + + puts("Uncompressing Linux... "); + decompress(input_data, input_len, NULL, NULL, output, NULL, error); + puts("Ok, booting the kernel.\n"); + return (unsigned long) output; +} + diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S new file mode 100644 index 00000000000..8e1fb823928 --- /dev/null +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -0,0 +1,55 @@ +#include <asm-generic/vmlinux.lds.h> + +#ifdef CONFIG_64BIT +OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") +OUTPUT_ARCH(s390:64-bit) +#else +OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") +OUTPUT_ARCH(s390:31-bit) +#endif + +ENTRY(startup) + +SECTIONS +{ + /* Be careful parts of head_64.S assume startup_32 is at + * address 0. + */ + . = 0; + .head.text : { + _head = . ; + HEAD_TEXT + _ehead = . ; + } + .rodata.compressed : { + *(.rodata.compressed) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + . = ALIGN(256); + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(8); /* For convenience during zeroing */ + _ebss = .; + } + _end = .; +} diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr new file mode 100644 index 00000000000..f02382ae5c4 --- /dev/null +++ b/arch/s390/boot/compressed/vmlinux.scr @@ -0,0 +1,10 @@ +SECTIONS +{ + .rodata.compressed : { + input_len = .; + LONG(input_data_end - input_data) input_data = .; + *(.data) + output_len = . - 4; + input_data_end = .; + } +} diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index d4026f62cb0..aed3069699b 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi -if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi # Default install - same as make zlilo diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig new file mode 100644 index 00000000000..fd09a10a2b5 --- /dev/null +++ b/arch/s390/configs/default_defconfig @@ -0,0 +1,687 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_PERF=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_CGROUP=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=m +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_DEFAULT_DEADLINE=y +CONFIG_MARCH_Z196=y +CONFIG_TUNE_ZEC12=y +CONFIG_NR_CPUS=256 +CONFIG_PREEMPT=y +CONFIG_HZ_100=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_PCI=y +CONFIG_PCI_DEBUG=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_S390=y +CONFIG_CHSC_SCH=y +CONFIG_CRASH_DUMP=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=m +CONFIG_HIBERNATION=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=m +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=m +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=m +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_INET_UDP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_IPV6=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CT_PROTO_DCCP=m +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_TABLES=m +CONFIG_NFT_EXTHDR=m +CONFIG_NFT_META=m +CONFIG_NFT_CT=m +CONFIG_NFT_RBTREE=m +CONFIG_NFT_HASH=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XT_SET=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_SET_BITMAP_IP=m +CONFIG_IP_SET_BITMAP_IPMAC=m +CONFIG_IP_SET_BITMAP_PORT=m +CONFIG_IP_SET_HASH_IP=m +CONFIG_IP_SET_HASH_IPPORT=m +CONFIG_IP_SET_HASH_IPPORTIP=m +CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m +CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m +CONFIG_IP_SET_HASH_NETPORT=m +CONFIG_IP_SET_HASH_NETIFACE=m +CONFIG_IP_SET_LIST_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_FTP=m +CONFIG_IP_VS_PE_SIP=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_NF_TABLES_IPV4=m +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NF_TABLES_ARP=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=m +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_NF_NAT_IPV6=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NET_SCTPPROBE=m +CONFIG_RDS=m +CONFIG_RDS_RDMA=m +CONFIG_RDS_TCP=m +CONFIG_RDS_DEBUG=y +CONFIG_L2TP=m +CONFIG_L2TP_DEBUGFS=m +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFB=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_MQPRIO=m +CONFIG_NET_SCH_CHOKE=m +CONFIG_NET_SCH_QFQ=m +CONFIG_NET_SCH_CODEL=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_PLUG=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_CSUM=m +CONFIG_DNS_RESOLVER=y +CONFIG_BPF_JIT=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_TCPPROBE=m +CONFIG_DEVTMPFS=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_OSD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=32768 +CONFIG_BLK_DEV_XIP=y +CONFIG_CDROM_PKTCDVD=m +CONFIG_ATA_OVER_ETH=m +CONFIG_VIRTIO_BLK=y +CONFIG_ENCLOSURE_SERVICES=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI=y +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_ENCLOSURE=m +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SRP_ATTRS=m +CONFIG_SCSI_SRP_TGT_ATTRS=y +CONFIG_ISCSI_TCP=m +CONFIG_LIBFCOE=m +CONFIG_SCSI_DEBUG=m +CONFIG_ZFCP=y +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_SCSI_OSD_INITIATOR=m +CONFIG_SCSI_OSD_ULD=m +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +CONFIG_DM_UEVENT=y +CONFIG_DM_FLAKEY=m +CONFIG_DM_VERITY=m +CONFIG_DM_SWITCH=m +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_EQUALIZER=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_TUN=m +CONFIG_VETH=m +CONFIG_VIRTIO_NET=m +CONFIG_NLMON=m +CONFIG_VHOST_NET=m +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_CADENCE is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +CONFIG_MLX4_EN=m +# CONFIG_NET_VENDOR_NATSEMI is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPTP=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y +CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_HW_RANDOM_VIRTIO=m +CONFIG_RAW_DRIVER=m +CONFIG_HANGCHECK_TIMER=m +CONFIG_TN3270_FS=y +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=m +CONFIG_DIAG288_WATCHDOG=m +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_VIRTIO_BALLOON=m +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_JBD_DEBUG=y +CONFIG_JBD2_DEBUG=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +CONFIG_JFS_STATISTICS=y +CONFIG_XFS_FS=m +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +CONFIG_XFS_DEBUG=y +CONFIG_GFS2_FS=m +CONFIG_OCFS2_FS=m +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_FANOTIFY=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +CONFIG_NTFS_RW=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_ROMFS_FS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_SWAP=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_V4_SECURITY_LABEL=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_STATS2=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +# CONFIG_CIFS_DEBUG is not set +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_PRINTK_TIME=y +CONFIG_DYNAMIC_DEBUG=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +CONFIG_READABLE_ASM=y +CONFIG_UNUSED_SYMBOLS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_SELFTEST=y +CONFIG_DEBUG_OBJECTS_FREE=y +CONFIG_DEBUG_OBJECTS_TIMERS=y +CONFIG_DEBUG_OBJECTS_WORK=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y +CONFIG_SLUB_DEBUG_ON=y +CONFIG_SLUB_STATS=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_VM_RB=y +CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m +CONFIG_DEBUG_PER_CPU_MAPS=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_RT_MUTEX_TESTER=y +CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCK_STAT=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_LOCKING_API_SELFTESTS=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_PROVE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_CPU_STALL_TIMEOUT=300 +CONFIG_NOTIFIER_ERROR_INJECTION=m +CONFIG_CPU_NOTIFIER_ERROR_INJECT=m +CONFIG_PM_NOTIFIER_ERROR_INJECT=m +CONFIG_FAULT_INJECTION=y +CONFIG_FAILSLAB=y +CONFIG_FAIL_PAGE_ALLOC=y +CONFIG_FAIL_MAKE_REQUEST=y +CONFIG_FAIL_IO_TIMEOUT=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y +CONFIG_LATENCYTOP=y +CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_KPROBE_EVENT is not set +CONFIG_LKDTM=m +CONFIG_TEST_LIST_SORT=y +CONFIG_KPROBES_SANITY_TEST=y +CONFIG_RBTREE_TEST=y +CONFIG_INTERVAL_TREE_TEST=m +CONFIG_PERCPU_TEST=m +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DMA_API_DEBUG=y +# CONFIG_STRICT_DEVMEM is not set +CONFIG_S390_PTDUMP=y +CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_IMA=y +CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=y +CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_ZCRYPT=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_ASYMMETRIC_KEY_TYPE=m +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m +CONFIG_X509_CERTIFICATE_PARSER=m +CONFIG_CRC7=m +CONFIG_CRC8=m +CONFIG_XZ_DEC_X86=y +CONFIG_XZ_DEC_POWERPC=y +CONFIG_XZ_DEC_IA64=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_ARMTHUMB=y +CONFIG_XZ_DEC_SPARC=y +CONFIG_CORDIC=m +CONFIG_CMM=m +CONFIG_APPLDATA_BASE=y +CONFIG_KVM=m +CONFIG_KVM_S390_UCONTROL=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig new file mode 100644 index 00000000000..b061180d354 --- /dev/null +++ b/arch/s390/configs/gcov_defconfig @@ -0,0 +1,640 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_PERF=y +CONFIG_BLK_CGROUP=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=m +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_GCOV_KERNEL=y +CONFIG_GCOV_PROFILE_ALL=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_DEFAULT_DEADLINE=y +CONFIG_MARCH_Z196=y +CONFIG_TUNE_ZEC12=y +CONFIG_NR_CPUS=256 +CONFIG_HZ_100=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_PCI=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_S390=y +CONFIG_CHSC_SCH=y +CONFIG_CRASH_DUMP=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=m +CONFIG_HIBERNATION=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=m +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=m +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=m +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_INET_UDP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_IPV6=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CT_PROTO_DCCP=m +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_TABLES=m +CONFIG_NFT_EXTHDR=m +CONFIG_NFT_META=m +CONFIG_NFT_CT=m +CONFIG_NFT_RBTREE=m +CONFIG_NFT_HASH=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XT_SET=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_SET_BITMAP_IP=m +CONFIG_IP_SET_BITMAP_IPMAC=m +CONFIG_IP_SET_BITMAP_PORT=m +CONFIG_IP_SET_HASH_IP=m +CONFIG_IP_SET_HASH_IPPORT=m +CONFIG_IP_SET_HASH_IPPORTIP=m +CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m +CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m +CONFIG_IP_SET_HASH_NETPORT=m +CONFIG_IP_SET_HASH_NETIFACE=m +CONFIG_IP_SET_LIST_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_FTP=m +CONFIG_IP_VS_PE_SIP=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_NF_TABLES_IPV4=m +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NF_TABLES_ARP=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=m +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_NF_NAT_IPV6=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NET_SCTPPROBE=m +CONFIG_RDS=m +CONFIG_RDS_RDMA=m +CONFIG_RDS_TCP=m +CONFIG_L2TP=m +CONFIG_L2TP_DEBUGFS=m +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFB=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_MQPRIO=m +CONFIG_NET_SCH_CHOKE=m +CONFIG_NET_SCH_QFQ=m +CONFIG_NET_SCH_CODEL=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_PLUG=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_CSUM=m +CONFIG_DNS_RESOLVER=y +CONFIG_BPF_JIT=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_TCPPROBE=m +CONFIG_DEVTMPFS=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_OSD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=32768 +CONFIG_BLK_DEV_XIP=y +CONFIG_CDROM_PKTCDVD=m +CONFIG_ATA_OVER_ETH=m +CONFIG_VIRTIO_BLK=y +CONFIG_ENCLOSURE_SERVICES=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI=y +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_ENCLOSURE=m +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SRP_ATTRS=m +CONFIG_SCSI_SRP_TGT_ATTRS=y +CONFIG_ISCSI_TCP=m +CONFIG_LIBFCOE=m +CONFIG_SCSI_DEBUG=m +CONFIG_ZFCP=y +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_SCSI_OSD_INITIATOR=m +CONFIG_SCSI_OSD_ULD=m +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +CONFIG_DM_UEVENT=y +CONFIG_DM_FLAKEY=m +CONFIG_DM_VERITY=m +CONFIG_DM_SWITCH=m +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_EQUALIZER=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_TUN=m +CONFIG_VETH=m +CONFIG_VIRTIO_NET=m +CONFIG_NLMON=m +CONFIG_VHOST_NET=m +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_CADENCE is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +CONFIG_MLX4_EN=m +# CONFIG_NET_VENDOR_NATSEMI is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPTP=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y +CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_HW_RANDOM_VIRTIO=m +CONFIG_RAW_DRIVER=m +CONFIG_HANGCHECK_TIMER=m +CONFIG_TN3270_FS=y +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=m +CONFIG_DIAG288_WATCHDOG=m +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_VIRTIO_BALLOON=m +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_JBD_DEBUG=y +CONFIG_JBD2_DEBUG=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +CONFIG_JFS_STATISTICS=y +CONFIG_XFS_FS=m +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +CONFIG_GFS2_FS=m +CONFIG_OCFS2_FS=m +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_FANOTIFY=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +CONFIG_NTFS_RW=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_ROMFS_FS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_SWAP=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_V4_SECURITY_LABEL=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_STATS2=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +# CONFIG_CIFS_DEBUG is not set +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +CONFIG_UNUSED_SYMBOLS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m +CONFIG_TIMER_STATS=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_NOTIFIER_ERROR_INJECTION=m +CONFIG_CPU_NOTIFIER_ERROR_INJECT=m +CONFIG_PM_NOTIFIER_ERROR_INJECT=m +CONFIG_LATENCYTOP=y +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_KPROBE_EVENT is not set +CONFIG_LKDTM=m +CONFIG_RBTREE_TEST=m +CONFIG_INTERVAL_TREE_TEST=m +CONFIG_PERCPU_TEST=m +CONFIG_ATOMIC64_SELFTEST=y +# CONFIG_STRICT_DEVMEM is not set +CONFIG_S390_PTDUMP=y +CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_IMA=y +CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=y +CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_ZCRYPT=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_ASYMMETRIC_KEY_TYPE=m +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m +CONFIG_X509_CERTIFICATE_PARSER=m +CONFIG_CRC7=m +CONFIG_CRC8=m +CONFIG_XZ_DEC_X86=y +CONFIG_XZ_DEC_POWERPC=y +CONFIG_XZ_DEC_IA64=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_ARMTHUMB=y +CONFIG_XZ_DEC_SPARC=y +CONFIG_CORDIC=m +CONFIG_CMM=m +CONFIG_APPLDATA_BASE=y +CONFIG_KVM=m +CONFIG_KVM_S390_UCONTROL=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig new file mode 100644 index 00000000000..d279baa0801 --- /dev/null +++ b/arch/s390/configs/performance_defconfig @@ -0,0 +1,632 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_PERF=y +CONFIG_BLK_CGROUP=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=m +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_DEFAULT_DEADLINE=y +CONFIG_MARCH_Z196=y +CONFIG_TUNE_ZEC12=y +CONFIG_NR_CPUS=256 +CONFIG_HZ_100=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_PCI=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_S390=y +CONFIG_CHSC_SCH=y +CONFIG_CRASH_DUMP=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=m +CONFIG_HIBERNATION=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=m +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=m +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=m +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_INET_UDP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_IPV6=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CT_PROTO_DCCP=m +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_TABLES=m +CONFIG_NFT_EXTHDR=m +CONFIG_NFT_META=m +CONFIG_NFT_CT=m +CONFIG_NFT_RBTREE=m +CONFIG_NFT_HASH=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XT_SET=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_SET_BITMAP_IP=m +CONFIG_IP_SET_BITMAP_IPMAC=m +CONFIG_IP_SET_BITMAP_PORT=m +CONFIG_IP_SET_HASH_IP=m +CONFIG_IP_SET_HASH_IPPORT=m +CONFIG_IP_SET_HASH_IPPORTIP=m +CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m +CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m +CONFIG_IP_SET_HASH_NETPORT=m +CONFIG_IP_SET_HASH_NETIFACE=m +CONFIG_IP_SET_LIST_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_FTP=m +CONFIG_IP_VS_PE_SIP=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_NF_TABLES_IPV4=m +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NF_TABLES_ARP=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=m +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_NF_NAT_IPV6=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NET_SCTPPROBE=m +CONFIG_RDS=m +CONFIG_RDS_RDMA=m +CONFIG_RDS_TCP=m +CONFIG_L2TP=m +CONFIG_L2TP_DEBUGFS=m +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFB=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_MQPRIO=m +CONFIG_NET_SCH_CHOKE=m +CONFIG_NET_SCH_QFQ=m +CONFIG_NET_SCH_CODEL=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_PLUG=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_CSUM=m +CONFIG_DNS_RESOLVER=y +CONFIG_BPF_JIT=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_TCPPROBE=m +CONFIG_DEVTMPFS=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_OSD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=32768 +CONFIG_BLK_DEV_XIP=y +CONFIG_CDROM_PKTCDVD=m +CONFIG_ATA_OVER_ETH=m +CONFIG_VIRTIO_BLK=y +CONFIG_ENCLOSURE_SERVICES=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI=y +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_ENCLOSURE=m +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SRP_ATTRS=m +CONFIG_SCSI_SRP_TGT_ATTRS=y +CONFIG_ISCSI_TCP=m +CONFIG_LIBFCOE=m +CONFIG_SCSI_DEBUG=m +CONFIG_ZFCP=y +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_SCSI_OSD_INITIATOR=m +CONFIG_SCSI_OSD_ULD=m +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_MULTIPATH=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +CONFIG_DM_UEVENT=y +CONFIG_DM_FLAKEY=m +CONFIG_DM_VERITY=m +CONFIG_DM_SWITCH=m +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_EQUALIZER=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_TUN=m +CONFIG_VETH=m +CONFIG_VIRTIO_NET=m +CONFIG_NLMON=m +CONFIG_VHOST_NET=m +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_CADENCE is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +CONFIG_MLX4_EN=m +# CONFIG_NET_VENDOR_NATSEMI is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPTP=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y +CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_HW_RANDOM_VIRTIO=m +CONFIG_RAW_DRIVER=m +CONFIG_HANGCHECK_TIMER=m +CONFIG_TN3270_FS=y +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=m +CONFIG_DIAG288_WATCHDOG=m +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_VIRTIO_BALLOON=m +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_JBD_DEBUG=y +CONFIG_JBD2_DEBUG=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +CONFIG_JFS_STATISTICS=y +CONFIG_XFS_FS=m +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +CONFIG_GFS2_FS=m +CONFIG_OCFS2_FS=m +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_FANOTIFY=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +CONFIG_NTFS_RW=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_ROMFS_FS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_SWAP=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_V4_SECURITY_LABEL=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_STATS2=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +# CONFIG_CIFS_DEBUG is not set +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +CONFIG_UNUSED_SYMBOLS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_TIMER_STATS=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_LATENCYTOP=y +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_KPROBE_EVENT is not set +CONFIG_LKDTM=m +CONFIG_PERCPU_TEST=m +CONFIG_ATOMIC64_SELFTEST=y +# CONFIG_STRICT_DEVMEM is not set +CONFIG_S390_PTDUMP=y +CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_IMA=y +CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=y +CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_ZCRYPT=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_ASYMMETRIC_KEY_TYPE=m +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m +CONFIG_X509_CERTIFICATE_PARSER=m +CONFIG_CRC7=m +CONFIG_CRC8=m +CONFIG_XZ_DEC_X86=y +CONFIG_XZ_DEC_POWERPC=y +CONFIG_XZ_DEC_IA64=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_ARMTHUMB=y +CONFIG_XZ_DEC_SPARC=y +CONFIG_CORDIC=m +CONFIG_CMM=m +CONFIG_APPLDATA_BASE=y +CONFIG_KVM=m +CONFIG_KVM_S390_UCONTROL=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig new file mode 100644 index 00000000000..948e0e057a2 --- /dev/null +++ b/arch/s390/configs/zfcpdump_defconfig @@ -0,0 +1,86 @@ +# CONFIG_SWAP is not set +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +CONFIG_DEFAULT_DEADLINE=y +CONFIG_MARCH_Z196=y +CONFIG_TUNE_ZEC12=y +# CONFIG_COMPAT is not set +CONFIG_NR_CPUS=2 +# CONFIG_HOTPLUG_CPU is not set +CONFIG_HZ_100=y +# CONFIG_COMPACTION is not set +# CONFIG_MIGRATION is not set +# CONFIG_CHECK_STACK is not set +# CONFIG_CHSC_SCH is not set +# CONFIG_SCM_BUS is not set +CONFIG_CRASH_DUMP=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_SECCOMP is not set +# CONFIG_IUCV is not set +CONFIG_ATM=y +CONFIG_ATM_LANE=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +# CONFIG_BLK_DEV_XPRAM is not set +# CONFIG_DCSSBLK is not set +# CONFIG_DASD is not set +CONFIG_ENCLOSURE_SERVICES=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_ENCLOSURE=y +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SRP_ATTRS=y +CONFIG_ZFCP=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_HVC_IUCV is not set +CONFIG_RAW_DRIVER=y +# CONFIG_SCLP_ASYNC is not set +# CONFIG_HMC_DRV is not set +# CONFIG_S390_TAPE is not set +# CONFIG_VMCP is not set +# CONFIG_MONWRITER is not set +# CONFIG_S390_VMUR is not set +# CONFIG_HID is not set +CONFIG_MEMSTICK=y +CONFIG_MEMSTICK_DEBUG=y +CONFIG_MEMSTICK_UNSAFE_RESUME=y +CONFIG_MSPRO_BLOCK=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +# CONFIG_INOTIFY_USER is not set +CONFIG_CONFIGFS_FS=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_FS=y +CONFIG_DEBUG_KERNEL=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_FTRACE is not set +# CONFIG_STRICT_DEVMEM is not set +CONFIG_XZ_DEC_X86=y +CONFIG_XZ_DEC_POWERPC=y +CONFIG_XZ_DEC_IA64=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_ARMTHUMB=y +CONFIG_XZ_DEC_SPARC=y +# CONFIG_PFAULT is not set +# CONFIG_S390_HYPFS_FS is not set +# CONFIG_VIRTUALIZATION is not set +# CONFIG_S390_GUEST is not set diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index bfe2541dc5c..7f0b7cda625 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -2,9 +2,10 @@ # Cryptographic API # -obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o -obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o -obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o des_check_key.o +obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o +obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o +obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o +obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o - -obj-$(CONFIG_CRYPTO_TEST) += crypt_s390_query.o +obj-$(CONFIG_S390_PRNG) += prng.o +obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 7a1033d8e00..23223cd63e5 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -4,10 +4,11 @@ * s390 implementation of the AES Cipher Algorithm. * * s390 Version: - * Copyright (C) 2005 IBM Deutschland GmbH, IBM Corporation + * Copyright IBM Corp. 2005, 2007 * Author(s): Jan Glauber (jang@de.ibm.com) + * Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback * - * Derived from "crypto/aes.c" + * Derived from "crypto/aes_generic.c" * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -16,63 +17,129 @@ * */ +#define KMSG_COMPONENT "aes_s390" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <linux/err.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/crypto.h> +#include <linux/spinlock.h> #include "crypt_s390.h" -#define AES_MIN_KEY_SIZE 16 -#define AES_MAX_KEY_SIZE 32 - -/* data block size for all key lengths */ -#define AES_BLOCK_SIZE 16 +#define AES_KEYLEN_128 1 +#define AES_KEYLEN_192 2 +#define AES_KEYLEN_256 4 -int has_aes_128 = 0; -int has_aes_192 = 0; -int has_aes_256 = 0; +static u8 *ctrblk; +static DEFINE_SPINLOCK(ctrblk_lock); +static char keylen_flag; struct s390_aes_ctx { - u8 iv[AES_BLOCK_SIZE]; u8 key[AES_MAX_KEY_SIZE]; + long enc; + long dec; int key_len; + union { + struct crypto_blkcipher *blk; + struct crypto_cipher *cip; + } fallback; }; -static int aes_set_key(void *ctx, const u8 *in_key, unsigned int key_len, - u32 *flags) -{ - struct s390_aes_ctx *sctx = ctx; +struct pcc_param { + u8 key[32]; + u8 tweak[16]; + u8 block[16]; + u8 bit[16]; + u8 xts[16]; +}; +struct s390_xts_ctx { + u8 key[32]; + u8 pcc_key[32]; + long enc; + long dec; + int key_len; + struct crypto_blkcipher *fallback; +}; + +/* + * Check if the key_len is supported by the HW. + * Returns 0 if it is, a positive number if it is not and software fallback is + * required or a negative number in case the key size is not valid + */ +static int need_fallback(unsigned int key_len) +{ switch (key_len) { case 16: - if (!has_aes_128) - goto fail; + if (!(keylen_flag & AES_KEYLEN_128)) + return 1; break; case 24: - if (!has_aes_192) - goto fail; - + if (!(keylen_flag & AES_KEYLEN_192)) + return 1; break; case 32: - if (!has_aes_256) - goto fail; + if (!(keylen_flag & AES_KEYLEN_256)) + return 1; break; default: - /* invalid key length */ - goto fail; + return -1; break; } + return 0; +} + +static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + int ret; + + sctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + sctx->fallback.cip->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (sctx->fallback.cip->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int ret; + + ret = need_fallback(key_len); + if (ret < 0) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } sctx->key_len = key_len; - memcpy(sctx->key, in_key, key_len); - return 0; -fail: - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; + if (!ret) { + memcpy(sctx->key, in_key, key_len); + return 0; + } + + return setkey_fallback_cip(tfm, in_key, key_len); } -static void aes_encrypt(void *ctx, u8 *out, const u8 *in) +static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct s390_aes_ctx *sctx = ctx; + const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + if (unlikely(need_fallback(sctx->key_len))) { + crypto_cipher_encrypt_one(sctx->fallback.cip, out, in); + return; + } switch (sctx->key_len) { case 16: @@ -90,9 +157,14 @@ static void aes_encrypt(void *ctx, u8 *out, const u8 *in) } } -static void aes_decrypt(void *ctx, u8 *out, const u8 *in) +static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct s390_aes_ctx *sctx = ctx; + const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + if (unlikely(need_fallback(sctx->key_len))) { + crypto_cipher_decrypt_one(sctx->fallback.cip, out, in); + return; + } switch (sctx->key_len) { case 16: @@ -110,139 +182,804 @@ static void aes_decrypt(void *ctx, u8 *out, const u8 *in) } } -static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) +static int fallback_init_cip(struct crypto_tfm *tfm) { - struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm); + const char *name = tfm->__crt_alg->cra_name; + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - switch (sctx->key_len) { + sctx->fallback.cip = crypto_alloc_cipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(sctx->fallback.cip)) { + pr_err("Allocating AES fallback algorithm %s failed\n", + name); + return PTR_ERR(sctx->fallback.cip); + } + + return 0; +} + +static void fallback_exit_cip(struct crypto_tfm *tfm) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + crypto_free_cipher(sctx->fallback.cip); + sctx->fallback.cip = NULL; +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_module = THIS_MODULE, + .cra_init = fallback_init_cip, + .cra_exit = fallback_exit_cip, + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt, + } + } +}; + +static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key, + unsigned int len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + unsigned int ret; + + sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_blkcipher_setkey(sctx->fallback.blk, key, len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int fallback_blk_dec(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + unsigned int ret; + struct crypto_blkcipher *tfm; + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + + tfm = desc->tfm; + desc->tfm = sctx->fallback.blk; + + ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int fallback_blk_enc(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + unsigned int ret; + struct crypto_blkcipher *tfm; + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + + tfm = desc->tfm; + desc->tfm = sctx->fallback.blk; + + ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + int ret; + + ret = need_fallback(key_len); + if (ret > 0) { + sctx->key_len = key_len; + return setkey_fallback_blk(tfm, in_key, key_len); + } + + switch (key_len) { case 16: - crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in, nbytes); + sctx->enc = KM_AES_128_ENCRYPT; + sctx->dec = KM_AES_128_DECRYPT; break; case 24: - crypt_s390_km(KM_AES_192_ENCRYPT, &sctx->key, out, in, nbytes); + sctx->enc = KM_AES_192_ENCRYPT; + sctx->dec = KM_AES_192_DECRYPT; break; case 32: - crypt_s390_km(KM_AES_256_ENCRYPT, &sctx->key, out, in, nbytes); + sctx->enc = KM_AES_256_ENCRYPT; + sctx->dec = KM_AES_256_DECRYPT; break; } - return nbytes & ~(AES_BLOCK_SIZE - 1); + + return aes_set_key(tfm, in_key, key_len); } -static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) +static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param, + struct blkcipher_walk *walk) { - struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm); + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes; - switch (sctx->key_len) { + while ((nbytes = walk->nbytes)) { + /* only use complete blocks */ + unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_km(func, param, out, in, n); + if (ret < 0 || ret != n) + return -EIO; + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } + + return ret; +} + +static int ecb_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_enc(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk); +} + +static int ecb_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_dec(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk); +} + +static int fallback_init_blk(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + sctx->fallback.blk = crypto_alloc_blkcipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(sctx->fallback.blk)) { + pr_err("Allocating AES fallback algorithm %s failed\n", + name); + return PTR_ERR(sctx->fallback.blk); + } + + return 0; +} + +static void fallback_exit_blk(struct crypto_tfm *tfm) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + crypto_free_blkcipher(sctx->fallback.blk); + sctx->fallback.blk = NULL; +} + +static struct crypto_alg ecb_aes_alg = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = fallback_init_blk, + .cra_exit = fallback_exit_blk, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = ecb_aes_set_key, + .encrypt = ecb_aes_encrypt, + .decrypt = ecb_aes_decrypt, + } + } +}; + +static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + int ret; + + ret = need_fallback(key_len); + if (ret > 0) { + sctx->key_len = key_len; + return setkey_fallback_blk(tfm, in_key, key_len); + } + + switch (key_len) { case 16: - crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in, nbytes); + sctx->enc = KMC_AES_128_ENCRYPT; + sctx->dec = KMC_AES_128_DECRYPT; break; case 24: - crypt_s390_km(KM_AES_192_DECRYPT, &sctx->key, out, in, nbytes); + sctx->enc = KMC_AES_192_ENCRYPT; + sctx->dec = KMC_AES_192_DECRYPT; break; case 32: - crypt_s390_km(KM_AES_256_DECRYPT, &sctx->key, out, in, nbytes); + sctx->enc = KMC_AES_256_ENCRYPT; + sctx->dec = KMC_AES_256_DECRYPT; break; } - return nbytes & ~(AES_BLOCK_SIZE - 1); + + return aes_set_key(tfm, in_key, key_len); } -static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) +static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, + struct blkcipher_walk *walk) { - struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm); + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + struct { + u8 iv[AES_BLOCK_SIZE]; + u8 key[AES_MAX_KEY_SIZE]; + } param; + + if (!nbytes) + goto out; + + memcpy(param.iv, walk->iv, AES_BLOCK_SIZE); + memcpy(param.key, sctx->key, sctx->key_len); + do { + /* only use complete blocks */ + unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_kmc(func, ¶m, out, in, n); + if (ret < 0 || ret != n) + return -EIO; + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); + memcpy(walk->iv, param.iv, AES_BLOCK_SIZE); + +out: + return ret; +} - memcpy(&sctx->iv, desc->info, AES_BLOCK_SIZE); - switch (sctx->key_len) { - case 16: - crypt_s390_kmc(KMC_AES_128_ENCRYPT, &sctx->iv, out, in, nbytes); +static int cbc_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_enc(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_aes_crypt(desc, sctx->enc, &walk); +} + +static int cbc_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_dec(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_aes_crypt(desc, sctx->dec, &walk); +} + +static struct crypto_alg cbc_aes_alg = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = fallback_init_blk, + .cra_exit = fallback_exit_blk, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = cbc_aes_set_key, + .encrypt = cbc_aes_encrypt, + .decrypt = cbc_aes_decrypt, + } + } +}; + +static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int len) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + unsigned int ret; + + xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int xts_fallback_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm; + unsigned int ret; + + tfm = desc->tfm; + desc->tfm = xts_ctx->fallback; + + ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int xts_fallback_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_blkcipher *tfm; + unsigned int ret; + + tfm = desc->tfm; + desc->tfm = xts_ctx->fallback; + + ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + switch (key_len) { + case 32: + xts_ctx->enc = KM_XTS_128_ENCRYPT; + xts_ctx->dec = KM_XTS_128_DECRYPT; + memcpy(xts_ctx->key + 16, in_key, 16); + memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16); break; - case 24: - crypt_s390_kmc(KMC_AES_192_ENCRYPT, &sctx->iv, out, in, nbytes); + case 48: + xts_ctx->enc = 0; + xts_ctx->dec = 0; + xts_fallback_setkey(tfm, in_key, key_len); break; - case 32: - crypt_s390_kmc(KMC_AES_256_ENCRYPT, &sctx->iv, out, in, nbytes); + case 64: + xts_ctx->enc = KM_XTS_256_ENCRYPT; + xts_ctx->dec = KM_XTS_256_DECRYPT; + memcpy(xts_ctx->key, in_key, 32); + memcpy(xts_ctx->pcc_key, in_key + 32, 32); break; + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; } - memcpy(desc->info, &sctx->iv, AES_BLOCK_SIZE); + xts_ctx->key_len = key_len; + return 0; +} - return nbytes & ~(AES_BLOCK_SIZE - 1); +static int xts_aes_crypt(struct blkcipher_desc *desc, long func, + struct s390_xts_ctx *xts_ctx, + struct blkcipher_walk *walk) +{ + unsigned int offset = (xts_ctx->key_len >> 1) & 0x10; + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + unsigned int n; + u8 *in, *out; + struct pcc_param pcc_param; + struct { + u8 key[32]; + u8 init[16]; + } xts_param; + + if (!nbytes) + goto out; + + memset(pcc_param.block, 0, sizeof(pcc_param.block)); + memset(pcc_param.bit, 0, sizeof(pcc_param.bit)); + memset(pcc_param.xts, 0, sizeof(pcc_param.xts)); + memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); + memcpy(pcc_param.key, xts_ctx->pcc_key, 32); + ret = crypt_s390_pcc(func, &pcc_param.key[offset]); + if (ret < 0) + return -EIO; + + memcpy(xts_param.key, xts_ctx->key, 32); + memcpy(xts_param.init, pcc_param.xts, 16); + do { + /* only use complete blocks */ + n = nbytes & ~(AES_BLOCK_SIZE - 1); + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + + ret = crypt_s390_km(func, &xts_param.key[offset], out, in, n); + if (ret < 0 || ret != n) + return -EIO; + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); +out: + return ret; } -static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out, - const u8 *in, unsigned int nbytes) +static int xts_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm); + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; - memcpy(&sctx->iv, desc->info, AES_BLOCK_SIZE); - switch (sctx->key_len) { + if (unlikely(xts_ctx->key_len == 48)) + return xts_fallback_encrypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk); +} + +static int xts_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + if (unlikely(xts_ctx->key_len == 48)) + return xts_fallback_decrypt(desc, dst, src, nbytes); + + blkcipher_walk_init(&walk, dst, src, nbytes); + return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk); +} + +static int xts_fallback_init(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + + xts_ctx->fallback = crypto_alloc_blkcipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(xts_ctx->fallback)) { + pr_err("Allocating XTS fallback algorithm %s failed\n", + name); + return PTR_ERR(xts_ctx->fallback); + } + return 0; +} + +static void xts_fallback_exit(struct crypto_tfm *tfm) +{ + struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); + + crypto_free_blkcipher(xts_ctx->fallback); + xts_ctx->fallback = NULL; +} + +static struct crypto_alg xts_aes_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_xts_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = xts_fallback_init, + .cra_exit = xts_fallback_exit, + .cra_u = { + .blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aes_set_key, + .encrypt = xts_aes_encrypt, + .decrypt = xts_aes_decrypt, + } + } +}; + +static int xts_aes_alg_reg; + +static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + switch (key_len) { case 16: - crypt_s390_kmc(KMC_AES_128_DECRYPT, &sctx->iv, out, in, nbytes); + sctx->enc = KMCTR_AES_128_ENCRYPT; + sctx->dec = KMCTR_AES_128_DECRYPT; break; case 24: - crypt_s390_kmc(KMC_AES_192_DECRYPT, &sctx->iv, out, in, nbytes); + sctx->enc = KMCTR_AES_192_ENCRYPT; + sctx->dec = KMCTR_AES_192_DECRYPT; break; case 32: - crypt_s390_kmc(KMC_AES_256_DECRYPT, &sctx->iv, out, in, nbytes); + sctx->enc = KMCTR_AES_256_ENCRYPT; + sctx->dec = KMCTR_AES_256_DECRYPT; break; } - return nbytes & ~(AES_BLOCK_SIZE - 1); + + return aes_set_key(tfm, in_key, key_len); } +static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) +{ + unsigned int i, n; + + /* only use complete blocks, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); + for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) { + memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrptr + i, AES_BLOCK_SIZE); + } + return n; +} -static struct crypto_alg aes_alg = { - .cra_name = "aes", - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, +static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, + struct s390_aes_ctx *sctx, struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); + unsigned int n, nbytes; + u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE]; + u8 *out, *in, *ctrptr = ctrbuf; + + if (!walk->nbytes) + return ret; + + if (spin_trylock(&ctrblk_lock)) + ctrptr = ctrblk; + + memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE); + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + while (nbytes >= AES_BLOCK_SIZE) { + if (ctrptr == ctrblk) + n = __ctrblk_init(ctrptr, nbytes); + else + n = AES_BLOCK_SIZE; + ret = crypt_s390_kmctr(func, sctx->key, out, in, + n, ctrptr); + if (ret < 0 || ret != n) { + if (ctrptr == ctrblk) + spin_unlock(&ctrblk_lock); + return -EIO; + } + if (n > AES_BLOCK_SIZE) + memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(ctrptr, AES_BLOCK_SIZE); + out += n; + in += n; + nbytes -= n; + } + ret = blkcipher_walk_done(desc, walk, nbytes); + } + if (ctrptr == ctrblk) { + if (nbytes) + memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE); + else + memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE); + spin_unlock(&ctrblk_lock); + } else { + if (!nbytes) + memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE); + } + /* + * final block may be < AES_BLOCK_SIZE, copy only nbytes + */ + if (nbytes) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + ret = crypt_s390_kmctr(func, sctx->key, buf, in, + AES_BLOCK_SIZE, ctrbuf); + if (ret < 0 || ret != AES_BLOCK_SIZE) + return -EIO; + memcpy(out, buf, nbytes); + crypto_inc(ctrbuf, AES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, 0); + memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE); + } + + return ret; +} + +static int ctr_aes_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_aes_crypt(desc, sctx->enc, sctx, &walk); +} + +static int ctr_aes_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_aes_crypt(desc, sctx->dec, sctx, &walk); +} + +static struct crypto_alg ctr_aes_alg = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, .cra_ctxsize = sizeof(struct s390_aes_ctx), + .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt, - .cia_encrypt_ecb = aes_encrypt_ecb, - .cia_decrypt_ecb = aes_decrypt_ecb, - .cia_encrypt_cbc = aes_encrypt_cbc, - .cia_decrypt_cbc = aes_decrypt_cbc, + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ctr_aes_set_key, + .encrypt = ctr_aes_encrypt, + .decrypt = ctr_aes_decrypt, } } }; -static int __init aes_init(void) +static int ctr_aes_alg_reg; + +static int __init aes_s390_init(void) { int ret; - if (crypt_s390_func_available(KM_AES_128_ENCRYPT)) - has_aes_128 = 1; - if (crypt_s390_func_available(KM_AES_192_ENCRYPT)) - has_aes_192 = 1; - if (crypt_s390_func_available(KM_AES_256_ENCRYPT)) - has_aes_256 = 1; + if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA)) + keylen_flag |= AES_KEYLEN_128; + if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA)) + keylen_flag |= AES_KEYLEN_192; + if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA)) + keylen_flag |= AES_KEYLEN_256; + + if (!keylen_flag) + return -EOPNOTSUPP; - if (!has_aes_128 && !has_aes_192 && !has_aes_256) - return -ENOSYS; + /* z9 109 and z9 BC/EC only support 128 bit key length */ + if (keylen_flag == AES_KEYLEN_128) + pr_info("AES hardware acceleration is only available for" + " 128-bit keys\n"); ret = crypto_register_alg(&aes_alg); - if (ret != 0) - printk(KERN_INFO "crypt_s390: aes_s390 couldn't be loaded.\n"); + if (ret) + goto aes_err; + + ret = crypto_register_alg(&ecb_aes_alg); + if (ret) + goto ecb_aes_err; + + ret = crypto_register_alg(&cbc_aes_alg); + if (ret) + goto cbc_aes_err; + + if (crypt_s390_func_available(KM_XTS_128_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KM_XTS_256_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ret = crypto_register_alg(&xts_aes_alg); + if (ret) + goto xts_aes_err; + xts_aes_alg_reg = 1; + } + + if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_AES_192_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_AES_256_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + if (!ctrblk) { + ret = -ENOMEM; + goto ctr_aes_err; + } + ret = crypto_register_alg(&ctr_aes_alg); + if (ret) { + free_page((unsigned long) ctrblk); + goto ctr_aes_err; + } + ctr_aes_alg_reg = 1; + } + +out: return ret; + +ctr_aes_err: + crypto_unregister_alg(&xts_aes_alg); +xts_aes_err: + crypto_unregister_alg(&cbc_aes_alg); +cbc_aes_err: + crypto_unregister_alg(&ecb_aes_alg); +ecb_aes_err: + crypto_unregister_alg(&aes_alg); +aes_err: + goto out; } -static void __exit aes_fini(void) +static void __exit aes_s390_fini(void) { + if (ctr_aes_alg_reg) { + crypto_unregister_alg(&ctr_aes_alg); + free_page((unsigned long) ctrblk); + } + if (xts_aes_alg_reg) + crypto_unregister_alg(&xts_aes_alg); + crypto_unregister_alg(&cbc_aes_alg); + crypto_unregister_alg(&ecb_aes_alg); crypto_unregister_alg(&aes_alg); } -module_init(aes_init); -module_exit(aes_fini); +module_init(aes_s390_init); +module_exit(aes_s390_fini); -MODULE_ALIAS("aes"); +MODULE_ALIAS("aes-all"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); - diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index d1c259a7fe3..6c5cc6da711 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -3,8 +3,9 @@ * * Support for s390 cryptographic instructions. * - * Copyright (C) 2003 IBM Deutschland GmbH, IBM Corporation - * Author(s): Thomas Spatzier (tspat@de.ibm.com) + * Copyright IBM Corp. 2003, 2007 + * Author(s): Thomas Spatzier + * Jan Glauber (jan.glauber@de.ibm.com) * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -16,20 +17,30 @@ #define _CRYPTO_ARCH_S390_CRYPT_S390_H #include <asm/errno.h> +#include <asm/facility.h> #define CRYPT_S390_OP_MASK 0xFF00 #define CRYPT_S390_FUNC_MASK 0x00FF -/* s930 cryptographic operations */ +#define CRYPT_S390_PRIORITY 300 +#define CRYPT_S390_COMPOSITE_PRIORITY 400 + +#define CRYPT_S390_MSA 0x1 +#define CRYPT_S390_MSA3 0x2 +#define CRYPT_S390_MSA4 0x4 + +/* s390 cryptographic operations */ enum crypt_s390_operations { CRYPT_S390_KM = 0x0100, CRYPT_S390_KMC = 0x0200, CRYPT_S390_KIMD = 0x0300, CRYPT_S390_KLMD = 0x0400, - CRYPT_S390_KMAC = 0x0500 + CRYPT_S390_KMAC = 0x0500, + CRYPT_S390_KMCTR = 0x0600 }; -/* function codes for KM (CIPHER MESSAGE) instruction +/* + * function codes for KM (CIPHER MESSAGE) instruction * 0x80 is the decipher modifier bit */ enum crypt_s390_km_func { @@ -46,9 +57,14 @@ enum crypt_s390_km_func { KM_AES_192_DECRYPT = CRYPT_S390_KM | 0x13 | 0x80, KM_AES_256_ENCRYPT = CRYPT_S390_KM | 0x14, KM_AES_256_DECRYPT = CRYPT_S390_KM | 0x14 | 0x80, + KM_XTS_128_ENCRYPT = CRYPT_S390_KM | 0x32, + KM_XTS_128_DECRYPT = CRYPT_S390_KM | 0x32 | 0x80, + KM_XTS_256_ENCRYPT = CRYPT_S390_KM | 0x34, + KM_XTS_256_DECRYPT = CRYPT_S390_KM | 0x34 | 0x80, }; -/* function codes for KMC (CIPHER MESSAGE WITH CHAINING) +/* + * function codes for KMC (CIPHER MESSAGE WITH CHAINING) * instruction */ enum crypt_s390_kmc_func { @@ -65,27 +81,54 @@ enum crypt_s390_kmc_func { KMC_AES_192_DECRYPT = CRYPT_S390_KMC | 0x13 | 0x80, KMC_AES_256_ENCRYPT = CRYPT_S390_KMC | 0x14, KMC_AES_256_DECRYPT = CRYPT_S390_KMC | 0x14 | 0x80, + KMC_PRNG = CRYPT_S390_KMC | 0x43, +}; + +/* + * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER) + * instruction + */ +enum crypt_s390_kmctr_func { + KMCTR_QUERY = CRYPT_S390_KMCTR | 0x0, + KMCTR_DEA_ENCRYPT = CRYPT_S390_KMCTR | 0x1, + KMCTR_DEA_DECRYPT = CRYPT_S390_KMCTR | 0x1 | 0x80, + KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2, + KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80, + KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3, + KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80, + KMCTR_AES_128_ENCRYPT = CRYPT_S390_KMCTR | 0x12, + KMCTR_AES_128_DECRYPT = CRYPT_S390_KMCTR | 0x12 | 0x80, + KMCTR_AES_192_ENCRYPT = CRYPT_S390_KMCTR | 0x13, + KMCTR_AES_192_DECRYPT = CRYPT_S390_KMCTR | 0x13 | 0x80, + KMCTR_AES_256_ENCRYPT = CRYPT_S390_KMCTR | 0x14, + KMCTR_AES_256_DECRYPT = CRYPT_S390_KMCTR | 0x14 | 0x80, }; -/* function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) +/* + * function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) * instruction */ enum crypt_s390_kimd_func { KIMD_QUERY = CRYPT_S390_KIMD | 0, KIMD_SHA_1 = CRYPT_S390_KIMD | 1, KIMD_SHA_256 = CRYPT_S390_KIMD | 2, + KIMD_SHA_512 = CRYPT_S390_KIMD | 3, + KIMD_GHASH = CRYPT_S390_KIMD | 65, }; -/* function codes for KLMD (COMPUTE LAST MESSAGE DIGEST) +/* + * function codes for KLMD (COMPUTE LAST MESSAGE DIGEST) * instruction */ enum crypt_s390_klmd_func { KLMD_QUERY = CRYPT_S390_KLMD | 0, KLMD_SHA_1 = CRYPT_S390_KLMD | 1, KLMD_SHA_256 = CRYPT_S390_KLMD | 2, + KLMD_SHA_512 = CRYPT_S390_KLMD | 3, }; -/* function codes for KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) +/* + * function codes for KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) * instruction */ enum crypt_s390_kmac_func { @@ -95,303 +138,300 @@ enum crypt_s390_kmac_func { KMAC_TDEA_192 = CRYPT_S390_KMAC | 3 }; -/* status word for s390 crypto instructions' QUERY functions */ -struct crypt_s390_query_status { - u64 high; - u64 low; -}; - -/* - * Standard fixup and ex_table sections for crypt_s390 inline functions. - * label 0: the s390 crypto operation - * label 1: just after 1 to catch illegal operation exception - * (unsupported model) - * label 6: the return point after fixup - * label 7: set error value if exception _in_ crypto operation - * label 8: set error value if illegal operation exception - * [ret] is the variable to receive the error code - * [ERR] is the error code value - */ -#ifndef CONFIG_64BIT -#define __crypt_s390_fixup \ - ".section .fixup,\"ax\" \n" \ - "7: lhi %0,%h[e1] \n" \ - " bras 1,9f \n" \ - " .long 6b \n" \ - "8: lhi %0,%h[e2] \n" \ - " bras 1,9f \n" \ - " .long 6b \n" \ - "9: l 1,0(1) \n" \ - " br 1 \n" \ - ".previous \n" \ - ".section __ex_table,\"a\" \n" \ - " .align 4 \n" \ - " .long 0b,7b \n" \ - " .long 1b,8b \n" \ - ".previous" -#else /* CONFIG_64BIT */ -#define __crypt_s390_fixup \ - ".section .fixup,\"ax\" \n" \ - "7: lhi %0,%h[e1] \n" \ - " jg 6b \n" \ - "8: lhi %0,%h[e2] \n" \ - " jg 6b \n" \ - ".previous\n" \ - ".section __ex_table,\"a\" \n" \ - " .align 8 \n" \ - " .quad 0b,7b \n" \ - " .quad 1b,8b \n" \ - ".previous" -#endif /* CONFIG_64BIT */ - -/* - * Standard code for setting the result of s390 crypto instructions. - * %0: the register which will receive the result - * [result]: the register containing the result (e.g. second operand length - * to compute number of processed bytes]. - */ -#ifndef CONFIG_64BIT -#define __crypt_s390_set_result \ - " lr %0,%[result] \n" -#else /* CONFIG_64BIT */ -#define __crypt_s390_set_result \ - " lgr %0,%[result] \n" -#endif - -/* +/** + * crypt_s390_km: + * @func: the function code passed to KM; see crypt_s390_km_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * * Executes the KM (CIPHER MESSAGE) operation of the CPU. - * @param func: the function code passed to KM; see crypt_s390_km_func - * @param param: address of parameter block; see POP for details on each func - * @param dest: address of destination memory area - * @param src: address of source memory area - * @param src_len: length of src operand in bytes - * @returns < zero for failure, 0 for the query func, number of processed bytes - * for encryption/decryption funcs + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for encryption/decryption funcs */ -static inline int -crypt_s390_km(long func, void* param, u8* dest, const u8* src, long src_len) +static inline int crypt_s390_km(long func, void *param, + u8 *dest, const u8 *src, long src_len) { register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void* __param asm("1") = param; - register u8* __dest asm("4") = dest; - register const u8* __src asm("2") = src; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; register long __src_len asm("3") = src_len; + register u8 *__dest asm("4") = dest; int ret; - ret = 0; - __asm__ __volatile__ ( - "0: .insn rre,0xB92E0000,%1,%2 \n" /* KM opcode */ + asm volatile( + "0: .insn rre,0xb92e0000,%3,%1 \n" /* KM opcode */ "1: brc 1,0b \n" /* handle partial completion */ - __crypt_s390_set_result - "6: \n" - __crypt_s390_fixup - : "+d" (ret), "+a" (__dest), "+a" (__src), - [result] "+d" (__src_len) - : [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func), - "a" (__param) - : "cc", "memory" - ); - if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){ - ret = src_len - ret; - } - return ret; + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) + : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; } -/* +/** + * crypt_s390_kmc: + * @func: the function code passed to KM; see crypt_s390_kmc_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * * Executes the KMC (CIPHER MESSAGE WITH CHAINING) operation of the CPU. - * @param func: the function code passed to KM; see crypt_s390_kmc_func - * @param param: address of parameter block; see POP for details on each func - * @param dest: address of destination memory area - * @param src: address of source memory area - * @param src_len: length of src operand in bytes - * @returns < zero for failure, 0 for the query func, number of processed bytes - * for encryption/decryption funcs + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for encryption/decryption funcs */ -static inline int -crypt_s390_kmc(long func, void* param, u8* dest, const u8* src, long src_len) +static inline int crypt_s390_kmc(long func, void *param, + u8 *dest, const u8 *src, long src_len) { register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void* __param asm("1") = param; - register u8* __dest asm("4") = dest; - register const u8* __src asm("2") = src; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; register long __src_len asm("3") = src_len; + register u8 *__dest asm("4") = dest; int ret; - ret = 0; - __asm__ __volatile__ ( - "0: .insn rre,0xB92F0000,%1,%2 \n" /* KMC opcode */ + asm volatile( + "0: .insn rre,0xb92f0000,%3,%1 \n" /* KMC opcode */ "1: brc 1,0b \n" /* handle partial completion */ - __crypt_s390_set_result - "6: \n" - __crypt_s390_fixup - : "+d" (ret), "+a" (__dest), "+a" (__src), - [result] "+d" (__src_len) - : [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func), - "a" (__param) - : "cc", "memory" - ); - if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){ - ret = src_len - ret; - } - return ret; + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) + : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; } -/* +/** + * crypt_s390_kimd: + * @func: the function code passed to KM; see crypt_s390_kimd_func + * @param: address of parameter block; see POP for details on each func + * @src: address of source memory area + * @src_len: length of src operand in bytes + * * Executes the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) operation * of the CPU. - * @param func: the function code passed to KM; see crypt_s390_kimd_func - * @param param: address of parameter block; see POP for details on each func - * @param src: address of source memory area - * @param src_len: length of src operand in bytes - * @returns < zero for failure, 0 for the query func, number of processed bytes - * for digest funcs + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for digest funcs */ -static inline int -crypt_s390_kimd(long func, void* param, const u8* src, long src_len) +static inline int crypt_s390_kimd(long func, void *param, + const u8 *src, long src_len) { register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void* __param asm("1") = param; - register const u8* __src asm("2") = src; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; register long __src_len asm("3") = src_len; int ret; - ret = 0; - __asm__ __volatile__ ( - "0: .insn rre,0xB93E0000,%1,%1 \n" /* KIMD opcode */ - "1: brc 1,0b \n" /* handle partical completion */ - __crypt_s390_set_result - "6: \n" - __crypt_s390_fixup - : "+d" (ret), "+a" (__src), [result] "+d" (__src_len) - : [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func), - "a" (__param) - : "cc", "memory" - ); - if (ret >= 0 && (func & CRYPT_S390_FUNC_MASK)){ - ret = src_len - ret; - } - return ret; + asm volatile( + "0: .insn rre,0xb93e0000,%1,%1 \n" /* KIMD opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+a" (__src), "+d" (__src_len) + : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; } -/* +/** + * crypt_s390_klmd: + * @func: the function code passed to KM; see crypt_s390_klmd_func + * @param: address of parameter block; see POP for details on each func + * @src: address of source memory area + * @src_len: length of src operand in bytes + * * Executes the KLMD (COMPUTE LAST MESSAGE DIGEST) operation of the CPU. - * @param func: the function code passed to KM; see crypt_s390_klmd_func - * @param param: address of parameter block; see POP for details on each func - * @param src: address of source memory area - * @param src_len: length of src operand in bytes - * @returns < zero for failure, 0 for the query func, number of processed bytes - * for digest funcs + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for digest funcs */ -static inline int -crypt_s390_klmd(long func, void* param, const u8* src, long src_len) +static inline int crypt_s390_klmd(long func, void *param, + const u8 *src, long src_len) { register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void* __param asm("1") = param; - register const u8* __src asm("2") = src; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; register long __src_len asm("3") = src_len; int ret; - ret = 0; - __asm__ __volatile__ ( - "0: .insn rre,0xB93F0000,%1,%1 \n" /* KLMD opcode */ - "1: brc 1,0b \n" /* handle partical completion */ - __crypt_s390_set_result - "6: \n" - __crypt_s390_fixup - : "+d" (ret), "+a" (__src), [result] "+d" (__src_len) - : [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func), - "a" (__param) - : "cc", "memory" - ); - if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){ - ret = src_len - ret; - } - return ret; + asm volatile( + "0: .insn rre,0xb93f0000,%1,%1 \n" /* KLMD opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+a" (__src), "+d" (__src_len) + : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; } -/* +/** + * crypt_s390_kmac: + * @func: the function code passed to KM; see crypt_s390_klmd_func + * @param: address of parameter block; see POP for details on each func + * @src: address of source memory area + * @src_len: length of src operand in bytes + * * Executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) operation * of the CPU. - * @param func: the function code passed to KM; see crypt_s390_klmd_func - * @param param: address of parameter block; see POP for details on each func - * @param src: address of source memory area - * @param src_len: length of src operand in bytes - * @returns < zero for failure, 0 for the query func, number of processed bytes - * for digest funcs + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for digest funcs */ -static inline int -crypt_s390_kmac(long func, void* param, const u8* src, long src_len) +static inline int crypt_s390_kmac(long func, void *param, + const u8 *src, long src_len) { register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void* __param asm("1") = param; - register const u8* __src asm("2") = src; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; register long __src_len asm("3") = src_len; int ret; - ret = 0; - __asm__ __volatile__ ( - "0: .insn rre,0xB91E0000,%5,%5 \n" /* KMAC opcode */ - "1: brc 1,0b \n" /* handle partical completion */ - __crypt_s390_set_result - "6: \n" - __crypt_s390_fixup - : "+d" (ret), "+a" (__src), [result] "+d" (__src_len) - : [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func), - "a" (__param) - : "cc", "memory" - ); - if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){ - ret = src_len - ret; - } - return ret; + asm volatile( + "0: .insn rre,0xb91e0000,%1,%1 \n" /* KLAC opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+a" (__src), "+d" (__src_len) + : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; } /** + * crypt_s390_kmctr: + * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * @counter: address of counter value + * + * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU. + * + * Returns -1 for failure, 0 for the query func, number of processed + * bytes for encryption/decryption funcs + */ +static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, + const u8 *src, long src_len, u8 *counter) +{ + register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; + register void *__param asm("1") = param; + register const u8 *__src asm("2") = src; + register long __src_len asm("3") = src_len; + register u8 *__dest asm("4") = dest; + register u8 *__ctr asm("6") = counter; + int ret = -1; + + asm volatile( + "0: .insn rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest), + "+a" (__ctr) + : "d" (__func), "a" (__param) : "cc", "memory"); + if (ret < 0) + return ret; + return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; +} + +/** + * crypt_s390_func_available: + * @func: the function code of the specific function; 0 if op in general + * * Tests if a specific crypto function is implemented on the machine. - * @param func: the function code of the specific function; 0 if op in general - * @return 1 if func available; 0 if func or op in general not available + * + * Returns 1 if func available; 0 if func or op in general not available */ -static inline int -crypt_s390_func_available(int func) +static inline int crypt_s390_func_available(int func, + unsigned int facility_mask) { + unsigned char status[16]; int ret; - struct crypt_s390_query_status status = { - .high = 0, - .low = 0 - }; - switch (func & CRYPT_S390_OP_MASK){ - case CRYPT_S390_KM: - ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0); - break; - case CRYPT_S390_KMC: - ret = crypt_s390_kmc(KMC_QUERY, &status, NULL, NULL, 0); - break; - case CRYPT_S390_KIMD: - ret = crypt_s390_kimd(KIMD_QUERY, &status, NULL, 0); - break; - case CRYPT_S390_KLMD: - ret = crypt_s390_klmd(KLMD_QUERY, &status, NULL, 0); - break; - case CRYPT_S390_KMAC: - ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); - break; - default: - ret = 0; - return ret; - } - if (ret >= 0){ - func &= CRYPT_S390_FUNC_MASK; - func &= 0x7f; //mask modifier bit - if (func < 64){ - ret = (status.high >> (64 - func - 1)) & 0x1; - } else { - ret = (status.low >> (128 - func - 1)) & 0x1; - } - } else { - ret = 0; + if (facility_mask & CRYPT_S390_MSA && !test_facility(17)) + return 0; + + if (facility_mask & CRYPT_S390_MSA3 && + (!test_facility(2) || !test_facility(76))) + return 0; + if (facility_mask & CRYPT_S390_MSA4 && + (!test_facility(2) || !test_facility(77))) + return 0; + + switch (func & CRYPT_S390_OP_MASK) { + case CRYPT_S390_KM: + ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0); + break; + case CRYPT_S390_KMC: + ret = crypt_s390_kmc(KMC_QUERY, &status, NULL, NULL, 0); + break; + case CRYPT_S390_KIMD: + ret = crypt_s390_kimd(KIMD_QUERY, &status, NULL, 0); + break; + case CRYPT_S390_KLMD: + ret = crypt_s390_klmd(KLMD_QUERY, &status, NULL, 0); + break; + case CRYPT_S390_KMAC: + ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); + break; + case CRYPT_S390_KMCTR: + ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0, + NULL); + break; + default: + return 0; } + if (ret < 0) + return 0; + func &= CRYPT_S390_FUNC_MASK; + func &= 0x7f; /* mask modifier bit */ + return (status[func >> 3] & (0x80 >> (func & 7))) != 0; +} + +/** + * crypt_s390_pcc: + * @func: the function code passed to KM; see crypt_s390_km_func + * @param: address of parameter block; see POP for details on each func + * + * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU. + * + * Returns -1 for failure, 0 for success. + */ +static inline int crypt_s390_pcc(long func, void *param) +{ + register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */ + register void *__param asm("1") = param; + int ret = -1; + + asm volatile( + "0: .insn rre,0xb92c0000,0,0 \n" /* PCC opcode */ + "1: brc 1,0b \n" /* handle partial completion */ + " la %0,0\n" + "2:\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "+d" (ret) + : "d" (__func), "a" (__param) : "cc", "memory"); return ret; } -#endif // _CRYPTO_ARCH_S390_CRYPT_S390_H + +#endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */ diff --git a/arch/s390/crypto/crypt_s390_query.c b/arch/s390/crypto/crypt_s390_query.c deleted file mode 100644 index def02bdc44a..00000000000 --- a/arch/s390/crypto/crypt_s390_query.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Cryptographic API. - * - * Support for s390 cryptographic instructions. - * Testing module for querying processor crypto capabilities. - * - * Copyright (c) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Thomas Spatzier (tspat@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ -#include <linux/module.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <asm/errno.h> -#include "crypt_s390.h" - -static void query_available_functions(void) -{ - printk(KERN_INFO "#####################\n"); - - /* query available KM functions */ - printk(KERN_INFO "KM_QUERY: %d\n", - crypt_s390_func_available(KM_QUERY)); - printk(KERN_INFO "KM_DEA: %d\n", - crypt_s390_func_available(KM_DEA_ENCRYPT)); - printk(KERN_INFO "KM_TDEA_128: %d\n", - crypt_s390_func_available(KM_TDEA_128_ENCRYPT)); - printk(KERN_INFO "KM_TDEA_192: %d\n", - crypt_s390_func_available(KM_TDEA_192_ENCRYPT)); - printk(KERN_INFO "KM_AES_128: %d\n", - crypt_s390_func_available(KM_AES_128_ENCRYPT)); - printk(KERN_INFO "KM_AES_192: %d\n", - crypt_s390_func_available(KM_AES_192_ENCRYPT)); - printk(KERN_INFO "KM_AES_256: %d\n", - crypt_s390_func_available(KM_AES_256_ENCRYPT)); - - /* query available KMC functions */ - printk(KERN_INFO "KMC_QUERY: %d\n", - crypt_s390_func_available(KMC_QUERY)); - printk(KERN_INFO "KMC_DEA: %d\n", - crypt_s390_func_available(KMC_DEA_ENCRYPT)); - printk(KERN_INFO "KMC_TDEA_128: %d\n", - crypt_s390_func_available(KMC_TDEA_128_ENCRYPT)); - printk(KERN_INFO "KMC_TDEA_192: %d\n", - crypt_s390_func_available(KMC_TDEA_192_ENCRYPT)); - printk(KERN_INFO "KMC_AES_128: %d\n", - crypt_s390_func_available(KMC_AES_128_ENCRYPT)); - printk(KERN_INFO "KMC_AES_192: %d\n", - crypt_s390_func_available(KMC_AES_192_ENCRYPT)); - printk(KERN_INFO "KMC_AES_256: %d\n", - crypt_s390_func_available(KMC_AES_256_ENCRYPT)); - - /* query available KIMD fucntions */ - printk(KERN_INFO "KIMD_QUERY: %d\n", - crypt_s390_func_available(KIMD_QUERY)); - printk(KERN_INFO "KIMD_SHA_1: %d\n", - crypt_s390_func_available(KIMD_SHA_1)); - printk(KERN_INFO "KIMD_SHA_256: %d\n", - crypt_s390_func_available(KIMD_SHA_256)); - - /* query available KLMD functions */ - printk(KERN_INFO "KLMD_QUERY: %d\n", - crypt_s390_func_available(KLMD_QUERY)); - printk(KERN_INFO "KLMD_SHA_1: %d\n", - crypt_s390_func_available(KLMD_SHA_1)); - printk(KERN_INFO "KLMD_SHA_256: %d\n", - crypt_s390_func_available(KLMD_SHA_256)); - - /* query available KMAC functions */ - printk(KERN_INFO "KMAC_QUERY: %d\n", - crypt_s390_func_available(KMAC_QUERY)); - printk(KERN_INFO "KMAC_DEA: %d\n", - crypt_s390_func_available(KMAC_DEA)); - printk(KERN_INFO "KMAC_TDEA_128: %d\n", - crypt_s390_func_available(KMAC_TDEA_128)); - printk(KERN_INFO "KMAC_TDEA_192: %d\n", - crypt_s390_func_available(KMAC_TDEA_192)); -} - -static int init(void) -{ - struct crypt_s390_query_status status = { - .high = 0, - .low = 0 - }; - - printk(KERN_INFO "crypt_s390: querying available crypto functions\n"); - crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0); - printk(KERN_INFO "KM:\t%016llx %016llx\n", - (unsigned long long) status.high, - (unsigned long long) status.low); - status.high = status.low = 0; - crypt_s390_kmc(KMC_QUERY, &status, NULL, NULL, 0); - printk(KERN_INFO "KMC:\t%016llx %016llx\n", - (unsigned long long) status.high, - (unsigned long long) status.low); - status.high = status.low = 0; - crypt_s390_kimd(KIMD_QUERY, &status, NULL, 0); - printk(KERN_INFO "KIMD:\t%016llx %016llx\n", - (unsigned long long) status.high, - (unsigned long long) status.low); - status.high = status.low = 0; - crypt_s390_klmd(KLMD_QUERY, &status, NULL, 0); - printk(KERN_INFO "KLMD:\t%016llx %016llx\n", - (unsigned long long) status.high, - (unsigned long long) status.low); - status.high = status.low = 0; - crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); - printk(KERN_INFO "KMAC:\t%016llx %016llx\n", - (unsigned long long) status.high, - (unsigned long long) status.low); - - query_available_functions(); - return -ECANCELED; -} - -static void __exit cleanup(void) -{ -} - -module_init(init); -module_exit(cleanup); - -MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/crypto_des.h b/arch/s390/crypto/crypto_des.h deleted file mode 100644 index c964b64111d..00000000000 --- a/arch/s390/crypto/crypto_des.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Cryptographic API. - * - * Function for checking keys for the DES and Tripple DES Encryption - * algorithms. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ -#ifndef __CRYPTO_DES_H__ -#define __CRYPTO_DES_H__ - -extern int crypto_des_check_key(const u8*, unsigned int, u32*); - -#endif //__CRYPTO_DES_H__ diff --git a/arch/s390/crypto/des_check_key.c b/arch/s390/crypto/des_check_key.c deleted file mode 100644 index e3f5c5f238f..00000000000 --- a/arch/s390/crypto/des_check_key.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Cryptographic API. - * - * Function for checking keys for the DES and Tripple DES Encryption - * algorithms. - * - * Originally released as descore by Dana L. How <how@isl.stanford.edu>. - * Modified by Raimar Falke <rf13@inf.tu-dresden.de> for the Linux-Kernel. - * Derived from Cryptoapi and Nettle implementations, adapted for in-place - * scatterlist interface. Changed LGPL to GPL per section 3 of the LGPL. - * - * s390 Version: - * Copyright (C) 2003 IBM Deutschland GmbH, IBM Corporation - * Author(s): Thomas Spatzier (tspat@de.ibm.com) - * - * Derived from "crypto/des.c" - * Copyright (c) 1992 Dana L. How. - * Copyright (c) Raimar Falke <rf13@inf.tu-dresden.de> - * Copyright (c) Gisle Sflensminde <gisle@ii.uib.no> - * Copyright (C) 2001 Niels Mvller. - * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/crypto.h> - -#define ROR(d,c,o) ((d) = (d) >> (c) | (d) << (o)) - -static const u8 parity[] = { - 8,1,0,8,0,8,8,0,0,8,8,0,8,0,2,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,3, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0, - 4,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,5,0,8,0,8,8,0,0,8,8,0,8,0,6,8, -}; - -/* - * RFC2451: Weak key checks SHOULD be performed. - */ -int -crypto_des_check_key(const u8 *key, unsigned int keylen, u32 *flags) -{ - u32 n, w; - - n = parity[key[0]]; n <<= 4; - n |= parity[key[1]]; n <<= 4; - n |= parity[key[2]]; n <<= 4; - n |= parity[key[3]]; n <<= 4; - n |= parity[key[4]]; n <<= 4; - n |= parity[key[5]]; n <<= 4; - n |= parity[key[6]]; n <<= 4; - n |= parity[key[7]]; - w = 0x88888888L; - - if ((*flags & CRYPTO_TFM_REQ_WEAK_KEY) - && !((n - (w >> 3)) & w)) { /* 1 in 10^10 keys passes this test */ - if (n < 0x41415151) { - if (n < 0x31312121) { - if (n < 0x14141515) { - /* 01 01 01 01 01 01 01 01 */ - if (n == 0x11111111) goto weak; - /* 01 1F 01 1F 01 0E 01 0E */ - if (n == 0x13131212) goto weak; - } else { - /* 01 E0 01 E0 01 F1 01 F1 */ - if (n == 0x14141515) goto weak; - /* 01 FE 01 FE 01 FE 01 FE */ - if (n == 0x16161616) goto weak; - } - } else { - if (n < 0x34342525) { - /* 1F 01 1F 01 0E 01 0E 01 */ - if (n == 0x31312121) goto weak; - /* 1F 1F 1F 1F 0E 0E 0E 0E (?) */ - if (n == 0x33332222) goto weak; - } else { - /* 1F E0 1F E0 0E F1 0E F1 */ - if (n == 0x34342525) goto weak; - /* 1F FE 1F FE 0E FE 0E FE */ - if (n == 0x36362626) goto weak; - } - } - } else { - if (n < 0x61616161) { - if (n < 0x44445555) { - /* E0 01 E0 01 F1 01 F1 01 */ - if (n == 0x41415151) goto weak; - /* E0 1F E0 1F F1 0E F1 0E */ - if (n == 0x43435252) goto weak; - } else { - /* E0 E0 E0 E0 F1 F1 F1 F1 (?) */ - if (n == 0x44445555) goto weak; - /* E0 FE E0 FE F1 FE F1 FE */ - if (n == 0x46465656) goto weak; - } - } else { - if (n < 0x64646565) { - /* FE 01 FE 01 FE 01 FE 01 */ - if (n == 0x61616161) goto weak; - /* FE 1F FE 1F FE 0E FE 0E */ - if (n == 0x63636262) goto weak; - } else { - /* FE E0 FE E0 FE F1 FE F1 */ - if (n == 0x64646565) goto weak; - /* FE FE FE FE FE FE FE FE */ - if (n == 0x66666666) goto weak; - } - } - } - } - return 0; -weak: - *flags |= CRYPTO_TFM_RES_WEAK_KEY; - return -EINVAL; -} - -EXPORT_SYMBOL(crypto_des_check_key); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Key Check function for DES & DES3 Cipher Algorithms"); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index a38bb2a3eef..7acb77f7ef1 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -3,9 +3,9 @@ * * s390 implementation of the DES Cipher Algorithm. * - * Copyright (c) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Thomas Spatzier (tspat@de.ibm.com) - * + * Copyright IBM Corp. 2003, 2011 + * Author(s): Thomas Spatzier + * Jan Glauber (jan.glauber@de.ibm.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -13,155 +13,214 @@ * (at your option) any later version. * */ + #include <linux/init.h> #include <linux/module.h> -#include <linux/mm.h> -#include <linux/errno.h> -#include <asm/scatterlist.h> #include <linux/crypto.h> -#include "crypt_s390.h" -#include "crypto_des.h" +#include <crypto/algapi.h> +#include <crypto/des.h> -#define DES_BLOCK_SIZE 8 -#define DES_KEY_SIZE 8 +#include "crypt_s390.h" -#define DES3_128_KEY_SIZE (2 * DES_KEY_SIZE) -#define DES3_128_BLOCK_SIZE DES_BLOCK_SIZE +#define DES3_KEY_SIZE (3 * DES_KEY_SIZE) -#define DES3_192_KEY_SIZE (3 * DES_KEY_SIZE) -#define DES3_192_BLOCK_SIZE DES_BLOCK_SIZE +static u8 *ctrblk; +static DEFINE_SPINLOCK(ctrblk_lock); -struct crypt_s390_des_ctx { +struct s390_des_ctx { u8 iv[DES_BLOCK_SIZE]; - u8 key[DES_KEY_SIZE]; + u8 key[DES3_KEY_SIZE]; }; -struct crypt_s390_des3_128_ctx { - u8 iv[DES_BLOCK_SIZE]; - u8 key[DES3_128_KEY_SIZE]; -}; - -struct crypt_s390_des3_192_ctx { - u8 iv[DES_BLOCK_SIZE]; - u8 key[DES3_192_KEY_SIZE]; -}; - -static int -des_setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags) +static int des_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) { - struct crypt_s390_des_ctx *dctx; - int ret; + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + u32 tmp[DES_EXPKEY_WORDS]; - dctx = ctx; - //test if key is valid (not a weak key) - ret = crypto_des_check_key(key, keylen, flags); - if (ret == 0){ - memcpy(dctx->key, key, keylen); + /* check for weak keys */ + if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; } - return ret; -} + memcpy(ctx->key, key, key_len); + return 0; +} -static void -des_encrypt(void *ctx, u8 *dst, const u8 *src) +static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - struct crypt_s390_des_ctx *dctx; + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - dctx = ctx; - crypt_s390_km(KM_DEA_ENCRYPT, dctx->key, dst, src, DES_BLOCK_SIZE); + crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE); } -static void -des_decrypt(void *ctx, u8 *dst, const u8 *src) +static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - struct crypt_s390_des_ctx *dctx; + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - dctx = ctx; - crypt_s390_km(KM_DEA_DECRYPT, dctx->key, dst, src, DES_BLOCK_SIZE); + crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE); } static struct crypto_alg des_alg = { .cra_name = "des", + .cra_driver_name = "des-s390", + .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des_ctx), + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(des_alg.cra_list), - .cra_u = { .cipher = { - .cia_min_keysize = DES_KEY_SIZE, - .cia_max_keysize = DES_KEY_SIZE, - .cia_setkey = des_setkey, - .cia_encrypt = des_encrypt, - .cia_decrypt = des_decrypt } } + .cra_u = { + .cipher = { + .cia_min_keysize = DES_KEY_SIZE, + .cia_max_keysize = DES_KEY_SIZE, + .cia_setkey = des_setkey, + .cia_encrypt = des_encrypt, + .cia_decrypt = des_decrypt, + } + } }; -/* - * RFC2451: - * - * For DES-EDE3, there is no known need to reject weak or - * complementation keys. Any weakness is obviated by the use of - * multiple keys. - * - * However, if the two independent 64-bit keys are equal, - * then the DES3 operation is simply the same as DES. - * Implementers MUST reject keys that exhibit this property. - * - */ -static int -des3_128_setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags) +static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, + u8 *key, struct blkcipher_walk *walk) { - int i, ret; - struct crypt_s390_des3_128_ctx *dctx; - const u8* temp_key = key; + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes; - dctx = ctx; - if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE))) { + while ((nbytes = walk->nbytes)) { + /* only use complete blocks */ + unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; - *flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED; - return -EINVAL; - } - for (i = 0; i < 2; i++, temp_key += DES_KEY_SIZE) { - ret = crypto_des_check_key(temp_key, DES_KEY_SIZE, flags); - if (ret < 0) - return ret; + ret = crypt_s390_km(func, key, out, in, n); + if (ret < 0 || ret != n) + return -EIO; + + nbytes &= DES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); } - memcpy(dctx->key, key, keylen); - return 0; + + return ret; } -static void -des3_128_encrypt(void *ctx, u8 *dst, const u8 *src) +static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, + struct blkcipher_walk *walk) { - struct crypt_s390_des3_128_ctx *dctx; + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int ret = blkcipher_walk_virt(desc, walk); + unsigned int nbytes = walk->nbytes; + struct { + u8 iv[DES_BLOCK_SIZE]; + u8 key[DES3_KEY_SIZE]; + } param; + + if (!nbytes) + goto out; + + memcpy(param.iv, walk->iv, DES_BLOCK_SIZE); + memcpy(param.key, ctx->key, DES3_KEY_SIZE); + do { + /* only use complete blocks */ + unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1); + u8 *out = walk->dst.virt.addr; + u8 *in = walk->src.virt.addr; + + ret = crypt_s390_kmc(func, ¶m, out, in, n); + if (ret < 0 || ret != n) + return -EIO; + + nbytes &= DES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, walk, nbytes); + } while ((nbytes = walk->nbytes)); + memcpy(walk->iv, param.iv, DES_BLOCK_SIZE); + +out: + return ret; +} - dctx = ctx; - crypt_s390_km(KM_TDEA_128_ENCRYPT, dctx->key, dst, (void*)src, - DES3_128_BLOCK_SIZE); +static int ecb_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk); } -static void -des3_128_decrypt(void *ctx, u8 *dst, const u8 *src) +static int ecb_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - struct crypt_s390_des3_128_ctx *dctx; + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; - dctx = ctx; - crypt_s390_km(KM_TDEA_128_DECRYPT, dctx->key, dst, (void*)src, - DES3_128_BLOCK_SIZE); + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk); } -static struct crypto_alg des3_128_alg = { - .cra_name = "des3_ede128", - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = DES3_128_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_128_ctx), +static struct crypto_alg ecb_des_alg = { + .cra_name = "ecb(des)", + .cra_driver_name = "ecb-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(des3_128_alg.cra_list), - .cra_u = { .cipher = { - .cia_min_keysize = DES3_128_KEY_SIZE, - .cia_max_keysize = DES3_128_KEY_SIZE, - .cia_setkey = des3_128_setkey, - .cia_encrypt = des3_128_encrypt, - .cia_decrypt = des3_128_decrypt } } + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = des_setkey, + .encrypt = ecb_des_encrypt, + .decrypt = ecb_des_decrypt, + } + } +}; + +static int cbc_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk); +} + +static int cbc_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk); +} + +static struct crypto_alg cbc_des_alg = { + .cra_name = "cbc(des)", + .cra_driver_name = "cbc-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_setkey, + .encrypt = cbc_des_encrypt, + .decrypt = cbc_des_decrypt, + } + } }; /* @@ -177,105 +236,388 @@ static struct crypto_alg des3_128_alg = { * property. * */ -static int -des3_192_setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags) +static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int key_len) { - int i, ret; - struct crypt_s390_des3_192_ctx *dctx; - const u8* temp_key; - - dctx = ctx; - temp_key = key; - if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && - memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], - DES_KEY_SIZE))) { - - *flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED; + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + + if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && + crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], + DES_KEY_SIZE)) && + (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } - for (i = 0; i < 3; i++, temp_key += DES_KEY_SIZE) { - ret = crypto_des_check_key(temp_key, DES_KEY_SIZE, flags); - if (ret < 0){ - return ret; - } - } - memcpy(dctx->key, key, keylen); + memcpy(ctx->key, key, key_len); return 0; } -static void -des3_192_encrypt(void *ctx, u8 *dst, const u8 *src) +static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - struct crypt_s390_des3_192_ctx *dctx; + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - dctx = ctx; - crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src, - DES3_192_BLOCK_SIZE); + crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); } -static void -des3_192_decrypt(void *ctx, u8 *dst, const u8 *src) +static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - struct crypt_s390_des3_192_ctx *dctx; + struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - dctx = ctx; - crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src, - DES3_192_BLOCK_SIZE); + crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); } -static struct crypto_alg des3_192_alg = { +static struct crypto_alg des3_alg = { .cra_name = "des3_ede", + .cra_driver_name = "des3_ede-s390", + .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = DES3_192_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_des3_192_ctx), + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(des3_192_alg.cra_list), - .cra_u = { .cipher = { - .cia_min_keysize = DES3_192_KEY_SIZE, - .cia_max_keysize = DES3_192_KEY_SIZE, - .cia_setkey = des3_192_setkey, - .cia_encrypt = des3_192_encrypt, - .cia_decrypt = des3_192_decrypt } } + .cra_u = { + .cipher = { + .cia_min_keysize = DES3_KEY_SIZE, + .cia_max_keysize = DES3_KEY_SIZE, + .cia_setkey = des3_setkey, + .cia_encrypt = des3_encrypt, + .cia_decrypt = des3_decrypt, + } + } }; +static int ecb_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk); +} -static int -init(void) +static int ecb_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - int ret; + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk); +} - if (!crypt_s390_func_available(KM_DEA_ENCRYPT) || - !crypt_s390_func_available(KM_TDEA_128_ENCRYPT) || - !crypt_s390_func_available(KM_TDEA_192_ENCRYPT)){ - return -ENOSYS; +static struct crypto_alg ecb_des3_alg = { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .setkey = des3_setkey, + .encrypt = ecb_des3_encrypt, + .decrypt = ecb_des3_decrypt, + } } +}; + +static int cbc_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk); +} - ret = 0; - ret |= (crypto_register_alg(&des_alg) == 0)? 0:1; - ret |= (crypto_register_alg(&des3_128_alg) == 0)? 0:2; - ret |= (crypto_register_alg(&des3_192_alg) == 0)? 0:4; - if (ret){ - crypto_unregister_alg(&des3_192_alg); - crypto_unregister_alg(&des3_128_alg); - crypto_unregister_alg(&des_alg); - return -EEXIST; +static int cbc_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk); +} + +static struct crypto_alg cbc_des3_alg = { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des3_setkey, + .encrypt = cbc_des3_encrypt, + .decrypt = cbc_des3_decrypt, + } } +}; - printk(KERN_INFO "crypt_s390: des_s390 loaded.\n"); - return 0; +static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes) +{ + unsigned int i, n; + + /* align to block size, max. PAGE_SIZE */ + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1); + for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) { + memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE); + crypto_inc(ctrptr + i, DES_BLOCK_SIZE); + } + return n; +} + +static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, + struct s390_des_ctx *ctx, + struct blkcipher_walk *walk) +{ + int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); + unsigned int n, nbytes; + u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE]; + u8 *out, *in, *ctrptr = ctrbuf; + + if (!walk->nbytes) + return ret; + + if (spin_trylock(&ctrblk_lock)) + ctrptr = ctrblk; + + memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE); + while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + while (nbytes >= DES_BLOCK_SIZE) { + if (ctrptr == ctrblk) + n = __ctrblk_init(ctrptr, nbytes); + else + n = DES_BLOCK_SIZE; + ret = crypt_s390_kmctr(func, ctx->key, out, in, + n, ctrptr); + if (ret < 0 || ret != n) { + if (ctrptr == ctrblk) + spin_unlock(&ctrblk_lock); + return -EIO; + } + if (n > DES_BLOCK_SIZE) + memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE, + DES_BLOCK_SIZE); + crypto_inc(ctrptr, DES_BLOCK_SIZE); + out += n; + in += n; + nbytes -= n; + } + ret = blkcipher_walk_done(desc, walk, nbytes); + } + if (ctrptr == ctrblk) { + if (nbytes) + memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE); + else + memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE); + spin_unlock(&ctrblk_lock); + } else { + if (!nbytes) + memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE); + } + /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ + if (nbytes) { + out = walk->dst.virt.addr; + in = walk->src.virt.addr; + ret = crypt_s390_kmctr(func, ctx->key, buf, in, + DES_BLOCK_SIZE, ctrbuf); + if (ret < 0 || ret != DES_BLOCK_SIZE) + return -EIO; + memcpy(out, buf, nbytes); + crypto_inc(ctrbuf, DES_BLOCK_SIZE); + ret = blkcipher_walk_done(desc, walk, 0); + memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE); + } + return ret; +} + +static int ctr_des_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk); } -static void __exit -fini(void) +static int ctr_des_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) { - crypto_unregister_alg(&des3_192_alg); - crypto_unregister_alg(&des3_128_alg); + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk); +} + +static struct crypto_alg ctr_des_alg = { + .cra_name = "ctr(des)", + .cra_driver_name = "ctr-des-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des_setkey, + .encrypt = ctr_des_encrypt, + .decrypt = ctr_des_decrypt, + } + } +}; + +static int ctr_des3_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk); +} + +static int ctr_des3_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk); +} + +static struct crypto_alg ctr_des3_alg = { + .cra_name = "ctr(des3_ede)", + .cra_driver_name = "ctr-des3_ede-s390", + .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct s390_des_ctx), + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = DES3_KEY_SIZE, + .max_keysize = DES3_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = des3_setkey, + .encrypt = ctr_des3_encrypt, + .decrypt = ctr_des3_decrypt, + } + } +}; + +static int __init des_s390_init(void) +{ + int ret; + + if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) || + !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + + ret = crypto_register_alg(&des_alg); + if (ret) + goto des_err; + ret = crypto_register_alg(&ecb_des_alg); + if (ret) + goto ecb_des_err; + ret = crypto_register_alg(&cbc_des_alg); + if (ret) + goto cbc_des_err; + ret = crypto_register_alg(&des3_alg); + if (ret) + goto des3_err; + ret = crypto_register_alg(&ecb_des3_alg); + if (ret) + goto ecb_des3_err; + ret = crypto_register_alg(&cbc_des3_alg); + if (ret) + goto cbc_des3_err; + + if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4) && + crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + ret = crypto_register_alg(&ctr_des_alg); + if (ret) + goto ctr_des_err; + ret = crypto_register_alg(&ctr_des3_alg); + if (ret) + goto ctr_des3_err; + ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + if (!ctrblk) { + ret = -ENOMEM; + goto ctr_mem_err; + } + } +out: + return ret; + +ctr_mem_err: + crypto_unregister_alg(&ctr_des3_alg); +ctr_des3_err: + crypto_unregister_alg(&ctr_des_alg); +ctr_des_err: + crypto_unregister_alg(&cbc_des3_alg); +cbc_des3_err: + crypto_unregister_alg(&ecb_des3_alg); +ecb_des3_err: + crypto_unregister_alg(&des3_alg); +des3_err: + crypto_unregister_alg(&cbc_des_alg); +cbc_des_err: + crypto_unregister_alg(&ecb_des_alg); +ecb_des_err: + crypto_unregister_alg(&des_alg); +des_err: + goto out; +} + +static void __exit des_s390_exit(void) +{ + if (ctrblk) { + crypto_unregister_alg(&ctr_des_alg); + crypto_unregister_alg(&ctr_des3_alg); + free_page((unsigned long) ctrblk); + } + crypto_unregister_alg(&cbc_des3_alg); + crypto_unregister_alg(&ecb_des3_alg); + crypto_unregister_alg(&des3_alg); + crypto_unregister_alg(&cbc_des_alg); + crypto_unregister_alg(&ecb_des_alg); crypto_unregister_alg(&des_alg); } -module_init(init); -module_exit(fini); +module_init(des_s390_init); +module_exit(des_s390_exit); MODULE_ALIAS("des"); MODULE_ALIAS("des3_ede"); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c new file mode 100644 index 00000000000..d43485d142e --- /dev/null +++ b/arch/s390/crypto/ghash_s390.c @@ -0,0 +1,166 @@ +/* + * Cryptographic API. + * + * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode). + * + * Copyright IBM Corp. 2011 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <crypto/internal/hash.h> +#include <linux/module.h> + +#include "crypt_s390.h" + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +struct ghash_ctx { + u8 icv[16]; + u8 key[16]; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key, key, GHASH_BLOCK_SIZE); + memset(ctx->icv, 0, GHASH_BLOCK_SIZE); + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + unsigned int n; + u8 *buf = dctx->buffer; + int ret; + + if (dctx->bytes) { + u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); + + n = min(srclen, dctx->bytes); + dctx->bytes -= n; + srclen -= n; + + memcpy(pos, src, n); + src += n; + + if (!dctx->bytes) { + ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, + GHASH_BLOCK_SIZE); + if (ret != GHASH_BLOCK_SIZE) + return -EIO; + } + } + + n = srclen & ~(GHASH_BLOCK_SIZE - 1); + if (n) { + ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n); + if (ret != n) + return -EIO; + src += n; + srclen -= n; + } + + if (srclen) { + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + memcpy(buf, src, srclen); + } + + return 0; +} + +static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *buf = dctx->buffer; + int ret; + + if (dctx->bytes) { + u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); + + memset(pos, 0, dctx->bytes); + + ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE); + if (ret != GHASH_BLOCK_SIZE) + return -EIO; + } + + dctx->bytes = 0; + return 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + int ret; + + ret = ghash_flush(ctx, dctx); + if (!ret) + memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE); + return ret; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + }, +}; + +static int __init ghash_mod_init(void) +{ + if (!crypt_s390_func_available(KIMD_GHASH, + CRYPT_S390_MSA | CRYPT_S390_MSA4)) + return -EOPNOTSUPP; + + return crypto_register_shash(&ghash_alg); +} + +static void __exit ghash_mod_exit(void) +{ + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_mod_init); +module_exit(ghash_mod_exit); + +MODULE_ALIAS("ghash"); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation"); diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c new file mode 100644 index 00000000000..94a35a4c1b4 --- /dev/null +++ b/arch/s390/crypto/prng.c @@ -0,0 +1,211 @@ +/* + * Copyright IBM Corp. 2006, 2007 + * Author(s): Jan Glauber <jan.glauber@de.ibm.com> + * Driver for the s390 pseudo random number generator + */ +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <asm/debug.h> +#include <asm/uaccess.h> + +#include "crypt_s390.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jan Glauber <jan.glauber@de.ibm.com>"); +MODULE_DESCRIPTION("s390 PRNG interface"); + +static int prng_chunk_size = 256; +module_param(prng_chunk_size, int, S_IRUSR | S_IRGRP | S_IROTH); +MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes"); + +static int prng_entropy_limit = 4096; +module_param(prng_entropy_limit, int, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR); +MODULE_PARM_DESC(prng_entropy_limit, + "PRNG add entropy after that much bytes were produced"); + +/* + * Any one who considers arithmetical methods of producing random digits is, + * of course, in a state of sin. -- John von Neumann + */ + +struct s390_prng_data { + unsigned long count; /* how many bytes were produced */ + char *buf; +}; + +static struct s390_prng_data *p; + +/* copied from libica, use a non-zero initial parameter block */ +static unsigned char parm_block[32] = { +0x0F,0x2B,0x8E,0x63,0x8C,0x8E,0xD2,0x52,0x64,0xB7,0xA0,0x7B,0x75,0x28,0xB8,0xF4, +0x75,0x5F,0xD2,0xA6,0x8D,0x97,0x11,0xFF,0x49,0xD8,0x23,0xF3,0x7E,0x21,0xEC,0xA0, +}; + +static int prng_open(struct inode *inode, struct file *file) +{ + return nonseekable_open(inode, file); +} + +static void prng_add_entropy(void) +{ + __u64 entropy[4]; + unsigned int i; + int ret; + + for (i = 0; i < 16; i++) { + ret = crypt_s390_kmc(KMC_PRNG, parm_block, (char *)entropy, + (char *)entropy, sizeof(entropy)); + BUG_ON(ret < 0 || ret != sizeof(entropy)); + memcpy(parm_block, entropy, sizeof(entropy)); + } +} + +static void prng_seed(int nbytes) +{ + char buf[16]; + int i = 0; + + BUG_ON(nbytes > 16); + get_random_bytes(buf, nbytes); + + /* Add the entropy */ + while (nbytes >= 8) { + *((__u64 *)parm_block) ^= *((__u64 *)(buf+i)); + prng_add_entropy(); + i += 8; + nbytes -= 8; + } + prng_add_entropy(); +} + +static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes, + loff_t *ppos) +{ + int chunk, n; + int ret = 0; + int tmp; + + /* nbytes can be arbitrary length, we split it into chunks */ + while (nbytes) { + /* same as in extract_entropy_user in random.c */ + if (need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + schedule(); + } + + /* + * we lose some random bytes if an attacker issues + * reads < 8 bytes, but we don't care + */ + chunk = min_t(int, nbytes, prng_chunk_size); + + /* PRNG only likes multiples of 8 bytes */ + n = (chunk + 7) & -8; + + if (p->count > prng_entropy_limit) + prng_seed(8); + + /* if the CPU supports PRNG stckf is present too */ + asm volatile(".insn s,0xb27c0000,%0" + : "=m" (*((unsigned long long *)p->buf)) : : "cc"); + + /* + * Beside the STCKF the input for the TDES-EDE is the output + * of the last operation. We differ here from X9.17 since we + * only store one timestamp into the buffer. Padding the whole + * buffer with timestamps does not improve security, since + * successive stckf have nearly constant offsets. + * If an attacker knows the first timestamp it would be + * trivial to guess the additional values. One timestamp + * is therefore enough and still guarantees unique input values. + * + * Note: you can still get strict X9.17 conformity by setting + * prng_chunk_size to 8 bytes. + */ + tmp = crypt_s390_kmc(KMC_PRNG, parm_block, p->buf, p->buf, n); + BUG_ON((tmp < 0) || (tmp != n)); + + p->count += n; + + if (copy_to_user(ubuf, p->buf, chunk)) + return -EFAULT; + + nbytes -= chunk; + ret += chunk; + ubuf += chunk; + } + return ret; +} + +static const struct file_operations prng_fops = { + .owner = THIS_MODULE, + .open = &prng_open, + .release = NULL, + .read = &prng_read, + .llseek = noop_llseek, +}; + +static struct miscdevice prng_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .fops = &prng_fops, +}; + +static int __init prng_init(void) +{ + int ret; + + /* check if the CPU has a PRNG */ + if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + + if (prng_chunk_size < 8) + return -EINVAL; + + p = kmalloc(sizeof(struct s390_prng_data), GFP_KERNEL); + if (!p) + return -ENOMEM; + p->count = 0; + + p->buf = kmalloc(prng_chunk_size, GFP_KERNEL); + if (!p->buf) { + ret = -ENOMEM; + goto out_free; + } + + /* initialize the PRNG, add 128 bits of entropy */ + prng_seed(16); + + ret = misc_register(&prng_dev); + if (ret) + goto out_buf; + return 0; + +out_buf: + kfree(p->buf); +out_free: + kfree(p); + return ret; +} + +static void __exit prng_exit(void) +{ + /* wipe me */ + kzfree(p->buf); + kfree(p); + + misc_deregister(&prng_dev); +} + +module_init(prng_init); +module_exit(prng_exit); diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h new file mode 100644 index 00000000000..f4e9dc71675 --- /dev/null +++ b/arch/s390/crypto/sha.h @@ -0,0 +1,37 @@ +/* + * Cryptographic API. + * + * s390 generic implementation of the SHA Secure Hash Algorithms. + * + * Copyright IBM Corp. 2007 + * Author(s): Jan Glauber (jang@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_ARCH_S390_SHA_H +#define _CRYPTO_ARCH_S390_SHA_H + +#include <linux/crypto.h> +#include <crypto/sha.h> + +/* must be big enough for the largest SHA variant */ +#define SHA_MAX_STATE_SIZE 16 +#define SHA_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE + +struct s390_sha_ctx { + u64 count; /* message length in bytes */ + u32 state[SHA_MAX_STATE_SIZE]; + u8 buf[2 * SHA_MAX_BLOCK_SIZE]; + int func; /* KIMD function to use */ +}; + +struct shash_desc; + +int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len); +int s390_sha_final(struct shash_desc *desc, u8 *out); + +#endif diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 98c896b86dc..a1b3a9dc9d8 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -8,10 +8,11 @@ * implementation written by Steve Reid. * * s390 Version: - * Copyright (C) 2003 IBM Deutschland GmbH, IBM Corporation - * Author(s): Thomas Spatzier (tspat@de.ibm.com) + * Copyright IBM Corp. 2003, 2007 + * Author(s): Thomas Spatzier + * Jan Glauber (jan.glauber@de.ibm.com) * - * Derived from "crypto/sha1.c" + * Derived from "crypto/sha1_generic.c" * Copyright (c) Alan Smithee. * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> @@ -22,146 +23,86 @@ * any later version. * */ +#include <crypto/internal/hash.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/mm.h> -#include <linux/crypto.h> -#include <asm/scatterlist.h> -#include <asm/byteorder.h> -#include "crypt_s390.h" - -#define SHA1_DIGEST_SIZE 20 -#define SHA1_BLOCK_SIZE 64 +#include <crypto/sha.h> -struct crypt_s390_sha1_ctx { - u64 count; - u32 state[5]; - u32 buf_len; - u8 buffer[2 * SHA1_BLOCK_SIZE]; -}; - -static void -sha1_init(void *ctx) -{ - static const struct crypt_s390_sha1_ctx initstate = { - .state = { - 0x67452301, - 0xEFCDAB89, - 0x98BADCFE, - 0x10325476, - 0xC3D2E1F0 - }, - }; - memcpy(ctx, &initstate, sizeof(initstate)); -} +#include "crypt_s390.h" +#include "sha.h" -static void -sha1_update(void *ctx, const u8 *data, unsigned int len) +static int sha1_init(struct shash_desc *desc) { - struct crypt_s390_sha1_ctx *sctx; - long imd_len; + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - sctx = ctx; - sctx->count += len * 8; //message bit length - - //anything in buffer yet? -> must be completed - if (sctx->buf_len && (sctx->buf_len + len) >= SHA1_BLOCK_SIZE) { - //complete full block and hash - memcpy(sctx->buffer + sctx->buf_len, data, - SHA1_BLOCK_SIZE - sctx->buf_len); - crypt_s390_kimd(KIMD_SHA_1, sctx->state, sctx->buffer, - SHA1_BLOCK_SIZE); - data += SHA1_BLOCK_SIZE - sctx->buf_len; - len -= SHA1_BLOCK_SIZE - sctx->buf_len; - sctx->buf_len = 0; - } + sctx->state[0] = SHA1_H0; + sctx->state[1] = SHA1_H1; + sctx->state[2] = SHA1_H2; + sctx->state[3] = SHA1_H3; + sctx->state[4] = SHA1_H4; + sctx->count = 0; + sctx->func = KIMD_SHA_1; - //rest of data contains full blocks? - imd_len = len & ~0x3ful; - if (imd_len){ - crypt_s390_kimd(KIMD_SHA_1, sctx->state, data, imd_len); - data += imd_len; - len -= imd_len; - } - //anything left? store in buffer - if (len){ - memcpy(sctx->buffer + sctx->buf_len , data, len); - sctx->buf_len += len; - } + return 0; } - -static void -pad_message(struct crypt_s390_sha1_ctx* sctx) +static int sha1_export(struct shash_desc *desc, void *out) { - int index; + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + struct sha1_state *octx = out; - index = sctx->buf_len; - sctx->buf_len = (sctx->buf_len < 56)? - SHA1_BLOCK_SIZE:2 * SHA1_BLOCK_SIZE; - //start pad with 1 - sctx->buffer[index] = 0x80; - //pad with zeros - index++; - memset(sctx->buffer + index, 0x00, sctx->buf_len - index); - //append length - memcpy(sctx->buffer + sctx->buf_len - 8, &sctx->count, - sizeof sctx->count); + octx->count = sctx->count; + memcpy(octx->state, sctx->state, sizeof(octx->state)); + memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer)); + return 0; } -/* Add padding and return the message digest. */ -static void -sha1_final(void* ctx, u8 *out) +static int sha1_import(struct shash_desc *desc, const void *in) { - struct crypt_s390_sha1_ctx *sctx = ctx; - - //must perform manual padding - pad_message(sctx); - crypt_s390_kimd(KIMD_SHA_1, sctx->state, sctx->buffer, sctx->buf_len); - //copy digest to out - memcpy(out, sctx->state, SHA1_DIGEST_SIZE); - /* Wipe context */ - memset(sctx, 0, sizeof *sctx); + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + const struct sha1_state *ictx = in; + + sctx->count = ictx->count; + memcpy(sctx->state, ictx->state, sizeof(ictx->state)); + memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer)); + sctx->func = KIMD_SHA_1; + return 0; } -static struct crypto_alg alg = { - .cra_name = "sha1", - .cra_flags = CRYPTO_ALG_TYPE_DIGEST, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypt_s390_sha1_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { .digest = { - .dia_digestsize = SHA1_DIGEST_SIZE, - .dia_init = sha1_init, - .dia_update = sha1_update, - .dia_final = sha1_final } } +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha1_export, + .import = sha1_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name= "sha1-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } }; -static int -init(void) +static int __init sha1_s390_init(void) { - int ret = -ENOSYS; - - if (crypt_s390_func_available(KIMD_SHA_1)){ - ret = crypto_register_alg(&alg); - if (ret == 0){ - printk(KERN_INFO "crypt_s390: sha1_s390 loaded.\n"); - } - } - return ret; + if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + return crypto_register_shash(&alg); } -static void __exit -fini(void) +static void __exit sha1_s390_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_shash(&alg); } -module_init(init); -module_exit(fini); +module_init(sha1_s390_init); +module_exit(sha1_s390_fini); MODULE_ALIAS("sha1"); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index b75bdbd476c..9b853809a49 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -1,151 +1,149 @@ /* * Cryptographic API. * - * s390 implementation of the SHA256 Secure Hash Algorithm. + * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm. * * s390 Version: - * Copyright (C) 2005 IBM Deutschland GmbH, IBM Corporation + * Copyright IBM Corp. 2005, 2011 * Author(s): Jan Glauber (jang@de.ibm.com) * - * Derived from "crypto/sha256.c" - * and "arch/s390/crypto/sha1_s390.c" - * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * */ +#include <crypto/internal/hash.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/crypto.h> +#include <crypto/sha.h> #include "crypt_s390.h" +#include "sha.h" -#define SHA256_DIGEST_SIZE 32 -#define SHA256_BLOCK_SIZE 64 - -struct s390_sha256_ctx { - u64 count; - u32 state[8]; - u8 buf[2 * SHA256_BLOCK_SIZE]; -}; - -static void sha256_init(void *ctx) +static int sha256_init(struct shash_desc *desc) { - struct s390_sha256_ctx *sctx = ctx; - - sctx->state[0] = 0x6a09e667; - sctx->state[1] = 0xbb67ae85; - sctx->state[2] = 0x3c6ef372; - sctx->state[3] = 0xa54ff53a; - sctx->state[4] = 0x510e527f; - sctx->state[5] = 0x9b05688c; - sctx->state[6] = 0x1f83d9ab; - sctx->state[7] = 0x5be0cd19; + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; sctx->count = 0; - memset(sctx->buf, 0, sizeof(sctx->buf)); -} + sctx->func = KIMD_SHA_256; -static void sha256_update(void *ctx, const u8 *data, unsigned int len) -{ - struct s390_sha256_ctx *sctx = ctx; - unsigned int index; - - /* how much is already in the buffer? */ - index = sctx->count / 8 & 0x3f; - - /* update message bit length */ - sctx->count += len * 8; - - /* process one block */ - if ((index + len) >= SHA256_BLOCK_SIZE) { - memcpy(sctx->buf + index, data, SHA256_BLOCK_SIZE - index); - crypt_s390_kimd(KIMD_SHA_256, sctx->state, sctx->buf, - SHA256_BLOCK_SIZE); - data += SHA256_BLOCK_SIZE - index; - len -= SHA256_BLOCK_SIZE - index; - } - - /* anything left? */ - if (len) - memcpy(sctx->buf + index , data, len); + return 0; } -static void pad_message(struct s390_sha256_ctx* sctx) +static int sha256_export(struct shash_desc *desc, void *out) { - int index, end; - - index = sctx->count / 8 & 0x3f; - end = index < 56 ? SHA256_BLOCK_SIZE : 2 * SHA256_BLOCK_SIZE; - - /* start pad with 1 */ - sctx->buf[index] = 0x80; - - /* pad with zeros */ - index++; - memset(sctx->buf + index, 0x00, end - index - 8); + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + struct sha256_state *octx = out; - /* append message length */ - memcpy(sctx->buf + end - 8, &sctx->count, sizeof sctx->count); - - sctx->count = end * 8; + octx->count = sctx->count; + memcpy(octx->state, sctx->state, sizeof(octx->state)); + memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); + return 0; } -/* Add padding and return the message digest */ -static void sha256_final(void* ctx, u8 *out) +static int sha256_import(struct shash_desc *desc, const void *in) { - struct s390_sha256_ctx *sctx = ctx; - - /* must perform manual padding */ - pad_message(sctx); + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + const struct sha256_state *ictx = in; + + sctx->count = ictx->count; + memcpy(sctx->state, ictx->state, sizeof(ictx->state)); + memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); + sctx->func = KIMD_SHA_256; + return 0; +} - crypt_s390_kimd(KIMD_SHA_256, sctx->state, sctx->buf, - sctx->count / 8); +static struct shash_alg sha256_alg = { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name= "sha256-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; - /* copy digest to out */ - memcpy(out, sctx->state, SHA256_DIGEST_SIZE); +static int sha224_init(struct shash_desc *desc) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + sctx->func = KIMD_SHA_256; - /* wipe context */ - memset(sctx, 0, sizeof *sctx); + return 0; } -static struct crypto_alg alg = { - .cra_name = "sha256", - .cra_flags = CRYPTO_ALG_TYPE_DIGEST, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_sha256_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { .digest = { - .dia_digestsize = SHA256_DIGEST_SIZE, - .dia_init = sha256_init, - .dia_update = sha256_update, - .dia_final = sha256_final } } +static struct shash_alg sha224_alg = { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name= "sha224-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } }; -static int init(void) +static int __init sha256_s390_init(void) { int ret; - if (!crypt_s390_func_available(KIMD_SHA_256)) - return -ENOSYS; - - ret = crypto_register_alg(&alg); - if (ret != 0) - printk(KERN_INFO "crypt_s390: sha256_s390 couldn't be loaded."); + if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + ret = crypto_register_shash(&sha256_alg); + if (ret < 0) + goto out; + ret = crypto_register_shash(&sha224_alg); + if (ret < 0) + crypto_unregister_shash(&sha256_alg); +out: return ret; } -static void __exit fini(void) +static void __exit sha256_s390_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_shash(&sha224_alg); + crypto_unregister_shash(&sha256_alg); } -module_init(init); -module_exit(fini); +module_init(sha256_s390_init); +module_exit(sha256_s390_fini); MODULE_ALIAS("sha256"); - +MODULE_ALIAS("sha224"); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm"); +MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c new file mode 100644 index 00000000000..32a81383b69 --- /dev/null +++ b/arch/s390/crypto/sha512_s390.c @@ -0,0 +1,155 @@ +/* + * Cryptographic API. + * + * s390 implementation of the SHA512 and SHA38 Secure Hash Algorithm. + * + * Copyright IBM Corp. 2007 + * Author(s): Jan Glauber (jang@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include "sha.h" +#include "crypt_s390.h" + +static int sha512_init(struct shash_desc *desc) +{ + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + + *(__u64 *)&ctx->state[0] = 0x6a09e667f3bcc908ULL; + *(__u64 *)&ctx->state[2] = 0xbb67ae8584caa73bULL; + *(__u64 *)&ctx->state[4] = 0x3c6ef372fe94f82bULL; + *(__u64 *)&ctx->state[6] = 0xa54ff53a5f1d36f1ULL; + *(__u64 *)&ctx->state[8] = 0x510e527fade682d1ULL; + *(__u64 *)&ctx->state[10] = 0x9b05688c2b3e6c1fULL; + *(__u64 *)&ctx->state[12] = 0x1f83d9abfb41bd6bULL; + *(__u64 *)&ctx->state[14] = 0x5be0cd19137e2179ULL; + ctx->count = 0; + ctx->func = KIMD_SHA_512; + + return 0; +} + +static int sha512_export(struct shash_desc *desc, void *out) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + struct sha512_state *octx = out; + + octx->count[0] = sctx->count; + octx->count[1] = 0; + memcpy(octx->state, sctx->state, sizeof(octx->state)); + memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); + return 0; +} + +static int sha512_import(struct shash_desc *desc, const void *in) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + const struct sha512_state *ictx = in; + + if (unlikely(ictx->count[1])) + return -ERANGE; + sctx->count = ictx->count[0]; + + memcpy(sctx->state, ictx->state, sizeof(ictx->state)); + memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); + sctx->func = KIMD_SHA_512; + return 0; +} + +static struct shash_alg sha512_alg = { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha512_export, + .import = sha512_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name= "sha512-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +MODULE_ALIAS("sha512"); + +static int sha384_init(struct shash_desc *desc) +{ + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + + *(__u64 *)&ctx->state[0] = 0xcbbb9d5dc1059ed8ULL; + *(__u64 *)&ctx->state[2] = 0x629a292a367cd507ULL; + *(__u64 *)&ctx->state[4] = 0x9159015a3070dd17ULL; + *(__u64 *)&ctx->state[6] = 0x152fecd8f70e5939ULL; + *(__u64 *)&ctx->state[8] = 0x67332667ffc00b31ULL; + *(__u64 *)&ctx->state[10] = 0x8eb44a8768581511ULL; + *(__u64 *)&ctx->state[12] = 0xdb0c2e0d64f98fa7ULL; + *(__u64 *)&ctx->state[14] = 0x47b5481dbefa4fa4ULL; + ctx->count = 0; + ctx->func = KIMD_SHA_512; + + return 0; +} + +static struct shash_alg sha384_alg = { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_init, + .update = s390_sha_update, + .final = s390_sha_final, + .export = sha512_export, + .import = sha512_import, + .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name= "sha384-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct s390_sha_ctx), + .cra_module = THIS_MODULE, + } +}; + +MODULE_ALIAS("sha384"); + +static int __init init(void) +{ + int ret; + + if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA)) + return -EOPNOTSUPP; + if ((ret = crypto_register_shash(&sha512_alg)) < 0) + goto out; + if ((ret = crypto_register_shash(&sha384_alg)) < 0) + crypto_unregister_shash(&sha512_alg); +out: + return ret; +} + +static void __exit fini(void) +{ + crypto_unregister_shash(&sha512_alg); + crypto_unregister_shash(&sha384_alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA512 and SHA-384 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c new file mode 100644 index 00000000000..8620b0ec9c4 --- /dev/null +++ b/arch/s390/crypto/sha_common.c @@ -0,0 +1,106 @@ +/* + * Cryptographic API. + * + * s390 generic implementation of the SHA Secure Hash Algorithms. + * + * Copyright IBM Corp. 2007 + * Author(s): Jan Glauber (jang@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/module.h> +#include "sha.h" +#include "crypt_s390.h" + +int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) +{ + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + unsigned int bsize = crypto_shash_blocksize(desc->tfm); + unsigned int index; + int ret; + + /* how much is already in the buffer? */ + index = ctx->count & (bsize - 1); + ctx->count += len; + + if ((index + len) < bsize) + goto store; + + /* process one stored block */ + if (index) { + memcpy(ctx->buf + index, data, bsize - index); + ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, bsize); + if (ret != bsize) + return -EIO; + data += bsize - index; + len -= bsize - index; + index = 0; + } + + /* process as many blocks as possible */ + if (len >= bsize) { + ret = crypt_s390_kimd(ctx->func, ctx->state, data, + len & ~(bsize - 1)); + if (ret != (len & ~(bsize - 1))) + return -EIO; + data += ret; + len -= ret; + } +store: + if (len) + memcpy(ctx->buf + index , data, len); + + return 0; +} +EXPORT_SYMBOL_GPL(s390_sha_update); + +int s390_sha_final(struct shash_desc *desc, u8 *out) +{ + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + unsigned int bsize = crypto_shash_blocksize(desc->tfm); + u64 bits; + unsigned int index, end, plen; + int ret; + + /* SHA-512 uses 128 bit padding length */ + plen = (bsize > SHA256_BLOCK_SIZE) ? 16 : 8; + + /* must perform manual padding */ + index = ctx->count & (bsize - 1); + end = (index < bsize - plen) ? bsize : (2 * bsize); + + /* start pad with 1 */ + ctx->buf[index] = 0x80; + index++; + + /* pad with zeros */ + memset(ctx->buf + index, 0x00, end - index - 8); + + /* + * Append message length. Well, SHA-512 wants a 128 bit length value, + * nevertheless we use u64, should be enough for now... + */ + bits = ctx->count * 8; + memcpy(ctx->buf + end - 8, &bits, sizeof(bits)); + + ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, end); + if (ret != end) + return -EIO; + + /* copy digest to out */ + memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm)); + /* wipe context */ + memset(ctx, 0, sizeof *ctx); + + return 0; +} +EXPORT_SYMBOL_GPL(s390_sha_final); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("s390 SHA cipher common functions"); diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 7d23edc6fac..2e56498a40d 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,225 +1,67 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.15-rc2 -# Mon Nov 21 13:51:30 2005 -# -CONFIG_MMU=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_S390=y -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y -CONFIG_CLEAN_COMPILE=y -CONFIG_LOCK_KERNEL=y -CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# -CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y -CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y +CONFIG_FHANDLE=y CONFIG_AUDIT=y -# CONFIG_AUDITSYSCALL is not set -CONFIG_HOTPLUG=y -CONFIG_KOBJECT_UEVENT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y -# CONFIG_CPUSETS is not set -CONFIG_INITRAMFS_SOURCE="" -# CONFIG_EMBEDDED is not set -CONFIG_KALLSYMS=y -# CONFIG_KALLSYMS_ALL is not set -# CONFIG_KALLSYMS_EXTRA_PASS is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -CONFIG_EPOLL=y -CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 - -# -# Loadable module support -# +CONFIG_CGROUPS=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y +CONFIG_RD_XZ=y +CONFIG_RD_LZO=y +CONFIG_RD_LZ4=y +CONFIG_EXPERT=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_MODULE_FORCE_UNLOAD is not set -CONFIG_OBSOLETE_MODPARM=y CONFIG_MODVERSIONS=y -# CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_KMOD=y -CONFIG_STOP_MACHINE=y - -# -# Block layer -# -# CONFIG_LBD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -CONFIG_DEFAULT_AS=y -# CONFIG_DEFAULT_DEADLINE is not set -# CONFIG_DEFAULT_CFQ is not set -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="anticipatory" - -# -# Base setup -# - -# -# Processor type and features -# -# CONFIG_64BIT is not set -CONFIG_SMP=y -CONFIG_NR_CPUS=32 -CONFIG_HOTPLUG_CPU=y -CONFIG_MATHEMU=y - -# -# Code generation options -# -CONFIG_MARCH_G5=y -# CONFIG_MARCH_Z900 is not set -# CONFIG_MARCH_Z990 is not set -CONFIG_PACK_STACK=y -# CONFIG_SMALL_STACK is not set -CONFIG_CHECK_STACK=y -CONFIG_STACK_GUARD=256 -# CONFIG_WARN_STACK is not set -CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y -# CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set -CONFIG_SPLIT_PTLOCK_CPUS=4 - -# -# I/O subsystem configuration -# -CONFIG_MACHCHK_WARNING=y -CONFIG_QDIO=y -# CONFIG_QDIO_PERF_STATS is not set -# CONFIG_QDIO_DEBUG is not set - -# -# Misc -# -CONFIG_PREEMPT=y -CONFIG_IPL=y -# CONFIG_IPL_TAPE is not set -CONFIG_IPL_VM=y -CONFIG_BINFMT_ELF=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IBM_PARTITION=y +CONFIG_DEFAULT_DEADLINE=y +CONFIG_MARCH_Z196=y +CONFIG_NR_CPUS=256 +CONFIG_HZ_100=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_CMA=y +CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m -# CONFIG_PROCESS_DEBUG is not set -CONFIG_PFAULT=y -# CONFIG_SHARED_KERNEL is not set -# CONFIG_CMM is not set -CONFIG_VIRT_TIMER=y -CONFIG_VIRT_CPU_ACCOUNTING=y -# CONFIG_APPLDATA_BASE is not set -CONFIG_NO_IDLE_HZ=y -CONFIG_NO_IDLE_HZ_INIT=y -# CONFIG_KEXEC is not set - -# -# Networking -# -CONFIG_NET=y - -# -# Networking options -# +CONFIG_HIBERNATION=y CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set CONFIG_UNIX=y -CONFIG_XFRM=y -# CONFIG_XFRM_USER is not set CONFIG_NET_KEY=y CONFIG_INET=y CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_FIB_HASH=y -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_BIC=y +# CONFIG_INET_LRO is not set CONFIG_IPV6=y -# CONFIG_IPV6_PRIVACY is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set -# CONFIG_INET6_IPCOMP is not set -# CONFIG_INET6_TUNNEL is not set -# CONFIG_IPV6_TUNNEL is not set -# CONFIG_NETFILTER is not set - -# -# DCCP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_DCCP is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# +CONFIG_L2TP=m +CONFIG_L2TP_DEBUGFS=m +CONFIG_VLAN_8021Q=y CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CLK_JIFFIES=y -# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set -# CONFIG_NET_SCH_CLK_CPU is not set - -# -# Queueing/Scheduling -# CONFIG_NET_SCH_CBQ=m -# CONFIG_NET_SCH_HTB is not set -# CONFIG_NET_SCH_HFSC is not set CONFIG_NET_SCH_PRIO=m CONFIG_NET_SCH_RED=m CONFIG_NET_SCH_SFQ=m @@ -227,441 +69,140 @@ CONFIG_NET_SCH_TEQL=m CONFIG_NET_SCH_TBF=m CONFIG_NET_SCH_GRED=m CONFIG_NET_SCH_DSMARK=m -# CONFIG_NET_SCH_NETEM is not set -# CONFIG_NET_SCH_INGRESS is not set - -# -# Classification -# -CONFIG_NET_CLS=y -# CONFIG_NET_CLS_BASIC is not set CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m -# CONFIG_CLS_U32_PERF is not set +CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m -# CONFIG_NET_EMATCH is not set -# CONFIG_NET_CLS_ACT is not set -CONFIG_NET_CLS_POLICE=y -# CONFIG_NET_CLS_IND is not set -CONFIG_NET_ESTIMATOR=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -# CONFIG_IEEE80211 is not set -# CONFIG_PCMCIA is not set - -# -# Generic Driver Options -# -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set -# CONFIG_DEBUG_DRIVER is not set - -# -# SCSI device support -# -# CONFIG_RAID_ATTRS is not set +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=y +CONFIG_BPF_JIT=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_VIRTIO_BLK=y CONFIG_SCSI=y -CONFIG_SCSI_PROC_FS=y - -# -# SCSI support type (disk, tape, CD-ROM) -# CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y -# CONFIG_CHR_DEV_OSST is not set CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y -# CONFIG_CHR_DEV_SCH is not set - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y - -# -# SCSI Transport Attributes -# -# CONFIG_SCSI_SPI_ATTRS is not set -CONFIG_SCSI_FC_ATTRS=y -# CONFIG_SCSI_ISCSI_ATTRS is not set -# CONFIG_SCSI_SAS_ATTRS is not set - -# -# SCSI low-level drivers -# -# CONFIG_ISCSI_TCP is not set -# CONFIG_SCSI_SATA is not set -# CONFIG_SCSI_DEBUG is not set +CONFIG_SCSI_SCAN_ASYNC=y CONFIG_ZFCP=y -CONFIG_CCW=y - -# -# Block devices -# -# CONFIG_BLK_DEV_COW_COMMON is not set -CONFIG_BLK_DEV_LOOP=m -# CONFIG_BLK_DEV_CRYPTOLOOP is not set -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -# CONFIG_CDROM_PKTCDVD is not set - -# -# S/390 block device drivers -# -CONFIG_BLK_DEV_XPRAM=m -# CONFIG_DCSSBLK is not set -CONFIG_DASD=y -CONFIG_DASD_PROFILE=y -CONFIG_DASD_ECKD=y -CONFIG_DASD_FBA=y -CONFIG_DASD_DIAG=y -CONFIG_DASD_EER=m -# CONFIG_DASD_CMB is not set -# CONFIG_ATA_OVER_ETH is not set - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -# CONFIG_MD_RAID10 is not set -CONFIG_MD_RAID5=m -# CONFIG_MD_RAID6 is not set -CONFIG_MD_MULTIPATH=m -# CONFIG_MD_FAULTY is not set -CONFIG_BLK_DEV_DM=y -CONFIG_DM_CRYPT=y -CONFIG_DM_SNAPSHOT=y -CONFIG_DM_MIRROR=y -CONFIG_DM_ZERO=y -CONFIG_DM_MULTIPATH=y -# CONFIG_DM_MULTIPATH_EMC is not set - -# -# Character device drivers -# -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set - -# -# S/390 character device drivers -# -CONFIG_TN3270=y -CONFIG_TN3270_TTY=y -CONFIG_TN3270_FS=m -CONFIG_TN3270_CONSOLE=y -CONFIG_TN3215=y -CONFIG_TN3215_CONSOLE=y -CONFIG_CCW_CONSOLE=y -CONFIG_SCLP=y -CONFIG_SCLP_TTY=y -CONFIG_SCLP_CONSOLE=y -CONFIG_SCLP_VT220_TTY=y -CONFIG_SCLP_VT220_CONSOLE=y -CONFIG_SCLP_CPI=m -CONFIG_S390_TAPE=m - -# -# S/390 tape interface support -# -CONFIG_S390_TAPE_BLOCK=y - -# -# S/390 tape hardware support -# -CONFIG_S390_TAPE_34XX=m -# CONFIG_VMLOGRDR is not set -# CONFIG_VMCP is not set -# CONFIG_MONREADER is not set - -# -# Cryptographic devices -# -CONFIG_Z90CRYPT=m - -# -# Network device support -# +CONFIG_SCSI_VIRTIO=y CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_EQUALIZER=m CONFIG_TUN=m - -# -# PHY device support -# - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_MII is not set - -# -# Ethernet (1000 Mbit) -# - -# -# Ethernet (10000 Mbit) -# - -# -# Token Ring devices -# -# CONFIG_TR is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# S/390 network device drivers -# -CONFIG_LCS=m -CONFIG_CTC=m -CONFIG_IUCV=m -# CONFIG_NETIUCV is not set -# CONFIG_SMSGIUCV is not set -# CONFIG_CLAW is not set -# CONFIG_MPC is not set -CONFIG_QETH=y - -# -# Gigabit Ethernet default settings -# -# CONFIG_QETH_IPV6 is not set -# CONFIG_QETH_PERF_STATS is not set -CONFIG_CCWGROUP=y -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set - -# -# File systems -# -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XIP is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -# CONFIG_EXT3_FS_POSIX_ACL is not set -# CONFIG_EXT3_FS_SECURITY is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=y -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -# CONFIG_FS_POSIX_ACL is not set -# CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -CONFIG_INOTIFY=y -# CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set -# CONFIG_FUSE_FS is not set - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y +CONFIG_VIRTIO_NET=y +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +CONFIG_RAW_DRIVER=m +CONFIG_VIRTIO_BALLOON=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +CONFIG_BTRFS_FS=y +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_FANOTIFY=y +CONFIG_FUSE_FS=y CONFIG_PROC_KCORE=y -CONFIG_SYSFS=y CONFIG_TMPFS=y -# CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y -# CONFIG_RELAYFS_FS is not set - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -# CONFIG_NFS_V3_ACL is not set -# CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -CONFIG_NFSD=y -CONFIG_NFSD_V3=y -# CONFIG_NFSD_V3_ACL is not set -# CONFIG_NFSD_V4 is not set -CONFIG_NFSD_TCP=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_EXPORTFS=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set -# CONFIG_9P_FS is not set - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_IBM_PARTITION=y -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_EFI_PARTITION is not set - -# -# Native Language Support -# -# CONFIG_NLS is not set - -# -# Profiling support -# -# CONFIG_PROFILING is not set - -# -# Kernel hacking -# -# CONFIG_PRINTK_TIME is not set -CONFIG_DEBUG_KERNEL=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y CONFIG_MAGIC_SYSRQ=y -CONFIG_LOG_BUF_SHIFT=17 -CONFIG_DETECT_SOFTLOCKUP=y -# CONFIG_SCHEDSTATS is not set -# CONFIG_DEBUG_SLAB is not set -CONFIG_DEBUG_PREEMPT=y -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set -# CONFIG_DEBUG_KOBJECT is not set -# CONFIG_DEBUG_INFO is not set -CONFIG_DEBUG_FS=y -# CONFIG_DEBUG_VM is not set -# CONFIG_RCU_TORTURE_TEST is not set - -# -# Security options -# -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -# CONFIG_CRYPTO_HMAC is not set -# CONFIG_CRYPTO_NULL is not set -# CONFIG_CRYPTO_MD4 is not set -# CONFIG_CRYPTO_MD5 is not set -# CONFIG_CRYPTO_SHA1 is not set -# CONFIG_CRYPTO_SHA1_S390 is not set -# CONFIG_CRYPTO_SHA256 is not set -# CONFIG_CRYPTO_SHA256_S390 is not set -# CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_WP512 is not set -# CONFIG_CRYPTO_TGR192 is not set -# CONFIG_CRYPTO_DES is not set -# CONFIG_CRYPTO_DES_S390 is not set -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_AES is not set -# CONFIG_CRYPTO_AES_S390 is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_ARC4 is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_ANUBIS is not set -# CONFIG_CRYPTO_DEFLATE is not set -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_CRC32C is not set -# CONFIG_CRYPTO_TEST is not set - -# -# Hardware crypto devices -# - -# -# Library routines -# -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC16 is not set -CONFIG_CRC32=m -# CONFIG_LIBCRC32C is not set +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCK_STAT=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_PI_LIST=y +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_PROVE_RCU=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_RCU_TRACE=y +CONFIG_LATENCYTOP=y +CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_KPROBES_SANITY_TEST=y +# CONFIG_STRICT_DEVMEM is not set +CONFIG_S390_PTDUMP=y +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_AUTHENC=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_CMAC=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_CRCT10DIF=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_ZLIB=m +CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_ZCRYPT=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRC7=m +# CONFIG_XZ_DEC_X86 is not set +# CONFIG_XZ_DEC_POWERPC is not set +# CONFIG_XZ_DEC_IA64 is not set +# CONFIG_XZ_DEC_ARM is not set +# CONFIG_XZ_DEC_ARMTHUMB is not set +# CONFIG_XZ_DEC_SPARC is not set +CONFIG_CMM=m diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile new file mode 100644 index 00000000000..06f8d95a16c --- /dev/null +++ b/arch/s390/hypfs/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the linux hypfs filesystem routines. +# + +obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o + +s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h new file mode 100644 index 00000000000..b34b5ab90a3 --- /dev/null +++ b/arch/s390/hypfs/hypfs.h @@ -0,0 +1,75 @@ +/* + * Hypervisor filesystem for Linux on s390. + * + * Copyright IBM Corp. 2006 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#ifndef _HYPFS_H_ +#define _HYPFS_H_ + +#include <linux/fs.h> +#include <linux/types.h> +#include <linux/debugfs.h> +#include <linux/workqueue.h> +#include <linux/kref.h> +#include <asm/hypfs.h> + +#define REG_FILE_MODE 0440 +#define UPDATE_FILE_MODE 0220 +#define DIR_MODE 0550 + +extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name); + +extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name, + __u64 value); + +extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name, + char *string); + +/* LPAR Hypervisor */ +extern int hypfs_diag_init(void); +extern void hypfs_diag_exit(void); +extern int hypfs_diag_create_files(struct dentry *root); + +/* VM Hypervisor */ +extern int hypfs_vm_init(void); +extern void hypfs_vm_exit(void); +extern int hypfs_vm_create_files(struct dentry *root); + +/* Set Partition-Resource Parameter */ +int hypfs_sprp_init(void); +void hypfs_sprp_exit(void); + +/* debugfs interface */ +struct hypfs_dbfs_file; + +struct hypfs_dbfs_data { + void *buf; + void *buf_free_ptr; + size_t size; + struct hypfs_dbfs_file *dbfs_file; + struct kref kref; +}; + +struct hypfs_dbfs_file { + const char *name; + int (*data_create)(void **data, void **data_free_ptr, + size_t *size); + void (*data_free)(const void *buf_free_ptr); + long (*unlocked_ioctl) (struct file *, unsigned int, + unsigned long); + + /* Private data for hypfs_dbfs.c */ + struct hypfs_dbfs_data *data; + struct delayed_work data_free_work; + struct mutex lock; + struct dentry *dentry; +}; + +extern int hypfs_dbfs_init(void); +extern void hypfs_dbfs_exit(void); +extern int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df); +extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df); + +#endif /* _HYPFS_H_ */ diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c new file mode 100644 index 00000000000..2badf2bf9cd --- /dev/null +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -0,0 +1,130 @@ +/* + * Hypervisor filesystem for Linux on s390 - debugfs interface + * + * Copyright IBM Corp. 2010 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/slab.h> +#include "hypfs.h" + +static struct dentry *dbfs_dir; + +static struct hypfs_dbfs_data *hypfs_dbfs_data_alloc(struct hypfs_dbfs_file *f) +{ + struct hypfs_dbfs_data *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + kref_init(&data->kref); + data->dbfs_file = f; + return data; +} + +static void hypfs_dbfs_data_free(struct kref *kref) +{ + struct hypfs_dbfs_data *data; + + data = container_of(kref, struct hypfs_dbfs_data, kref); + data->dbfs_file->data_free(data->buf_free_ptr); + kfree(data); +} + +static void data_free_delayed(struct work_struct *work) +{ + struct hypfs_dbfs_data *data; + struct hypfs_dbfs_file *df; + + df = container_of(work, struct hypfs_dbfs_file, data_free_work.work); + mutex_lock(&df->lock); + data = df->data; + df->data = NULL; + mutex_unlock(&df->lock); + kref_put(&data->kref, hypfs_dbfs_data_free); +} + +static ssize_t dbfs_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct hypfs_dbfs_data *data; + struct hypfs_dbfs_file *df; + ssize_t rc; + + if (*ppos != 0) + return 0; + + df = file_inode(file)->i_private; + mutex_lock(&df->lock); + if (!df->data) { + data = hypfs_dbfs_data_alloc(df); + if (!data) { + mutex_unlock(&df->lock); + return -ENOMEM; + } + rc = df->data_create(&data->buf, &data->buf_free_ptr, + &data->size); + if (rc) { + mutex_unlock(&df->lock); + kfree(data); + return rc; + } + df->data = data; + schedule_delayed_work(&df->data_free_work, HZ); + } + data = df->data; + kref_get(&data->kref); + mutex_unlock(&df->lock); + + rc = simple_read_from_buffer(buf, size, ppos, data->buf, data->size); + kref_put(&data->kref, hypfs_dbfs_data_free); + return rc; +} + +static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct hypfs_dbfs_file *df; + long rc; + + df = file->f_path.dentry->d_inode->i_private; + mutex_lock(&df->lock); + if (df->unlocked_ioctl) + rc = df->unlocked_ioctl(file, cmd, arg); + else + rc = -ENOTTY; + mutex_unlock(&df->lock); + return rc; +} + +static const struct file_operations dbfs_ops = { + .read = dbfs_read, + .llseek = no_llseek, + .unlocked_ioctl = dbfs_ioctl, +}; + +int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) +{ + df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, + &dbfs_ops); + if (IS_ERR(df->dentry)) + return PTR_ERR(df->dentry); + mutex_init(&df->lock); + INIT_DELAYED_WORK(&df->data_free_work, data_free_delayed); + return 0; +} + +void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) +{ + debugfs_remove(df->dentry); +} + +int hypfs_dbfs_init(void) +{ + dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); + return PTR_ERR_OR_ZERO(dbfs_dir); +} + +void hypfs_dbfs_exit(void) +{ + debugfs_remove(dbfs_dir); +} diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c new file mode 100644 index 00000000000..5eeffeefae0 --- /dev/null +++ b/arch/s390/hypfs/hypfs_diag.c @@ -0,0 +1,769 @@ +/* + * Hypervisor filesystem for Linux on s390. Diag 204 and 224 + * implementation. + * + * Copyright IBM Corp. 2006, 2008 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#define KMSG_COMPONENT "hypfs" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <asm/ebcdic.h> +#include "hypfs.h" + +#define LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */ +#define CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */ +#define TMP_SIZE 64 /* size of temporary buffers */ + +#define DBFS_D204_HDR_VERSION 0 + +/* diag 204 subcodes */ +enum diag204_sc { + SUBC_STIB4 = 4, + SUBC_RSI = 5, + SUBC_STIB6 = 6, + SUBC_STIB7 = 7 +}; + +/* The two available diag 204 data formats */ +enum diag204_format { + INFO_SIMPLE = 0, + INFO_EXT = 0x00010000 +}; + +/* bit is set in flags, when physical cpu info is included in diag 204 data */ +#define LPAR_PHYS_FLG 0x80 + +static char *diag224_cpu_names; /* diag 224 name table */ +static enum diag204_sc diag204_store_sc; /* used subcode for store */ +static enum diag204_format diag204_info_type; /* used diag 204 data format */ + +static void *diag204_buf; /* 4K aligned buffer for diag204 data */ +static void *diag204_buf_vmalloc; /* vmalloc pointer for diag204 data */ +static int diag204_buf_pages; /* number of pages for diag204 data */ + +static struct dentry *dbfs_d204_file; + +/* + * DIAG 204 data structures and member access functions. + * + * Since we have two different diag 204 data formats for old and new s390 + * machines, we do not access the structs directly, but use getter functions for + * each struct member instead. This should make the code more readable. + */ + +/* Time information block */ + +struct info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod; +} __attribute__ ((packed)); + +struct x_info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod1; + __u64 curtod2; + char reserved[40]; +} __attribute__ ((packed)); + +static inline int info_blk_hdr__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct info_blk_hdr); + else /* INFO_EXT */ + return sizeof(struct x_info_blk_hdr); +} + +static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct info_blk_hdr *)hdr)->npar; + else /* INFO_EXT */ + return ((struct x_info_blk_hdr *)hdr)->npar; +} + +static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct info_blk_hdr *)hdr)->flags; + else /* INFO_EXT */ + return ((struct x_info_blk_hdr *)hdr)->flags; +} + +static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct info_blk_hdr *)hdr)->phys_cpus; + else /* INFO_EXT */ + return ((struct x_info_blk_hdr *)hdr)->phys_cpus; +} + +/* Partition header */ + +struct part_hdr { + __u8 pn; + __u8 cpus; + char reserved[6]; + char part_name[LPAR_NAME_LEN]; +} __attribute__ ((packed)); + +struct x_part_hdr { + __u8 pn; + __u8 cpus; + __u8 rcpus; + __u8 pflag; + __u32 mlu; + char part_name[LPAR_NAME_LEN]; + char lpc_name[8]; + char os_name[8]; + __u64 online_cs; + __u64 online_es; + __u8 upid; + char reserved1[3]; + __u32 group_mlu; + char group_name[8]; + char reserved2[32]; +} __attribute__ ((packed)); + +static inline int part_hdr__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct part_hdr); + else /* INFO_EXT */ + return sizeof(struct x_part_hdr); +} + +static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct part_hdr *)hdr)->cpus; + else /* INFO_EXT */ + return ((struct x_part_hdr *)hdr)->rcpus; +} + +static inline void part_hdr__part_name(enum diag204_format type, void *hdr, + char *name) +{ + if (type == INFO_SIMPLE) + memcpy(name, ((struct part_hdr *)hdr)->part_name, + LPAR_NAME_LEN); + else /* INFO_EXT */ + memcpy(name, ((struct x_part_hdr *)hdr)->part_name, + LPAR_NAME_LEN); + EBCASC(name, LPAR_NAME_LEN); + name[LPAR_NAME_LEN] = 0; + strim(name); +} + +struct cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; +} __attribute__ ((packed)); + +struct x_cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; + __u16 min_weight; + __u16 cur_weight; + __u16 max_weight; + char reseved2[2]; + __u64 online_time; + __u64 wait_time; + __u32 pma_weight; + __u32 polar_weight; + char reserved3[40]; +} __attribute__ ((packed)); + +/* CPU info block */ + +static inline int cpu_info__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct cpu_info); + else /* INFO_EXT */ + return sizeof(struct x_cpu_info); +} + +static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->ctidx; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->ctidx; +} + +static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->cpu_addr; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->cpu_addr; +} + +static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->acc_time; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->acc_time; +} + +static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct cpu_info *)hdr)->lp_time; + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->lp_time; +} + +static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return 0; /* online_time not available in simple info */ + else /* INFO_EXT */ + return ((struct x_cpu_info *)hdr)->online_time; +} + +/* Physical header */ + +struct phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; +} __attribute__ ((packed)); + +struct x_phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; + char reserved3[80]; +} __attribute__ ((packed)); + +static inline int phys_hdr__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct phys_hdr); + else /* INFO_EXT */ + return sizeof(struct x_phys_hdr); +} + +static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_hdr *)hdr)->cpus; + else /* INFO_EXT */ + return ((struct x_phys_hdr *)hdr)->cpus; +} + +/* Physical CPU info block */ + +struct phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[8]; +} __attribute__ ((packed)); + +struct x_phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[80]; +} __attribute__ ((packed)); + +static inline int phys_cpu__size(enum diag204_format type) +{ + if (type == INFO_SIMPLE) + return sizeof(struct phys_cpu); + else /* INFO_EXT */ + return sizeof(struct x_phys_cpu); +} + +static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_cpu *)hdr)->cpu_addr; + else /* INFO_EXT */ + return ((struct x_phys_cpu *)hdr)->cpu_addr; +} + +static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_cpu *)hdr)->mgm_time; + else /* INFO_EXT */ + return ((struct x_phys_cpu *)hdr)->mgm_time; +} + +static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) +{ + if (type == INFO_SIMPLE) + return ((struct phys_cpu *)hdr)->ctidx; + else /* INFO_EXT */ + return ((struct x_phys_cpu *)hdr)->ctidx; +} + +/* Diagnose 204 functions */ + +static int diag204(unsigned long subcode, unsigned long size, void *addr) +{ + register unsigned long _subcode asm("0") = subcode; + register unsigned long _size asm("1") = size; + + asm volatile( + " diag %2,%0,0x204\n" + "0:\n" + EX_TABLE(0b,0b) + : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); + if (_subcode) + return -1; + return _size; +} + +/* + * For the old diag subcode 4 with simple data format we have to use real + * memory. If we use subcode 6 or 7 with extended data format, we can (and + * should) use vmalloc, since we need a lot of memory in that case. Currently + * up to 93 pages! + */ + +static void diag204_free_buffer(void) +{ + if (!diag204_buf) + return; + if (diag204_buf_vmalloc) { + vfree(diag204_buf_vmalloc); + diag204_buf_vmalloc = NULL; + } else { + free_pages((unsigned long) diag204_buf, 0); + } + diag204_buf = NULL; +} + +static void *page_align_ptr(void *ptr) +{ + return (void *) PAGE_ALIGN((unsigned long) ptr); +} + +static void *diag204_alloc_vbuf(int pages) +{ + /* The buffer has to be page aligned! */ + diag204_buf_vmalloc = vmalloc(PAGE_SIZE * (pages + 1)); + if (!diag204_buf_vmalloc) + return ERR_PTR(-ENOMEM); + diag204_buf = page_align_ptr(diag204_buf_vmalloc); + diag204_buf_pages = pages; + return diag204_buf; +} + +static void *diag204_alloc_rbuf(void) +{ + diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0); + if (!diag204_buf) + return ERR_PTR(-ENOMEM); + diag204_buf_pages = 1; + return diag204_buf; +} + +static void *diag204_get_buffer(enum diag204_format fmt, int *pages) +{ + if (diag204_buf) { + *pages = diag204_buf_pages; + return diag204_buf; + } + if (fmt == INFO_SIMPLE) { + *pages = 1; + return diag204_alloc_rbuf(); + } else {/* INFO_EXT */ + *pages = diag204((unsigned long)SUBC_RSI | + (unsigned long)INFO_EXT, 0, NULL); + if (*pages <= 0) + return ERR_PTR(-ENOSYS); + else + return diag204_alloc_vbuf(*pages); + } +} + +/* + * diag204_probe() has to find out, which type of diagnose 204 implementation + * we have on our machine. Currently there are three possible scanarios: + * - subcode 4 + simple data format (only one page) + * - subcode 4-6 + extended data format + * - subcode 4-7 + extended data format + * + * Subcode 5 is used to retrieve the size of the data, provided by subcodes + * 6 and 7. Subcode 7 basically has the same function as subcode 6. In addition + * to subcode 6 it provides also information about secondary cpus. + * In order to get as much information as possible, we first try + * subcode 7, then 6 and if both fail, we use subcode 4. + */ + +static int diag204_probe(void) +{ + void *buf; + int pages, rc; + + buf = diag204_get_buffer(INFO_EXT, &pages); + if (!IS_ERR(buf)) { + if (diag204((unsigned long)SUBC_STIB7 | + (unsigned long)INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = SUBC_STIB7; + diag204_info_type = INFO_EXT; + goto out; + } + if (diag204((unsigned long)SUBC_STIB6 | + (unsigned long)INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = SUBC_STIB6; + diag204_info_type = INFO_EXT; + goto out; + } + diag204_free_buffer(); + } + + /* subcodes 6 and 7 failed, now try subcode 4 */ + + buf = diag204_get_buffer(INFO_SIMPLE, &pages); + if (IS_ERR(buf)) { + rc = PTR_ERR(buf); + goto fail_alloc; + } + if (diag204((unsigned long)SUBC_STIB4 | + (unsigned long)INFO_SIMPLE, pages, buf) >= 0) { + diag204_store_sc = SUBC_STIB4; + diag204_info_type = INFO_SIMPLE; + goto out; + } else { + rc = -ENOSYS; + goto fail_store; + } +out: + rc = 0; +fail_store: + diag204_free_buffer(); +fail_alloc: + return rc; +} + +static int diag204_do_store(void *buf, int pages) +{ + int rc; + + rc = diag204((unsigned long) diag204_store_sc | + (unsigned long) diag204_info_type, pages, buf); + return rc < 0 ? -ENOSYS : 0; +} + +static void *diag204_store(void) +{ + void *buf; + int pages, rc; + + buf = diag204_get_buffer(diag204_info_type, &pages); + if (IS_ERR(buf)) + goto out; + rc = diag204_do_store(buf, pages); + if (rc) + return ERR_PTR(rc); +out: + return buf; +} + +/* Diagnose 224 functions */ + +static int diag224(void *ptr) +{ + int rc = -EOPNOTSUPP; + + asm volatile( + " diag %1,%2,0x224\n" + "0: lhi %0,0x0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) :"d" (0), "d" (ptr) : "memory"); + return rc; +} + +static int diag224_get_name_table(void) +{ + /* memory must be below 2GB */ + diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); + if (!diag224_cpu_names) + return -ENOMEM; + if (diag224(diag224_cpu_names)) { + kfree(diag224_cpu_names); + return -EOPNOTSUPP; + } + EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16); + return 0; +} + +static void diag224_delete_name_table(void) +{ + kfree(diag224_cpu_names); +} + +static int diag224_idx2name(int index, char *name) +{ + memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN), + CPU_NAME_LEN); + name[CPU_NAME_LEN] = 0; + strim(name); + return 0; +} + +struct dbfs_d204_hdr { + u64 len; /* Length of d204 buffer without header */ + u16 version; /* Version of header */ + u8 sc; /* Used subcode */ + char reserved[53]; +} __attribute__ ((packed)); + +struct dbfs_d204 { + struct dbfs_d204_hdr hdr; /* 64 byte header */ + char buf[]; /* d204 buffer */ +} __attribute__ ((packed)); + +static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size) +{ + struct dbfs_d204 *d204; + int rc, buf_size; + void *base; + + buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr); + base = vzalloc(buf_size); + if (!base) + return -ENOMEM; + d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr); + rc = diag204_do_store(d204->buf, diag204_buf_pages); + if (rc) { + vfree(base); + return rc; + } + d204->hdr.version = DBFS_D204_HDR_VERSION; + d204->hdr.len = PAGE_SIZE * diag204_buf_pages; + d204->hdr.sc = diag204_store_sc; + *data = d204; + *data_free_ptr = base; + *size = d204->hdr.len + sizeof(struct dbfs_d204_hdr); + return 0; +} + +static struct hypfs_dbfs_file dbfs_file_d204 = { + .name = "diag_204", + .data_create = dbfs_d204_create, + .data_free = vfree, +}; + +__init int hypfs_diag_init(void) +{ + int rc; + + if (diag204_probe()) { + pr_err("The hardware system does not support hypfs\n"); + return -ENODATA; + } + if (diag204_info_type == INFO_EXT) { + rc = hypfs_dbfs_create_file(&dbfs_file_d204); + if (rc) + return rc; + } + if (MACHINE_IS_LPAR) { + rc = diag224_get_name_table(); + if (rc) { + pr_err("The hardware system does not provide all " + "functions required by hypfs\n"); + debugfs_remove(dbfs_d204_file); + return rc; + } + } + return 0; +} + +void hypfs_diag_exit(void) +{ + debugfs_remove(dbfs_d204_file); + diag224_delete_name_table(); + diag204_free_buffer(); + hypfs_dbfs_remove_file(&dbfs_file_d204); +} + +/* + * Functions to create the directory structure + * ******************************************* + */ + +static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) +{ + struct dentry *cpu_dir; + char buffer[TMP_SIZE]; + void *rc; + + snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type, + cpu_info)); + cpu_dir = hypfs_mkdir(cpus_dir, buffer); + rc = hypfs_create_u64(cpu_dir, "mgmtime", + cpu_info__acc_time(diag204_info_type, cpu_info) - + cpu_info__lp_time(diag204_info_type, cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + rc = hypfs_create_u64(cpu_dir, "cputime", + cpu_info__lp_time(diag204_info_type, cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + if (diag204_info_type == INFO_EXT) { + rc = hypfs_create_u64(cpu_dir, "onlinetime", + cpu_info__online_time(diag204_info_type, + cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + } + diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_RET(rc); +} + +static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) +{ + struct dentry *cpus_dir; + struct dentry *lpar_dir; + char lpar_name[LPAR_NAME_LEN + 1]; + void *cpu_info; + int i; + + part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); + lpar_name[LPAR_NAME_LEN] = 0; + lpar_dir = hypfs_mkdir(systems_dir, lpar_name); + if (IS_ERR(lpar_dir)) + return lpar_dir; + cpus_dir = hypfs_mkdir(lpar_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return cpus_dir; + cpu_info = part_hdr + part_hdr__size(diag204_info_type); + for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) { + int rc; + rc = hypfs_create_cpu_files(cpus_dir, cpu_info); + if (rc) + return ERR_PTR(rc); + cpu_info += cpu_info__size(diag204_info_type); + } + return cpu_info; +} + +static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) +{ + struct dentry *cpu_dir; + char buffer[TMP_SIZE]; + void *rc; + + snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type, + cpu_info)); + cpu_dir = hypfs_mkdir(cpus_dir, buffer); + if (IS_ERR(cpu_dir)) + return PTR_ERR(cpu_dir); + rc = hypfs_create_u64(cpu_dir, "mgmtime", + phys_cpu__mgm_time(diag204_info_type, cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_RET(rc); +} + +static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) +{ + int i; + void *cpu_info; + struct dentry *cpus_dir; + + cpus_dir = hypfs_mkdir(parent_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return cpus_dir; + cpu_info = phys_hdr + phys_hdr__size(diag204_info_type); + for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) { + int rc; + rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info); + if (rc) + return ERR_PTR(rc); + cpu_info += phys_cpu__size(diag204_info_type); + } + return cpu_info; +} + +int hypfs_diag_create_files(struct dentry *root) +{ + struct dentry *systems_dir, *hyp_dir; + void *time_hdr, *part_hdr; + int i, rc; + void *buffer, *ptr; + + buffer = diag204_store(); + if (IS_ERR(buffer)) + return PTR_ERR(buffer); + + systems_dir = hypfs_mkdir(root, "systems"); + if (IS_ERR(systems_dir)) { + rc = PTR_ERR(systems_dir); + goto err_out; + } + time_hdr = (struct x_info_blk_hdr *)buffer; + part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type); + for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) { + part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr); + if (IS_ERR(part_hdr)) { + rc = PTR_ERR(part_hdr); + goto err_out; + } + } + if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) { + ptr = hypfs_create_phys_files(root, part_hdr); + if (IS_ERR(ptr)) { + rc = PTR_ERR(ptr); + goto err_out; + } + } + hyp_dir = hypfs_mkdir(root, "hyp"); + if (IS_ERR(hyp_dir)) { + rc = PTR_ERR(hyp_dir); + goto err_out; + } + ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); + if (IS_ERR(ptr)) { + rc = PTR_ERR(ptr); + goto err_out; + } + rc = 0; + +err_out: + return rc; +} diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c new file mode 100644 index 00000000000..f043c3c7e73 --- /dev/null +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -0,0 +1,141 @@ +/* + * Hypervisor filesystem for Linux on s390. + * Set Partition-Resource Parameter interface. + * + * Copyright IBM Corp. 2013 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/compat.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <asm/compat.h> +#include <asm/sclp.h> +#include "hypfs.h" + +#define DIAG304_SET_WEIGHTS 0 +#define DIAG304_QUERY_PRP 1 +#define DIAG304_SET_CAPPING 2 + +#define DIAG304_CMD_MAX 2 + +static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd) +{ + register unsigned long _data asm("2") = (unsigned long) data; + register unsigned long _rc asm("3"); + register unsigned long _cmd asm("4") = cmd; + + asm volatile("diag %1,%2,0x304\n" + : "=d" (_rc) : "d" (_data), "d" (_cmd) : "memory"); + + return _rc; +} + +static void hypfs_sprp_free(const void *data) +{ + free_page((unsigned long) data); +} + +static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size) +{ + unsigned long rc; + void *data; + + data = (void *) get_zeroed_page(GFP_KERNEL); + if (!data) + return -ENOMEM; + rc = hypfs_sprp_diag304(data, DIAG304_QUERY_PRP); + if (rc != 1) { + *data_ptr = *free_ptr = NULL; + *size = 0; + free_page((unsigned long) data); + return -EIO; + } + *data_ptr = *free_ptr = data; + *size = PAGE_SIZE; + return 0; +} + +static int __hypfs_sprp_ioctl(void __user *user_area) +{ + struct hypfs_diag304 diag304; + unsigned long cmd; + void __user *udata; + void *data; + int rc; + + if (copy_from_user(&diag304, user_area, sizeof(diag304))) + return -EFAULT; + if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX) + return -EINVAL; + + data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!data) + return -ENOMEM; + + udata = (void __user *)(unsigned long) diag304.data; + if (diag304.args[1] == DIAG304_SET_WEIGHTS || + diag304.args[1] == DIAG304_SET_CAPPING) + if (copy_from_user(data, udata, PAGE_SIZE)) { + rc = -EFAULT; + goto out; + } + + cmd = *(unsigned long *) &diag304.args[0]; + diag304.rc = hypfs_sprp_diag304(data, cmd); + + if (diag304.args[1] == DIAG304_QUERY_PRP) + if (copy_to_user(udata, data, PAGE_SIZE)) { + rc = -EFAULT; + goto out; + } + + rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0; +out: + free_page((unsigned long) data); + return rc; +} + +static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + void __user *argp; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (is_compat_task()) + argp = compat_ptr(arg); + else + argp = (void __user *) arg; + switch (cmd) { + case HYPFS_DIAG304: + return __hypfs_sprp_ioctl(argp); + default: /* unknown ioctl number */ + return -ENOTTY; + } + return 0; +} + +static struct hypfs_dbfs_file hypfs_sprp_file = { + .name = "diag_304", + .data_create = hypfs_sprp_create, + .data_free = hypfs_sprp_free, + .unlocked_ioctl = hypfs_sprp_ioctl, +}; + +int hypfs_sprp_init(void) +{ + if (!sclp_has_sprp()) + return 0; + return hypfs_dbfs_create_file(&hypfs_sprp_file); +} + +void hypfs_sprp_exit(void) +{ + if (!sclp_has_sprp()) + return; + hypfs_dbfs_remove_file(&hypfs_sprp_file); +} diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c new file mode 100644 index 00000000000..32040ace00e --- /dev/null +++ b/arch/s390/hypfs/hypfs_vm.c @@ -0,0 +1,287 @@ +/* + * Hypervisor filesystem for Linux on s390. z/VM implementation. + * + * Copyright IBM Corp. 2006 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/vmalloc.h> +#include <asm/ebcdic.h> +#include <asm/timex.h> +#include "hypfs.h" + +#define NAME_LEN 8 +#define DBFS_D2FC_HDR_VERSION 0 + +static char local_guest[] = " "; +static char all_guests[] = "* "; +static char *guest_query; + +struct diag2fc_data { + __u32 version; + __u32 flags; + __u64 used_cpu; + __u64 el_time; + __u64 mem_min_kb; + __u64 mem_max_kb; + __u64 mem_share_kb; + __u64 mem_used_kb; + __u32 pcpus; + __u32 lcpus; + __u32 vcpus; + __u32 ocpus; + __u32 cpu_max; + __u32 cpu_shares; + __u32 cpu_use_samp; + __u32 cpu_delay_samp; + __u32 page_wait_samp; + __u32 idle_samp; + __u32 other_samp; + __u32 total_samp; + char guest_name[NAME_LEN]; +}; + +struct diag2fc_parm_list { + char userid[NAME_LEN]; + char aci_grp[NAME_LEN]; + __u64 addr; + __u32 size; + __u32 fmt; +}; + +static int diag2fc(int size, char* query, void *addr) +{ + unsigned long residual_cnt; + unsigned long rc; + struct diag2fc_parm_list parm_list; + + memcpy(parm_list.userid, query, NAME_LEN); + ASCEBC(parm_list.userid, NAME_LEN); + parm_list.addr = (unsigned long) addr ; + parm_list.size = size; + parm_list.fmt = 0x02; + memset(parm_list.aci_grp, 0x40, NAME_LEN); + rc = -1; + + asm volatile( + " diag %0,%1,0x2fc\n" + "0:\n" + EX_TABLE(0b,0b) + : "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory"); + + if ((rc != 0 ) && (rc != -2)) + return rc; + else + return -residual_cnt; +} + +/* + * Allocate buffer for "query" and store diag 2fc at "offset" + */ +static void *diag2fc_store(char *query, unsigned int *count, int offset) +{ + void *data; + int size; + + do { + size = diag2fc(0, query, NULL); + if (size < 0) + return ERR_PTR(-EACCES); + data = vmalloc(size + offset); + if (!data) + return ERR_PTR(-ENOMEM); + if (diag2fc(size, query, data + offset) == 0) + break; + vfree(data); + } while (1); + *count = (size / sizeof(struct diag2fc_data)); + + return data; +} + +static void diag2fc_free(const void *data) +{ + vfree(data); +} + +#define ATTRIBUTE(dir, name, member) \ +do { \ + void *rc; \ + rc = hypfs_create_u64(dir, name, member); \ + if (IS_ERR(rc)) \ + return PTR_ERR(rc); \ +} while(0) + +static int hpyfs_vm_create_guest(struct dentry *systems_dir, + struct diag2fc_data *data) +{ + char guest_name[NAME_LEN + 1] = {}; + struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir; + int dedicated_flag, capped_value; + + capped_value = (data->flags & 0x00000006) >> 1; + dedicated_flag = (data->flags & 0x00000008) >> 3; + + /* guest dir */ + memcpy(guest_name, data->guest_name, NAME_LEN); + EBCASC(guest_name, NAME_LEN); + strim(guest_name); + guest_dir = hypfs_mkdir(systems_dir, guest_name); + if (IS_ERR(guest_dir)) + return PTR_ERR(guest_dir); + ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time); + + /* logical cpu information */ + cpus_dir = hypfs_mkdir(guest_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return PTR_ERR(cpus_dir); + ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu); + ATTRIBUTE(cpus_dir, "capped", capped_value); + ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag); + ATTRIBUTE(cpus_dir, "count", data->vcpus); + /* + * Note: The "weight_min" attribute got the wrong name. + * The value represents the number of non-stopped (operating) + * CPUS. + */ + ATTRIBUTE(cpus_dir, "weight_min", data->ocpus); + ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max); + ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares); + + /* memory information */ + mem_dir = hypfs_mkdir(guest_dir, "mem"); + if (IS_ERR(mem_dir)) + return PTR_ERR(mem_dir); + ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb); + ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb); + ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb); + ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb); + + /* samples */ + samples_dir = hypfs_mkdir(guest_dir, "samples"); + if (IS_ERR(samples_dir)) + return PTR_ERR(samples_dir); + ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp); + ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp); + ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp); + ATTRIBUTE(samples_dir, "idle", data->idle_samp); + ATTRIBUTE(samples_dir, "other", data->other_samp); + ATTRIBUTE(samples_dir, "total", data->total_samp); + return 0; +} + +int hypfs_vm_create_files(struct dentry *root) +{ + struct dentry *dir, *file; + struct diag2fc_data *data; + unsigned int count = 0; + int rc, i; + + data = diag2fc_store(guest_query, &count, 0); + if (IS_ERR(data)) + return PTR_ERR(data); + + /* Hpervisor Info */ + dir = hypfs_mkdir(root, "hyp"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + file = hypfs_create_str(dir, "type", "z/VM Hypervisor"); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + goto failed; + } + + /* physical cpus */ + dir = hypfs_mkdir(root, "cpus"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + file = hypfs_create_u64(dir, "count", data->lcpus); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + goto failed; + } + + /* guests */ + dir = hypfs_mkdir(root, "systems"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + + for (i = 0; i < count; i++) { + rc = hpyfs_vm_create_guest(dir, &(data[i])); + if (rc) + goto failed; + } + diag2fc_free(data); + return 0; + +failed: + diag2fc_free(data); + return rc; +} + +struct dbfs_d2fc_hdr { + u64 len; /* Length of d2fc buffer without header */ + u16 version; /* Version of header */ + char tod_ext[16]; /* TOD clock for d2fc */ + u64 count; /* Number of VM guests in d2fc buffer */ + char reserved[30]; +} __attribute__ ((packed)); + +struct dbfs_d2fc { + struct dbfs_d2fc_hdr hdr; /* 64 byte header */ + char buf[]; /* d2fc buffer */ +} __attribute__ ((packed)); + +static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size) +{ + struct dbfs_d2fc *d2fc; + unsigned int count; + + d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr)); + if (IS_ERR(d2fc)) + return PTR_ERR(d2fc); + get_tod_clock_ext(d2fc->hdr.tod_ext); + d2fc->hdr.len = count * sizeof(struct diag2fc_data); + d2fc->hdr.version = DBFS_D2FC_HDR_VERSION; + d2fc->hdr.count = count; + memset(&d2fc->hdr.reserved, 0, sizeof(d2fc->hdr.reserved)); + *data = d2fc; + *data_free_ptr = d2fc; + *size = d2fc->hdr.len + sizeof(struct dbfs_d2fc_hdr); + return 0; +} + +static struct hypfs_dbfs_file dbfs_file_2fc = { + .name = "diag_2fc", + .data_create = dbfs_diag2fc_create, + .data_free = diag2fc_free, +}; + +int hypfs_vm_init(void) +{ + if (!MACHINE_IS_VM) + return 0; + if (diag2fc(0, all_guests, NULL) > 0) + guest_query = all_guests; + else if (diag2fc(0, local_guest, NULL) > 0) + guest_query = local_guest; + else + return -EACCES; + return hypfs_dbfs_create_file(&dbfs_file_2fc); +} + +void hypfs_vm_exit(void) +{ + if (!MACHINE_IS_VM) + return; + hypfs_dbfs_remove_file(&dbfs_file_2fc); +} diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c new file mode 100644 index 00000000000..c952b981e4f --- /dev/null +++ b/arch/s390/hypfs/inode.c @@ -0,0 +1,524 @@ +/* + * Hypervisor filesystem for Linux on s390. + * + * Copyright IBM Corp. 2006, 2008 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#define KMSG_COMPONENT "hypfs" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/vfs.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/time.h> +#include <linux/parser.h> +#include <linux/sysfs.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/mount.h> +#include <linux/aio.h> +#include <asm/ebcdic.h> +#include "hypfs.h" + +#define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */ +#define TMP_SIZE 64 /* size of temporary buffers */ + +static struct dentry *hypfs_create_update_file(struct dentry *dir); + +struct hypfs_sb_info { + kuid_t uid; /* uid used for files and dirs */ + kgid_t gid; /* gid used for files and dirs */ + struct dentry *update_file; /* file to trigger update */ + time_t last_update; /* last update time in secs since 1970 */ + struct mutex lock; /* lock to protect update process */ +}; + +static const struct file_operations hypfs_file_ops; +static struct file_system_type hypfs_type; +static const struct super_operations hypfs_s_ops; + +/* start of list of all dentries, which have to be deleted on update */ +static struct dentry *hypfs_last_dentry; + +static void hypfs_update_update(struct super_block *sb) +{ + struct hypfs_sb_info *sb_info = sb->s_fs_info; + struct inode *inode = sb_info->update_file->d_inode; + + sb_info->last_update = get_seconds(); + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +} + +/* directory tree removal functions */ + +static void hypfs_add_dentry(struct dentry *dentry) +{ + dentry->d_fsdata = hypfs_last_dentry; + hypfs_last_dentry = dentry; +} + +static inline int hypfs_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +static void hypfs_remove(struct dentry *dentry) +{ + struct dentry *parent; + + parent = dentry->d_parent; + mutex_lock(&parent->d_inode->i_mutex); + if (hypfs_positive(dentry)) { + if (S_ISDIR(dentry->d_inode->i_mode)) + simple_rmdir(parent->d_inode, dentry); + else + simple_unlink(parent->d_inode, dentry); + } + d_delete(dentry); + dput(dentry); + mutex_unlock(&parent->d_inode->i_mutex); +} + +static void hypfs_delete_tree(struct dentry *root) +{ + while (hypfs_last_dentry) { + struct dentry *next_dentry; + next_dentry = hypfs_last_dentry->d_fsdata; + hypfs_remove(hypfs_last_dentry); + hypfs_last_dentry = next_dentry; + } +} + +static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode) +{ + struct inode *ret = new_inode(sb); + + if (ret) { + struct hypfs_sb_info *hypfs_info = sb->s_fs_info; + ret->i_ino = get_next_ino(); + ret->i_mode = mode; + ret->i_uid = hypfs_info->uid; + ret->i_gid = hypfs_info->gid; + ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; + if (S_ISDIR(mode)) + set_nlink(ret, 2); + } + return ret; +} + +static void hypfs_evict_inode(struct inode *inode) +{ + clear_inode(inode); + kfree(inode->i_private); +} + +static int hypfs_open(struct inode *inode, struct file *filp) +{ + char *data = file_inode(filp)->i_private; + struct hypfs_sb_info *fs_info; + + if (filp->f_mode & FMODE_WRITE) { + if (!(inode->i_mode & S_IWUGO)) + return -EACCES; + } + if (filp->f_mode & FMODE_READ) { + if (!(inode->i_mode & S_IRUGO)) + return -EACCES; + } + + fs_info = inode->i_sb->s_fs_info; + if(data) { + mutex_lock(&fs_info->lock); + filp->private_data = kstrdup(data, GFP_KERNEL); + if (!filp->private_data) { + mutex_unlock(&fs_info->lock); + return -ENOMEM; + } + mutex_unlock(&fs_info->lock); + } + return nonseekable_open(inode, filp); +} + +static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t offset) +{ + char *data; + ssize_t ret; + struct file *filp = iocb->ki_filp; + /* XXX: temporary */ + char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; + + if (nr_segs != 1) + return -EINVAL; + + data = filp->private_data; + ret = simple_read_from_buffer(buf, count, &offset, data, strlen(data)); + if (ret <= 0) + return ret; + + iocb->ki_pos += ret; + file_accessed(filp); + + return ret; +} +static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t offset) +{ + int rc; + struct super_block *sb = file_inode(iocb->ki_filp)->i_sb; + struct hypfs_sb_info *fs_info = sb->s_fs_info; + size_t count = iov_length(iov, nr_segs); + + /* + * Currently we only allow one update per second for two reasons: + * 1. diag 204 is VERY expensive + * 2. If several processes do updates in parallel and then read the + * hypfs data, the likelihood of collisions is reduced, if we restrict + * the minimum update interval. A collision occurs, if during the + * data gathering of one process another process triggers an update + * If the first process wants to ensure consistent data, it has + * to restart data collection in this case. + */ + mutex_lock(&fs_info->lock); + if (fs_info->last_update == get_seconds()) { + rc = -EBUSY; + goto out; + } + hypfs_delete_tree(sb->s_root); + if (MACHINE_IS_VM) + rc = hypfs_vm_create_files(sb->s_root); + else + rc = hypfs_diag_create_files(sb->s_root); + if (rc) { + pr_err("Updating the hypfs tree failed\n"); + hypfs_delete_tree(sb->s_root); + goto out; + } + hypfs_update_update(sb); + rc = count; +out: + mutex_unlock(&fs_info->lock); + return rc; +} + +static int hypfs_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +enum { opt_uid, opt_gid, opt_err }; + +static const match_table_t hypfs_tokens = { + {opt_uid, "uid=%u"}, + {opt_gid, "gid=%u"}, + {opt_err, NULL} +}; + +static int hypfs_parse_options(char *options, struct super_block *sb) +{ + char *str; + substring_t args[MAX_OPT_ARGS]; + kuid_t uid; + kgid_t gid; + + if (!options) + return 0; + while ((str = strsep(&options, ",")) != NULL) { + int token, option; + struct hypfs_sb_info *hypfs_info = sb->s_fs_info; + + if (!*str) + continue; + token = match_token(str, hypfs_tokens, args); + switch (token) { + case opt_uid: + if (match_int(&args[0], &option)) + return -EINVAL; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) + return -EINVAL; + hypfs_info->uid = uid; + break; + case opt_gid: + if (match_int(&args[0], &option)) + return -EINVAL; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) + return -EINVAL; + hypfs_info->gid = gid; + break; + case opt_err: + default: + pr_err("%s is not a valid mount option\n", str); + return -EINVAL; + } + } + return 0; +} + +static int hypfs_show_options(struct seq_file *s, struct dentry *root) +{ + struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info; + + seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, hypfs_info->uid)); + seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, hypfs_info->gid)); + return 0; +} + +static int hypfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root_inode; + struct dentry *root_dentry; + int rc = 0; + struct hypfs_sb_info *sbi; + + sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + mutex_init(&sbi->lock); + sbi->uid = current_uid(); + sbi->gid = current_gid(); + sb->s_fs_info = sbi; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = HYPFS_MAGIC; + sb->s_op = &hypfs_s_ops; + if (hypfs_parse_options(data, sb)) + return -EINVAL; + root_inode = hypfs_make_inode(sb, S_IFDIR | 0755); + if (!root_inode) + return -ENOMEM; + root_inode->i_op = &simple_dir_inode_operations; + root_inode->i_fop = &simple_dir_operations; + sb->s_root = root_dentry = d_make_root(root_inode); + if (!root_dentry) + return -ENOMEM; + if (MACHINE_IS_VM) + rc = hypfs_vm_create_files(root_dentry); + else + rc = hypfs_diag_create_files(root_dentry); + if (rc) + return rc; + sbi->update_file = hypfs_create_update_file(root_dentry); + if (IS_ERR(sbi->update_file)) + return PTR_ERR(sbi->update_file); + hypfs_update_update(sb); + pr_info("Hypervisor filesystem mounted\n"); + return 0; +} + +static struct dentry *hypfs_mount(struct file_system_type *fst, int flags, + const char *devname, void *data) +{ + return mount_single(fst, flags, data, hypfs_fill_super); +} + +static void hypfs_kill_super(struct super_block *sb) +{ + struct hypfs_sb_info *sb_info = sb->s_fs_info; + + if (sb->s_root) + hypfs_delete_tree(sb->s_root); + if (sb_info->update_file) + hypfs_remove(sb_info->update_file); + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; + kill_litter_super(sb); +} + +static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, + char *data, umode_t mode) +{ + struct dentry *dentry; + struct inode *inode; + + mutex_lock(&parent->d_inode->i_mutex); + dentry = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(dentry)) { + dentry = ERR_PTR(-ENOMEM); + goto fail; + } + inode = hypfs_make_inode(parent->d_sb, mode); + if (!inode) { + dput(dentry); + dentry = ERR_PTR(-ENOMEM); + goto fail; + } + if (S_ISREG(mode)) { + inode->i_fop = &hypfs_file_ops; + if (data) + inode->i_size = strlen(data); + else + inode->i_size = 0; + } else if (S_ISDIR(mode)) { + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inc_nlink(parent->d_inode); + } else + BUG(); + inode->i_private = data; + d_instantiate(dentry, inode); + dget(dentry); +fail: + mutex_unlock(&parent->d_inode->i_mutex); + return dentry; +} + +struct dentry *hypfs_mkdir(struct dentry *parent, const char *name) +{ + struct dentry *dentry; + + dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE); + if (IS_ERR(dentry)) + return dentry; + hypfs_add_dentry(dentry); + return dentry; +} + +static struct dentry *hypfs_create_update_file(struct dentry *dir) +{ + struct dentry *dentry; + + dentry = hypfs_create_file(dir, "update", NULL, + S_IFREG | UPDATE_FILE_MODE); + /* + * We do not put the update file on the 'delete' list with + * hypfs_add_dentry(), since it should not be removed when the tree + * is updated. + */ + return dentry; +} + +struct dentry *hypfs_create_u64(struct dentry *dir, + const char *name, __u64 value) +{ + char *buffer; + char tmp[TMP_SIZE]; + struct dentry *dentry; + + snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value); + buffer = kstrdup(tmp, GFP_KERNEL); + if (!buffer) + return ERR_PTR(-ENOMEM); + dentry = + hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); + if (IS_ERR(dentry)) { + kfree(buffer); + return ERR_PTR(-ENOMEM); + } + hypfs_add_dentry(dentry); + return dentry; +} + +struct dentry *hypfs_create_str(struct dentry *dir, + const char *name, char *string) +{ + char *buffer; + struct dentry *dentry; + + buffer = kmalloc(strlen(string) + 2, GFP_KERNEL); + if (!buffer) + return ERR_PTR(-ENOMEM); + sprintf(buffer, "%s\n", string); + dentry = + hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); + if (IS_ERR(dentry)) { + kfree(buffer); + return ERR_PTR(-ENOMEM); + } + hypfs_add_dentry(dentry); + return dentry; +} + +static const struct file_operations hypfs_file_ops = { + .open = hypfs_open, + .release = hypfs_release, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = hypfs_aio_read, + .aio_write = hypfs_aio_write, + .llseek = no_llseek, +}; + +static struct file_system_type hypfs_type = { + .owner = THIS_MODULE, + .name = "s390_hypfs", + .mount = hypfs_mount, + .kill_sb = hypfs_kill_super +}; +MODULE_ALIAS_FS("s390_hypfs"); + +static const struct super_operations hypfs_s_ops = { + .statfs = simple_statfs, + .evict_inode = hypfs_evict_inode, + .show_options = hypfs_show_options, +}; + +static struct kobject *s390_kobj; + +static int __init hypfs_init(void) +{ + int rc; + + rc = hypfs_dbfs_init(); + if (rc) + return rc; + if (hypfs_diag_init()) { + rc = -ENODATA; + goto fail_dbfs_exit; + } + if (hypfs_vm_init()) { + rc = -ENODATA; + goto fail_hypfs_diag_exit; + } + if (hypfs_sprp_init()) { + rc = -ENODATA; + goto fail_hypfs_vm_exit; + } + s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); + if (!s390_kobj) { + rc = -ENOMEM; + goto fail_hypfs_sprp_exit; + } + rc = register_filesystem(&hypfs_type); + if (rc) + goto fail_filesystem; + return 0; + +fail_filesystem: + kobject_put(s390_kobj); +fail_hypfs_sprp_exit: + hypfs_sprp_exit(); +fail_hypfs_vm_exit: + hypfs_vm_exit(); +fail_hypfs_diag_exit: + hypfs_diag_exit(); +fail_dbfs_exit: + hypfs_dbfs_exit(); + pr_err("Initialization of hypfs failed with rc=%i\n", rc); + return rc; +} + +static void __exit hypfs_exit(void) +{ + unregister_filesystem(&hypfs_type); + kobject_put(s390_kobj); + hypfs_sprp_exit(); + hypfs_vm_exit(); + hypfs_diag_exit(); + hypfs_dbfs_exit(); +} + +module_init(hypfs_init) +module_exit(hypfs_exit) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael Holzheu <holzheu@de.ibm.com>"); +MODULE_DESCRIPTION("s390 Hypervisor Filesystem"); diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild new file mode 100644 index 00000000000..57892a8a905 --- /dev/null +++ b/arch/s390/include/asm/Kbuild @@ -0,0 +1,7 @@ + + +generic-y += clkdev.h +generic-y += hash.h +generic-y += mcs_spinlock.h +generic-y += preempt.h +generic-y += trace_clock.h diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h new file mode 100644 index 00000000000..bd93ff6661b --- /dev/null +++ b/arch/s390/include/asm/airq.h @@ -0,0 +1,103 @@ +/* + * Copyright IBM Corp. 2002, 2007 + * Author(s): Ingo Adlung <adlung@de.ibm.com> + * Cornelia Huck <cornelia.huck@de.ibm.com> + * Arnd Bergmann <arndb@de.ibm.com> + * Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _ASM_S390_AIRQ_H +#define _ASM_S390_AIRQ_H + +#include <linux/bit_spinlock.h> + +struct airq_struct { + struct hlist_node list; /* Handler queueing. */ + void (*handler)(struct airq_struct *); /* Thin-interrupt handler */ + u8 *lsi_ptr; /* Local-Summary-Indicator pointer */ + u8 lsi_mask; /* Local-Summary-Indicator mask */ + u8 isc; /* Interrupt-subclass */ + u8 flags; +}; + +#define AIRQ_PTR_ALLOCATED 0x01 + +int register_adapter_interrupt(struct airq_struct *airq); +void unregister_adapter_interrupt(struct airq_struct *airq); + +/* Adapter interrupt bit vector */ +struct airq_iv { + unsigned long *vector; /* Adapter interrupt bit vector */ + unsigned long *avail; /* Allocation bit mask for the bit vector */ + unsigned long *bitlock; /* Lock bit mask for the bit vector */ + unsigned long *ptr; /* Pointer associated with each bit */ + unsigned int *data; /* 32 bit value associated with each bit */ + unsigned long bits; /* Number of bits in the vector */ + unsigned long end; /* Number of highest allocated bit + 1 */ + spinlock_t lock; /* Lock to protect alloc & free */ +}; + +#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */ +#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */ +#define AIRQ_IV_PTR 4 /* Allocate the ptr array */ +#define AIRQ_IV_DATA 8 /* Allocate the data array */ + +struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags); +void airq_iv_release(struct airq_iv *iv); +unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num); +void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num); +unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start, + unsigned long end); + +static inline unsigned long airq_iv_alloc_bit(struct airq_iv *iv) +{ + return airq_iv_alloc(iv, 1); +} + +static inline void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit) +{ + airq_iv_free(iv, bit, 1); +} + +static inline unsigned long airq_iv_end(struct airq_iv *iv) +{ + return iv->end; +} + +static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + bit_spin_lock(bit ^ be_to_le, iv->bitlock); +} + +static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + bit_spin_unlock(bit ^ be_to_le, iv->bitlock); +} + +static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit, + unsigned int data) +{ + iv->data[bit] = data; +} + +static inline unsigned int airq_iv_get_data(struct airq_iv *iv, + unsigned long bit) +{ + return iv->data[bit]; +} + +static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit, + unsigned long ptr) +{ + iv->ptr[bit] = ptr; +} + +static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv, + unsigned long bit) +{ + return iv->ptr[bit]; +} + +#endif /* _ASM_S390_AIRQ_H */ diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h new file mode 100644 index 00000000000..32a70598715 --- /dev/null +++ b/arch/s390/include/asm/appldata.h @@ -0,0 +1,88 @@ +/* + * Copyright IBM Corp. 2006 + * + * Author(s): Melissa Howland <melissah@us.ibm.com> + */ + +#ifndef _ASM_S390_APPLDATA_H +#define _ASM_S390_APPLDATA_H + +#include <asm/io.h> + +#ifndef CONFIG_64BIT + +#define APPLDATA_START_INTERVAL_REC 0x00 /* Function codes for */ +#define APPLDATA_STOP_REC 0x01 /* DIAG 0xDC */ +#define APPLDATA_GEN_EVENT_REC 0x02 +#define APPLDATA_START_CONFIG_REC 0x03 + +/* + * Parameter list for DIAGNOSE X'DC' + */ +struct appldata_parameter_list { + u16 diag; /* The DIAGNOSE code X'00DC' */ + u8 function; /* The function code for the DIAGNOSE */ + u8 parlist_length; /* Length of the parameter list */ + u32 product_id_addr; /* Address of the 16-byte product ID */ + u16 reserved; + u16 buffer_length; /* Length of the application data buffer */ + u32 buffer_addr; /* Address of the application data buffer */ +} __attribute__ ((packed)); + +#else /* CONFIG_64BIT */ + +#define APPLDATA_START_INTERVAL_REC 0x80 +#define APPLDATA_STOP_REC 0x81 +#define APPLDATA_GEN_EVENT_REC 0x82 +#define APPLDATA_START_CONFIG_REC 0x83 + +/* + * Parameter list for DIAGNOSE X'DC' + */ +struct appldata_parameter_list { + u16 diag; + u8 function; + u8 parlist_length; + u32 unused01; + u16 reserved; + u16 buffer_length; + u32 unused02; + u64 product_id_addr; + u64 buffer_addr; +} __attribute__ ((packed)); + +#endif /* CONFIG_64BIT */ + +struct appldata_product_id { + char prod_nr[7]; /* product number */ + u16 prod_fn; /* product function */ + u8 record_nr; /* record number */ + u16 version_nr; /* version */ + u16 release_nr; /* release */ + u16 mod_lvl; /* modification level */ +} __attribute__ ((packed)); + +static inline int appldata_asm(struct appldata_product_id *id, + unsigned short fn, void *buffer, + unsigned short length) +{ + struct appldata_parameter_list parm_list; + int ry; + + if (!MACHINE_IS_VM) + return -EOPNOTSUPP; + parm_list.diag = 0xdc; + parm_list.function = fn; + parm_list.parlist_length = sizeof(parm_list); + parm_list.buffer_length = length; + parm_list.product_id_addr = (unsigned long) id; + parm_list.buffer_addr = virt_to_phys(buffer); + asm volatile( + " diag %1,%0,0xdc" + : "=d" (ry) + : "d" (&parm_list), "m" (parm_list), "m" (*id) + : "cc"); + return ry; +} + +#endif /* _ASM_S390_APPLDATA_H */ diff --git a/arch/s390/include/asm/asm-offsets.h b/arch/s390/include/asm/asm-offsets.h new file mode 100644 index 00000000000..d370ee36a18 --- /dev/null +++ b/arch/s390/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include <generated/asm-offsets.h> diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h new file mode 100644 index 00000000000..fa934fe080c --- /dev/null +++ b/arch/s390/include/asm/atomic.h @@ -0,0 +1,415 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Denis Joseph Barrow, + * Arnd Bergmann <arndb@de.ibm.com>, + * + * Atomic operations that C can't guarantee us. + * Useful for resource counting etc. + * s390 uses 'Compare And Swap' for atomicity in SMP environment. + * + */ + +#ifndef __ARCH_S390_ATOMIC__ +#define __ARCH_S390_ATOMIC__ + +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/barrier.h> +#include <asm/cmpxchg.h> + +#define ATOMIC_INIT(i) { (i) } + +#define __ATOMIC_NO_BARRIER "\n" + +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define __ATOMIC_OR "lao" +#define __ATOMIC_AND "lan" +#define __ATOMIC_ADD "laa" +#define __ATOMIC_BARRIER "bcr 14,0\n" + +#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \ +({ \ + int old_val; \ + \ + typecheck(atomic_t *, ptr); \ + asm volatile( \ + __barrier \ + op_string " %0,%2,%1\n" \ + __barrier \ + : "=d" (old_val), "+Q" ((ptr)->counter) \ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +#define __ATOMIC_OR "or" +#define __ATOMIC_AND "nr" +#define __ATOMIC_ADD "ar" +#define __ATOMIC_BARRIER "\n" + +#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \ +({ \ + int old_val, new_val; \ + \ + typecheck(atomic_t *, ptr); \ + asm volatile( \ + " l %0,%2\n" \ + "0: lr %1,%0\n" \ + op_string " %1,%3\n" \ + " cs %0,%1,%2\n" \ + " jl 0b" \ + : "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +static inline int atomic_read(const atomic_t *v) +{ + int c; + + asm volatile( + " l %0,%1\n" + : "=d" (c) : "Q" (v->counter)); + return c; +} + +static inline void atomic_set(atomic_t *v, int i) +{ + asm volatile( + " st %1,%0\n" + : "=Q" (v->counter) : "d" (i)); +} + +static inline int atomic_add_return(int i, atomic_t *v) +{ + return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i; +} + +static inline void atomic_add(int i, atomic_t *v) +{ +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + asm volatile( + "asi %0,%1\n" + : "+Q" (v->counter) + : "i" (i) + : "cc", "memory"); + return; + } +#endif + __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_NO_BARRIER); +} + +#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0) +#define atomic_inc(_v) atomic_add(1, _v) +#define atomic_inc_return(_v) atomic_add_return(1, _v) +#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0) +#define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v) +#define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v) +#define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0) +#define atomic_dec(_v) atomic_sub(1, _v) +#define atomic_dec_return(_v) atomic_sub_return(1, _v) +#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0) + +static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) +{ + __ATOMIC_LOOP(v, ~mask, __ATOMIC_AND, __ATOMIC_NO_BARRIER); +} + +static inline void atomic_set_mask(unsigned int mask, atomic_t *v) +{ + __ATOMIC_LOOP(v, mask, __ATOMIC_OR, __ATOMIC_NO_BARRIER); +} + +#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) + +static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + asm volatile( + " cs %0,%2,%1" + : "+d" (old), "+Q" (v->counter) + : "d" (new) + : "cc", "memory"); + return old; +} + +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c == u)) + break; + old = atomic_cmpxchg(v, c, c + a); + if (likely(old == c)) + break; + c = old; + } + return c; +} + + +#undef __ATOMIC_LOOP + +#define ATOMIC64_INIT(i) { (i) } + +#ifdef CONFIG_64BIT + +#define __ATOMIC64_NO_BARRIER "\n" + +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define __ATOMIC64_OR "laog" +#define __ATOMIC64_AND "lang" +#define __ATOMIC64_ADD "laag" +#define __ATOMIC64_BARRIER "bcr 14,0\n" + +#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \ +({ \ + long long old_val; \ + \ + typecheck(atomic64_t *, ptr); \ + asm volatile( \ + __barrier \ + op_string " %0,%2,%1\n" \ + __barrier \ + : "=d" (old_val), "+Q" ((ptr)->counter) \ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +#define __ATOMIC64_OR "ogr" +#define __ATOMIC64_AND "ngr" +#define __ATOMIC64_ADD "agr" +#define __ATOMIC64_BARRIER "\n" + +#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \ +({ \ + long long old_val, new_val; \ + \ + typecheck(atomic64_t *, ptr); \ + asm volatile( \ + " lg %0,%2\n" \ + "0: lgr %1,%0\n" \ + op_string " %1,%3\n" \ + " csg %0,%1,%2\n" \ + " jl 0b" \ + : "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +static inline long long atomic64_read(const atomic64_t *v) +{ + long long c; + + asm volatile( + " lg %0,%1\n" + : "=d" (c) : "Q" (v->counter)); + return c; +} + +static inline void atomic64_set(atomic64_t *v, long long i) +{ + asm volatile( + " stg %1,%0\n" + : "=Q" (v->counter) : "d" (i)); +} + +static inline long long atomic64_add_return(long long i, atomic64_t *v) +{ + return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i; +} + +static inline void atomic64_add(long long i, atomic64_t *v) +{ +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + asm volatile( + "agsi %0,%1\n" + : "+Q" (v->counter) + : "i" (i) + : "cc", "memory"); + return; + } +#endif + __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER); +} + +static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v) +{ + __ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND, __ATOMIC64_NO_BARRIER); +} + +static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v) +{ + __ATOMIC64_LOOP(v, mask, __ATOMIC64_OR, __ATOMIC64_NO_BARRIER); +} + +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +static inline long long atomic64_cmpxchg(atomic64_t *v, + long long old, long long new) +{ + asm volatile( + " csg %0,%2,%1" + : "+d" (old), "+Q" (v->counter) + : "d" (new) + : "cc", "memory"); + return old; +} + +#undef __ATOMIC64_LOOP + +#else /* CONFIG_64BIT */ + +typedef struct { + long long counter; +} atomic64_t; + +static inline long long atomic64_read(const atomic64_t *v) +{ + register_pair rp; + + asm volatile( + " lm %0,%N0,%1" + : "=&d" (rp) : "Q" (v->counter) ); + return rp.pair; +} + +static inline void atomic64_set(atomic64_t *v, long long i) +{ + register_pair rp = {.pair = i}; + + asm volatile( + " stm %1,%N1,%0" + : "=Q" (v->counter) : "d" (rp) ); +} + +static inline long long atomic64_xchg(atomic64_t *v, long long new) +{ + register_pair rp_new = {.pair = new}; + register_pair rp_old; + + asm volatile( + " lm %0,%N0,%1\n" + "0: cds %0,%2,%1\n" + " jl 0b\n" + : "=&d" (rp_old), "+Q" (v->counter) + : "d" (rp_new) + : "cc"); + return rp_old.pair; +} + +static inline long long atomic64_cmpxchg(atomic64_t *v, + long long old, long long new) +{ + register_pair rp_old = {.pair = old}; + register_pair rp_new = {.pair = new}; + + asm volatile( + " cds %0,%2,%1" + : "+&d" (rp_old), "+Q" (v->counter) + : "d" (rp_new) + : "cc"); + return rp_old.pair; +} + + +static inline long long atomic64_add_return(long long i, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old + i; + } while (atomic64_cmpxchg(v, old, new) != old); + return new; +} + +static inline void atomic64_set_mask(unsigned long long mask, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old | mask; + } while (atomic64_cmpxchg(v, old, new) != old); +} + +static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old & mask; + } while (atomic64_cmpxchg(v, old, new) != old); +} + +static inline void atomic64_add(long long i, atomic64_t *v) +{ + atomic64_add_return(i, v); +} + +#endif /* CONFIG_64BIT */ + +static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u) +{ + long long c, old; + + c = atomic64_read(v); + for (;;) { + if (unlikely(c == u)) + break; + old = atomic64_cmpxchg(v, c, c + i); + if (likely(old == c)) + break; + c = old; + } + return c != u; +} + +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long c, old, dec; + + c = atomic64_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic64_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + +#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0) +#define atomic64_inc(_v) atomic64_add(1, _v) +#define atomic64_inc_return(_v) atomic64_add_return(1, _v) +#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0) +#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v) +#define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v) +#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0) +#define atomic64_dec(_v) atomic64_sub(1, _v) +#define atomic64_dec_return(_v) atomic64_sub_return(1, _v) +#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +#endif /* __ARCH_S390_ATOMIC__ */ diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h new file mode 100644 index 00000000000..19ff956b752 --- /dev/null +++ b/arch/s390/include/asm/barrier.h @@ -0,0 +1,51 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ + +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES +/* Fast-BCR without checkpoint synchronization */ +#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0) +#else +#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0) +#endif + +#define rmb() mb() +#define wmb() mb() +#define read_barrier_depends() do { } while(0) +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() + +#define smp_mb__before_atomic() smp_mb() +#define smp_mb__after_atomic() smp_mb() + +#define set_mb(var, value) do { var = value; mb(); } while (0) + +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + +#endif /* __ASM_BARRIER_H */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h new file mode 100644 index 00000000000..52054247767 --- /dev/null +++ b/arch/s390/include/asm/bitops.h @@ -0,0 +1,482 @@ +/* + * Copyright IBM Corp. 1999,2013 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * + * The description below was taken in large parts from the powerpc + * bitops header file: + * Within a word, bits are numbered LSB first. Lot's of places make + * this assumption by directly testing bits with (val & (1<<nr)). + * This can cause confusion for large (> 1 word) bitmaps on a + * big-endian system because, unlike little endian, the number of each + * bit depends on the word size. + * + * The bitop functions are defined to work on unsigned longs, so for an + * s390x system the bits end up numbered: + * |63..............0|127............64|191...........128|255...........192| + * and on s390: + * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224| + * + * There are a few little-endian macros used mostly for filesystem + * bitmaps, these work on similar bit arrays layouts, but + * byte-oriented: + * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56| + * + * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit + * number field needs to be reversed compared to the big-endian bit + * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b). + * + * We also have special functions which work with an MSB0 encoding: + * on an s390x system the bits are numbered: + * |0..............63|64............127|128...........191|192...........255| + * and on s390: + * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| + * + * The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit + * number field needs to be reversed compared to the LSB0 encoded bit + * fields. This can be achieved by XOR with 0x3f (64b) or 0x1f (32b). + * + */ + +#ifndef _S390_BITOPS_H +#define _S390_BITOPS_H + +#ifndef _LINUX_BITOPS_H +#error only <linux/bitops.h> can be included directly +#endif + +#include <linux/typecheck.h> +#include <linux/compiler.h> +#include <asm/barrier.h> + +#define __BITOPS_NO_BARRIER "\n" + +#ifndef CONFIG_64BIT + +#define __BITOPS_OR "or" +#define __BITOPS_AND "nr" +#define __BITOPS_XOR "xr" +#define __BITOPS_BARRIER "\n" + +#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \ +({ \ + unsigned long __old, __new; \ + \ + typecheck(unsigned long *, (__addr)); \ + asm volatile( \ + " l %0,%2\n" \ + "0: lr %1,%0\n" \ + __op_string " %1,%3\n" \ + " cs %0,%1,%2\n" \ + " jl 0b" \ + : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\ + : "d" (__val) \ + : "cc", "memory"); \ + __old; \ +}) + +#else /* CONFIG_64BIT */ + +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define __BITOPS_OR "laog" +#define __BITOPS_AND "lang" +#define __BITOPS_XOR "laxg" +#define __BITOPS_BARRIER "bcr 14,0\n" + +#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \ +({ \ + unsigned long __old; \ + \ + typecheck(unsigned long *, (__addr)); \ + asm volatile( \ + __barrier \ + __op_string " %0,%2,%1\n" \ + __barrier \ + : "=d" (__old), "+Q" (*(__addr)) \ + : "d" (__val) \ + : "cc", "memory"); \ + __old; \ +}) + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +#define __BITOPS_OR "ogr" +#define __BITOPS_AND "ngr" +#define __BITOPS_XOR "xgr" +#define __BITOPS_BARRIER "\n" + +#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \ +({ \ + unsigned long __old, __new; \ + \ + typecheck(unsigned long *, (__addr)); \ + asm volatile( \ + " lg %0,%2\n" \ + "0: lgr %1,%0\n" \ + __op_string " %1,%3\n" \ + " csg %0,%1,%2\n" \ + " jl 0b" \ + : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\ + : "d" (__val) \ + : "cc", "memory"); \ + __old; \ +}) + +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +#endif /* CONFIG_64BIT */ + +#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) + +static inline unsigned long * +__bitops_word(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long addr; + + addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3); + return (unsigned long *)addr; +} + +static inline unsigned char * +__bitops_byte(unsigned long nr, volatile unsigned long *ptr) +{ + return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); +} + +static inline void set_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long mask; + +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + if (__builtin_constant_p(nr)) { + unsigned char *caddr = __bitops_byte(nr, ptr); + + asm volatile( + "oi %0,%b1\n" + : "+Q" (*caddr) + : "i" (1 << (nr & 7)) + : "cc", "memory"); + return; + } +#endif + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_NO_BARRIER); +} + +static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long mask; + +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + if (__builtin_constant_p(nr)) { + unsigned char *caddr = __bitops_byte(nr, ptr); + + asm volatile( + "ni %0,%b1\n" + : "+Q" (*caddr) + : "i" (~(1 << (nr & 7))) + : "cc", "memory"); + return; + } +#endif + mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); + __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_NO_BARRIER); +} + +static inline void change_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long mask; + +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + if (__builtin_constant_p(nr)) { + unsigned char *caddr = __bitops_byte(nr, ptr); + + asm volatile( + "xi %0,%b1\n" + : "+Q" (*caddr) + : "i" (1 << (nr & 7)) + : "cc", "memory"); + return; + } +#endif + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_NO_BARRIER); +} + +static inline int +test_and_set_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long old, mask; + + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + old = __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_BARRIER); + return (old & mask) != 0; +} + +static inline int +test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long old, mask; + + mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); + old = __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_BARRIER); + return (old & ~mask) != 0; +} + +static inline int +test_and_change_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned long *addr = __bitops_word(nr, ptr); + unsigned long old, mask; + + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_BARRIER); + return (old & mask) != 0; +} + +static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned char *addr = __bitops_byte(nr, ptr); + + *addr |= 1 << (nr & 7); +} + +static inline void +__clear_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned char *addr = __bitops_byte(nr, ptr); + + *addr &= ~(1 << (nr & 7)); +} + +static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned char *addr = __bitops_byte(nr, ptr); + + *addr ^= 1 << (nr & 7); +} + +static inline int +__test_and_set_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned char *addr = __bitops_byte(nr, ptr); + unsigned char ch; + + ch = *addr; + *addr |= 1 << (nr & 7); + return (ch >> (nr & 7)) & 1; +} + +static inline int +__test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned char *addr = __bitops_byte(nr, ptr); + unsigned char ch; + + ch = *addr; + *addr &= ~(1 << (nr & 7)); + return (ch >> (nr & 7)) & 1; +} + +static inline int +__test_and_change_bit(unsigned long nr, volatile unsigned long *ptr) +{ + unsigned char *addr = __bitops_byte(nr, ptr); + unsigned char ch; + + ch = *addr; + *addr ^= 1 << (nr & 7); + return (ch >> (nr & 7)) & 1; +} + +static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr) +{ + const volatile unsigned char *addr; + + addr = ((const volatile unsigned char *)ptr); + addr += (nr ^ (BITS_PER_LONG - 8)) >> 3; + return (*addr >> (nr & 7)) & 1; +} + +/* + * Functions which use MSB0 bit numbering. + * On an s390x system the bits are numbered: + * |0..............63|64............127|128...........191|192...........255| + * and on s390: + * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| + */ +unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size); +unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size, + unsigned long offset); + +static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr) +{ + return set_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr) +{ + return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr) +{ + return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void __clear_bit_inv(unsigned long nr, volatile unsigned long *ptr) +{ + return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline int test_bit_inv(unsigned long nr, + const volatile unsigned long *ptr) +{ + return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + +/** + * __flogr - find leftmost one + * @word - The word to search + * + * Returns the bit number of the most significant bit set, + * where the most significant bit has bit number 0. + * If no bit is set this function returns 64. + */ +static inline unsigned char __flogr(unsigned long word) +{ + if (__builtin_constant_p(word)) { + unsigned long bit = 0; + + if (!word) + return 64; + if (!(word & 0xffffffff00000000UL)) { + word <<= 32; + bit += 32; + } + if (!(word & 0xffff000000000000UL)) { + word <<= 16; + bit += 16; + } + if (!(word & 0xff00000000000000UL)) { + word <<= 8; + bit += 8; + } + if (!(word & 0xf000000000000000UL)) { + word <<= 4; + bit += 4; + } + if (!(word & 0xc000000000000000UL)) { + word <<= 2; + bit += 2; + } + if (!(word & 0x8000000000000000UL)) { + word <<= 1; + bit += 1; + } + return bit; + } else { + register unsigned long bit asm("4") = word; + register unsigned long out asm("5"); + + asm volatile( + " flogr %[bit],%[bit]\n" + : [bit] "+d" (bit), [out] "=d" (out) : : "cc"); + return bit; + } +} + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + return __flogr(-word & word) ^ (BITS_PER_LONG - 1); +} + +/** + * ffs - find first bit set + * @word: the word to search + * + * This is defined the same way as the libc and + * compiler builtin ffs routines (man ffs). + */ +static inline int ffs(int word) +{ + unsigned long mask = 2 * BITS_PER_LONG - 1; + unsigned int val = (unsigned int)word; + + return (1 + (__flogr(-val & val) ^ (BITS_PER_LONG - 1))) & mask; +} + +/** + * __fls - find last (most-significant) set bit in a long word + * @word: the word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +static inline unsigned long __fls(unsigned long word) +{ + return __flogr(word) ^ (BITS_PER_LONG - 1); +} + +/** + * fls64 - find last set bit in a 64-bit word + * @word: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffsll, but returns the position of the most significant set bit. + * + * fls64(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 64. + */ +static inline int fls64(unsigned long word) +{ + unsigned long mask = 2 * BITS_PER_LONG - 1; + + return (1 + (__flogr(word) ^ (BITS_PER_LONG - 1))) & mask; +} + +/** + * fls - find last (most-significant) bit set + * @word: the word to search + * + * This is defined the same way as ffs. + * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. + */ +static inline int fls(int word) +{ + return fls64((unsigned int)word); +} + +#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ + +#include <asm-generic/bitops/__ffs.h> +#include <asm-generic/bitops/ffs.h> +#include <asm-generic/bitops/__fls.h> +#include <asm-generic/bitops/fls.h> +#include <asm-generic/bitops/fls64.h> + +#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ + +#include <asm-generic/bitops/ffz.h> +#include <asm-generic/bitops/find.h> +#include <asm-generic/bitops/hweight.h> +#include <asm-generic/bitops/lock.h> +#include <asm-generic/bitops/sched.h> +#include <asm-generic/bitops/le.h> +#include <asm-generic/bitops/ext2-atomic-setbit.h> + +#endif /* _S390_BITOPS_H */ diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h new file mode 100644 index 00000000000..bf90d1fd97a --- /dev/null +++ b/arch/s390/include/asm/bug.h @@ -0,0 +1,71 @@ +#ifndef _ASM_S390_BUG_H +#define _ASM_S390_BUG_H + +#include <linux/kernel.h> + +#ifdef CONFIG_BUG + +#ifdef CONFIG_DEBUG_BUGVERBOSE + +#define __EMIT_BUG(x) do { \ + asm volatile( \ + "0: j 0b+2\n" \ + "1:\n" \ + ".section .rodata.str,\"aMS\",@progbits,1\n" \ + "2: .asciz \""__FILE__"\"\n" \ + ".previous\n" \ + ".section __bug_table,\"a\"\n" \ + "3: .long 1b-3b,2b-3b\n" \ + " .short %0,%1\n" \ + " .org 3b+%2\n" \ + ".previous\n" \ + : : "i" (__LINE__), \ + "i" (x), \ + "i" (sizeof(struct bug_entry))); \ +} while (0) + +#else /* CONFIG_DEBUG_BUGVERBOSE */ + +#define __EMIT_BUG(x) do { \ + asm volatile( \ + "0: j 0b+2\n" \ + "1:\n" \ + ".section __bug_table,\"a\"\n" \ + "2: .long 1b-2b\n" \ + " .short %0\n" \ + " .org 2b+%1\n" \ + ".previous\n" \ + : : "i" (x), \ + "i" (sizeof(struct bug_entry))); \ +} while (0) + +#endif /* CONFIG_DEBUG_BUGVERBOSE */ + +#define BUG() do { \ + __EMIT_BUG(0); \ + unreachable(); \ +} while (0) + +#define __WARN_TAINT(taint) do { \ + __EMIT_BUG(BUGFLAG_TAINT(taint)); \ +} while (0) + +#define WARN_ON(x) ({ \ + int __ret_warn_on = !!(x); \ + if (__builtin_constant_p(__ret_warn_on)) { \ + if (__ret_warn_on) \ + __WARN(); \ + } else { \ + if (unlikely(__ret_warn_on)) \ + __WARN(); \ + } \ + unlikely(__ret_warn_on); \ +}) + +#define HAVE_ARCH_BUG +#define HAVE_ARCH_WARN_ON +#endif /* CONFIG_BUG */ + +#include <asm-generic/bug.h> + +#endif /* _ASM_S390_BUG_H */ diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h new file mode 100644 index 00000000000..0f5bd894f4d --- /dev/null +++ b/arch/s390/include/asm/bugs.h @@ -0,0 +1,20 @@ +/* + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/bugs.h" + * Copyright (C) 1994 Linus Torvalds + */ + +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Needs: + * void check_bugs(void); + */ + +static inline void check_bugs(void) +{ + /* s390 has no bugs ... */ +} diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h new file mode 100644 index 00000000000..4d7ccac5fd1 --- /dev/null +++ b/arch/s390/include/asm/cache.h @@ -0,0 +1,18 @@ +/* + * S390 version + * Copyright IBM Corp. 1999 + * + * Derived from "include/asm-i386/cache.h" + * Copyright (C) 1992, Linus Torvalds + */ + +#ifndef __ARCH_S390_CACHE_H +#define __ARCH_S390_CACHE_H + +#define L1_CACHE_BYTES 256 +#define L1_CACHE_SHIFT 8 +#define NET_SKB_PAD 32 + +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + +#endif diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h new file mode 100644 index 00000000000..3e20383d092 --- /dev/null +++ b/arch/s390/include/asm/cacheflush.h @@ -0,0 +1,16 @@ +#ifndef _S390_CACHEFLUSH_H +#define _S390_CACHEFLUSH_H + +/* Caches aren't brain-dead on the s390. */ +#include <asm-generic/cacheflush.h> + +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable); +#endif + +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); + +#endif /* _S390_CACHEFLUSH_H */ diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h new file mode 100644 index 00000000000..b80e456d642 --- /dev/null +++ b/arch/s390/include/asm/ccwdev.h @@ -0,0 +1,233 @@ +/* + * Copyright IBM Corp. 2002, 2009 + * + * Author(s): Arnd Bergmann <arndb@de.ibm.com> + * + * Interface for CCW device drivers + */ +#ifndef _S390_CCWDEV_H_ +#define _S390_CCWDEV_H_ + +#include <linux/device.h> +#include <linux/mod_devicetable.h> +#include <asm/fcx.h> +#include <asm/irq.h> +#include <asm/schid.h> + +/* structs from asm/cio.h */ +struct irb; +struct ccw1; +struct ccw_dev_id; + +/* simplified initializers for struct ccw_device: + * CCW_DEVICE and CCW_DEVICE_DEVTYPE initialize one + * entry in your MODULE_DEVICE_TABLE and set the match_flag correctly */ +#define CCW_DEVICE(cu, cum) \ + .cu_type=(cu), .cu_model=(cum), \ + .match_flags=(CCW_DEVICE_ID_MATCH_CU_TYPE \ + | (cum ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0)) + +#define CCW_DEVICE_DEVTYPE(cu, cum, dev, devm) \ + .cu_type=(cu), .cu_model=(cum), .dev_type=(dev), .dev_model=(devm),\ + .match_flags=CCW_DEVICE_ID_MATCH_CU_TYPE \ + | ((cum) ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0) \ + | CCW_DEVICE_ID_MATCH_DEVICE_TYPE \ + | ((devm) ? CCW_DEVICE_ID_MATCH_DEVICE_MODEL : 0) + +/* scan through an array of device ids and return the first + * entry that matches the device. + * + * the array must end with an entry containing zero match_flags + */ +static inline const struct ccw_device_id * +ccw_device_id_match(const struct ccw_device_id *array, + const struct ccw_device_id *match) +{ + const struct ccw_device_id *id = array; + + for (id = array; id->match_flags; id++) { + if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_TYPE) + && (id->cu_type != match->cu_type)) + continue; + + if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_MODEL) + && (id->cu_model != match->cu_model)) + continue; + + if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_TYPE) + && (id->dev_type != match->dev_type)) + continue; + + if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_MODEL) + && (id->dev_model != match->dev_model)) + continue; + + return id; + } + + return NULL; +} + +/** + * struct ccw_device - channel attached device + * @ccwlock: pointer to device lock + * @id: id of this device + * @drv: ccw driver for this device + * @dev: embedded device structure + * @online: online status of device + * @handler: interrupt handler + * + * @handler is a member of the device rather than the driver since a driver + * can have different interrupt handlers for different ccw devices + * (multi-subchannel drivers). + */ +struct ccw_device { + spinlock_t *ccwlock; +/* private: */ + struct ccw_device_private *private; /* cio private information */ +/* public: */ + struct ccw_device_id id; + struct ccw_driver *drv; + struct device dev; + int online; + void (*handler) (struct ccw_device *, unsigned long, struct irb *); +}; + +/* + * Possible events used by the path_event notifier. + */ +#define PE_NONE 0x0 +#define PE_PATH_GONE 0x1 /* A path is no longer available. */ +#define PE_PATH_AVAILABLE 0x2 /* A path has become available and + was successfully verified. */ +#define PE_PATHGROUP_ESTABLISHED 0x4 /* A pathgroup was reset and had + to be established again. */ + +/* + * Possible CIO actions triggered by the unit check handler. + */ +enum uc_todo { + UC_TODO_RETRY, + UC_TODO_RETRY_ON_NEW_PATH, + UC_TODO_STOP +}; + +/** + * struct ccw driver - device driver for channel attached devices + * @ids: ids supported by this driver + * @probe: function called on probe + * @remove: function called on remove + * @set_online: called when setting device online + * @set_offline: called when setting device offline + * @notify: notify driver of device state changes + * @path_event: notify driver of channel path events + * @shutdown: called at device shutdown + * @prepare: prepare for pm state transition + * @complete: undo work done in @prepare + * @freeze: callback for freezing during hibernation snapshotting + * @thaw: undo work done in @freeze + * @restore: callback for restoring after hibernation + * @uc_handler: callback for unit check handler + * @driver: embedded device driver structure + * @int_class: interruption class to use for accounting interrupts + */ +struct ccw_driver { + struct ccw_device_id *ids; + int (*probe) (struct ccw_device *); + void (*remove) (struct ccw_device *); + int (*set_online) (struct ccw_device *); + int (*set_offline) (struct ccw_device *); + int (*notify) (struct ccw_device *, int); + void (*path_event) (struct ccw_device *, int *); + void (*shutdown) (struct ccw_device *); + int (*prepare) (struct ccw_device *); + void (*complete) (struct ccw_device *); + int (*freeze)(struct ccw_device *); + int (*thaw) (struct ccw_device *); + int (*restore)(struct ccw_device *); + enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); + struct device_driver driver; + enum interruption_class int_class; +}; + +extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv, + const char *bus_id); + +/* devices drivers call these during module load and unload. + * When a driver is registered, its probe method is called + * when new devices for its type pop up */ +extern int ccw_driver_register (struct ccw_driver *driver); +extern void ccw_driver_unregister (struct ccw_driver *driver); + +struct ccw1; + +extern int ccw_device_set_options_mask(struct ccw_device *, unsigned long); +extern int ccw_device_set_options(struct ccw_device *, unsigned long); +extern void ccw_device_clear_options(struct ccw_device *, unsigned long); +int ccw_device_is_pathgroup(struct ccw_device *cdev); +int ccw_device_is_multipath(struct ccw_device *cdev); + +/* Allow for i/o completion notification after primary interrupt status. */ +#define CCWDEV_EARLY_NOTIFICATION 0x0001 +/* Report all interrupt conditions. */ +#define CCWDEV_REPORT_ALL 0x0002 +/* Try to perform path grouping. */ +#define CCWDEV_DO_PATHGROUP 0x0004 +/* Allow forced onlining of boxed devices. */ +#define CCWDEV_ALLOW_FORCE 0x0008 +/* Try to use multipath mode. */ +#define CCWDEV_DO_MULTIPATH 0x0010 + +extern int ccw_device_start(struct ccw_device *, struct ccw1 *, + unsigned long, __u8, unsigned long); +extern int ccw_device_start_timeout(struct ccw_device *, struct ccw1 *, + unsigned long, __u8, unsigned long, int); +extern int ccw_device_start_key(struct ccw_device *, struct ccw1 *, + unsigned long, __u8, __u8, unsigned long); +extern int ccw_device_start_timeout_key(struct ccw_device *, struct ccw1 *, + unsigned long, __u8, __u8, + unsigned long, int); + + +extern int ccw_device_resume(struct ccw_device *); +extern int ccw_device_halt(struct ccw_device *, unsigned long); +extern int ccw_device_clear(struct ccw_device *, unsigned long); +int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, + unsigned long intparm, u8 lpm, u8 key); +int ccw_device_tm_start_key(struct ccw_device *, struct tcw *, + unsigned long, u8, u8); +int ccw_device_tm_start_timeout_key(struct ccw_device *, struct tcw *, + unsigned long, u8, u8, int); +int ccw_device_tm_start(struct ccw_device *, struct tcw *, + unsigned long, u8); +int ccw_device_tm_start_timeout(struct ccw_device *, struct tcw *, + unsigned long, u8, int); +int ccw_device_tm_intrg(struct ccw_device *cdev); + +int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask); + +extern int ccw_device_set_online(struct ccw_device *cdev); +extern int ccw_device_set_offline(struct ccw_device *cdev); + + +extern struct ciw *ccw_device_get_ciw(struct ccw_device *, __u32 cmd); +extern __u8 ccw_device_get_path_mask(struct ccw_device *); +extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); + +#define get_ccwdev_lock(x) (x)->ccwlock + +#define to_ccwdev(n) container_of(n, struct ccw_device, dev) +#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) + +extern struct ccw_device *ccw_device_create_console(struct ccw_driver *); +extern void ccw_device_destroy_console(struct ccw_device *); +extern int ccw_device_enable_console(struct ccw_device *); +extern void ccw_device_wait_idle(struct ccw_device *); +extern int ccw_device_force_console(struct ccw_device *); + +int ccw_device_siosl(struct ccw_device *); + +extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *); + +struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *, int); +#endif /* _S390_CCWDEV_H_ */ diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h new file mode 100644 index 00000000000..057ce0ca637 --- /dev/null +++ b/arch/s390/include/asm/ccwgroup.h @@ -0,0 +1,73 @@ +#ifndef S390_CCWGROUP_H +#define S390_CCWGROUP_H + +struct ccw_device; +struct ccw_driver; + +/** + * struct ccwgroup_device - ccw group device + * @state: online/offline state + * @count: number of attached slave devices + * @dev: embedded device structure + * @cdev: variable number of slave devices, allocated as needed + * @ungroup_work: work to be done when a ccwgroup notifier has action + * type %BUS_NOTIFY_UNBIND_DRIVER + */ +struct ccwgroup_device { + enum { + CCWGROUP_OFFLINE, + CCWGROUP_ONLINE, + } state; +/* private: */ + atomic_t onoff; + struct mutex reg_mutex; +/* public: */ + unsigned int count; + struct device dev; + struct work_struct ungroup_work; + struct ccw_device *cdev[0]; +}; + +/** + * struct ccwgroup_driver - driver for ccw group devices + * @setup: function called during device creation to setup the device + * @remove: function called on remove + * @set_online: function called when device is set online + * @set_offline: function called when device is set offline + * @shutdown: function called when device is shut down + * @prepare: prepare for pm state transition + * @complete: undo work done in @prepare + * @freeze: callback for freezing during hibernation snapshotting + * @thaw: undo work done in @freeze + * @restore: callback for restoring after hibernation + * @driver: embedded driver structure + */ +struct ccwgroup_driver { + int (*setup) (struct ccwgroup_device *); + void (*remove) (struct ccwgroup_device *); + int (*set_online) (struct ccwgroup_device *); + int (*set_offline) (struct ccwgroup_device *); + void (*shutdown)(struct ccwgroup_device *); + int (*prepare) (struct ccwgroup_device *); + void (*complete) (struct ccwgroup_device *); + int (*freeze)(struct ccwgroup_device *); + int (*thaw) (struct ccwgroup_device *); + int (*restore)(struct ccwgroup_device *); + + struct device_driver driver; +}; + +extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver); +extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver); +int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv, + int num_devices, const char *buf); + +extern int ccwgroup_set_online(struct ccwgroup_device *gdev); +extern int ccwgroup_set_offline(struct ccwgroup_device *gdev); + +extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev); +extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev); + +#define to_ccwgroupdev(x) container_of((x), struct ccwgroup_device, dev) +#define to_ccwgroupdrv(x) container_of((x), struct ccwgroup_driver, driver) +#endif diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h new file mode 100644 index 00000000000..74036485635 --- /dev/null +++ b/arch/s390/include/asm/checksum.h @@ -0,0 +1,140 @@ +/* + * S390 fast network checksum routines + * + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Ulrich Hild (first version) + * Martin Schwidefsky (heavily optimized CKSM version) + * D.J. Barrow (third attempt) + */ + +#ifndef _S390_CHECKSUM_H +#define _S390_CHECKSUM_H + +#include <asm/uaccess.h> + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +static inline __wsum +csum_partial(const void *buff, int len, __wsum sum) +{ + register unsigned long reg2 asm("2") = (unsigned long) buff; + register unsigned long reg3 asm("3") = (unsigned long) len; + + asm volatile( + "0: cksm %0,%1\n" /* do checksum on longs */ + " jo 0b\n" + : "+d" (sum), "+d" (reg2), "+d" (reg3) : : "cc", "memory"); + return sum; +} + +/* + * the same as csum_partial_copy, but copies from user space. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + * + * Copy from userspace and compute checksum. + */ +static inline __wsum +csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, + int *err_ptr) +{ + if (unlikely(copy_from_user(dst, src, len))) + *err_ptr = -EFAULT; + return csum_partial(dst, len, sum); +} + + +static inline __wsum +csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum) +{ + memcpy(dst,src,len); + return csum_partial(dst, len, sum); +} + +/* + * Fold a partial checksum without adding pseudo headers + */ +static inline __sum16 csum_fold(__wsum sum) +{ + u32 csum = (__force u32) sum; + + csum += (csum >> 16) + (csum << 16); + csum >>= 16; + return (__force __sum16) ~csum; +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + return csum_fold(csum_partial(iph, ihl*4, 0)); +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 32-bit checksum + */ +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, unsigned short proto, + __wsum sum) +{ + __u32 csum = (__force __u32)sum; + + csum += (__force __u32)saddr; + if (csum < (__force __u32)saddr) + csum++; + + csum += (__force __u32)daddr; + if (csum < (__force __u32)daddr) + csum++; + + csum += len + proto; + if (csum < len + proto) + csum++; + + return (__force __wsum)csum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ + +static inline __sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline __sum16 ip_compute_csum(const void *buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#endif /* _S390_CHECKSUM_H */ + + diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h new file mode 100644 index 00000000000..7298eec9854 --- /dev/null +++ b/arch/s390/include/asm/chpid.h @@ -0,0 +1,50 @@ +/* + * Copyright IBM Corp. 2007, 2012 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ +#ifndef _ASM_S390_CHPID_H +#define _ASM_S390_CHPID_H + +#include <uapi/asm/chpid.h> +#include <asm/cio.h> + +struct channel_path_desc { + u8 flags; + u8 lsn; + u8 desc; + u8 chpid; + u8 swla; + u8 zeroes; + u8 chla; + u8 chpp; +} __packed; + +static inline void chp_id_init(struct chp_id *chpid) +{ + memset(chpid, 0, sizeof(struct chp_id)); +} + +static inline int chp_id_is_equal(struct chp_id *a, struct chp_id *b) +{ + return (a->id == b->id) && (a->cssid == b->cssid); +} + +static inline void chp_id_next(struct chp_id *chpid) +{ + if (chpid->id < __MAX_CHPID) + chpid->id++; + else { + chpid->id = 0; + chpid->cssid++; + } +} + +static inline int chp_id_is_valid(struct chp_id *chpid) +{ + return (chpid->cssid <= __MAX_CSSID); +} + + +#define chp_id_for_each(c) \ + for (chp_id_init(c); chp_id_is_valid(c); chp_id_next(c)) +#endif /* _ASM_S390_CHPID_H */ diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h new file mode 100644 index 00000000000..09633920776 --- /dev/null +++ b/arch/s390/include/asm/cio.h @@ -0,0 +1,315 @@ +/* + * Common interface for I/O on S/390 + */ +#ifndef _ASM_S390_CIO_H_ +#define _ASM_S390_CIO_H_ + +#include <linux/spinlock.h> +#include <asm/types.h> + +#define LPM_ANYPATH 0xff +#define __MAX_CSSID 0 +#define __MAX_SUBCHANNEL 65535 +#define __MAX_SSID 3 + +#include <asm/scsw.h> + +/** + * struct ccw1 - channel command word + * @cmd_code: command code + * @flags: flags, like IDA addressing, etc. + * @count: byte count + * @cda: data address + * + * The ccw is the basic structure to build channel programs that perform + * operations with the device or the control unit. Only Format-1 channel + * command words are supported. + */ +struct ccw1 { + __u8 cmd_code; + __u8 flags; + __u16 count; + __u32 cda; +} __attribute__ ((packed,aligned(8))); + +#define CCW_FLAG_DC 0x80 +#define CCW_FLAG_CC 0x40 +#define CCW_FLAG_SLI 0x20 +#define CCW_FLAG_SKIP 0x10 +#define CCW_FLAG_PCI 0x08 +#define CCW_FLAG_IDA 0x04 +#define CCW_FLAG_SUSPEND 0x02 + +#define CCW_CMD_READ_IPL 0x02 +#define CCW_CMD_NOOP 0x03 +#define CCW_CMD_BASIC_SENSE 0x04 +#define CCW_CMD_TIC 0x08 +#define CCW_CMD_STLCK 0x14 +#define CCW_CMD_SENSE_PGID 0x34 +#define CCW_CMD_SUSPEND_RECONN 0x5B +#define CCW_CMD_RDC 0x64 +#define CCW_CMD_RELEASE 0x94 +#define CCW_CMD_SET_PGID 0xAF +#define CCW_CMD_SENSE_ID 0xE4 +#define CCW_CMD_DCTL 0xF3 + +#define SENSE_MAX_COUNT 0x20 + +/** + * struct erw - extended report word + * @res0: reserved + * @auth: authorization check + * @pvrf: path-verification-required flag + * @cpt: channel-path timeout + * @fsavf: failing storage address validity flag + * @cons: concurrent sense + * @scavf: secondary ccw address validity flag + * @fsaf: failing storage address format + * @scnt: sense count, if @cons == %1 + * @res16: reserved + */ +struct erw { + __u32 res0 : 3; + __u32 auth : 1; + __u32 pvrf : 1; + __u32 cpt : 1; + __u32 fsavf : 1; + __u32 cons : 1; + __u32 scavf : 1; + __u32 fsaf : 1; + __u32 scnt : 6; + __u32 res16 : 16; +} __attribute__ ((packed)); + +/** + * struct erw_eadm - EADM Subchannel extended report word + * @b: aob error + * @r: arsb error + */ +struct erw_eadm { + __u32 : 16; + __u32 b : 1; + __u32 r : 1; + __u32 : 14; +} __packed; + +/** + * struct sublog - subchannel logout area + * @res0: reserved + * @esf: extended status flags + * @lpum: last path used mask + * @arep: ancillary report + * @fvf: field-validity flags + * @sacc: storage access code + * @termc: termination code + * @devsc: device-status check + * @serr: secondary error + * @ioerr: i/o-error alert + * @seqc: sequence code + */ +struct sublog { + __u32 res0 : 1; + __u32 esf : 7; + __u32 lpum : 8; + __u32 arep : 1; + __u32 fvf : 5; + __u32 sacc : 2; + __u32 termc : 2; + __u32 devsc : 1; + __u32 serr : 1; + __u32 ioerr : 1; + __u32 seqc : 3; +} __attribute__ ((packed)); + +/** + * struct esw0 - Format 0 Extended Status Word (ESW) + * @sublog: subchannel logout + * @erw: extended report word + * @faddr: failing storage address + * @saddr: secondary ccw address + */ +struct esw0 { + struct sublog sublog; + struct erw erw; + __u32 faddr[2]; + __u32 saddr; +} __attribute__ ((packed)); + +/** + * struct esw1 - Format 1 Extended Status Word (ESW) + * @zero0: reserved zeros + * @lpum: last path used mask + * @zero16: reserved zeros + * @erw: extended report word + * @zeros: three fullwords of zeros + */ +struct esw1 { + __u8 zero0; + __u8 lpum; + __u16 zero16; + struct erw erw; + __u32 zeros[3]; +} __attribute__ ((packed)); + +/** + * struct esw2 - Format 2 Extended Status Word (ESW) + * @zero0: reserved zeros + * @lpum: last path used mask + * @dcti: device-connect-time interval + * @erw: extended report word + * @zeros: three fullwords of zeros + */ +struct esw2 { + __u8 zero0; + __u8 lpum; + __u16 dcti; + struct erw erw; + __u32 zeros[3]; +} __attribute__ ((packed)); + +/** + * struct esw3 - Format 3 Extended Status Word (ESW) + * @zero0: reserved zeros + * @lpum: last path used mask + * @res: reserved + * @erw: extended report word + * @zeros: three fullwords of zeros + */ +struct esw3 { + __u8 zero0; + __u8 lpum; + __u16 res; + struct erw erw; + __u32 zeros[3]; +} __attribute__ ((packed)); + +/** + * struct esw_eadm - EADM Subchannel Extended Status Word (ESW) + * @sublog: subchannel logout + * @erw: extended report word + */ +struct esw_eadm { + __u32 sublog; + struct erw_eadm erw; + __u32 : 32; + __u32 : 32; + __u32 : 32; +} __packed; + +/** + * struct irb - interruption response block + * @scsw: subchannel status word + * @esw: extended status word + * @ecw: extended control word + * + * The irb that is handed to the device driver when an interrupt occurs. For + * solicited interrupts, the common I/O layer already performs checks whether + * a field is valid; a field not being valid is always passed as %0. + * If a unit check occurred, @ecw may contain sense data; this is retrieved + * by the common I/O layer itself if the device doesn't support concurrent + * sense (so that the device driver never needs to perform basic sene itself). + * For unsolicited interrupts, the irb is passed as-is (expect for sense data, + * if applicable). + */ +struct irb { + union scsw scsw; + union { + struct esw0 esw0; + struct esw1 esw1; + struct esw2 esw2; + struct esw3 esw3; + struct esw_eadm eadm; + } esw; + __u8 ecw[32]; +} __attribute__ ((packed,aligned(4))); + +/** + * struct ciw - command information word (CIW) layout + * @et: entry type + * @reserved: reserved bits + * @ct: command type + * @cmd: command code + * @count: command count + */ +struct ciw { + __u32 et : 2; + __u32 reserved : 2; + __u32 ct : 4; + __u32 cmd : 8; + __u32 count : 16; +} __attribute__ ((packed)); + +#define CIW_TYPE_RCD 0x0 /* read configuration data */ +#define CIW_TYPE_SII 0x1 /* set interface identifier */ +#define CIW_TYPE_RNI 0x2 /* read node identifier */ + +/* + * Flags used as input parameters for do_IO() + */ +#define DOIO_ALLOW_SUSPEND 0x0001 /* allow for channel prog. suspend */ +#define DOIO_DENY_PREFETCH 0x0002 /* don't allow for CCW prefetch */ +#define DOIO_SUPPRESS_INTER 0x0004 /* suppress intermediate inter. */ + /* ... for suspended CCWs */ +/* Device or subchannel gone. */ +#define CIO_GONE 0x0001 +/* No path to device. */ +#define CIO_NO_PATH 0x0002 +/* Device has appeared. */ +#define CIO_OPER 0x0004 +/* Sick revalidation of device. */ +#define CIO_REVALIDATE 0x0008 +/* Device did not respond in time. */ +#define CIO_BOXED 0x0010 + +/** + * struct ccw_dev_id - unique identifier for ccw devices + * @ssid: subchannel set id + * @devno: device number + * + * This structure is not directly based on any hardware structure. The + * hardware identifies a device by its device number and its subchannel, + * which is in turn identified by its id. In order to get a unique identifier + * for ccw devices across subchannel sets, @struct ccw_dev_id has been + * introduced. + */ +struct ccw_dev_id { + u8 ssid; + u16 devno; +}; + +/** + * ccw_device_id_is_equal() - compare two ccw_dev_ids + * @dev_id1: a ccw_dev_id + * @dev_id2: another ccw_dev_id + * Returns: + * %1 if the two structures are equal field-by-field, + * %0 if not. + * Context: + * any + */ +static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, + struct ccw_dev_id *dev_id2) +{ + if ((dev_id1->ssid == dev_id2->ssid) && + (dev_id1->devno == dev_id2->devno)) + return 1; + return 0; +} + +void channel_subsystem_reinit(void); +extern void css_schedule_reprobe(void); + +extern void reipl_ccw_dev(struct ccw_dev_id *id); + +struct cio_iplinfo { + u16 devno; + int is_qdio; +}; + +extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo); + +/* Function from drivers/s390/cio/chsc.c */ +int chsc_sstpc(void *page, unsigned int op, u16 ctrl); +int chsc_sstpi(void *page, void *result, size_t size); + +#endif diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h new file mode 100644 index 00000000000..a0e71a501f7 --- /dev/null +++ b/arch/s390/include/asm/clp.h @@ -0,0 +1,28 @@ +#ifndef _ASM_S390_CLP_H +#define _ASM_S390_CLP_H + +/* CLP common request & response block size */ +#define CLP_BLK_SIZE PAGE_SIZE + +struct clp_req_hdr { + u16 len; + u16 cmd; +} __packed; + +struct clp_rsp_hdr { + u16 len; + u16 rsp; +} __packed; + +/* CLP Response Codes */ +#define CLP_RC_OK 0x0010 /* Command request successfully */ +#define CLP_RC_CMD 0x0020 /* Command code not recognized */ +#define CLP_RC_PERM 0x0030 /* Command not authorized */ +#define CLP_RC_FMT 0x0040 /* Invalid command request format */ +#define CLP_RC_LEN 0x0050 /* Invalid command request length */ +#define CLP_RC_8K 0x0060 /* Command requires 8K LPCB */ +#define CLP_RC_RESNOT0 0x0070 /* Reserved field not zero */ +#define CLP_RC_NODATA 0x0080 /* No data available */ +#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ + +#endif diff --git a/arch/s390/include/asm/cmb.h b/arch/s390/include/asm/cmb.h new file mode 100644 index 00000000000..806eac12e3b --- /dev/null +++ b/arch/s390/include/asm/cmb.h @@ -0,0 +1,12 @@ +#ifndef S390_CMB_H +#define S390_CMB_H + +#include <uapi/asm/cmb.h> + +struct ccw_device; +extern int enable_cmf(struct ccw_device *cdev); +extern int disable_cmf(struct ccw_device *cdev); +extern u64 cmf_read(struct ccw_device *cdev, int index); +extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data); + +#endif /* S390_CMB_H */ diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h new file mode 100644 index 00000000000..4236408070e --- /dev/null +++ b/arch/s390/include/asm/cmpxchg.h @@ -0,0 +1,304 @@ +/* + * Copyright IBM Corp. 1999, 2011 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + */ + +#ifndef __ASM_CMPXCHG_H +#define __ASM_CMPXCHG_H + +#include <linux/mmdebug.h> +#include <linux/types.h> +#include <linux/bug.h> + +extern void __xchg_called_with_bad_pointer(void); + +static inline unsigned long __xchg(unsigned long x, void *ptr, int size) +{ + unsigned long addr, old; + int shift; + + switch (size) { + case 1: + addr = (unsigned long) ptr; + shift = (3 ^ (addr & 3)) << 3; + addr ^= addr & 3; + asm volatile( + " l %0,%4\n" + "0: lr 0,%0\n" + " nr 0,%3\n" + " or 0,%2\n" + " cs %0,0,%4\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) addr) + : "d" ((x & 0xff) << shift), "d" (~(0xff << shift)), + "Q" (*(int *) addr) : "memory", "cc", "0"); + return old >> shift; + case 2: + addr = (unsigned long) ptr; + shift = (2 ^ (addr & 2)) << 3; + addr ^= addr & 2; + asm volatile( + " l %0,%4\n" + "0: lr 0,%0\n" + " nr 0,%3\n" + " or 0,%2\n" + " cs %0,0,%4\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) addr) + : "d" ((x & 0xffff) << shift), "d" (~(0xffff << shift)), + "Q" (*(int *) addr) : "memory", "cc", "0"); + return old >> shift; + case 4: + asm volatile( + " l %0,%3\n" + "0: cs %0,%2,%3\n" + " jl 0b\n" + : "=&d" (old), "=Q" (*(int *) ptr) + : "d" (x), "Q" (*(int *) ptr) + : "memory", "cc"); + return old; +#ifdef CONFIG_64BIT + case 8: + asm volatile( + " lg %0,%3\n" + "0: csg %0,%2,%3\n" + " jl 0b\n" + : "=&d" (old), "=m" (*(long *) ptr) + : "d" (x), "Q" (*(long *) ptr) + : "memory", "cc"); + return old; +#endif /* CONFIG_64BIT */ + } + __xchg_called_with_bad_pointer(); + return x; +} + +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\ + __ret; \ +}) + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#define __HAVE_ARCH_CMPXCHG + +extern void __cmpxchg_called_with_bad_pointer(void); + +static inline unsigned long __cmpxchg(void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long addr, prev, tmp; + int shift; + + switch (size) { + case 1: + addr = (unsigned long) ptr; + shift = (3 ^ (addr & 3)) << 3; + addr ^= addr & 3; + asm volatile( + " l %0,%2\n" + "0: nr %0,%5\n" + " lr %1,%0\n" + " or %0,%3\n" + " or %1,%4\n" + " cs %0,%1,%2\n" + " jnl 1f\n" + " xr %1,%0\n" + " nr %1,%5\n" + " jnz 0b\n" + "1:" + : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr) + : "d" ((old & 0xff) << shift), + "d" ((new & 0xff) << shift), + "d" (~(0xff << shift)) + : "memory", "cc"); + return prev >> shift; + case 2: + addr = (unsigned long) ptr; + shift = (2 ^ (addr & 2)) << 3; + addr ^= addr & 2; + asm volatile( + " l %0,%2\n" + "0: nr %0,%5\n" + " lr %1,%0\n" + " or %0,%3\n" + " or %1,%4\n" + " cs %0,%1,%2\n" + " jnl 1f\n" + " xr %1,%0\n" + " nr %1,%5\n" + " jnz 0b\n" + "1:" + : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr) + : "d" ((old & 0xffff) << shift), + "d" ((new & 0xffff) << shift), + "d" (~(0xffff << shift)) + : "memory", "cc"); + return prev >> shift; + case 4: + asm volatile( + " cs %0,%3,%1\n" + : "=&d" (prev), "=Q" (*(int *) ptr) + : "0" (old), "d" (new), "Q" (*(int *) ptr) + : "memory", "cc"); + return prev; +#ifdef CONFIG_64BIT + case 8: + asm volatile( + " csg %0,%3,%1\n" + : "=&d" (prev), "=Q" (*(long *) ptr) + : "0" (old), "d" (new), "Q" (*(long *) ptr) + : "memory", "cc"); + return prev; +#endif /* CONFIG_64BIT */ + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg((ptr), (unsigned long)(o), (unsigned long)(n), \ + sizeof(*(ptr))); \ + __ret; \ +}) + +#ifdef CONFIG_64BIT +#define cmpxchg64(ptr, o, n) \ +({ \ + cmpxchg((ptr), (o), (n)); \ +}) +#else /* CONFIG_64BIT */ +static inline unsigned long long __cmpxchg64(void *ptr, + unsigned long long old, + unsigned long long new) +{ + register_pair rp_old = {.pair = old}; + register_pair rp_new = {.pair = new}; + unsigned long long *ullptr = ptr; + + asm volatile( + " cds %0,%2,%1" + : "+d" (rp_old), "+Q" (*ullptr) + : "d" (rp_new) + : "memory", "cc"); + return rp_old.pair; +} + +#define cmpxchg64(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n)); \ + __ret; \ +}) +#endif /* CONFIG_64BIT */ + +#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ +({ \ + register __typeof__(*(p1)) __old1 asm("2") = (o1); \ + register __typeof__(*(p2)) __old2 asm("3") = (o2); \ + register __typeof__(*(p1)) __new1 asm("4") = (n1); \ + register __typeof__(*(p2)) __new2 asm("5") = (n2); \ + int cc; \ + asm volatile( \ + insn " %[old],%[new],%[ptr]\n" \ + " ipm %[cc]\n" \ + " srl %[cc],28" \ + : [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2) \ + : [new] "d" (__new1), "d" (__new2), \ + [ptr] "Q" (*(p1)), "Q" (*(p2)) \ + : "memory", "cc"); \ + !cc; \ +}) + +#define __cmpxchg_double_4(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cds") + +#define __cmpxchg_double_8(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cdsg") + +extern void __cmpxchg_double_called_with_bad_pointer(void); + +#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \ +({ \ + int __ret; \ + switch (sizeof(*(p1))) { \ + case 4: \ + __ret = __cmpxchg_double_4(p1, p2, o1, o2, n1, n2); \ + break; \ + case 8: \ + __ret = __cmpxchg_double_8(p1, p2, o1, o2, n1, n2); \ + break; \ + default: \ + __cmpxchg_double_called_with_bad_pointer(); \ + } \ + __ret; \ +}) + +#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ +({ \ + __typeof__(p1) __p1 = (p1); \ + __typeof__(p2) __p2 = (p2); \ + int __ret; \ + BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ + BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ + VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\ + if (sizeof(long) == 4) \ + __ret = __cmpxchg_double_4(__p1, __p2, o1, o2, n1, n2); \ + else \ + __ret = __cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2); \ + __ret; \ +}) + +#define system_has_cmpxchg_double() 1 + +#include <asm-generic/cmpxchg-local.h> + +static inline unsigned long __cmpxchg_local(void *ptr, + unsigned long old, + unsigned long new, int size) +{ + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + return __cmpxchg(ptr, old, new, size); + default: + return __cmpxchg_local_generic(ptr, old, new, size); + } + + return old; +} + +/* + * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make + * them available. + */ +#define cmpxchg_local(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ +}) + +#define cmpxchg64_local(ptr, o, n) cmpxchg64((ptr), (o), (n)) + +#endif /* __ASM_CMPXCHG_H */ diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h new file mode 100644 index 00000000000..d350ed9d0fb --- /dev/null +++ b/arch/s390/include/asm/compat.h @@ -0,0 +1,359 @@ +#ifndef _ASM_S390X_COMPAT_H +#define _ASM_S390X_COMPAT_H +/* + * Architecture specific compatibility types + */ +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/thread_info.h> + +#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64)) + +#define __SC_DELOUSE(t,v) ({ \ + BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \ + (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \ +}) + +#define PSW32_MASK_PER 0x40000000UL +#define PSW32_MASK_DAT 0x04000000UL +#define PSW32_MASK_IO 0x02000000UL +#define PSW32_MASK_EXT 0x01000000UL +#define PSW32_MASK_KEY 0x00F00000UL +#define PSW32_MASK_BASE 0x00080000UL /* Always one */ +#define PSW32_MASK_MCHECK 0x00040000UL +#define PSW32_MASK_WAIT 0x00020000UL +#define PSW32_MASK_PSTATE 0x00010000UL +#define PSW32_MASK_ASC 0x0000C000UL +#define PSW32_MASK_CC 0x00003000UL +#define PSW32_MASK_PM 0x00000f00UL +#define PSW32_MASK_RI 0x00000080UL + +#define PSW32_MASK_USER 0x0000FF00UL + +#define PSW32_ADDR_AMODE 0x80000000UL +#define PSW32_ADDR_INSN 0x7FFFFFFFUL + +#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20) + +#define PSW32_ASC_PRIMARY 0x00000000UL +#define PSW32_ASC_ACCREG 0x00004000UL +#define PSW32_ASC_SECONDARY 0x00008000UL +#define PSW32_ASC_HOME 0x0000C000UL + +#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \ + PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \ + PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \ + PSW32_ASC_PRIMARY) + +#define COMPAT_USER_HZ 100 +#define COMPAT_UTS_MACHINE "s390\0\0\0\0" + +typedef u32 compat_size_t; +typedef s32 compat_ssize_t; +typedef s32 compat_time_t; +typedef s32 compat_clock_t; +typedef s32 compat_pid_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; +typedef u16 compat_mode_t; +typedef u32 compat_ino_t; +typedef u16 compat_dev_t; +typedef s32 compat_off_t; +typedef s64 compat_loff_t; +typedef u16 compat_nlink_t; +typedef u16 compat_ipc_pid_t; +typedef s32 compat_daddr_t; +typedef u32 compat_caddr_t; +typedef __kernel_fsid_t compat_fsid_t; +typedef s32 compat_key_t; +typedef s32 compat_timer_t; + +typedef s32 compat_int_t; +typedef s32 compat_long_t; +typedef s64 compat_s64; +typedef u32 compat_uint_t; +typedef u32 compat_ulong_t; +typedef u64 compat_u64; +typedef u32 compat_uptr_t; + +typedef struct { + u32 mask; + u32 addr; +} __aligned(8) psw_compat_t; + +typedef struct { + psw_compat_t psw; + u32 gprs[NUM_GPRS]; + u32 acrs[NUM_ACRS]; + u32 orig_gpr2; +} s390_compat_regs; + +typedef struct { + u32 gprs_high[NUM_GPRS]; +} s390_compat_regs_high; + +struct compat_timespec { + compat_time_t tv_sec; + s32 tv_nsec; +}; + +struct compat_timeval { + compat_time_t tv_sec; + s32 tv_usec; +}; + +struct compat_stat { + compat_dev_t st_dev; + u16 __pad1; + compat_ino_t st_ino; + compat_mode_t st_mode; + compat_nlink_t st_nlink; + __compat_uid_t st_uid; + __compat_gid_t st_gid; + compat_dev_t st_rdev; + u16 __pad2; + u32 st_size; + u32 st_blksize; + u32 st_blocks; + u32 st_atime; + u32 st_atime_nsec; + u32 st_mtime; + u32 st_mtime_nsec; + u32 st_ctime; + u32 st_ctime_nsec; + u32 __unused4; + u32 __unused5; +}; + +struct compat_flock { + short l_type; + short l_whence; + compat_off_t l_start; + compat_off_t l_len; + compat_pid_t l_pid; +}; + +#define F_GETLK64 12 +#define F_SETLK64 13 +#define F_SETLKW64 14 + +struct compat_flock64 { + short l_type; + short l_whence; + compat_loff_t l_start; + compat_loff_t l_len; + compat_pid_t l_pid; +}; + +struct compat_statfs { + u32 f_type; + u32 f_bsize; + u32 f_blocks; + u32 f_bfree; + u32 f_bavail; + u32 f_files; + u32 f_ffree; + compat_fsid_t f_fsid; + u32 f_namelen; + u32 f_frsize; + u32 f_flags; + u32 f_spare[4]; +}; + +struct compat_statfs64 { + u32 f_type; + u32 f_bsize; + u64 f_blocks; + u64 f_bfree; + u64 f_bavail; + u64 f_files; + u64 f_ffree; + compat_fsid_t f_fsid; + u32 f_namelen; + u32 f_frsize; + u32 f_flags; + u32 f_spare[4]; +}; + +#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff +#define COMPAT_RLIM_INFINITY 0xffffffff + +typedef u32 compat_old_sigset_t; /* at least 32 bits */ + +#define _COMPAT_NSIG 64 +#define _COMPAT_NSIG_BPW 32 + +typedef u32 compat_sigset_word; + +typedef union compat_sigval { + compat_int_t sival_int; + compat_uptr_t sival_ptr; +} compat_sigval_t; + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[128/sizeof(int) - 3]; + + /* kill() */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + pid_t _pid; /* which child */ + uid_t _uid; /* sender's uid */ + int _status;/* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + __u32 _addr; /* faulting insn/memory ref. - pointer */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + +/* + * How these fields are to be accessed. + */ +#define si_pid _sifields._kill._pid +#define si_uid _sifields._kill._uid +#define si_status _sifields._sigchld._status +#define si_utime _sifields._sigchld._utime +#define si_stime _sifields._sigchld._stime +#define si_value _sifields._rt._sigval +#define si_int _sifields._rt._sigval.sival_int +#define si_ptr _sifields._rt._sigval.sival_ptr +#define si_addr _sifields._sigfault._addr +#define si_band _sifields._sigpoll._band +#define si_fd _sifields._sigpoll._fd +#define si_tid _sifields._timer._tid +#define si_overrun _sifields._timer._overrun + +#define COMPAT_OFF_T_MAX 0x7fffffff +#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL + +/* + * A pointer passed in from user mode. This should not + * be used for syscall parameters, just declare them + * as pointers because the syscall entry code will have + * appropriately converted them already. + */ + +static inline void __user *compat_ptr(compat_uptr_t uptr) +{ + return (void __user *)(unsigned long)(uptr & 0x7fffffffUL); +} + +static inline compat_uptr_t ptr_to_compat(void __user *uptr) +{ + return (u32)(unsigned long)uptr; +} + +#ifdef CONFIG_COMPAT + +static inline int is_compat_task(void) +{ + return is_32bit_task(); +} + +static inline void __user *arch_compat_alloc_user_space(long len) +{ + unsigned long stack; + + stack = KSTK_ESP(current); + if (is_compat_task()) + stack &= 0x7fffffffUL; + return (void __user *) (stack - len); +} + +#endif + +struct compat_ipc64_perm { + compat_key_t key; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; + compat_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned short __pad2; + unsigned int __unused1; + unsigned int __unused2; +}; + +struct compat_semid64_ds { + struct compat_ipc64_perm sem_perm; + compat_time_t sem_otime; + compat_ulong_t __pad1; + compat_time_t sem_ctime; + compat_ulong_t __pad2; + compat_ulong_t sem_nsems; + compat_ulong_t __unused1; + compat_ulong_t __unused2; +}; + +struct compat_msqid64_ds { + struct compat_ipc64_perm msg_perm; + compat_time_t msg_stime; + compat_ulong_t __pad1; + compat_time_t msg_rtime; + compat_ulong_t __pad2; + compat_time_t msg_ctime; + compat_ulong_t __pad3; + compat_ulong_t msg_cbytes; + compat_ulong_t msg_qnum; + compat_ulong_t msg_qbytes; + compat_pid_t msg_lspid; + compat_pid_t msg_lrpid; + compat_ulong_t __unused1; + compat_ulong_t __unused2; +}; + +struct compat_shmid64_ds { + struct compat_ipc64_perm shm_perm; + compat_size_t shm_segsz; + compat_time_t shm_atime; + compat_ulong_t __pad1; + compat_time_t shm_dtime; + compat_ulong_t __pad2; + compat_time_t shm_ctime; + compat_ulong_t __pad3; + compat_pid_t shm_cpid; + compat_pid_t shm_lpid; + compat_ulong_t shm_nattch; + compat_ulong_t __unused1; + compat_ulong_t __unused2; +}; +#endif /* _ASM_S390X_COMPAT_H */ diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h new file mode 100644 index 00000000000..3dfadb5d648 --- /dev/null +++ b/arch/s390/include/asm/cpcmd.h @@ -0,0 +1,32 @@ +/* + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Christian Borntraeger (cborntra@de.ibm.com), + */ + +#ifndef _ASM_S390_CPCMD_H +#define _ASM_S390_CPCMD_H + +/* + * the lowlevel function for cpcmd + * the caller of __cpcmd has to ensure that the response buffer is below 2 GB + */ +extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code); + +/* + * cpcmd is the in-kernel interface for issuing CP commands + * + * cmd: null-terminated command string, max 240 characters + * response: response buffer for VM's textual response + * rlen: size of the response buffer, cpcmd will not exceed this size + * but will cap the output, if its too large. Everything that + * did not fit into the buffer will be silently dropped + * response_code: return pointer for VM's error code + * return value: the size of the response. The caller can check if the buffer + * was large enough by comparing the return value and rlen + * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep + */ +extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code); + +#endif /* _ASM_S390_CPCMD_H */ diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h new file mode 100644 index 00000000000..f5a8e2fcde0 --- /dev/null +++ b/arch/s390/include/asm/cpu.h @@ -0,0 +1,26 @@ +/* + * Copyright IBM Corp. 2000, 2009 + * Author(s): Hartmut Penner <hp@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Christian Ehrhardt <ehrhardt@de.ibm.com>, + */ + +#ifndef _ASM_S390_CPU_H +#define _ASM_S390_CPU_H + +#define MAX_CPU_ADDRESS 255 + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +struct cpuid +{ + unsigned int version : 8; + unsigned int ident : 24; + unsigned int machine : 16; + unsigned int unused : 16; +} __attribute__ ((packed, aligned(8))); + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_S390_CPU_H */ diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h new file mode 100644 index 00000000000..cb700d54bd8 --- /dev/null +++ b/arch/s390/include/asm/cpu_mf.h @@ -0,0 +1,283 @@ +/* + * CPU-measurement facilities + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * Jan Glauber <jang@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#ifndef _ASM_S390_CPU_MF_H +#define _ASM_S390_CPU_MF_H + +#include <linux/errno.h> +#include <asm/facility.h> + +#define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */ +#define CPU_MF_INT_SF_ISE (1 << 30) /* incorrect SDBT entry */ +#define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ +#define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */ +#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ +#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ +#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ +#define CPU_MF_INT_RI_HALTED (1 << 5) /* run-time instr. halted */ +#define CPU_MF_INT_RI_BUF_FULL (1 << 4) /* run-time instr. program + buffer full */ + +#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) +#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ + CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ + CPU_MF_INT_SF_LSDA) +#define CPU_MF_INT_RI_MASK (CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL) + +/* CPU measurement facility support */ +static inline int cpum_cf_avail(void) +{ + return MACHINE_HAS_LPP && test_facility(67); +} + +static inline int cpum_sf_avail(void) +{ + return MACHINE_HAS_LPP && test_facility(68); +} + + +struct cpumf_ctr_info { + u16 cfvn; + u16 auth_ctl; + u16 enable_ctl; + u16 act_ctl; + u16 max_cpu; + u16 csvn; + u16 max_cg; + u16 reserved1; + u32 reserved2[12]; +} __packed; + +/* QUERY SAMPLING INFORMATION block */ +struct hws_qsi_info_block { /* Bit(s) */ + unsigned int b0_13:14; /* 0-13: zeros */ + unsigned int as:1; /* 14: basic-sampling authorization */ + unsigned int ad:1; /* 15: diag-sampling authorization */ + unsigned int b16_21:6; /* 16-21: zeros */ + unsigned int es:1; /* 22: basic-sampling enable control */ + unsigned int ed:1; /* 23: diag-sampling enable control */ + unsigned int b24_29:6; /* 24-29: zeros */ + unsigned int cs:1; /* 30: basic-sampling activation control */ + unsigned int cd:1; /* 31: diag-sampling activation control */ + unsigned int bsdes:16; /* 4-5: size of basic sampling entry */ + unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */ + unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ + unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ + unsigned long tear; /* 24-31: TEAR contents */ + unsigned long dear; /* 32-39: DEAR contents */ + unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int cpu_speed; /* 44-47: CPU speed */ + unsigned long long rsvrd1; /* 48-55: reserved */ + unsigned long long rsvrd2; /* 56-63: reserved */ +} __packed; + +/* SET SAMPLING CONTROLS request block */ +struct hws_lsctl_request_block { + unsigned int s:1; /* 0: maximum buffer indicator */ + unsigned int h:1; /* 1: part. level reserved for VM use*/ + unsigned long long b2_53:52;/* 2-53: zeros */ + unsigned int es:1; /* 54: basic-sampling enable control */ + unsigned int ed:1; /* 55: diag-sampling enable control */ + unsigned int b56_61:6; /* 56-61: - zeros */ + unsigned int cs:1; /* 62: basic-sampling activation control */ + unsigned int cd:1; /* 63: diag-sampling activation control */ + unsigned long interval; /* 8-15: sampling interval */ + unsigned long tear; /* 16-23: TEAR contents */ + unsigned long dear; /* 24-31: DEAR contents */ + /* 32-63: */ + unsigned long rsvrd1; /* reserved */ + unsigned long rsvrd2; /* reserved */ + unsigned long rsvrd3; /* reserved */ + unsigned long rsvrd4; /* reserved */ +} __packed; + +struct hws_basic_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:4; /* 16-19 reserved */ + unsigned int U:4; /* 20-23 Number of unique instruct. */ + unsigned int z:2; /* zeros */ + unsigned int T:1; /* 26 PSW DAT mode */ + unsigned int W:1; /* 27 PSW wait state */ + unsigned int P:1; /* 28 PSW Problem state */ + unsigned int AS:2; /* 29-30 PSW address-space control */ + unsigned int I:1; /* 31 entry valid or invalid */ + unsigned int:16; + unsigned int prim_asn:16; /* primary ASN */ + unsigned long long ia; /* Instruction Address */ + unsigned long long gpp; /* Guest Program Parameter */ + unsigned long long hpp; /* Host Program Parameter */ +} __packed; + +struct hws_diag_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:14; /* 16-19 and 20-30 reserved */ + unsigned int I:1; /* 31 entry valid or invalid */ + u8 data[]; /* Machine-dependent sample data */ +} __packed; + +struct hws_combined_entry { + struct hws_basic_entry basic; /* Basic-sampling data entry */ + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ +} __packed; + +struct hws_trailer_entry { + union { + struct { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned long long:61; /* 3 - 63: Reserved */ + }; + unsigned long long flags; /* 0 - 63: All indicators */ + }; + unsigned long long overflow; /* 64 - sample Overflow count */ + unsigned char timestamp[16]; /* 16 - 31 timestamp */ + unsigned long long reserved1; /* 32 -Reserved */ + unsigned long long reserved2; /* */ + unsigned long long progusage1; /* 48 - reserved for programming use */ + unsigned long long progusage2; /* */ +} __packed; + +/* Query counter information */ +static inline int qctri(struct cpumf_ctr_info *info) +{ + int rc = -EINVAL; + + asm volatile ( + "0: .insn s,0xb28e0000,%1\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(1b, 2b) + : "+d" (rc), "=Q" (*info)); + return rc; +} + +/* Load CPU-counter-set controls */ +static inline int lcctl(u64 ctl) +{ + int cc; + + asm volatile ( + " .insn s,0xb2840000,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) : "m" (ctl) : "cc"); + return cc; +} + +/* Extract CPU counter */ +static inline int ecctr(u64 ctr, u64 *val) +{ + register u64 content asm("4") = 0; + int cc; + + asm volatile ( + " .insn rre,0xb2e40000,%0,%2\n" + " ipm %1\n" + " srl %1,28\n" + : "=d" (content), "=d" (cc) : "d" (ctr) : "cc"); + if (!cc) + *val = content; + return cc; +} + +/* Query sampling information */ +static inline int qsi(struct hws_qsi_info_block *info) +{ + int cc; + cc = 1; + + asm volatile( + "0: .insn s,0xb2860000,0(%1)\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "=d" (cc), "+a" (info) + : "m" (*info) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +/* Load sampling controls */ +static inline int lsctl(struct hws_lsctl_request_block *req) +{ + int cc; + + cc = 1; + asm volatile( + "0: .insn s,0xb2870000,0(%1)\n" + "1: ipm %0\n" + " srl %0,28\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (cc), "+a" (req) + : "m" (*req) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +/* Sampling control helper functions */ + +#include <linux/time.h> + +static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi, + unsigned long freq) +{ + return (USEC_PER_SEC / freq) * qsi->cpu_speed; +} + +static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, + unsigned long rate) +{ + return USEC_PER_SEC * qsi->cpu_speed / rate; +} + +#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL +#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL + +/* Return TOD timestamp contained in an trailer entry */ +static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) +{ + /* TOD in STCKE format */ + if (te->t) + return *((unsigned long long *) &te->timestamp[1]); + + /* TOD in STCK format */ + return *((unsigned long long *) &te->timestamp[0]); +} + +/* Return pointer to trailer entry of an sample data block */ +static inline unsigned long *trailer_entry_ptr(unsigned long v) +{ + void *ret; + + ret = (void *) v; + ret += PAGE_SIZE; + ret -= sizeof(struct hws_trailer_entry); + + return (unsigned long *) ret; +} + +/* Return if the entry in the sample data block table (sdbt) + * is a link to the next sdbt */ +static inline int is_link_entry(unsigned long *s) +{ + return *s & 0x1ul ? 1 : 0; +} + +/* Return pointer to the linked sdbt */ +static inline unsigned long *get_next_sdbt(unsigned long *s) +{ + return (unsigned long *) (*s & ~0x1ul); +} +#endif /* _ASM_S390_CPU_MF_H */ diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h new file mode 100644 index 00000000000..f65bd363451 --- /dev/null +++ b/arch/s390/include/asm/cputime.h @@ -0,0 +1,192 @@ +/* + * Copyright IBM Corp. 2004 + * + * Author: Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _S390_CPUTIME_H +#define _S390_CPUTIME_H + +#include <linux/types.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> +#include <asm/div64.h> + + +/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ + +typedef unsigned long long __nocast cputime_t; +typedef unsigned long long __nocast cputime64_t; + +static inline unsigned long __div(unsigned long long n, unsigned long base) +{ +#ifndef CONFIG_64BIT + register_pair rp; + + rp.pair = n >> 1; + asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1)); + return rp.subreg.odd; +#else /* CONFIG_64BIT */ + return n / base; +#endif /* CONFIG_64BIT */ +} + +#define cputime_one_jiffy jiffies_to_cputime(1) + +/* + * Convert cputime to jiffies and back. + */ +static inline unsigned long cputime_to_jiffies(const cputime_t cputime) +{ + return __div((__force unsigned long long) cputime, 4096000000ULL / HZ); +} + +static inline cputime_t jiffies_to_cputime(const unsigned int jif) +{ + return (__force cputime_t)(jif * (4096000000ULL / HZ)); +} + +static inline u64 cputime64_to_jiffies64(cputime64_t cputime) +{ + unsigned long long jif = (__force unsigned long long) cputime; + do_div(jif, 4096000000ULL / HZ); + return jif; +} + +static inline cputime64_t jiffies64_to_cputime64(const u64 jif) +{ + return (__force cputime64_t)(jif * (4096000000ULL / HZ)); +} + +/* + * Convert cputime to microseconds and back. + */ +static inline unsigned int cputime_to_usecs(const cputime_t cputime) +{ + return (__force unsigned long long) cputime >> 12; +} + +static inline cputime_t usecs_to_cputime(const unsigned int m) +{ + return (__force cputime_t)(m * 4096ULL); +} + +#define usecs_to_cputime64(m) usecs_to_cputime(m) + +/* + * Convert cputime to milliseconds and back. + */ +static inline unsigned int cputime_to_secs(const cputime_t cputime) +{ + return __div((__force unsigned long long) cputime, 2048000000) >> 1; +} + +static inline cputime_t secs_to_cputime(const unsigned int s) +{ + return (__force cputime_t)(s * 4096000000ULL); +} + +/* + * Convert cputime to timespec and back. + */ +static inline cputime_t timespec_to_cputime(const struct timespec *value) +{ + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000); +} + +static inline void cputime_to_timespec(const cputime_t cputime, + struct timespec *value) +{ + unsigned long long __cputime = (__force unsigned long long) cputime; +#ifndef CONFIG_64BIT + register_pair rp; + + rp.pair = __cputime >> 1; + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); + value->tv_nsec = rp.subreg.even * 1000 / 4096; + value->tv_sec = rp.subreg.odd; +#else + value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096; + value->tv_sec = __cputime / 4096000000ULL; +#endif +} + +/* + * Convert cputime to timeval and back. + * Since cputime and timeval have the same resolution (microseconds) + * this is easy. + */ +static inline cputime_t timeval_to_cputime(const struct timeval *value) +{ + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_usec * 4096ULL); +} + +static inline void cputime_to_timeval(const cputime_t cputime, + struct timeval *value) +{ + unsigned long long __cputime = (__force unsigned long long) cputime; +#ifndef CONFIG_64BIT + register_pair rp; + + rp.pair = __cputime >> 1; + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); + value->tv_usec = rp.subreg.even / 4096; + value->tv_sec = rp.subreg.odd; +#else + value->tv_usec = (__cputime % 4096000000ULL) / 4096; + value->tv_sec = __cputime / 4096000000ULL; +#endif +} + +/* + * Convert cputime to clock and back. + */ +static inline clock_t cputime_to_clock_t(cputime_t cputime) +{ + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; +} + +static inline cputime_t clock_t_to_cputime(unsigned long x) +{ + return (__force cputime_t)(x * (4096000000ULL / USER_HZ)); +} + +/* + * Convert cputime64 to clock. + */ +static inline clock_t cputime64_to_clock_t(cputime64_t cputime) +{ + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; +} + +struct s390_idle_data { + int nohz_delay; + unsigned int sequence; + unsigned long long idle_count; + unsigned long long idle_time; + unsigned long long clock_idle_enter; + unsigned long long clock_idle_exit; + unsigned long long timer_idle_enter; + unsigned long long timer_idle_exit; +}; + +DECLARE_PER_CPU(struct s390_idle_data, s390_idle); + +cputime64_t s390_get_idle_time(int cpu); + +#define arch_idle_time(cpu) s390_get_idle_time(cpu) + +static inline int s390_nohz_delay(int cpu) +{ + return __get_cpu_var(s390_idle).nohz_delay != 0; +} + +#define arch_needs_cpu(cpu) s390_nohz_delay(cpu) + +#endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h new file mode 100644 index 00000000000..7c31d3e25cd --- /dev/null +++ b/arch/s390/include/asm/crw.h @@ -0,0 +1,69 @@ +/* + * Data definitions for channel report processing + * Copyright IBM Corp. 2000, 2009 + * Author(s): Ingo Adlung <adlung@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Cornelia Huck <cornelia.huck@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#ifndef _ASM_S390_CRW_H +#define _ASM_S390_CRW_H + +#include <linux/types.h> + +/* + * Channel Report Word + */ +struct crw { + __u32 res1 : 1; /* reserved zero */ + __u32 slct : 1; /* solicited */ + __u32 oflw : 1; /* overflow */ + __u32 chn : 1; /* chained */ + __u32 rsc : 4; /* reporting source code */ + __u32 anc : 1; /* ancillary report */ + __u32 res2 : 1; /* reserved zero */ + __u32 erc : 6; /* error-recovery code */ + __u32 rsid : 16; /* reporting-source ID */ +} __attribute__ ((packed)); + +typedef void (*crw_handler_t)(struct crw *, struct crw *, int); + +extern int crw_register_handler(int rsc, crw_handler_t handler); +extern void crw_unregister_handler(int rsc); +extern void crw_handle_channel_report(void); +void crw_wait_for_channel_report(void); + +#define NR_RSCS 16 + +#define CRW_RSC_MONITOR 0x2 /* monitoring facility */ +#define CRW_RSC_SCH 0x3 /* subchannel */ +#define CRW_RSC_CPATH 0x4 /* channel path */ +#define CRW_RSC_CONFIG 0x9 /* configuration-alert facility */ +#define CRW_RSC_CSS 0xB /* channel subsystem */ + +#define CRW_ERC_EVENT 0x00 /* event information pending */ +#define CRW_ERC_AVAIL 0x01 /* available */ +#define CRW_ERC_INIT 0x02 /* initialized */ +#define CRW_ERC_TERROR 0x03 /* temporary error */ +#define CRW_ERC_IPARM 0x04 /* installed parm initialized */ +#define CRW_ERC_TERM 0x05 /* terminal */ +#define CRW_ERC_PERRN 0x06 /* perm. error, fac. not init */ +#define CRW_ERC_PERRI 0x07 /* perm. error, facility init */ +#define CRW_ERC_PMOD 0x08 /* installed parameters modified */ + +static inline int stcrw(struct crw *pcrw) +{ + int ccode; + + asm volatile( + " stcrw 0(%2)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (ccode), "=m" (*pcrw) + : "a" (pcrw) + : "cc" ); + return ccode; +} + +#endif /* _ASM_S390_CRW_H */ diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h new file mode 100644 index 00000000000..09d1dd46bd5 --- /dev/null +++ b/arch/s390/include/asm/css_chars.h @@ -0,0 +1,38 @@ +#ifndef _ASM_CSS_CHARS_H +#define _ASM_CSS_CHARS_H + +#include <linux/types.h> + +struct css_general_char { + u64 : 12; + u32 dynio : 1; /* bit 12 */ + u32 : 4; + u32 eadm : 1; /* bit 17 */ + u32 : 23; + u32 aif : 1; /* bit 41 */ + u32 : 3; + u32 mcss : 1; /* bit 45 */ + u32 fcs : 1; /* bit 46 */ + u32 : 1; + u32 ext_mb : 1; /* bit 48 */ + u32 : 7; + u32 aif_tdd : 1; /* bit 56 */ + u32 : 1; + u32 qebsm : 1; /* bit 58 */ + u32 : 8; + u32 aif_osa : 1; /* bit 67 */ + u32 : 12; + u32 eadm_rf : 1; /* bit 80 */ + u32 : 1; + u32 cib : 1; /* bit 82 */ + u32 : 5; + u32 fcx : 1; /* bit 88 */ + u32 : 19; + u32 alt_ssi : 1; /* bit 108 */ + u32:1; + u32 narf:1; /* bit 110 */ +} __packed; + +extern struct css_general_char css_general_characteristics; + +#endif diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h new file mode 100644 index 00000000000..31ab9f346d7 --- /dev/null +++ b/arch/s390/include/asm/ctl_reg.h @@ -0,0 +1,82 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_CTL_REG_H +#define __ASM_CTL_REG_H + +#include <linux/bug.h> + +#ifdef CONFIG_64BIT +# define __CTL_LOAD "lctlg" +# define __CTL_STORE "stctg" +#else +# define __CTL_LOAD "lctl" +# define __CTL_STORE "stctl" +#endif + +#define __ctl_load(array, low, high) { \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + \ + BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\ + asm volatile( \ + __CTL_LOAD " %1,%2,%0\n" \ + : : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high));\ +} + +#define __ctl_store(array, low, high) { \ + typedef struct { char _[sizeof(array)]; } addrtype; \ + \ + BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\ + asm volatile( \ + __CTL_STORE " %1,%2,%0\n" \ + : "=Q" (*(addrtype *)(&array)) \ + : "i" (low), "i" (high)); \ +} + +static inline void __ctl_set_bit(unsigned int cr, unsigned int bit) +{ + unsigned long reg; + + __ctl_store(reg, cr, cr); + reg |= 1UL << bit; + __ctl_load(reg, cr, cr); +} + +static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit) +{ + unsigned long reg; + + __ctl_store(reg, cr, cr); + reg &= ~(1UL << bit); + __ctl_load(reg, cr, cr); +} + +void smp_ctl_set_bit(int cr, int bit); +void smp_ctl_clear_bit(int cr, int bit); + +union ctlreg0 { + unsigned long val; + struct { +#ifdef CONFIG_64BIT + unsigned long : 32; +#endif + unsigned long : 3; + unsigned long lap : 1; /* Low-address-protection control */ + unsigned long : 4; + unsigned long edat : 1; /* Enhanced-DAT-enablement control */ + unsigned long : 23; + }; +}; + +#ifdef CONFIG_SMP +# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) +# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) +#else +# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit) +# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit) +#endif + +#endif /* __ASM_CTL_REG_H */ diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h new file mode 100644 index 00000000000..b80941f30df --- /dev/null +++ b/arch/s390/include/asm/current.h @@ -0,0 +1,18 @@ +/* + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/current.h" + */ + +#ifndef _S390_CURRENT_H +#define _S390_CURRENT_H + +#include <asm/lowcore.h> + +struct task_struct; + +#define current ((struct task_struct *const)S390_lowcore.current_task) + +#endif /* !(_S390_CURRENT_H) */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h new file mode 100644 index 00000000000..530c15eb01e --- /dev/null +++ b/arch/s390/include/asm/debug.h @@ -0,0 +1,237 @@ +/* + * S/390 debug facility + * + * Copyright IBM Corp. 1999, 2000 + */ +#ifndef DEBUG_H +#define DEBUG_H + +#include <linux/string.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/time.h> +#include <uapi/asm/debug.h> + +#define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ +#define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */ +#define DEBUG_FLUSH_ALL -1 /* parameter to flush all areas */ +#define DEBUG_MAX_VIEWS 10 /* max number of views in proc fs */ +#define DEBUG_MAX_NAME_LEN 64 /* max length for a debugfs file name */ +#define DEBUG_DEFAULT_LEVEL 3 /* initial debug level */ + +#define DEBUG_DIR_ROOT "s390dbf" /* name of debug root directory in proc fs */ + +#define DEBUG_DATA(entry) (char*)(entry + 1) /* data is stored behind */ + /* the entry information */ + +typedef struct __debug_entry debug_entry_t; + +struct debug_view; + +typedef struct debug_info { + struct debug_info* next; + struct debug_info* prev; + atomic_t ref_count; + spinlock_t lock; + int level; + int nr_areas; + int pages_per_area; + int buf_size; + int entry_size; + debug_entry_t*** areas; + int active_area; + int *active_pages; + int *active_entries; + struct dentry* debugfs_root_entry; + struct dentry* debugfs_entries[DEBUG_MAX_VIEWS]; + struct debug_view* views[DEBUG_MAX_VIEWS]; + char name[DEBUG_MAX_NAME_LEN]; + umode_t mode; +} debug_info_t; + +typedef int (debug_header_proc_t) (debug_info_t* id, + struct debug_view* view, + int area, + debug_entry_t* entry, + char* out_buf); + +typedef int (debug_format_proc_t) (debug_info_t* id, + struct debug_view* view, char* out_buf, + const char* in_buf); +typedef int (debug_prolog_proc_t) (debug_info_t* id, + struct debug_view* view, + char* out_buf); +typedef int (debug_input_proc_t) (debug_info_t* id, + struct debug_view* view, + struct file* file, + const char __user *user_buf, + size_t in_buf_size, loff_t* offset); + +int debug_dflt_header_fn(debug_info_t* id, struct debug_view* view, + int area, debug_entry_t* entry, char* out_buf); + +struct debug_view { + char name[DEBUG_MAX_NAME_LEN]; + debug_prolog_proc_t* prolog_proc; + debug_header_proc_t* header_proc; + debug_format_proc_t* format_proc; + debug_input_proc_t* input_proc; + void* private_data; +}; + +extern struct debug_view debug_hex_ascii_view; +extern struct debug_view debug_raw_view; +extern struct debug_view debug_sprintf_view; + +/* do NOT use the _common functions */ + +debug_entry_t* debug_event_common(debug_info_t* id, int level, + const void* data, int length); + +debug_entry_t* debug_exception_common(debug_info_t* id, int level, + const void* data, int length); + +/* Debug Feature API: */ + +debug_info_t *debug_register(const char *name, int pages, int nr_areas, + int buf_size); + +debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, + int buf_size, umode_t mode, uid_t uid, + gid_t gid); + +void debug_unregister(debug_info_t* id); + +void debug_set_level(debug_info_t* id, int new_level); + +void debug_set_critical(void); +void debug_stop_all(void); + +static inline bool debug_level_enabled(debug_info_t* id, int level) +{ + return level <= id->level; +} + +static inline debug_entry_t* +debug_event(debug_info_t* id, int level, void* data, int length) +{ + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_event_common(id,level,data,length); +} + +static inline debug_entry_t* +debug_int_event(debug_info_t* id, int level, unsigned int tag) +{ + unsigned int t=tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_event_common(id,level,&t,sizeof(unsigned int)); +} + +static inline debug_entry_t * +debug_long_event (debug_info_t* id, int level, unsigned long tag) +{ + unsigned long t=tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_event_common(id,level,&t,sizeof(unsigned long)); +} + +static inline debug_entry_t* +debug_text_event(debug_info_t* id, int level, const char* txt) +{ + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_event_common(id,level,txt,strlen(txt)); +} + +/* + * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are + * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! + */ +extern debug_entry_t * +debug_sprintf_event(debug_info_t* id,int level,char *string,...) + __attribute__ ((format(printf, 3, 4))); + + +static inline debug_entry_t* +debug_exception(debug_info_t* id, int level, void* data, int length) +{ + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_exception_common(id,level,data,length); +} + +static inline debug_entry_t* +debug_int_exception(debug_info_t* id, int level, unsigned int tag) +{ + unsigned int t=tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_exception_common(id,level,&t,sizeof(unsigned int)); +} + +static inline debug_entry_t * +debug_long_exception (debug_info_t* id, int level, unsigned long tag) +{ + unsigned long t=tag; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_exception_common(id,level,&t,sizeof(unsigned long)); +} + +static inline debug_entry_t* +debug_text_exception(debug_info_t* id, int level, const char* txt) +{ + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; + return debug_exception_common(id,level,txt,strlen(txt)); +} + +/* + * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are + * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! + */ +extern debug_entry_t * +debug_sprintf_exception(debug_info_t* id,int level,char *string,...) + __attribute__ ((format(printf, 3, 4))); + +int debug_register_view(debug_info_t* id, struct debug_view* view); +int debug_unregister_view(debug_info_t* id, struct debug_view* view); + +/* + define the debug levels: + - 0 No debugging output to console or syslog + - 1 Log internal errors to syslog, ignore check conditions + - 2 Log internal errors and check conditions to syslog + - 3 Log internal errors to console, log check conditions to syslog + - 4 Log internal errors and check conditions to console + - 5 panic on internal errors, log check conditions to console + - 6 panic on both, internal errors and check conditions + */ + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 4 +#endif + +#define INTERNAL_ERRMSG(x,y...) "E" __FILE__ "%d: " x, __LINE__, y +#define INTERNAL_WRNMSG(x,y...) "W" __FILE__ "%d: " x, __LINE__, y +#define INTERNAL_INFMSG(x,y...) "I" __FILE__ "%d: " x, __LINE__, y +#define INTERNAL_DEBMSG(x,y...) "D" __FILE__ "%d: " x, __LINE__, y + +#if DEBUG_LEVEL > 0 +#define PRINT_DEBUG(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#define PRINT_INFO(x...) printk ( KERN_INFO PRINTK_HEADER x ) +#define PRINT_WARN(x...) printk ( KERN_WARNING PRINTK_HEADER x ) +#define PRINT_ERR(x...) printk ( KERN_ERR PRINTK_HEADER x ) +#define PRINT_FATAL(x...) panic ( PRINTK_HEADER x ) +#else +#define PRINT_DEBUG(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#define PRINT_INFO(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#define PRINT_WARN(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#define PRINT_ERR(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#define PRINT_FATAL(x...) printk ( KERN_DEBUG PRINTK_HEADER x ) +#endif /* DASD_DEBUG */ + +#endif /* DEBUG_H */ diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h new file mode 100644 index 00000000000..3f6e4095f47 --- /dev/null +++ b/arch/s390/include/asm/delay.h @@ -0,0 +1,24 @@ +/* + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/delay.h" + * Copyright (C) 1993 Linus Torvalds + * + * Delay routines calling functions in arch/s390/lib/delay.c + */ + +#ifndef _S390_DELAY_H +#define _S390_DELAY_H + +void __ndelay(unsigned long long nsecs); +void __udelay(unsigned long long usecs); +void udelay_simple(unsigned long long usecs); +void __delay(unsigned long loops); + +#define ndelay(n) __ndelay((unsigned long long) (n)) +#define udelay(n) __udelay((unsigned long long) (n)) +#define mdelay(n) __udelay((unsigned long long) (n) * 1000) + +#endif /* defined(_S390_DELAY_H) */ diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h new file mode 100644 index 00000000000..d8f9872b0e2 --- /dev/null +++ b/arch/s390/include/asm/device.h @@ -0,0 +1,7 @@ +/* + * Arch specific extensions to struct device + * + * This file is released under the GPLv2 + */ +#include <asm-generic/device.h> + diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h new file mode 100644 index 00000000000..7e91c58072e --- /dev/null +++ b/arch/s390/include/asm/diag.h @@ -0,0 +1,52 @@ +/* + * s390 diagnose functions + * + * Copyright IBM Corp. 2007 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#ifndef _ASM_S390_DIAG_H +#define _ASM_S390_DIAG_H + +/* + * Diagnose 10: Release page range + */ +static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) +{ + unsigned long start_addr, end_addr; + + start_addr = start_pfn << PAGE_SHIFT; + end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT; + + asm volatile( + "0: diag %0,%1,0x10\n" + "1:\n" + EX_TABLE(0b, 1b) + EX_TABLE(1b, 1b) + : : "a" (start_addr), "a" (end_addr)); +} + +/* + * Diagnose 14: Input spool file manipulation + */ +extern int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode); + +/* + * Diagnose 210: Get information about a virtual device + */ +struct diag210 { + u16 vrdcdvno; /* device number (input) */ + u16 vrdclen; /* data block length (input) */ + u8 vrdcvcla; /* virtual device class (output) */ + u8 vrdcvtyp; /* virtual device type (output) */ + u8 vrdcvsta; /* virtual device status (output) */ + u8 vrdcvfla; /* virtual device flags (output) */ + u8 vrdcrccl; /* real device class (output) */ + u8 vrdccrty; /* real device type (output) */ + u8 vrdccrmd; /* real device model (output) */ + u8 vrdccrft; /* real device feature (output) */ +} __attribute__((packed, aligned(4))); + +extern int diag210(struct diag210 *addr); + +#endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h new file mode 100644 index 00000000000..04a83f5773c --- /dev/null +++ b/arch/s390/include/asm/dis.h @@ -0,0 +1,52 @@ +/* + * Disassemble s390 instructions. + * + * Copyright IBM Corp. 2007 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + */ + +#ifndef __ASM_S390_DIS_H__ +#define __ASM_S390_DIS_H__ + +/* Type of operand */ +#define OPERAND_GPR 0x1 /* Operand printed as %rx */ +#define OPERAND_FPR 0x2 /* Operand printed as %fx */ +#define OPERAND_AR 0x4 /* Operand printed as %ax */ +#define OPERAND_CR 0x8 /* Operand printed as %cx */ +#define OPERAND_DISP 0x10 /* Operand printed as displacement */ +#define OPERAND_BASE 0x20 /* Operand printed as base register */ +#define OPERAND_INDEX 0x40 /* Operand printed as index register */ +#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */ +#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */ +#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */ + + +struct s390_operand { + int bits; /* The number of bits in the operand. */ + int shift; /* The number of bits to shift. */ + int flags; /* One bit syntax flags. */ +}; + +struct s390_insn { + const char name[5]; + unsigned char opfrag; + unsigned char format; +}; + + +static inline int insn_length(unsigned char code) +{ + return ((((int) code + 64) >> 7) + 1) << 1; +} + +void show_code(struct pt_regs *regs); +void print_fn_code(unsigned char *code, unsigned long len); +int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len); +struct s390_insn *find_insn(unsigned char *code); + +static inline int is_known_insn(unsigned char *code) +{ + return !!find_insn(code); +} + +#endif /* __ASM_S390_DIS_H__ */ diff --git a/arch/s390/include/asm/div64.h b/arch/s390/include/asm/div64.h new file mode 100644 index 00000000000..6cd978cefb2 --- /dev/null +++ b/arch/s390/include/asm/div64.h @@ -0,0 +1 @@ +#include <asm-generic/div64.h> diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h new file mode 100644 index 00000000000..3fbc67d9e19 --- /dev/null +++ b/arch/s390/include/asm/dma-mapping.h @@ -0,0 +1,79 @@ +#ifndef _ASM_S390_DMA_MAPPING_H +#define _ASM_S390_DMA_MAPPING_H + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/scatterlist.h> +#include <linux/dma-attrs.h> +#include <linux/dma-debug.h> +#include <linux/io.h> + +#define DMA_ERROR_CODE (~(dma_addr_t) 0x0) + +extern struct dma_map_ops s390_dma_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + return &s390_dma_ops; +} + +extern int dma_set_mask(struct device *dev, u64 mask); + +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ +} + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +#include <asm-generic/dma-mapping-common.h> + +static inline int dma_supported(struct device *dev, u64 mask) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + if (dma_ops->dma_supported == NULL) + return 1; + return dma_ops->dma_supported(dev, mask); +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + return addr + size - 1 <= *dev->dma_mask; +} + +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + debug_dma_mapping_error(dev, dma_addr); + if (dma_ops->mapping_error) + return dma_ops->mapping_error(dev, dma_addr); + return dma_addr == DMA_ERROR_CODE; +} + +static inline void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + void *ret; + + ret = ops->alloc(dev, size, dma_handle, flag, NULL); + debug_dma_alloc_coherent(dev, size, *dma_handle, ret); + return ret; +} + +static inline void dma_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + dma_ops->free(dev, size, cpu_addr, dma_handle, NULL); +} + +#endif /* _ASM_S390_DMA_MAPPING_H */ diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h new file mode 100644 index 00000000000..bb9bdcd2086 --- /dev/null +++ b/arch/s390/include/asm/dma.h @@ -0,0 +1,19 @@ +#ifndef _ASM_S390_DMA_H +#define _ASM_S390_DMA_H + +#include <asm/io.h> + +/* + * MAX_DMA_ADDRESS is ambiguous because on s390 its completely unrelated + * to DMA. It _is_ used for the s390 memory zone split at 2GB caused + * by the 31 bit heritage. + */ +#define MAX_DMA_ADDRESS 0x80000000 + +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; +#else +#define isa_dma_bridge_buggy (0) +#endif + +#endif /* _ASM_S390_DMA_H */ diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h new file mode 100644 index 00000000000..67026300c88 --- /dev/null +++ b/arch/s390/include/asm/eadm.h @@ -0,0 +1,117 @@ +#ifndef _ASM_S390_EADM_H +#define _ASM_S390_EADM_H + +#include <linux/types.h> +#include <linux/device.h> + +struct arqb { + u64 data; + u16 fmt:4; + u16:12; + u16 cmd_code; + u16:16; + u16 msb_count; + u32 reserved[12]; +} __packed; + +#define ARQB_CMD_MOVE 1 + +struct arsb { + u16 fmt:4; + u32:28; + u8 ef; + u8:8; + u8 ecbi; + u8:8; + u8 fvf; + u16:16; + u8 eqc; + u32:32; + u64 fail_msb; + u64 fail_aidaw; + u64 fail_ms; + u64 fail_scm; + u32 reserved[4]; +} __packed; + +#define EQC_WR_PROHIBIT 22 + +struct msb { + u8 fmt:4; + u8 oc:4; + u8 flags; + u16:12; + u16 bs:4; + u32 blk_count; + u64 data_addr; + u64 scm_addr; + u64:64; +} __packed; + +struct aidaw { + u8 flags; + u32 :24; + u32 :32; + u64 data_addr; +} __packed; + +#define MSB_OC_CLEAR 0 +#define MSB_OC_READ 1 +#define MSB_OC_WRITE 2 +#define MSB_OC_RELEASE 3 + +#define MSB_FLAG_BNM 0x80 +#define MSB_FLAG_IDA 0x40 + +#define MSB_BS_4K 0 +#define MSB_BS_1M 1 + +#define AOB_NR_MSB 124 + +struct aob { + struct arqb request; + struct arsb response; + struct msb msb[AOB_NR_MSB]; +} __packed __aligned(PAGE_SIZE); + +struct aob_rq_header { + struct scm_device *scmdev; + char data[0]; +}; + +struct scm_device { + u64 address; + u64 size; + unsigned int nr_max_block; + struct device dev; + struct { + unsigned int persistence:4; + unsigned int oper_state:4; + unsigned int data_state:4; + unsigned int rank:4; + unsigned int release:1; + unsigned int res_id:8; + } __packed attrs; +}; + +#define OP_STATE_GOOD 1 +#define OP_STATE_TEMP_ERR 2 +#define OP_STATE_PERM_ERR 3 + +enum scm_event {SCM_CHANGE, SCM_AVAIL}; + +struct scm_driver { + struct device_driver drv; + int (*probe) (struct scm_device *scmdev); + int (*remove) (struct scm_device *scmdev); + void (*notify) (struct scm_device *scmdev, enum scm_event event); + void (*handler) (struct scm_device *scmdev, void *data, int error); +}; + +int scm_driver_register(struct scm_driver *scmdrv); +void scm_driver_unregister(struct scm_driver *scmdrv); + +int eadm_start_aob(struct aob *aob); +void scm_irq_handler(struct aob *aob, int error); + +#endif /* _ASM_S390_EADM_H */ diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h new file mode 100644 index 00000000000..c5befc5a3bf --- /dev/null +++ b/arch/s390/include/asm/ebcdic.h @@ -0,0 +1,48 @@ +/* + * EBCDIC -> ASCII, ASCII -> EBCDIC conversion routines. + * + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _EBCDIC_H +#define _EBCDIC_H + +#ifndef _S390_TYPES_H +#include <types.h> +#endif + +extern __u8 _ascebc_500[256]; /* ASCII -> EBCDIC 500 conversion table */ +extern __u8 _ebcasc_500[256]; /* EBCDIC 500 -> ASCII conversion table */ +extern __u8 _ascebc[256]; /* ASCII -> EBCDIC conversion table */ +extern __u8 _ebcasc[256]; /* EBCDIC -> ASCII conversion table */ +extern __u8 _ebc_tolower[256]; /* EBCDIC -> lowercase */ +extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */ + +static inline void +codepage_convert(const __u8 *codepage, volatile __u8 * addr, unsigned long nr) +{ + if (nr-- <= 0) + return; + asm volatile( + " bras 1,1f\n" + " tr 0(1,%0),0(%2)\n" + "0: tr 0(256,%0),0(%2)\n" + " la %0,256(%0)\n" + "1: ahi %1,-256\n" + " jnm 0b\n" + " ex %1,0(1)" + : "+&a" (addr), "+&a" (nr) + : "a" (codepage) : "cc", "memory", "1"); +} + +#define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr) +#define EBCASC(addr,nr) codepage_convert(_ebcasc, addr, nr) +#define ASCEBC_500(addr,nr) codepage_convert(_ascebc_500, addr, nr) +#define EBCASC_500(addr,nr) codepage_convert(_ebcasc_500, addr, nr) +#define EBC_TOLOWER(addr,nr) codepage_convert(_ebc_tolower, addr, nr) +#define EBC_TOUPPER(addr,nr) codepage_convert(_ebc_toupper, addr, nr) + +#endif + diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h new file mode 100644 index 00000000000..78f4f8711d5 --- /dev/null +++ b/arch/s390/include/asm/elf.h @@ -0,0 +1,230 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/elf.h" + */ + +#ifndef __ASMS390_ELF_H +#define __ASMS390_ELF_H + +/* s390 relocations defined by the ABIs */ +#define R_390_NONE 0 /* No reloc. */ +#define R_390_8 1 /* Direct 8 bit. */ +#define R_390_12 2 /* Direct 12 bit. */ +#define R_390_16 3 /* Direct 16 bit. */ +#define R_390_32 4 /* Direct 32 bit. */ +#define R_390_PC32 5 /* PC relative 32 bit. */ +#define R_390_GOT12 6 /* 12 bit GOT offset. */ +#define R_390_GOT32 7 /* 32 bit GOT offset. */ +#define R_390_PLT32 8 /* 32 bit PC relative PLT address. */ +#define R_390_COPY 9 /* Copy symbol at runtime. */ +#define R_390_GLOB_DAT 10 /* Create GOT entry. */ +#define R_390_JMP_SLOT 11 /* Create PLT entry. */ +#define R_390_RELATIVE 12 /* Adjust by program base. */ +#define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */ +#define R_390_GOTPC 14 /* 32 bit PC rel. offset to GOT. */ +#define R_390_GOT16 15 /* 16 bit GOT offset. */ +#define R_390_PC16 16 /* PC relative 16 bit. */ +#define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */ +#define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */ +#define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */ +#define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */ +#define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */ +#define R_390_64 22 /* Direct 64 bit. */ +#define R_390_PC64 23 /* PC relative 64 bit. */ +#define R_390_GOT64 24 /* 64 bit GOT offset. */ +#define R_390_PLT64 25 /* 64 bit PC relative PLT address. */ +#define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */ +#define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */ +#define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */ +#define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */ +#define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */ +#define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */ +#define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */ +#define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */ +#define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */ +#define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */ +#define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */ +#define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */ +#define R_390_TLS_GDCALL 38 /* Tag for function call in general + dynamic TLS code. */ +#define R_390_TLS_LDCALL 39 /* Tag for function call in local + dynamic TLS code. */ +#define R_390_TLS_GD32 40 /* Direct 32 bit for general dynamic + thread local data. */ +#define R_390_TLS_GD64 41 /* Direct 64 bit for general dynamic + thread local data. */ +#define R_390_TLS_GOTIE12 42 /* 12 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_GOTIE32 43 /* 32 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_GOTIE64 44 /* 64 bit GOT offset for static TLS + block offset. */ +#define R_390_TLS_LDM32 45 /* Direct 32 bit for local dynamic + thread local data in LD code. */ +#define R_390_TLS_LDM64 46 /* Direct 64 bit for local dynamic + thread local data in LD code. */ +#define R_390_TLS_IE32 47 /* 32 bit address of GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_IE64 48 /* 64 bit address of GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_IEENT 49 /* 32 bit rel. offset to GOT entry for + negated static TLS block offset. */ +#define R_390_TLS_LE32 50 /* 32 bit negated offset relative to + static TLS block. */ +#define R_390_TLS_LE64 51 /* 64 bit negated offset relative to + static TLS block. */ +#define R_390_TLS_LDO32 52 /* 32 bit offset relative to TLS + block. */ +#define R_390_TLS_LDO64 53 /* 64 bit offset relative to TLS + block. */ +#define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */ +#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */ +#define R_390_TLS_TPOFF 56 /* Negate offset in static TLS + block. */ +#define R_390_20 57 /* Direct 20 bit. */ +#define R_390_GOT20 58 /* 20 bit GOT offset. */ +#define R_390_GOTPLT20 59 /* 20 bit offset to jump slot. */ +#define R_390_TLS_GOTIE20 60 /* 20 bit GOT offset for static TLS + block offset. */ +/* Keep this the last entry. */ +#define R_390_NUM 61 + +/* Bits present in AT_HWCAP. */ +#define HWCAP_S390_ESAN3 1 +#define HWCAP_S390_ZARCH 2 +#define HWCAP_S390_STFLE 4 +#define HWCAP_S390_MSA 8 +#define HWCAP_S390_LDISP 16 +#define HWCAP_S390_EIMM 32 +#define HWCAP_S390_DFP 64 +#define HWCAP_S390_HPAGE 128 +#define HWCAP_S390_ETF3EH 256 +#define HWCAP_S390_HIGH_GPRS 512 +#define HWCAP_S390_TE 1024 + +/* + * These are used to set parameters in the core dumps. + */ +#ifndef CONFIG_64BIT +#define ELF_CLASS ELFCLASS32 +#else /* CONFIG_64BIT */ +#define ELF_CLASS ELFCLASS64 +#endif /* CONFIG_64BIT */ +#define ELF_DATA ELFDATA2MSB +#define ELF_ARCH EM_S390 + +/* + * ELF register definitions.. + */ + +#include <asm/ptrace.h> +#include <asm/compat.h> +#include <asm/syscall.h> +#include <asm/user.h> + +typedef s390_fp_regs elf_fpregset_t; +typedef s390_regs elf_gregset_t; + +typedef s390_fp_regs compat_elf_fpregset_t; +typedef s390_compat_regs compat_elf_gregset_t; + +#include <linux/sched.h> /* for task_struct */ +#include <asm/mmu_context.h> + +#include <asm/vdso.h> + +extern unsigned int vdso_enabled; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \ + && (x)->e_ident[EI_CLASS] == ELF_CLASS) +#define compat_elf_check_arch(x) \ + (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \ + && (x)->e_ident[EI_CLASS] == ELF_CLASS) +#define compat_start_thread start_thread31 + +/* For SVR4/S390 the function pointer to be registered with `atexit` is + passed in R14. */ +#define ELF_PLAT_INIT(_r, load_addr) \ + do { \ + _r->gprs[14] = 0; \ + } while (0) + +#define CORE_DUMP_USE_REGSET +#define ELF_EXEC_PAGESIZE 4096 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +extern unsigned long randomize_et_dyn(unsigned long base); +#define ELF_ET_DYN_BASE (randomize_et_dyn(STACK_TOP / 3 * 2)) + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. */ + +extern unsigned long elf_hwcap; +#define ELF_HWCAP (elf_hwcap) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. + + For the moment, we have only optimizations for the Intel generations, + but that could change... */ + +#define ELF_PLATFORM_SIZE 8 +extern char elf_platform[]; +#define ELF_PLATFORM (elf_platform) + +#ifndef CONFIG_COMPAT +#define SET_PERSONALITY(ex) \ +do { \ + set_personality(PER_LINUX | \ + (current->personality & (~PER_MASK))); \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table; \ +} while (0) +#else /* CONFIG_COMPAT */ +#define SET_PERSONALITY(ex) \ +do { \ + if (personality(current->personality) != PER_LINUX32) \ + set_personality(PER_LINUX | \ + (current->personality & ~PER_MASK)); \ + if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \ + set_thread_flag(TIF_31BIT); \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table_emu; \ + } else { \ + clear_thread_flag(TIF_31BIT); \ + current_thread_info()->sys_call_table = \ + (unsigned long) &sys_call_table; \ + } \ +} while (0) +#endif /* CONFIG_COMPAT */ + +#define STACK_RND_MASK 0x7ffUL + +#define ARCH_DLINFO \ +do { \ + if (vdso_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso_base); \ +} while (0) + +struct linux_binprm; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +int arch_setup_additional_pages(struct linux_binprm *, int); + +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +#define arch_randomize_brk arch_randomize_brk + +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); + +#endif diff --git a/arch/s390/include/asm/emergency-restart.h b/arch/s390/include/asm/emergency-restart.h new file mode 100644 index 00000000000..108d8c48e42 --- /dev/null +++ b/arch/s390/include/asm/emergency-restart.h @@ -0,0 +1,6 @@ +#ifndef _ASM_EMERGENCY_RESTART_H +#define _ASM_EMERGENCY_RESTART_H + +#include <asm-generic/emergency-restart.h> + +#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h new file mode 100644 index 00000000000..629b79a9316 --- /dev/null +++ b/arch/s390/include/asm/etr.h @@ -0,0 +1,256 @@ +/* + * Copyright IBM Corp. 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ +#ifndef __S390_ETR_H +#define __S390_ETR_H + +/* ETR attachment control register */ +struct etr_eacr { + unsigned int e0 : 1; /* port 0 stepping control */ + unsigned int e1 : 1; /* port 1 stepping control */ + unsigned int _pad0 : 5; /* must be 00100 */ + unsigned int dp : 1; /* data port control */ + unsigned int p0 : 1; /* port 0 change recognition control */ + unsigned int p1 : 1; /* port 1 change recognition control */ + unsigned int _pad1 : 3; /* must be 000 */ + unsigned int ea : 1; /* ETR alert control */ + unsigned int es : 1; /* ETR sync check control */ + unsigned int sl : 1; /* switch to local control */ +} __attribute__ ((packed)); + +/* Port state returned by steai */ +enum etr_psc { + etr_psc_operational = 0, + etr_psc_semi_operational = 1, + etr_psc_protocol_error = 4, + etr_psc_no_symbols = 8, + etr_psc_no_signal = 12, + etr_psc_pps_mode = 13 +}; + +/* Logical port state returned by stetr */ +enum etr_lpsc { + etr_lpsc_operational_step = 0, + etr_lpsc_operational_alt = 1, + etr_lpsc_semi_operational = 2, + etr_lpsc_protocol_error = 4, + etr_lpsc_no_symbol_sync = 8, + etr_lpsc_no_signal = 12, + etr_lpsc_pps_mode = 13 +}; + +/* ETR status words */ +struct etr_esw { + struct etr_eacr eacr; /* attachment control register */ + unsigned int y : 1; /* stepping mode */ + unsigned int _pad0 : 5; /* must be 00000 */ + unsigned int p : 1; /* stepping port number */ + unsigned int q : 1; /* data port number */ + unsigned int psc0 : 4; /* port 0 state code */ + unsigned int psc1 : 4; /* port 1 state code */ +} __attribute__ ((packed)); + +/* Second level data register status word */ +struct etr_slsw { + unsigned int vv1 : 1; /* copy of validity bit data frame 1 */ + unsigned int vv2 : 1; /* copy of validity bit data frame 2 */ + unsigned int vv3 : 1; /* copy of validity bit data frame 3 */ + unsigned int vv4 : 1; /* copy of validity bit data frame 4 */ + unsigned int _pad0 : 19; /* must by all zeroes */ + unsigned int n : 1; /* EAF port number */ + unsigned int v1 : 1; /* validity bit ETR data frame 1 */ + unsigned int v2 : 1; /* validity bit ETR data frame 2 */ + unsigned int v3 : 1; /* validity bit ETR data frame 3 */ + unsigned int v4 : 1; /* validity bit ETR data frame 4 */ + unsigned int _pad1 : 4; /* must be 0000 */ +} __attribute__ ((packed)); + +/* ETR data frames */ +struct etr_edf1 { + unsigned int u : 1; /* untuned bit */ + unsigned int _pad0 : 1; /* must be 0 */ + unsigned int r : 1; /* service request bit */ + unsigned int _pad1 : 4; /* must be 0000 */ + unsigned int a : 1; /* time adjustment bit */ + unsigned int net_id : 8; /* ETR network id */ + unsigned int etr_id : 8; /* id of ETR which sends data frames */ + unsigned int etr_pn : 8; /* port number of ETR output port */ +} __attribute__ ((packed)); + +struct etr_edf2 { + unsigned int etv : 32; /* Upper 32 bits of TOD. */ +} __attribute__ ((packed)); + +struct etr_edf3 { + unsigned int rc : 8; /* failure reason code */ + unsigned int _pad0 : 3; /* must be 000 */ + unsigned int c : 1; /* ETR coupled bit */ + unsigned int tc : 4; /* ETR type code */ + unsigned int blto : 8; /* biased local time offset */ + /* (blto - 128) * 15 = minutes */ + unsigned int buo : 8; /* biased utc offset */ + /* (buo - 128) = leap seconds */ +} __attribute__ ((packed)); + +struct etr_edf4 { + unsigned int ed : 8; /* ETS device dependent data */ + unsigned int _pad0 : 1; /* must be 0 */ + unsigned int buc : 5; /* biased ut1 correction */ + /* (buc - 16) * 0.1 seconds */ + unsigned int em : 6; /* ETS error magnitude */ + unsigned int dc : 6; /* ETS drift code */ + unsigned int sc : 6; /* ETS steering code */ +} __attribute__ ((packed)); + +/* + * ETR attachment information block, two formats + * format 1 has 4 reserved words with a size of 64 bytes + * format 2 has 16 reserved words with a size of 96 bytes + */ +struct etr_aib { + struct etr_esw esw; + struct etr_slsw slsw; + unsigned long long tsp; + struct etr_edf1 edf1; + struct etr_edf2 edf2; + struct etr_edf3 edf3; + struct etr_edf4 edf4; + unsigned int reserved[16]; +} __attribute__ ((packed,aligned(8))); + +/* ETR interruption parameter */ +struct etr_irq_parm { + unsigned int _pad0 : 8; + unsigned int pc0 : 1; /* port 0 state change */ + unsigned int pc1 : 1; /* port 1 state change */ + unsigned int _pad1 : 3; + unsigned int eai : 1; /* ETR alert indication */ + unsigned int _pad2 : 18; +} __attribute__ ((packed)); + +/* Query TOD offset result */ +struct etr_ptff_qto { + unsigned long long physical_clock; + unsigned long long tod_offset; + unsigned long long logical_tod_offset; + unsigned long long tod_epoch_difference; +} __attribute__ ((packed)); + +/* Inline assembly helper functions */ +static inline int etr_setr(struct etr_eacr *ctrl) +{ + int rc = -EOPNOTSUPP; + + asm volatile( + " .insn s,0xb2160000,%1\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "Q" (*ctrl)); + return rc; +} + +/* Stores a format 1 aib with 64 bytes */ +static inline int etr_stetr(struct etr_aib *aib) +{ + int rc = -EOPNOTSUPP; + + asm volatile( + " .insn s,0xb2170000,%1\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "Q" (*aib)); + return rc; +} + +/* Stores a format 2 aib with 96 bytes for specified port */ +static inline int etr_steai(struct etr_aib *aib, unsigned int func) +{ + register unsigned int reg0 asm("0") = func; + int rc = -EOPNOTSUPP; + + asm volatile( + " .insn s,0xb2b30000,%1\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "Q" (*aib), "d" (reg0)); + return rc; +} + +/* Function codes for the steai instruction. */ +#define ETR_STEAI_STEPPING_PORT 0x10 +#define ETR_STEAI_ALTERNATE_PORT 0x11 +#define ETR_STEAI_PORT_0 0x12 +#define ETR_STEAI_PORT_1 0x13 + +static inline int etr_ptff(void *ptff_block, unsigned int func) +{ + register unsigned int reg0 asm("0") = func; + register unsigned long reg1 asm("1") = (unsigned long) ptff_block; + int rc = -EOPNOTSUPP; + + asm volatile( + " .word 0x0104\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (rc), "=m" (ptff_block) + : "d" (reg0), "d" (reg1), "m" (ptff_block) : "cc"); + return rc; +} + +/* Function codes for the ptff instruction. */ +#define ETR_PTFF_QAF 0x00 /* query available functions */ +#define ETR_PTFF_QTO 0x01 /* query tod offset */ +#define ETR_PTFF_QSI 0x02 /* query steering information */ +#define ETR_PTFF_ATO 0x40 /* adjust tod offset */ +#define ETR_PTFF_STO 0x41 /* set tod offset */ +#define ETR_PTFF_SFS 0x42 /* set fine steering rate */ +#define ETR_PTFF_SGS 0x43 /* set gross steering rate */ + +/* Functions needed by the machine check handler */ +void etr_switch_to_local(void); +void etr_sync_check(void); + +/* STP interruption parameter */ +struct stp_irq_parm { + unsigned int _pad0 : 14; + unsigned int tsc : 1; /* Timing status change */ + unsigned int lac : 1; /* Link availability change */ + unsigned int tcpc : 1; /* Time control parameter change */ + unsigned int _pad2 : 15; +} __attribute__ ((packed)); + +#define STP_OP_SYNC 1 +#define STP_OP_CTRL 3 + +struct stp_sstpi { + unsigned int rsvd0; + unsigned int rsvd1 : 8; + unsigned int stratum : 8; + unsigned int vbits : 16; + unsigned int leaps : 16; + unsigned int tmd : 4; + unsigned int ctn : 4; + unsigned int rsvd2 : 3; + unsigned int c : 1; + unsigned int tst : 4; + unsigned int tzo : 16; + unsigned int dsto : 16; + unsigned int ctrl : 16; + unsigned int rsvd3 : 16; + unsigned int tto; + unsigned int rsvd4; + unsigned int ctnid[3]; + unsigned int rsvd5; + unsigned int todoff[4]; + unsigned int rsvd6[48]; +} __attribute__ ((packed)); + +/* Functions needed by the machine check handler */ +void stp_sync_check(void); +void stp_island_check(void); + +#endif /* __S390_ETR_H */ diff --git a/arch/s390/include/asm/exec.h b/arch/s390/include/asm/exec.h new file mode 100644 index 00000000000..c4a93d6327f --- /dev/null +++ b/arch/s390/include/asm/exec.h @@ -0,0 +1,12 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_EXEC_H +#define __ASM_EXEC_H + +extern unsigned long arch_align_stack(unsigned long sp); + +#endif /* __ASM_EXEC_H */ diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h new file mode 100644 index 00000000000..6276002d76b --- /dev/null +++ b/arch/s390/include/asm/extmem.h @@ -0,0 +1,31 @@ +/* + * definitions for external memory segment support + * Copyright IBM Corp. 2003 + */ + +#ifndef _ASM_S390X_DCSS_H +#define _ASM_S390X_DCSS_H +#ifndef __ASSEMBLY__ + +/* possible values for segment type as returned by segment_info */ +#define SEG_TYPE_SW 0 +#define SEG_TYPE_EW 1 +#define SEG_TYPE_SR 2 +#define SEG_TYPE_ER 3 +#define SEG_TYPE_SN 4 +#define SEG_TYPE_EN 5 +#define SEG_TYPE_SC 6 +#define SEG_TYPE_EWEN 7 + +#define SEGMENT_SHARED 0 +#define SEGMENT_EXCLUSIVE 1 + +int segment_load (char *name, int segtype, unsigned long *addr, unsigned long *length); +void segment_unload(char *name); +void segment_save(char *name); +int segment_type (char* name); +int segment_modify_shared (char *name, int do_nonshared); +void segment_warning(int rc, char *seg_name); + +#endif +#endif diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h new file mode 100644 index 00000000000..0aa6a7ed95a --- /dev/null +++ b/arch/s390/include/asm/facility.h @@ -0,0 +1,67 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_FACILITY_H +#define __ASM_FACILITY_H + +#include <linux/string.h> +#include <linux/preempt.h> +#include <asm/lowcore.h> + +#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ + +static inline int __test_facility(unsigned long nr, void *facilities) +{ + unsigned char *ptr; + + if (nr >= MAX_FACILITY_BIT) + return 0; + ptr = (unsigned char *) facilities + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; +} + +/* + * The test_facility function uses the bit odering where the MSB is bit 0. + * That makes it easier to query facility bits with the bit number as + * documented in the Principles of Operation. + */ +static inline int test_facility(unsigned long nr) +{ + return __test_facility(nr, &S390_lowcore.stfle_fac_list); +} + +/** + * stfle - Store facility list extended + * @stfle_fac_list: array where facility list can be stored + * @size: size of passed in array in double words + */ +static inline void stfle(u64 *stfle_fac_list, int size) +{ + unsigned long nr; + + preempt_disable(); + asm volatile( + " .insn s,0xb2b10000,0(0)\n" /* stfl */ + "0:\n" + EX_TABLE(0b, 0b) + : "+m" (S390_lowcore.stfl_fac_list)); + nr = 4; /* bytes stored by stfl */ + memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); + if (S390_lowcore.stfl_fac_list & 0x01000000) { + /* More facility bits available with stfle */ + register unsigned long reg0 asm("0") = size - 1; + + asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ + : "+d" (reg0) + : "a" (stfle_fac_list) + : "memory", "cc"); + nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ + } + memset((char *) stfle_fac_list + nr, 0, size * 8 - nr); + preempt_enable(); +} + +#endif /* __ASM_FACILITY_H */ diff --git a/arch/s390/include/asm/fb.h b/arch/s390/include/asm/fb.h new file mode 100644 index 00000000000..c7df3803099 --- /dev/null +++ b/arch/s390/include/asm/fb.h @@ -0,0 +1,12 @@ +#ifndef _ASM_FB_H_ +#define _ASM_FB_H_ +#include <linux/fb.h> + +#define fb_pgprotect(...) do {} while (0) + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* _ASM_FB_H_ */ diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h new file mode 100644 index 00000000000..7ecb92b469b --- /dev/null +++ b/arch/s390/include/asm/fcx.h @@ -0,0 +1,311 @@ +/* + * Functions for assembling fcx enabled I/O control blocks. + * + * Copyright IBM Corp. 2008 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _ASM_S390_FCX_H +#define _ASM_S390_FCX_H _ASM_S390_FCX_H + +#include <linux/types.h> + +#define TCW_FORMAT_DEFAULT 0 +#define TCW_TIDAW_FORMAT_DEFAULT 0 +#define TCW_FLAGS_INPUT_TIDA (1 << (23 - 5)) +#define TCW_FLAGS_TCCB_TIDA (1 << (23 - 6)) +#define TCW_FLAGS_OUTPUT_TIDA (1 << (23 - 7)) +#define TCW_FLAGS_TIDAW_FORMAT(x) ((x) & 3) << (23 - 9) +#define TCW_FLAGS_GET_TIDAW_FORMAT(x) (((x) >> (23 - 9)) & 3) + +/** + * struct tcw - Transport Control Word (TCW) + * @format: TCW format + * @flags: TCW flags + * @tccbl: Transport-Command-Control-Block Length + * @r: Read Operations + * @w: Write Operations + * @output: Output-Data Address + * @input: Input-Data Address + * @tsb: Transport-Status-Block Address + * @tccb: Transport-Command-Control-Block Address + * @output_count: Output Count + * @input_count: Input Count + * @intrg: Interrogate TCW Address + */ +struct tcw { + u32 format:2; + u32 :6; + u32 flags:24; + u32 :8; + u32 tccbl:6; + u32 r:1; + u32 w:1; + u32 :16; + u64 output; + u64 input; + u64 tsb; + u64 tccb; + u32 output_count; + u32 input_count; + u32 :32; + u32 :32; + u32 :32; + u32 intrg; +} __attribute__ ((packed, aligned(64))); + +#define TIDAW_FLAGS_LAST (1 << (7 - 0)) +#define TIDAW_FLAGS_SKIP (1 << (7 - 1)) +#define TIDAW_FLAGS_DATA_INT (1 << (7 - 2)) +#define TIDAW_FLAGS_TTIC (1 << (7 - 3)) +#define TIDAW_FLAGS_INSERT_CBC (1 << (7 - 4)) + +/** + * struct tidaw - Transport-Indirect-Addressing Word (TIDAW) + * @flags: TIDAW flags. Can be an arithmetic OR of the following constants: + * %TIDAW_FLAGS_LAST, %TIDAW_FLAGS_SKIP, %TIDAW_FLAGS_DATA_INT, + * %TIDAW_FLAGS_TTIC, %TIDAW_FLAGS_INSERT_CBC + * @count: Count + * @addr: Address + */ +struct tidaw { + u32 flags:8; + u32 :24; + u32 count; + u64 addr; +} __attribute__ ((packed, aligned(16))); + +/** + * struct tsa_iostat - I/O-Status Transport-Status Area (IO-Stat TSA) + * @dev_time: Device Time + * @def_time: Defer Time + * @queue_time: Queue Time + * @dev_busy_time: Device-Busy Time + * @dev_act_time: Device-Active-Only Time + * @sense: Sense Data (if present) + */ +struct tsa_iostat { + u32 dev_time; + u32 def_time; + u32 queue_time; + u32 dev_busy_time; + u32 dev_act_time; + u8 sense[32]; +} __attribute__ ((packed)); + +/** + * struct tsa_ddpcs - Device-Detected-Program-Check Transport-Status Area (DDPC TSA) + * @rc: Reason Code + * @rcq: Reason Code Qualifier + * @sense: Sense Data (if present) + */ +struct tsa_ddpc { + u32 :24; + u32 rc:8; + u8 rcq[16]; + u8 sense[32]; +} __attribute__ ((packed)); + +#define TSA_INTRG_FLAGS_CU_STATE_VALID (1 << (7 - 0)) +#define TSA_INTRG_FLAGS_DEV_STATE_VALID (1 << (7 - 1)) +#define TSA_INTRG_FLAGS_OP_STATE_VALID (1 << (7 - 2)) + +/** + * struct tsa_intrg - Interrogate Transport-Status Area (Intrg. TSA) + * @format: Format + * @flags: Flags. Can be an arithmetic OR of the following constants: + * %TSA_INTRG_FLAGS_CU_STATE_VALID, %TSA_INTRG_FLAGS_DEV_STATE_VALID, + * %TSA_INTRG_FLAGS_OP_STATE_VALID + * @cu_state: Controle-Unit State + * @dev_state: Device State + * @op_state: Operation State + * @sd_info: State-Dependent Information + * @dl_id: Device-Level Identifier + * @dd_data: Device-Dependent Data + */ +struct tsa_intrg { + u32 format:8; + u32 flags:8; + u32 cu_state:8; + u32 dev_state:8; + u32 op_state:8; + u32 :24; + u8 sd_info[12]; + u32 dl_id; + u8 dd_data[28]; +} __attribute__ ((packed)); + +#define TSB_FORMAT_NONE 0 +#define TSB_FORMAT_IOSTAT 1 +#define TSB_FORMAT_DDPC 2 +#define TSB_FORMAT_INTRG 3 + +#define TSB_FLAGS_DCW_OFFSET_VALID (1 << (7 - 0)) +#define TSB_FLAGS_COUNT_VALID (1 << (7 - 1)) +#define TSB_FLAGS_CACHE_MISS (1 << (7 - 2)) +#define TSB_FLAGS_TIME_VALID (1 << (7 - 3)) +#define TSB_FLAGS_FORMAT(x) ((x) & 7) +#define TSB_FORMAT(t) ((t)->flags & 7) + +/** + * struct tsb - Transport-Status Block (TSB) + * @length: Length + * @flags: Flags. Can be an arithmetic OR of the following constants: + * %TSB_FLAGS_DCW_OFFSET_VALID, %TSB_FLAGS_COUNT_VALID, %TSB_FLAGS_CACHE_MISS, + * %TSB_FLAGS_TIME_VALID + * @dcw_offset: DCW Offset + * @count: Count + * @tsa: Transport-Status-Area + */ +struct tsb { + u32 length:8; + u32 flags:8; + u32 dcw_offset:16; + u32 count; + u32 :32; + union { + struct tsa_iostat iostat; + struct tsa_ddpc ddpc; + struct tsa_intrg intrg; + } __attribute__ ((packed)) tsa; +} __attribute__ ((packed, aligned(8))); + +#define DCW_INTRG_FORMAT_DEFAULT 0 + +#define DCW_INTRG_RC_UNSPECIFIED 0 +#define DCW_INTRG_RC_TIMEOUT 1 + +#define DCW_INTRG_RCQ_UNSPECIFIED 0 +#define DCW_INTRG_RCQ_PRIMARY 1 +#define DCW_INTRG_RCQ_SECONDARY 2 + +#define DCW_INTRG_FLAGS_MPM (1 << (7 - 0)) +#define DCW_INTRG_FLAGS_PPR (1 << (7 - 1)) +#define DCW_INTRG_FLAGS_CRIT (1 << (7 - 2)) + +/** + * struct dcw_intrg_data - Interrogate DCW data + * @format: Format. Should be %DCW_INTRG_FORMAT_DEFAULT + * @rc: Reason Code. Can be one of %DCW_INTRG_RC_UNSPECIFIED, + * %DCW_INTRG_RC_TIMEOUT + * @rcq: Reason Code Qualifier: Can be one of %DCW_INTRG_RCQ_UNSPECIFIED, + * %DCW_INTRG_RCQ_PRIMARY, %DCW_INTRG_RCQ_SECONDARY + * @lpm: Logical-Path Mask + * @pam: Path-Available Mask + * @pim: Path-Installed Mask + * @timeout: Timeout + * @flags: Flags. Can be an arithmetic OR of %DCW_INTRG_FLAGS_MPM, + * %DCW_INTRG_FLAGS_PPR, %DCW_INTRG_FLAGS_CRIT + * @time: Time + * @prog_id: Program Identifier + * @prog_data: Program-Dependent Data + */ +struct dcw_intrg_data { + u32 format:8; + u32 rc:8; + u32 rcq:8; + u32 lpm:8; + u32 pam:8; + u32 pim:8; + u32 timeout:16; + u32 flags:8; + u32 :24; + u32 :32; + u64 time; + u64 prog_id; + u8 prog_data[0]; +} __attribute__ ((packed)); + +#define DCW_FLAGS_CC (1 << (7 - 1)) + +#define DCW_CMD_WRITE 0x01 +#define DCW_CMD_READ 0x02 +#define DCW_CMD_CONTROL 0x03 +#define DCW_CMD_SENSE 0x04 +#define DCW_CMD_SENSE_ID 0xe4 +#define DCW_CMD_INTRG 0x40 + +/** + * struct dcw - Device-Command Word (DCW) + * @cmd: Command Code. Can be one of %DCW_CMD_WRITE, %DCW_CMD_READ, + * %DCW_CMD_CONTROL, %DCW_CMD_SENSE, %DCW_CMD_SENSE_ID, %DCW_CMD_INTRG + * @flags: Flags. Can be an arithmetic OR of %DCW_FLAGS_CC + * @cd_count: Control-Data Count + * @count: Count + * @cd: Control Data + */ +struct dcw { + u32 cmd:8; + u32 flags:8; + u32 :8; + u32 cd_count:8; + u32 count; + u8 cd[0]; +} __attribute__ ((packed)); + +#define TCCB_FORMAT_DEFAULT 0x7f +#define TCCB_MAX_DCW 30 +#define TCCB_MAX_SIZE (sizeof(struct tccb_tcah) + \ + TCCB_MAX_DCW * sizeof(struct dcw) + \ + sizeof(struct tccb_tcat)) +#define TCCB_SAC_DEFAULT 0x1ffe +#define TCCB_SAC_INTRG 0x1fff + +/** + * struct tccb_tcah - Transport-Command-Area Header (TCAH) + * @format: Format. Should be %TCCB_FORMAT_DEFAULT + * @tcal: Transport-Command-Area Length + * @sac: Service-Action Code. Can be one of %TCCB_SAC_DEFAULT, %TCCB_SAC_INTRG + * @prio: Priority + */ +struct tccb_tcah { + u32 format:8; + u32 :24; + u32 :24; + u32 tcal:8; + u32 sac:16; + u32 :8; + u32 prio:8; + u32 :32; +} __attribute__ ((packed)); + +/** + * struct tccb_tcat - Transport-Command-Area Trailer (TCAT) + * @count: Transport Count + */ +struct tccb_tcat { + u32 :32; + u32 count; +} __attribute__ ((packed)); + +/** + * struct tccb - (partial) Transport-Command-Control Block (TCCB) + * @tcah: TCAH + * @tca: Transport-Command Area + */ +struct tccb { + struct tccb_tcah tcah; + u8 tca[0]; +} __attribute__ ((packed, aligned(8))); + +struct tcw *tcw_get_intrg(struct tcw *tcw); +void *tcw_get_data(struct tcw *tcw); +struct tccb *tcw_get_tccb(struct tcw *tcw); +struct tsb *tcw_get_tsb(struct tcw *tcw); + +void tcw_init(struct tcw *tcw, int r, int w); +void tcw_finalize(struct tcw *tcw, int num_tidaws); + +void tcw_set_intrg(struct tcw *tcw, struct tcw *intrg_tcw); +void tcw_set_data(struct tcw *tcw, void *data, int use_tidal); +void tcw_set_tccb(struct tcw *tcw, struct tccb *tccb); +void tcw_set_tsb(struct tcw *tcw, struct tsb *tsb); + +void tccb_init(struct tccb *tccb, size_t tccb_size, u32 sac); +void tsb_init(struct tsb *tsb); +struct dcw *tccb_add_dcw(struct tccb *tccb, size_t tccb_size, u8 cmd, u8 flags, + void *cd, u8 cd_count, u32 count); +struct tidaw *tcw_add_tidaw(struct tcw *tcw, int num_tidaws, u8 flags, + void *addr, u32 count); + +#endif /* _ASM_S390_FCX_H */ diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h new file mode 100644 index 00000000000..bf246dae136 --- /dev/null +++ b/arch/s390/include/asm/ftrace.h @@ -0,0 +1,26 @@ +#ifndef _ASM_S390_FTRACE_H +#define _ASM_S390_FTRACE_H + +#ifndef __ASSEMBLY__ + +extern void _mcount(void); + +struct dyn_arch_ftrace { }; + +#define MCOUNT_ADDR ((long)_mcount) + + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_64BIT +#define MCOUNT_INSN_SIZE 12 +#else +#define MCOUNT_INSN_SIZE 22 +#endif + +#endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h new file mode 100644 index 00000000000..a4811aa0304 --- /dev/null +++ b/arch/s390/include/asm/futex.h @@ -0,0 +1,96 @@ +#ifndef _ASM_S390_FUTEX_H +#define _ASM_S390_FUTEX_H + +#include <linux/uaccess.h> +#include <linux/futex.h> +#include <asm/mmu_context.h> +#include <asm/errno.h> + +#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ + asm volatile( \ + " sacf 256\n" \ + "0: l %1,0(%6)\n" \ + "1:"insn \ + "2: cs %1,%2,0(%6)\n" \ + "3: jl 1b\n" \ + " lhi %0,0\n" \ + "4: sacf 768\n" \ + EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ + : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ + "=m" (*uaddr) \ + : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ + "m" (*uaddr) : "cc"); + +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, newval, ret; + + load_kernel_asce(); + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + pagefault_disable(); + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("lr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("lr %2,%1\nar %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("lr %2,%1\nor %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("lr %2,%1\nnr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("lr %2,%1\nxr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret; + + load_kernel_asce(); + asm volatile( + " sacf 256\n" + "0: cs %1,%4,0(%5)\n" + "1: la %0,0\n" + "2: sacf 768\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+d" (oldval), "=m" (*uaddr) + : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + : "cc", "memory"); + *uval = oldval; + return ret; +} + +#endif /* _ASM_S390_FUTEX_H */ diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h new file mode 100644 index 00000000000..b7eabaaeffb --- /dev/null +++ b/arch/s390/include/asm/hardirq.h @@ -0,0 +1,26 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + * + * Derived from "include/asm-i386/hardirq.h" + */ + +#ifndef __ASM_HARDIRQ_H +#define __ASM_HARDIRQ_H + +#include <asm/lowcore.h> + +#define local_softirq_pending() (S390_lowcore.softirq_pending) + +#define __ARCH_IRQ_STAT +#define __ARCH_HAS_DO_SOFTIRQ +#define __ARCH_IRQ_EXIT_IRQS_DISABLED + +static inline void ack_bad_irq(unsigned int irq) +{ + printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); +} + +#endif /* __ASM_HARDIRQ_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h new file mode 100644 index 00000000000..11eae5f55b7 --- /dev/null +++ b/arch/s390/include/asm/hugetlb.h @@ -0,0 +1,115 @@ +/* + * IBM System z Huge TLB Page Support for Kernel. + * + * Copyright IBM Corp. 2008 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#ifndef _ASM_S390_HUGETLB_H +#define _ASM_S390_HUGETLB_H + +#include <asm/page.h> +#include <asm/pgtable.h> + + +#define is_hugepage_only_range(mm, addr, len) 0 +#define hugetlb_free_pgd_range free_pgd_range + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +pte_t huge_ptep_get(pte_t *ptep); +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +#define hugetlb_prefault_arch_hook(mm) do { } while (0) +#define arch_clear_hugepage_flags(page) do { } while (0) + +int arch_prepare_hugepage(struct page *page); +void arch_release_hugepage(struct page *page); + +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + huge_ptep_get_and_clear(vma->vm_mm, address, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + int changed = !pte_same(huge_ptep_get(ptep), pte); + if (changed) { + huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + } + return changed; +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); + set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); +} + +static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) +{ + return mk_pte(page, pgprot); +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline int huge_pte_write(pte_t pte) +{ + return pte_write(pte); +} + +static inline int huge_pte_dirty(pte_t pte) +{ + return pte_dirty(pte); +} + +static inline pte_t huge_pte_mkwrite(pte_t pte) +{ + return pte_mkwrite(pte); +} + +static inline pte_t huge_pte_mkdirty(pte_t pte) +{ + return pte_mkdirty(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) +{ + return pte_modify(pte, newprot); +} + +#endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h new file mode 100644 index 00000000000..ee96a8b697f --- /dev/null +++ b/arch/s390/include/asm/hw_irq.h @@ -0,0 +1,11 @@ +#ifndef _HW_IRQ_H +#define _HW_IRQ_H + +#include <linux/msi.h> +#include <linux/pci.h> + +void __init init_airq_interrupts(void); +void __init init_cio_interrupts(void); +void __init init_ext_interrupts(void); + +#endif diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h new file mode 100644 index 00000000000..ea5a6e45fd9 --- /dev/null +++ b/arch/s390/include/asm/idals.h @@ -0,0 +1,248 @@ +/* + * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * Bugreports.to..: <Linux390@de.ibm.com> + * Copyright IBM Corp. 2000 + * + * History of changes + * 07/24/00 new file + * 05/04/02 code restructuring. + */ + +#ifndef _S390_IDALS_H +#define _S390_IDALS_H + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <asm/cio.h> +#include <asm/uaccess.h> + +#ifdef CONFIG_64BIT +#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */ +#else +#define IDA_SIZE_LOG 11 /* 11 for 2k , 12 for 4k */ +#endif +#define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG) + +/* + * Test if an address/length pair needs an idal list. + */ +static inline int +idal_is_needed(void *vaddr, unsigned int length) +{ +#ifdef CONFIG_64BIT + return ((__pa(vaddr) + length - 1) >> 31) != 0; +#else + return 0; +#endif +} + + +/* + * Return the number of idal words needed for an address/length pair. + */ +static inline unsigned int idal_nr_words(void *vaddr, unsigned int length) +{ + return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length + + (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG; +} + +/* + * Create the list of idal words for an address/length pair. + */ +static inline unsigned long *idal_create_words(unsigned long *idaws, + void *vaddr, unsigned int length) +{ + unsigned long paddr; + unsigned int cidaw; + + paddr = __pa(vaddr); + cidaw = ((paddr & (IDA_BLOCK_SIZE-1)) + length + + (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG; + *idaws++ = paddr; + paddr &= -IDA_BLOCK_SIZE; + while (--cidaw > 0) { + paddr += IDA_BLOCK_SIZE; + *idaws++ = paddr; + } + return idaws; +} + +/* + * Sets the address of the data in CCW. + * If necessary it allocates an IDAL and sets the appropriate flags. + */ +static inline int +set_normalized_cda(struct ccw1 * ccw, void *vaddr) +{ +#ifdef CONFIG_64BIT + unsigned int nridaws; + unsigned long *idal; + + if (ccw->flags & CCW_FLAG_IDA) + return -EINVAL; + nridaws = idal_nr_words(vaddr, ccw->count); + if (nridaws > 0) { + idal = kmalloc(nridaws * sizeof(unsigned long), + GFP_ATOMIC | GFP_DMA ); + if (idal == NULL) + return -ENOMEM; + idal_create_words(idal, vaddr, ccw->count); + ccw->flags |= CCW_FLAG_IDA; + vaddr = idal; + } +#endif + ccw->cda = (__u32)(unsigned long) vaddr; + return 0; +} + +/* + * Releases any allocated IDAL related to the CCW. + */ +static inline void +clear_normalized_cda(struct ccw1 * ccw) +{ +#ifdef CONFIG_64BIT + if (ccw->flags & CCW_FLAG_IDA) { + kfree((void *)(unsigned long) ccw->cda); + ccw->flags &= ~CCW_FLAG_IDA; + } +#endif + ccw->cda = 0; +} + +/* + * Idal buffer extension + */ +struct idal_buffer { + size_t size; + size_t page_order; + void *data[0]; +}; + +/* + * Allocate an idal buffer + */ +static inline struct idal_buffer * +idal_buffer_alloc(size_t size, int page_order) +{ + struct idal_buffer *ib; + int nr_chunks, nr_ptrs, i; + + nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; + nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG; + ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *), + GFP_DMA | GFP_KERNEL); + if (ib == NULL) + return ERR_PTR(-ENOMEM); + ib->size = size; + ib->page_order = page_order; + for (i = 0; i < nr_ptrs; i++) { + if ((i & (nr_chunks - 1)) != 0) { + ib->data[i] = ib->data[i-1] + IDA_BLOCK_SIZE; + continue; + } + ib->data[i] = (void *) + __get_free_pages(GFP_KERNEL, page_order); + if (ib->data[i] != NULL) + continue; + // Not enough memory + while (i >= nr_chunks) { + i -= nr_chunks; + free_pages((unsigned long) ib->data[i], + ib->page_order); + } + kfree(ib); + return ERR_PTR(-ENOMEM); + } + return ib; +} + +/* + * Free an idal buffer. + */ +static inline void +idal_buffer_free(struct idal_buffer *ib) +{ + int nr_chunks, nr_ptrs, i; + + nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; + nr_chunks = (4096 << ib->page_order) >> IDA_SIZE_LOG; + for (i = 0; i < nr_ptrs; i += nr_chunks) + free_pages((unsigned long) ib->data[i], ib->page_order); + kfree(ib); +} + +/* + * Test if a idal list is really needed. + */ +static inline int +__idal_buffer_is_needed(struct idal_buffer *ib) +{ +#ifdef CONFIG_64BIT + return ib->size > (4096ul << ib->page_order) || + idal_is_needed(ib->data[0], ib->size); +#else + return ib->size > (4096ul << ib->page_order); +#endif +} + +/* + * Set channel data address to idal buffer. + */ +static inline void +idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw) +{ + if (__idal_buffer_is_needed(ib)) { + // setup idals; + ccw->cda = (u32)(addr_t) ib->data; + ccw->flags |= CCW_FLAG_IDA; + } else + // we do not need idals - use direct addressing + ccw->cda = (u32)(addr_t) ib->data[0]; + ccw->count = ib->size; +} + +/* + * Copy count bytes from an idal buffer to user memory + */ +static inline size_t +idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count) +{ + size_t left; + int i; + + BUG_ON(count > ib->size); + for (i = 0; count > IDA_BLOCK_SIZE; i++) { + left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE); + if (left) + return left + count - IDA_BLOCK_SIZE; + to = (void __user *) to + IDA_BLOCK_SIZE; + count -= IDA_BLOCK_SIZE; + } + return copy_to_user(to, ib->data[i], count); +} + +/* + * Copy count bytes from user memory to an idal buffer + */ +static inline size_t +idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count) +{ + size_t left; + int i; + + BUG_ON(count > ib->size); + for (i = 0; count > IDA_BLOCK_SIZE; i++) { + left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE); + if (left) + return left + count - IDA_BLOCK_SIZE; + from = (void __user *) from + IDA_BLOCK_SIZE; + count -= IDA_BLOCK_SIZE; + } + return copy_from_user(ib->data[i], from, count); +} + +#endif diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h new file mode 100644 index 00000000000..cd6b9ee7b69 --- /dev/null +++ b/arch/s390/include/asm/io.h @@ -0,0 +1,72 @@ +/* + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/io.h" + */ + +#ifndef _S390_IO_H +#define _S390_IO_H + +#include <linux/kernel.h> +#include <asm/page.h> +#include <asm/pci_io.h> + +void *xlate_dev_mem_ptr(unsigned long phys); +#define xlate_dev_mem_ptr xlate_dev_mem_ptr +void unxlate_dev_mem_ptr(unsigned long phys, void *addr); + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +#define IO_SPACE_LIMIT 0 + +#ifdef CONFIG_PCI + +#define ioremap_nocache(addr, size) ioremap(addr, size) +#define ioremap_wc ioremap_nocache + +static inline void __iomem *ioremap(unsigned long offset, unsigned long size) +{ + return (void __iomem *) offset; +} + +static inline void iounmap(volatile void __iomem *addr) +{ +} + +/* + * s390 needs a private implementation of pci_iomap since ioremap with its + * offset parameter isn't sufficient. That's because BAR spaces are not + * disjunctive on s390 so we need the bar parameter of pci_iomap to find + * the corresponding device and create the mapping cookie. + */ +#define pci_iomap pci_iomap +#define pci_iounmap pci_iounmap + +#define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count) +#define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count) +#define memset_io(dst, val, count) zpci_memset_io(dst, val, count) + +#define __raw_readb zpci_read_u8 +#define __raw_readw zpci_read_u16 +#define __raw_readl zpci_read_u32 +#define __raw_readq zpci_read_u64 +#define __raw_writeb zpci_write_u8 +#define __raw_writew zpci_write_u16 +#define __raw_writel zpci_write_u32 +#define __raw_writeq zpci_write_u64 + +#define readb_relaxed readb +#define readw_relaxed readw +#define readl_relaxed readl +#define readq_relaxed readq + +#endif /* CONFIG_PCI */ + +#include <asm-generic/io.h> + +#endif diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h new file mode 100644 index 00000000000..2fcccc0c997 --- /dev/null +++ b/arch/s390/include/asm/ipl.h @@ -0,0 +1,182 @@ +/* + * s390 (re)ipl support + * + * Copyright IBM Corp. 2007 + */ + +#ifndef _ASM_S390_IPL_H +#define _ASM_S390_IPL_H + +#include <asm/lowcore.h> +#include <asm/types.h> +#include <asm/cio.h> +#include <asm/setup.h> + +#define IPL_PARMBLOCK_ORIGIN 0x2000 + +#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \ + sizeof(struct ipl_block_fcp)) + +#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 8) + +#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \ + sizeof(struct ipl_block_ccw)) + +#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 8) + +#define IPL_MAX_SUPPORTED_VERSION (0) + +#define IPL_PARMBLOCK_START ((struct ipl_parameter_block *) \ + IPL_PARMBLOCK_ORIGIN) +#define IPL_PARMBLOCK_SIZE (IPL_PARMBLOCK_START->hdr.len) + +struct ipl_list_hdr { + u32 len; + u8 reserved1[3]; + u8 version; + u32 blk0_len; + u8 pbt; + u8 flags; + u16 reserved2; +} __attribute__((packed)); + +struct ipl_block_fcp { + u8 reserved1[313-1]; + u8 opt; + u8 reserved2[3]; + u16 reserved3; + u16 devno; + u8 reserved4[4]; + u64 wwpn; + u64 lun; + u32 bootprog; + u8 reserved5[12]; + u64 br_lba; + u32 scp_data_len; + u8 reserved6[260]; + u8 scp_data[]; +} __attribute__((packed)); + +#define DIAG308_VMPARM_SIZE 64 +#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \ + offsetof(struct ipl_block_fcp, scp_data))) + +struct ipl_block_ccw { + u8 load_parm[8]; + u8 reserved1[84]; + u8 reserved2[2]; + u16 devno; + u8 vm_flags; + u8 reserved3[3]; + u32 vm_parm_len; + u8 nss_name[8]; + u8 vm_parm[DIAG308_VMPARM_SIZE]; + u8 reserved4[8]; +} __attribute__((packed)); + +struct ipl_parameter_block { + struct ipl_list_hdr hdr; + union { + struct ipl_block_fcp fcp; + struct ipl_block_ccw ccw; + } ipl_info; +} __attribute__((packed,aligned(4096))); + +/* + * IPL validity flags + */ +extern u32 ipl_flags; +extern u32 dump_prefix_page; + +struct dump_save_areas { + struct save_area **areas; + int count; +}; + +extern struct dump_save_areas dump_save_areas; +struct save_area *dump_save_area_create(int cpu); + +extern void do_reipl(void); +extern void do_halt(void); +extern void do_poff(void); +extern void ipl_save_parameters(void); +extern void ipl_update_parameters(void); +extern size_t append_ipl_vmparm(char *, size_t); +extern size_t append_ipl_scpdata(char *, size_t); + +enum { + IPL_DEVNO_VALID = 1, + IPL_PARMBLOCK_VALID = 2, + IPL_NSS_VALID = 4, +}; + +enum ipl_type { + IPL_TYPE_UNKNOWN = 1, + IPL_TYPE_CCW = 2, + IPL_TYPE_FCP = 4, + IPL_TYPE_FCP_DUMP = 8, + IPL_TYPE_NSS = 16, +}; + +struct ipl_info +{ + enum ipl_type type; + union { + struct { + struct ccw_dev_id dev_id; + } ccw; + struct { + struct ccw_dev_id dev_id; + u64 wwpn; + u64 lun; + } fcp; + struct { + char name[NSS_NAME_SIZE + 1]; + } nss; + } data; +}; + +extern struct ipl_info ipl_info; +extern void setup_ipl(void); + +/* + * DIAG 308 support + */ +enum diag308_subcode { + DIAG308_REL_HSA = 2, + DIAG308_IPL = 3, + DIAG308_DUMP = 4, + DIAG308_SET = 5, + DIAG308_STORE = 6, +}; + +enum diag308_ipl_type { + DIAG308_IPL_TYPE_FCP = 0, + DIAG308_IPL_TYPE_CCW = 2, +}; + +enum diag308_opt { + DIAG308_IPL_OPT_IPL = 0x10, + DIAG308_IPL_OPT_DUMP = 0x20, +}; + +enum diag308_flags { + DIAG308_FLAGS_LP_VALID = 0x80, +}; + +enum diag308_vm_flags { + DIAG308_VM_FLAGS_NSS_VALID = 0x80, + DIAG308_VM_FLAGS_VP_VALID = 0x40, +}; + +enum diag308_rc { + DIAG308_RC_OK = 0x0001, + DIAG308_RC_NOCONFIG = 0x0102, +}; + +extern int diag308(unsigned long subcode, void *addr); +extern void diag308_reset(void); +extern void store_status(void); +extern void lgr_info_log(void); + +#endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h new file mode 100644 index 00000000000..c4dd400a279 --- /dev/null +++ b/arch/s390/include/asm/irq.h @@ -0,0 +1,109 @@ +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H + +#define EXT_INTERRUPT 1 +#define IO_INTERRUPT 2 +#define THIN_INTERRUPT 3 + +#define NR_IRQS_BASE 4 + +#ifdef CONFIG_PCI_NR_MSI +# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI) +#else +# define NR_IRQS NR_IRQS_BASE +#endif + +/* This number is used when no interrupt has been assigned */ +#define NO_IRQ 0 + +/* External interruption codes */ +#define EXT_IRQ_INTERRUPT_KEY 0x0040 +#define EXT_IRQ_CLK_COMP 0x1004 +#define EXT_IRQ_CPU_TIMER 0x1005 +#define EXT_IRQ_WARNING_TRACK 0x1007 +#define EXT_IRQ_MALFUNC_ALERT 0x1200 +#define EXT_IRQ_EMERGENCY_SIG 0x1201 +#define EXT_IRQ_EXTERNAL_CALL 0x1202 +#define EXT_IRQ_TIMING_ALERT 0x1406 +#define EXT_IRQ_MEASURE_ALERT 0x1407 +#define EXT_IRQ_SERVICE_SIG 0x2401 +#define EXT_IRQ_CP_SERVICE 0x2603 +#define EXT_IRQ_IUCV 0x4000 + +#ifndef __ASSEMBLY__ + +#include <linux/hardirq.h> +#include <linux/percpu.h> +#include <linux/cache.h> +#include <linux/types.h> + +enum interruption_class { + IRQEXT_CLK, + IRQEXT_EXC, + IRQEXT_EMS, + IRQEXT_TMR, + IRQEXT_TLA, + IRQEXT_PFL, + IRQEXT_DSD, + IRQEXT_VRT, + IRQEXT_SCP, + IRQEXT_IUC, + IRQEXT_CMS, + IRQEXT_CMC, + IRQEXT_CMR, + IRQIO_CIO, + IRQIO_QAI, + IRQIO_DAS, + IRQIO_C15, + IRQIO_C70, + IRQIO_TAP, + IRQIO_VMR, + IRQIO_LCS, + IRQIO_CLW, + IRQIO_CTC, + IRQIO_APB, + IRQIO_ADM, + IRQIO_CSC, + IRQIO_PCI, + IRQIO_MSI, + IRQIO_VIR, + IRQIO_VAI, + NMI_NMI, + CPU_RST, + NR_ARCH_IRQS +}; + +struct irq_stat { + unsigned int irqs[NR_ARCH_IRQS]; +}; + +DECLARE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); + +static __always_inline void inc_irq_stat(enum interruption_class irq) +{ + __get_cpu_var(irq_stat).irqs[irq]++; +} + +struct ext_code { + unsigned short subcode; + unsigned short code; +}; + +typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long); + +int register_external_irq(u16 code, ext_int_handler_t handler); +int unregister_external_irq(u16 code, ext_int_handler_t handler); + +enum irq_subclass { + IRQ_SUBCLASS_MEASUREMENT_ALERT = 5, + IRQ_SUBCLASS_SERVICE_SIGNAL = 9, +}; + +void irq_subclass_register(enum irq_subclass subclass); +void irq_subclass_unregister(enum irq_subclass subclass); + +#define irq_canonicalize(irq) (irq) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/irq_regs.h b/arch/s390/include/asm/irq_regs.h new file mode 100644 index 00000000000..3dd9c0b7027 --- /dev/null +++ b/arch/s390/include/asm/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h new file mode 100644 index 00000000000..37b9091ab8c --- /dev/null +++ b/arch/s390/include/asm/irqflags.h @@ -0,0 +1,72 @@ +/* + * Copyright IBM Corp. 2006, 2010 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_IRQFLAGS_H +#define __ASM_IRQFLAGS_H + +#include <linux/types.h> + +/* store then OR system mask. */ +#define __arch_local_irq_stosm(__or) \ +({ \ + unsigned long __mask; \ + asm volatile( \ + " stosm %0,%1" \ + : "=Q" (__mask) : "i" (__or) : "memory"); \ + __mask; \ +}) + +/* store then AND system mask. */ +#define __arch_local_irq_stnsm(__and) \ +({ \ + unsigned long __mask; \ + asm volatile( \ + " stnsm %0,%1" \ + : "=Q" (__mask) : "i" (__and) : "memory"); \ + __mask; \ +}) + +/* set system mask. */ +static inline notrace void __arch_local_irq_ssm(unsigned long flags) +{ + asm volatile("ssm %0" : : "Q" (flags) : "memory"); +} + +static inline notrace unsigned long arch_local_save_flags(void) +{ + return __arch_local_irq_stosm(0x00); +} + +static inline notrace unsigned long arch_local_irq_save(void) +{ + return __arch_local_irq_stnsm(0xfc); +} + +static inline notrace void arch_local_irq_disable(void) +{ + arch_local_irq_save(); +} + +static inline notrace void arch_local_irq_enable(void) +{ + __arch_local_irq_stosm(0x03); +} + +static inline notrace void arch_local_irq_restore(unsigned long flags) +{ + __arch_local_irq_ssm(flags); +} + +static inline notrace bool arch_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (3UL << (BITS_PER_LONG - 8))); +} + +static inline notrace bool arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +#endif /* __ASM_IRQFLAGS_H */ diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h new file mode 100644 index 00000000000..68d7d68300f --- /dev/null +++ b/arch/s390/include/asm/isc.h @@ -0,0 +1,28 @@ +#ifndef _ASM_S390_ISC_H +#define _ASM_S390_ISC_H + +#include <linux/types.h> + +/* + * I/O interruption subclasses used by drivers. + * Please add all used iscs here so that it is possible to distribute + * isc usage between drivers. + * Reminder: 0 is highest priority, 7 lowest. + */ +#define MAX_ISC 7 + +/* Regular I/O interrupts. */ +#define IO_SCH_ISC 3 /* regular I/O subchannels */ +#define CONSOLE_ISC 1 /* console I/O subchannel */ +#define EADM_SCH_ISC 4 /* EADM subchannels */ +#define CHSC_SCH_ISC 7 /* CHSC subchannels */ +/* Adapter interrupts. */ +#define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ +#define PCI_ISC 2 /* PCI I/O subchannels */ +#define AP_ISC 6 /* adjunct processor (crypto) devices */ + +/* Functions for registration of I/O interruption subclasses */ +void isc_register(unsigned int isc); +void isc_unregister(unsigned int isc); + +#endif /* _ASM_S390_ISC_H */ diff --git a/arch/s390/include/asm/itcw.h b/arch/s390/include/asm/itcw.h new file mode 100644 index 00000000000..fb1bedd3dc0 --- /dev/null +++ b/arch/s390/include/asm/itcw.h @@ -0,0 +1,30 @@ +/* + * Functions for incremental construction of fcx enabled I/O control blocks. + * + * Copyright IBM Corp. 2008 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _ASM_S390_ITCW_H +#define _ASM_S390_ITCW_H + +#include <linux/types.h> +#include <asm/fcx.h> + +#define ITCW_OP_READ 0 +#define ITCW_OP_WRITE 1 + +struct itcw; + +struct tcw *itcw_get_tcw(struct itcw *itcw); +size_t itcw_calc_size(int intrg, int max_tidaws, int intrg_max_tidaws); +struct itcw *itcw_init(void *buffer, size_t size, int op, int intrg, + int max_tidaws, int intrg_max_tidaws); +struct dcw *itcw_add_dcw(struct itcw *itcw, u8 cmd, u8 flags, void *cd, + u8 cd_count, u32 count); +struct tidaw *itcw_add_tidaw(struct itcw *itcw, u8 flags, void *addr, + u32 count); +void itcw_set_data(struct itcw *itcw, void *addr, int use_tidal); +void itcw_finalize(struct itcw *itcw); + +#endif /* _ASM_S390_ITCW_H */ diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h new file mode 100644 index 00000000000..346b1c85ffb --- /dev/null +++ b/arch/s390/include/asm/jump_label.h @@ -0,0 +1,37 @@ +#ifndef _ASM_S390_JUMP_LABEL_H +#define _ASM_S390_JUMP_LABEL_H + +#include <linux/types.h> + +#define JUMP_LABEL_NOP_SIZE 6 + +#ifdef CONFIG_64BIT +#define ASM_PTR ".quad" +#define ASM_ALIGN ".balign 8" +#else +#define ASM_PTR ".long" +#define ASM_ALIGN ".balign 4" +#endif + +static __always_inline bool arch_static_branch(struct static_key *key) +{ + asm_volatile_goto("0: brcl 0,0\n" + ".pushsection __jump_table, \"aw\"\n" + ASM_ALIGN "\n" + ASM_PTR " 0b, %l[label], %0\n" + ".popsection\n" + : : "X" (key) : : label); + return false; +label: + return true; +} + +typedef unsigned long jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h new file mode 100644 index 00000000000..5c1abd47612 --- /dev/null +++ b/arch/s390/include/asm/kdebug.h @@ -0,0 +1,27 @@ +#ifndef _S390_KDEBUG_H +#define _S390_KDEBUG_H + +/* + * Feb 2006 Ported to s390 <grundym@us.ibm.com> + */ + +struct pt_regs; + +enum die_val { + DIE_OOPS = 1, + DIE_BPT, + DIE_SSTEP, + DIE_PANIC, + DIE_NMI, + DIE_DIE, + DIE_NMIWATCHDOG, + DIE_KERNELDEBUG, + DIE_TRAP, + DIE_GPF, + DIE_CALL, + DIE_NMI_IPI, +}; + +extern void die(struct pt_regs *, const char *); + +#endif diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h new file mode 100644 index 00000000000..694bcd6bd92 --- /dev/null +++ b/arch/s390/include/asm/kexec.h @@ -0,0 +1,63 @@ +/* + * Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * + */ + +#ifndef _S390_KEXEC_H +#define _S390_KEXEC_H + +#include <asm/processor.h> +#include <asm/page.h> +/* + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. + * I.e. Maximum page that is mapped directly into kernel memory, + * and kmap is not required. + */ + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) + +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + +/* Maximum address we can use for the control pages */ +/* Not more than 2GB */ +#define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) + +/* Maximum address we can use for the crash control pages */ +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) + +/* Allocate one page for the pdp and the second for the code */ +#define KEXEC_CONTROL_PAGE_SIZE 4096 + +/* Alignment of crashkernel memory */ +#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_S390 + +/* + * Size for s390x ELF notes per CPU + * + * Seven notes plus zero note at the end: prstatus, fpregset, timer, + * tod_cmp, tod_reg, control regs, and prefix + */ +#define KEXEC_NOTE_BYTES \ + (ALIGN(sizeof(struct elf_note), 4) * 8 + \ + ALIGN(sizeof("CORE"), 4) * 7 + \ + ALIGN(sizeof(struct elf_prstatus), 4) + \ + ALIGN(sizeof(elf_fpregset_t), 4) + \ + ALIGN(sizeof(u64), 4) + \ + ALIGN(sizeof(u64), 4) + \ + ALIGN(sizeof(u32), 4) + \ + ALIGN(sizeof(u64) * 16, 4) + \ + ALIGN(sizeof(u32), 4) \ + ) + +/* Provide a dummy definition to avoid build failures. */ +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) { } + +#endif /*_S390_KEXEC_H */ diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h new file mode 100644 index 00000000000..0a88622339e --- /dev/null +++ b/arch/s390/include/asm/kmap_types.h @@ -0,0 +1,6 @@ +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +#include <asm-generic/kmap_types.h> + +#endif diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h new file mode 100644 index 00000000000..4176dfe0fba --- /dev/null +++ b/arch/s390/include/asm/kprobes.h @@ -0,0 +1,89 @@ +#ifndef _ASM_S390_KPROBES_H +#define _ASM_S390_KPROBES_H +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corp. 2002, 2006 + * + * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel + * Probes initial implementation ( includes suggestions from + * Rusty Russell). + * 2004-Nov Modified for PPC64 by Ananth N Mavinakayanahalli + * <ananth@in.ibm.com> + * 2005-Dec Used as a template for s390 by Mike Grundy + * <grundym@us.ibm.com> + */ +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> + +#define __ARCH_WANT_KPROBES_INSN_SLOT + +struct pt_regs; +struct kprobe; + +typedef u16 kprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0x0002 + +/* Maximum instruction size is 3 (16bit) halfwords: */ +#define MAX_INSN_SIZE 0x0003 +#define MAX_STACK_SIZE 64 +#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \ + (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \ + ? (MAX_STACK_SIZE) \ + : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) + +#define kretprobe_blacklist_size 0 + +#define KPROBE_SWAP_INST 0x10 + +#define FIXUP_PSW_NORMAL 0x08 +#define FIXUP_BRANCH_NOT_TAKEN 0x04 +#define FIXUP_RETURN_REGISTER 0x02 +#define FIXUP_NOT_REQUIRED 0x01 + +/* Architecture specific copy of original instruction */ +struct arch_specific_insn { + /* copy of original instruction */ + kprobe_opcode_t *insn; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_saved_imask; + unsigned long kprobe_saved_ctl[3]; + struct prev_kprobe prev_kprobe; + struct pt_regs jprobe_saved_regs; + kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; +}; + +void arch_remove_kprobe(struct kprobe *p); +void kretprobe_trampoline(void); + +int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +#define flush_insn_slot(p) do { } while (0) + +#endif /* _ASM_S390_KPROBES_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h new file mode 100644 index 00000000000..4181d7baabb --- /dev/null +++ b/arch/s390/include/asm/kvm_host.h @@ -0,0 +1,454 @@ +/* + * definition for kernel virtual machines on s390 + * + * Copyright IBM Corp. 2008, 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + + +#ifndef ASM_KVM_HOST_H +#define ASM_KVM_HOST_H +#include <linux/hrtimer.h> +#include <linux/interrupt.h> +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <asm/debug.h> +#include <asm/cpu.h> +#include <asm/isc.h> + +#define KVM_MAX_VCPUS 64 +#define KVM_USER_MEM_SLOTS 32 + +/* + * These seem to be used for allocating ->chip in the routing table, + * which we don't use. 4096 is an out-of-thin-air value. If we need + * to look at ->chip later on, we'll need to revisit this. + */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 4096 + +#define SIGP_CTRL_C 0x00800000 + +struct sca_entry { + atomic_t ctrl; + __u32 reserved; + __u64 sda; + __u64 reserved2[2]; +} __attribute__((packed)); + +union ipte_control { + unsigned long val; + struct { + unsigned long k : 1; + unsigned long kh : 31; + unsigned long kg : 32; + }; +}; + +struct sca_block { + union ipte_control ipte_control; + __u64 reserved[5]; + __u64 mcn; + __u64 reserved2; + struct sca_entry cpu[64]; +} __attribute__((packed)); + +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + +struct kvm_s390_sie_block { + atomic_t cpuflags; /* 0x0000 */ + __u32 : 1; /* 0x0004 */ + __u32 prefix : 18; + __u32 : 13; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE (1<<0) + __u32 prog0c; /* 0x000c */ + __u8 reserved10[16]; /* 0x0010 */ +#define PROG_BLOCK_SIE 0x00000001 + atomic_t prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u8 reserved40[4]; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 + __u8 icptcode; /* 0x0050 */ + __u8 reserved51; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54[2]; /* 0x0054 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ + __u8 reserved60; /* 0x0060 */ + __u8 ecb; /* 0x0061 */ + __u8 ecb2; /* 0x0062 */ + __u8 reserved63[1]; /* 0x0063 */ + __u32 scaol; /* 0x0064 */ + __u8 reserved68[4]; /* 0x0068 */ + __u32 todpr; /* 0x006c */ + __u8 reserved70[32]; /* 0x0070 */ + psw_t gpsw; /* 0x0090 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[20]; /* 0x00b0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + __u32 reservedc8; /* 0x00c8 */ + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + __u64 tecmc; /* 0x00e8 */ + __u8 reservedf0[16]; /* 0x00f0 */ + __u64 gcr[16]; /* 0x0100 */ + __u64 gbea; /* 0x0180 */ + __u8 reserved188[24]; /* 0x0188 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[30]; /* 0x01c0 */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u8 reserved1f0[16]; /* 0x01f0 */ +} __attribute__((packed)); + +struct kvm_s390_itdb { + __u8 data[256]; +} __packed; + +struct sie_page { + struct kvm_s390_sie_block sie_block; + __u8 reserved200[1024]; /* 0x0200 */ + struct kvm_s390_itdb itdb; /* 0x0600 */ + __u8 reserved700[2304]; /* 0x0700 */ +} __packed; + +struct kvm_vcpu_stat { + u32 exit_userspace; + u32 exit_null; + u32 exit_external_request; + u32 exit_external_interrupt; + u32 exit_stop_request; + u32 exit_validity; + u32 exit_instruction; + u32 instruction_lctl; + u32 instruction_lctlg; + u32 instruction_stctl; + u32 instruction_stctg; + u32 exit_program_interruption; + u32 exit_instr_and_program; + u32 deliver_external_call; + u32 deliver_emergency_signal; + u32 deliver_service_signal; + u32 deliver_virtio_interrupt; + u32 deliver_stop_signal; + u32 deliver_prefix_signal; + u32 deliver_restart_signal; + u32 deliver_program_int; + u32 deliver_io_int; + u32 exit_wait_state; + u32 instruction_pfmf; + u32 instruction_stidp; + u32 instruction_spx; + u32 instruction_stpx; + u32 instruction_stap; + u32 instruction_storage_key; + u32 instruction_ipte_interlock; + u32 instruction_stsch; + u32 instruction_chsc; + u32 instruction_stsi; + u32 instruction_stfl; + u32 instruction_tprot; + u32 instruction_essa; + u32 instruction_sigp_sense; + u32 instruction_sigp_sense_running; + u32 instruction_sigp_external_call; + u32 instruction_sigp_emergency; + u32 instruction_sigp_stop; + u32 instruction_sigp_arch; + u32 instruction_sigp_prefix; + u32 instruction_sigp_restart; + u32 diagnose_10; + u32 diagnose_44; + u32 diagnose_9c; +}; + +#define PGM_OPERATION 0x01 +#define PGM_PRIVILEGED_OP 0x02 +#define PGM_EXECUTE 0x03 +#define PGM_PROTECTION 0x04 +#define PGM_ADDRESSING 0x05 +#define PGM_SPECIFICATION 0x06 +#define PGM_DATA 0x07 +#define PGM_FIXED_POINT_OVERFLOW 0x08 +#define PGM_FIXED_POINT_DIVIDE 0x09 +#define PGM_DECIMAL_OVERFLOW 0x0a +#define PGM_DECIMAL_DIVIDE 0x0b +#define PGM_HFP_EXPONENT_OVERFLOW 0x0c +#define PGM_HFP_EXPONENT_UNDERFLOW 0x0d +#define PGM_HFP_SIGNIFICANCE 0x0e +#define PGM_HFP_DIVIDE 0x0f +#define PGM_SEGMENT_TRANSLATION 0x10 +#define PGM_PAGE_TRANSLATION 0x11 +#define PGM_TRANSLATION_SPEC 0x12 +#define PGM_SPECIAL_OPERATION 0x13 +#define PGM_OPERAND 0x15 +#define PGM_TRACE_TABEL 0x16 +#define PGM_SPACE_SWITCH 0x1c +#define PGM_HFP_SQUARE_ROOT 0x1d +#define PGM_PC_TRANSLATION_SPEC 0x1f +#define PGM_AFX_TRANSLATION 0x20 +#define PGM_ASX_TRANSLATION 0x21 +#define PGM_LX_TRANSLATION 0x22 +#define PGM_EX_TRANSLATION 0x23 +#define PGM_PRIMARY_AUTHORITY 0x24 +#define PGM_SECONDARY_AUTHORITY 0x25 +#define PGM_LFX_TRANSLATION 0x26 +#define PGM_LSX_TRANSLATION 0x27 +#define PGM_ALET_SPECIFICATION 0x28 +#define PGM_ALEN_TRANSLATION 0x29 +#define PGM_ALE_SEQUENCE 0x2a +#define PGM_ASTE_VALIDITY 0x2b +#define PGM_ASTE_SEQUENCE 0x2c +#define PGM_EXTENDED_AUTHORITY 0x2d +#define PGM_LSTE_SEQUENCE 0x2e +#define PGM_ASTE_INSTANCE 0x2f +#define PGM_STACK_FULL 0x30 +#define PGM_STACK_EMPTY 0x31 +#define PGM_STACK_SPECIFICATION 0x32 +#define PGM_STACK_TYPE 0x33 +#define PGM_STACK_OPERATION 0x34 +#define PGM_ASCE_TYPE 0x38 +#define PGM_REGION_FIRST_TRANS 0x39 +#define PGM_REGION_SECOND_TRANS 0x3a +#define PGM_REGION_THIRD_TRANS 0x3b +#define PGM_MONITOR 0x40 +#define PGM_PER 0x80 +#define PGM_CRYPTO_OPERATION 0x119 + +struct kvm_s390_interrupt_info { + struct list_head list; + u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_mchk_info mchk; + }; +}; + +/* for local_interrupt.action_flags */ +#define ACTION_STORE_ON_STOP (1<<0) +#define ACTION_STOP_ON_STOP (1<<1) + +struct kvm_s390_local_interrupt { + spinlock_t lock; + struct list_head list; + atomic_t active; + struct kvm_s390_float_interrupt *float_int; + int timer_due; /* event indicator for waitqueue below */ + wait_queue_head_t *wq; + atomic_t *cpuflags; + unsigned int action_bits; +}; + +struct kvm_s390_float_interrupt { + spinlock_t lock; + struct list_head list; + atomic_t active; + int next_rr_cpu; + unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + unsigned int irq_count; +}; + +struct kvm_hw_wp_info_arch { + unsigned long addr; + unsigned long phys_addr; + int len; + char *old_data; +}; + +struct kvm_hw_bp_info_arch { + unsigned long addr; + int len; +}; + +/* + * Only the upper 16 bits of kvm_guest_debug->control are arch specific. + * Further KVM_GUESTDBG flags which an be used from userspace can be found in + * arch/s390/include/uapi/asm/kvm.h + */ +#define KVM_GUESTDBG_EXIT_PENDING 0x10000000 + +#define guestdbg_enabled(vcpu) \ + (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) +#define guestdbg_sstep_enabled(vcpu) \ + (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) +#define guestdbg_hw_bp_enabled(vcpu) \ + (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) +#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \ + (vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING)) + +struct kvm_guestdbg_info_arch { + unsigned long cr0; + unsigned long cr9; + unsigned long cr10; + unsigned long cr11; + struct kvm_hw_bp_info_arch *hw_bp_info; + struct kvm_hw_wp_info_arch *hw_wp_info; + int nr_hw_bp; + int nr_hw_wp; + unsigned long last_bp; +}; + +struct kvm_vcpu_arch { + struct kvm_s390_sie_block *sie_block; + s390_fp_regs host_fpregs; + unsigned int host_acrs[NUM_ACRS]; + s390_fp_regs guest_fpregs; + struct kvm_s390_local_interrupt local_int; + struct hrtimer ckc_timer; + struct tasklet_struct tasklet; + struct kvm_s390_pgm_info pgm; + union { + struct cpuid cpu_id; + u64 stidp_data; + }; + struct gmap *gmap; + struct kvm_guestdbg_info_arch guestdbg; +#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL) + unsigned long pfault_token; + unsigned long pfault_select; + unsigned long pfault_compare; +}; + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_arch_memory_slot { +}; + +struct s390_map_info { + struct list_head list; + __u64 guest_addr; + __u64 addr; + struct page *page; +}; + +struct s390_io_adapter { + unsigned int id; + int isc; + bool maskable; + bool masked; + bool swap; + struct rw_semaphore maps_lock; + struct list_head maps; + atomic_t nr_maps; +}; + +#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) +#define MAX_S390_ADAPTER_MAPS 256 + +struct kvm_arch{ + struct sca_block *sca; + debug_info_t *dbf; + struct kvm_s390_float_interrupt float_int; + struct kvm_device *flic; + struct gmap *gmap; + int css_support; + int use_irqchip; + int use_cmma; + struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; + wait_queue_head_t ipte_wq; + spinlock_t start_stop_lock; +}; + +#define KVM_HVA_ERR_BAD (-1UL) +#define KVM_HVA_ERR_RO_BAD (-2UL) + +static inline bool kvm_is_error_hva(unsigned long addr) +{ + return IS_ERR_VALUE(addr); +} + +#define ASYNC_PF_PER_VCPU 64 +struct kvm_vcpu; +struct kvm_async_pf; +struct kvm_arch_async_pf { + unsigned long pfault_token; +}; + +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); + +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + +extern int sie64a(struct kvm_s390_sie_block *, u64 *); +extern char sie_exit; +#endif diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h new file mode 100644 index 00000000000..e0f842308a6 --- /dev/null +++ b/arch/s390/include/asm/kvm_para.h @@ -0,0 +1,157 @@ +/* + * definition for paravirtual devices on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + */ +/* + * Hypercalls for KVM on s390. The calling convention is similar to the + * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1 + * as hypercall number and R7 as parameter 6. The return value is + * written to R2. We use the diagnose instruction as hypercall. To avoid + * conflicts with existing diagnoses for LPAR and z/VM, we do not use + * the instruction encoded number, but specify the number in R1 and + * use 0x500 as KVM hypercall + * + * Copyright IBM Corp. 2007,2008 + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#ifndef __S390_KVM_PARA_H +#define __S390_KVM_PARA_H + +#include <uapi/asm/kvm_para.h> + + + +static inline long kvm_hypercall0(unsigned long nr) +{ + register unsigned long __nr asm("1") = nr; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr): "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall1(unsigned long nr, unsigned long p1) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall2(unsigned long nr, unsigned long p1, + unsigned long p2) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2) + : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall3(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3) : "memory", "cc"); + return __rc; +} + + +static inline long kvm_hypercall4(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register unsigned long __p4 asm("5") = p4; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3), "d" (__p4) : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall5(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register unsigned long __p4 asm("5") = p4; + register unsigned long __p5 asm("6") = p5; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3), "d" (__p4), "d" (__p5) : "memory", "cc"); + return __rc; +} + +static inline long kvm_hypercall6(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, + unsigned long p6) +{ + register unsigned long __nr asm("1") = nr; + register unsigned long __p1 asm("2") = p1; + register unsigned long __p2 asm("3") = p2; + register unsigned long __p3 asm("4") = p3; + register unsigned long __p4 asm("5") = p4; + register unsigned long __p5 asm("6") = p5; + register unsigned long __p6 asm("7") = p6; + register long __rc asm("2"); + + asm volatile ("diag 2,4,0x500\n" + : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2), + "d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6) + : "memory", "cc"); + return __rc; +} + +/* kvm on s390 is always paravirtualization enabled */ +static inline int kvm_para_available(void) +{ + return 1; +} + +/* No feature bits are currently assigned for kvm on s390 */ +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + +#endif /* __S390_KVM_PARA_H */ diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h new file mode 100644 index 00000000000..fc8a8284778 --- /dev/null +++ b/arch/s390/include/asm/linkage.h @@ -0,0 +1,9 @@ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#include <linux/stringify.h> + +#define __ALIGN .align 4, 0x07 +#define __ALIGN_STR __stringify(__ALIGN) + +#endif diff --git a/arch/s390/include/asm/local.h b/arch/s390/include/asm/local.h new file mode 100644 index 00000000000..c11c530f74d --- /dev/null +++ b/arch/s390/include/asm/local.h @@ -0,0 +1 @@ +#include <asm-generic/local.h> diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h new file mode 100644 index 00000000000..36c93b5cc23 --- /dev/null +++ b/arch/s390/include/asm/local64.h @@ -0,0 +1 @@ +#include <asm-generic/local64.h> diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h new file mode 100644 index 00000000000..4349197ab9d --- /dev/null +++ b/arch/s390/include/asm/lowcore.h @@ -0,0 +1,361 @@ +/* + * Copyright IBM Corp. 1999, 2012 + * Author(s): Hartmut Penner <hp@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Denis Joseph Barrow, + */ + +#ifndef _ASM_S390_LOWCORE_H +#define _ASM_S390_LOWCORE_H + +#include <linux/types.h> +#include <asm/ptrace.h> +#include <asm/cpu.h> + +#ifdef CONFIG_32BIT + +#define LC_ORDER 0 +#define LC_PAGES 1 + +struct save_area { + u32 ext_save; + u64 timer; + u64 clk_cmp; + u8 pad1[24]; + u8 psw[8]; + u32 pref_reg; + u8 pad2[20]; + u32 acc_regs[16]; + u64 fp_regs[4]; + u32 gp_regs[16]; + u32 ctrl_regs[16]; +} __packed; + +struct _lowcore { + psw_t restart_psw; /* 0x0000 */ + psw_t restart_old_psw; /* 0x0008 */ + __u8 pad_0x0010[0x0014-0x0010]; /* 0x0010 */ + __u32 ipl_parmblock_ptr; /* 0x0014 */ + psw_t external_old_psw; /* 0x0018 */ + psw_t svc_old_psw; /* 0x0020 */ + psw_t program_old_psw; /* 0x0028 */ + psw_t mcck_old_psw; /* 0x0030 */ + psw_t io_old_psw; /* 0x0038 */ + __u8 pad_0x0040[0x0058-0x0040]; /* 0x0040 */ + psw_t external_new_psw; /* 0x0058 */ + psw_t svc_new_psw; /* 0x0060 */ + psw_t program_new_psw; /* 0x0068 */ + psw_t mcck_new_psw; /* 0x0070 */ + psw_t io_new_psw; /* 0x0078 */ + __u32 ext_params; /* 0x0080 */ + __u16 ext_cpu_addr; /* 0x0084 */ + __u16 ext_int_code; /* 0x0086 */ + __u16 svc_ilc; /* 0x0088 */ + __u16 svc_code; /* 0x008a */ + __u16 pgm_ilc; /* 0x008c */ + __u16 pgm_code; /* 0x008e */ + __u32 trans_exc_code; /* 0x0090 */ + __u16 mon_class_num; /* 0x0094 */ + __u8 per_code; /* 0x0096 */ + __u8 per_atmid; /* 0x0097 */ + __u32 per_address; /* 0x0098 */ + __u32 monitor_code; /* 0x009c */ + __u8 exc_access_id; /* 0x00a0 */ + __u8 per_access_id; /* 0x00a1 */ + __u8 op_access_id; /* 0x00a2 */ + __u8 ar_mode_id; /* 0x00a3 */ + __u8 pad_0x00a4[0x00b8-0x00a4]; /* 0x00a4 */ + __u16 subchannel_id; /* 0x00b8 */ + __u16 subchannel_nr; /* 0x00ba */ + __u32 io_int_parm; /* 0x00bc */ + __u32 io_int_word; /* 0x00c0 */ + __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */ + __u32 stfl_fac_list; /* 0x00c8 */ + __u8 pad_0x00cc[0x00d4-0x00cc]; /* 0x00cc */ + __u32 extended_save_area_addr; /* 0x00d4 */ + __u32 cpu_timer_save_area[2]; /* 0x00d8 */ + __u32 clock_comp_save_area[2]; /* 0x00e0 */ + __u32 mcck_interruption_code[2]; /* 0x00e8 */ + __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */ + __u32 external_damage_code; /* 0x00f4 */ + __u32 failing_storage_address; /* 0x00f8 */ + __u8 pad_0x00fc[0x0100-0x00fc]; /* 0x00fc */ + psw_t psw_save_area; /* 0x0100 */ + __u32 prefixreg_save_area; /* 0x0108 */ + __u8 pad_0x010c[0x0120-0x010c]; /* 0x010c */ + + /* CPU register save area: defined by architecture */ + __u32 access_regs_save_area[16]; /* 0x0120 */ + __u32 floating_pt_save_area[8]; /* 0x0160 */ + __u32 gpregs_save_area[16]; /* 0x0180 */ + __u32 cregs_save_area[16]; /* 0x01c0 */ + + /* Save areas. */ + __u32 save_area_sync[8]; /* 0x0200 */ + __u32 save_area_async[8]; /* 0x0220 */ + __u32 save_area_restart[1]; /* 0x0240 */ + + /* CPU flags. */ + __u32 cpu_flags; /* 0x0244 */ + + /* Return psws. */ + psw_t return_psw; /* 0x0248 */ + psw_t return_mcck_psw; /* 0x0250 */ + + /* CPU time accounting values */ + __u64 sync_enter_timer; /* 0x0258 */ + __u64 async_enter_timer; /* 0x0260 */ + __u64 mcck_enter_timer; /* 0x0268 */ + __u64 exit_timer; /* 0x0270 */ + __u64 user_timer; /* 0x0278 */ + __u64 system_timer; /* 0x0280 */ + __u64 steal_timer; /* 0x0288 */ + __u64 last_update_timer; /* 0x0290 */ + __u64 last_update_clock; /* 0x0298 */ + __u64 int_clock; /* 0x02a0 */ + __u64 mcck_clock; /* 0x02a8 */ + __u64 clock_comparator; /* 0x02b0 */ + + /* Current process. */ + __u32 current_task; /* 0x02b8 */ + __u32 thread_info; /* 0x02bc */ + __u32 kernel_stack; /* 0x02c0 */ + + /* Interrupt, panic and restart stack. */ + __u32 async_stack; /* 0x02c4 */ + __u32 panic_stack; /* 0x02c8 */ + __u32 restart_stack; /* 0x02cc */ + + /* Restart function and parameter. */ + __u32 restart_fn; /* 0x02d0 */ + __u32 restart_data; /* 0x02d4 */ + __u32 restart_source; /* 0x02d8 */ + + /* Address space pointer. */ + __u32 kernel_asce; /* 0x02dc */ + __u32 user_asce; /* 0x02e0 */ + __u32 current_pid; /* 0x02e4 */ + + /* SMP info area */ + __u32 cpu_nr; /* 0x02e8 */ + __u32 softirq_pending; /* 0x02ec */ + __u32 percpu_offset; /* 0x02f0 */ + __u32 machine_flags; /* 0x02f4 */ + __u32 ftrace_func; /* 0x02f8 */ + __u32 spinlock_lockval; /* 0x02fc */ + + __u8 pad_0x0300[0x0e00-0x0300]; /* 0x0300 */ + + /* + * 0xe00 contains the address of the IPL Parameter Information + * block. Dump tools need IPIB for IPL after dump. + * Note: do not change the position of any fields in 0x0e00-0x0f00 + */ + __u32 ipib; /* 0x0e00 */ + __u32 ipib_checksum; /* 0x0e04 */ + __u32 vmcore_info; /* 0x0e08 */ + __u8 pad_0x0e0c[0x0e18-0x0e0c]; /* 0x0e0c */ + __u32 os_info; /* 0x0e18 */ + __u8 pad_0x0e1c[0x0f00-0x0e1c]; /* 0x0e1c */ + + /* Extended facility list */ + __u64 stfle_fac_list[32]; /* 0x0f00 */ +} __packed; + +#else /* CONFIG_32BIT */ + +#define LC_ORDER 1 +#define LC_PAGES 2 + +struct save_area { + u64 fp_regs[16]; + u64 gp_regs[16]; + u8 psw[16]; + u8 pad1[8]; + u32 pref_reg; + u32 fp_ctrl_reg; + u8 pad2[4]; + u32 tod_reg; + u64 timer; + u64 clk_cmp; + u8 pad3[8]; + u32 acc_regs[16]; + u64 ctrl_regs[16]; +} __packed; + +struct _lowcore { + __u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */ + __u32 ipl_parmblock_ptr; /* 0x0014 */ + __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */ + __u32 ext_params; /* 0x0080 */ + __u16 ext_cpu_addr; /* 0x0084 */ + __u16 ext_int_code; /* 0x0086 */ + __u16 svc_ilc; /* 0x0088 */ + __u16 svc_code; /* 0x008a */ + __u16 pgm_ilc; /* 0x008c */ + __u16 pgm_code; /* 0x008e */ + __u32 data_exc_code; /* 0x0090 */ + __u16 mon_class_num; /* 0x0094 */ + __u8 per_code; /* 0x0096 */ + __u8 per_atmid; /* 0x0097 */ + __u64 per_address; /* 0x0098 */ + __u8 exc_access_id; /* 0x00a0 */ + __u8 per_access_id; /* 0x00a1 */ + __u8 op_access_id; /* 0x00a2 */ + __u8 ar_mode_id; /* 0x00a3 */ + __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */ + __u64 trans_exc_code; /* 0x00a8 */ + __u64 monitor_code; /* 0x00b0 */ + __u16 subchannel_id; /* 0x00b8 */ + __u16 subchannel_nr; /* 0x00ba */ + __u32 io_int_parm; /* 0x00bc */ + __u32 io_int_word; /* 0x00c0 */ + __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */ + __u32 stfl_fac_list; /* 0x00c8 */ + __u8 pad_0x00cc[0x00e8-0x00cc]; /* 0x00cc */ + __u32 mcck_interruption_code[2]; /* 0x00e8 */ + __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */ + __u32 external_damage_code; /* 0x00f4 */ + __u64 failing_storage_address; /* 0x00f8 */ + __u8 pad_0x0100[0x0110-0x0100]; /* 0x0100 */ + __u64 breaking_event_addr; /* 0x0110 */ + __u8 pad_0x0118[0x0120-0x0118]; /* 0x0118 */ + psw_t restart_old_psw; /* 0x0120 */ + psw_t external_old_psw; /* 0x0130 */ + psw_t svc_old_psw; /* 0x0140 */ + psw_t program_old_psw; /* 0x0150 */ + psw_t mcck_old_psw; /* 0x0160 */ + psw_t io_old_psw; /* 0x0170 */ + __u8 pad_0x0180[0x01a0-0x0180]; /* 0x0180 */ + psw_t restart_psw; /* 0x01a0 */ + psw_t external_new_psw; /* 0x01b0 */ + psw_t svc_new_psw; /* 0x01c0 */ + psw_t program_new_psw; /* 0x01d0 */ + psw_t mcck_new_psw; /* 0x01e0 */ + psw_t io_new_psw; /* 0x01f0 */ + + /* Save areas. */ + __u64 save_area_sync[8]; /* 0x0200 */ + __u64 save_area_async[8]; /* 0x0240 */ + __u64 save_area_restart[1]; /* 0x0280 */ + + /* CPU flags. */ + __u64 cpu_flags; /* 0x0288 */ + + /* Return psws. */ + psw_t return_psw; /* 0x0290 */ + psw_t return_mcck_psw; /* 0x02a0 */ + + /* CPU accounting and timing values. */ + __u64 sync_enter_timer; /* 0x02b0 */ + __u64 async_enter_timer; /* 0x02b8 */ + __u64 mcck_enter_timer; /* 0x02c0 */ + __u64 exit_timer; /* 0x02c8 */ + __u64 user_timer; /* 0x02d0 */ + __u64 system_timer; /* 0x02d8 */ + __u64 steal_timer; /* 0x02e0 */ + __u64 last_update_timer; /* 0x02e8 */ + __u64 last_update_clock; /* 0x02f0 */ + __u64 int_clock; /* 0x02f8 */ + __u64 mcck_clock; /* 0x0300 */ + __u64 clock_comparator; /* 0x0308 */ + + /* Current process. */ + __u64 current_task; /* 0x0310 */ + __u64 thread_info; /* 0x0318 */ + __u64 kernel_stack; /* 0x0320 */ + + /* Interrupt, panic and restart stack. */ + __u64 async_stack; /* 0x0328 */ + __u64 panic_stack; /* 0x0330 */ + __u64 restart_stack; /* 0x0338 */ + + /* Restart function and parameter. */ + __u64 restart_fn; /* 0x0340 */ + __u64 restart_data; /* 0x0348 */ + __u64 restart_source; /* 0x0350 */ + + /* Address space pointer. */ + __u64 kernel_asce; /* 0x0358 */ + __u64 user_asce; /* 0x0360 */ + __u64 current_pid; /* 0x0368 */ + + /* SMP info area */ + __u32 cpu_nr; /* 0x0370 */ + __u32 softirq_pending; /* 0x0374 */ + __u64 percpu_offset; /* 0x0378 */ + __u64 vdso_per_cpu_data; /* 0x0380 */ + __u64 machine_flags; /* 0x0388 */ + __u64 ftrace_func; /* 0x0390 */ + __u64 gmap; /* 0x0398 */ + __u32 spinlock_lockval; /* 0x03a0 */ + __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */ + + /* Per cpu primary space access list */ + __u32 paste[16]; /* 0x0400 */ + + __u8 pad_0x04c0[0x0e00-0x0440]; /* 0x0440 */ + + /* + * 0xe00 contains the address of the IPL Parameter Information + * block. Dump tools need IPIB for IPL after dump. + * Note: do not change the position of any fields in 0x0e00-0x0f00 + */ + __u64 ipib; /* 0x0e00 */ + __u32 ipib_checksum; /* 0x0e08 */ + __u64 vmcore_info; /* 0x0e0c */ + __u8 pad_0x0e14[0x0e18-0x0e14]; /* 0x0e14 */ + __u64 os_info; /* 0x0e18 */ + __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */ + + /* Extended facility list */ + __u64 stfle_fac_list[32]; /* 0x0f00 */ + __u8 pad_0x1000[0x11b8-0x1000]; /* 0x1000 */ + + /* 64 bit extparam used for pfault/diag 250: defined by architecture */ + __u64 ext_params2; /* 0x11B8 */ + __u8 pad_0x11c0[0x1200-0x11C0]; /* 0x11C0 */ + + /* CPU register save area: defined by architecture */ + __u64 floating_pt_save_area[16]; /* 0x1200 */ + __u64 gpregs_save_area[16]; /* 0x1280 */ + psw_t psw_save_area; /* 0x1300 */ + __u8 pad_0x1310[0x1318-0x1310]; /* 0x1310 */ + __u32 prefixreg_save_area; /* 0x1318 */ + __u32 fpt_creg_save_area; /* 0x131c */ + __u8 pad_0x1320[0x1324-0x1320]; /* 0x1320 */ + __u32 tod_progreg_save_area; /* 0x1324 */ + __u32 cpu_timer_save_area[2]; /* 0x1328 */ + __u32 clock_comp_save_area[2]; /* 0x1330 */ + __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */ + __u32 access_regs_save_area[16]; /* 0x1340 */ + __u64 cregs_save_area[16]; /* 0x1380 */ + __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */ + + /* Transaction abort diagnostic block */ + __u8 pgm_tdb[256]; /* 0x1800 */ + + /* align to the top of the prefix area */ + __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ +} __packed; + +#endif /* CONFIG_32BIT */ + +#define S390_lowcore (*((struct _lowcore *) 0)) + +extern struct _lowcore *lowcore_ptr[]; + +static inline void set_prefix(__u32 address) +{ + asm volatile("spx %0" : : "m" (address) : "memory"); +} + +static inline __u32 store_prefix(void) +{ + __u32 address; + + asm volatile("stpx %0" : "=m" (address)); + return address; +} + +#endif /* _ASM_S390_LOWCORE_H */ diff --git a/arch/s390/include/asm/mathemu.h b/arch/s390/include/asm/mathemu.h new file mode 100644 index 00000000000..614dfaf47f7 --- /dev/null +++ b/arch/s390/include/asm/mathemu.h @@ -0,0 +1,28 @@ +/* + * IEEE floating point emulation. + * + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#ifndef __MATHEMU__ +#define __MATHEMU__ + +extern int math_emu_b3(__u8 *, struct pt_regs *); +extern int math_emu_ed(__u8 *, struct pt_regs *); +extern int math_emu_ldr(__u8 *); +extern int math_emu_ler(__u8 *); +extern int math_emu_std(__u8 *, struct pt_regs *); +extern int math_emu_ld(__u8 *, struct pt_regs *); +extern int math_emu_ste(__u8 *, struct pt_regs *); +extern int math_emu_le(__u8 *, struct pt_regs *); +extern int math_emu_lfpc(__u8 *, struct pt_regs *); +extern int math_emu_stfpc(__u8 *, struct pt_regs *); +extern int math_emu_srnm(__u8 *, struct pt_regs *); + +#endif /* __MATHEMU__ */ + + + + diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h new file mode 100644 index 00000000000..9977e08df5b --- /dev/null +++ b/arch/s390/include/asm/mman.h @@ -0,0 +1,15 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/mman.h" + */ +#ifndef __S390_MMAN_H__ +#define __S390_MMAN_H__ + +#include <uapi/asm/mman.h> + +#if !defined(__ASSEMBLY__) && defined(CONFIG_64BIT) +int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags); +#define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags) +#endif +#endif /* __S390_MMAN_H__ */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h new file mode 100644 index 00000000000..a5e656260a7 --- /dev/null +++ b/arch/s390/include/asm/mmu.h @@ -0,0 +1,42 @@ +#ifndef __MMU_H +#define __MMU_H + +#include <linux/cpumask.h> +#include <linux/errno.h> + +typedef struct { + cpumask_t cpu_attach_mask; + atomic_t attach_count; + unsigned int flush_mm; + spinlock_t list_lock; + struct list_head pgtable_list; + struct list_head gmap_list; + unsigned long asce_bits; + unsigned long asce_limit; + unsigned long vdso_base; + /* The mmu context has extended page tables. */ + unsigned int has_pgste:1; + /* The mmu context uses storage keys. */ + unsigned int use_skey:1; +} mm_context_t; + +#define INIT_MM_CONTEXT(name) \ + .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ + .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ + .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), + +static inline int tprot(unsigned long addr) +{ + int rc = -EFAULT; + + asm volatile( + " tprot 0(%1),0\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) : "a" (addr) : "cc"); + return rc; +} + +#endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h new file mode 100644 index 00000000000..3815bfea1b2 --- /dev/null +++ b/arch/s390/include/asm/mmu_context.h @@ -0,0 +1,123 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/mmu_context.h" + */ + +#ifndef __S390_MMU_CONTEXT_H +#define __S390_MMU_CONTEXT_H + +#include <asm/pgalloc.h> +#include <asm/uaccess.h> +#include <asm/tlbflush.h> +#include <asm/ctl_reg.h> + +static inline int init_new_context(struct task_struct *tsk, + struct mm_struct *mm) +{ + cpumask_clear(&mm->context.cpu_attach_mask); + atomic_set(&mm->context.attach_count, 0); + mm->context.flush_mm = 0; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; +#ifdef CONFIG_64BIT + mm->context.asce_bits |= _ASCE_TYPE_REGION3; +#endif + mm->context.has_pgste = 0; + mm->context.use_skey = 0; + mm->context.asce_limit = STACK_TOP_MAX; + crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); + return 0; +} + +#define destroy_context(mm) do { } while (0) + +static inline void set_user_asce(struct mm_struct *mm) +{ + S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd); + if (current->thread.mm_segment.ar4) + __ctl_load(S390_lowcore.user_asce, 7, 7); + set_cpu_flag(CIF_ASCE); +} + +static inline void clear_user_asce(void) +{ + S390_lowcore.user_asce = S390_lowcore.kernel_asce; + + __ctl_load(S390_lowcore.user_asce, 1, 1); + __ctl_load(S390_lowcore.user_asce, 7, 7); +} + +static inline void load_kernel_asce(void) +{ + unsigned long asce; + + __ctl_store(asce, 1, 1); + if (asce != S390_lowcore.kernel_asce) + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + set_cpu_flag(CIF_ASCE); +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + int cpu = smp_processor_id(); + + if (prev == next) + return; + if (MACHINE_HAS_TLB_LC) + cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); + /* Clear old ASCE by loading the kernel ASCE. */ + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.kernel_asce, 7, 7); + atomic_inc(&next->context.attach_count); + atomic_dec(&prev->context.attach_count); + if (MACHINE_HAS_TLB_LC) + cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); + S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd); +} + +#define finish_arch_post_lock_switch finish_arch_post_lock_switch +static inline void finish_arch_post_lock_switch(void) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + + load_kernel_asce(); + if (mm) { + preempt_disable(); + while (atomic_read(&mm->context.attach_count) >> 16) + cpu_relax(); + + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + if (mm->context.flush_mm) + __tlb_flush_mm(mm); + preempt_enable(); + } + set_fs(current->thread.mm_segment); +} + +#define enter_lazy_tlb(mm,tsk) do { } while (0) +#define deactivate_mm(tsk,mm) do { } while (0) + +static inline void activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ + switch_mm(prev, next, current); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + set_user_asce(next); +} + +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +#ifdef CONFIG_64BIT + if (oldmm->context.asce_limit < mm->context.asce_limit) + crst_table_downgrade(mm, oldmm->context.asce_limit); +#endif +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ +} + +#endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h new file mode 100644 index 00000000000..df1f861a848 --- /dev/null +++ b/arch/s390/include/asm/module.h @@ -0,0 +1,34 @@ +#ifndef _ASM_S390_MODULE_H +#define _ASM_S390_MODULE_H + +#include <asm-generic/module.h> + +/* + * This file contains the s390 architecture specific module code. + */ + +struct mod_arch_syminfo +{ + unsigned long got_offset; + unsigned long plt_offset; + int got_initialized; + int plt_initialized; +}; + +struct mod_arch_specific +{ + /* Starting offset of got in the module core memory. */ + unsigned long got_offset; + /* Starting offset of plt in the module core memory. */ + unsigned long plt_offset; + /* Size of the got. */ + unsigned long got_size; + /* Size of the plt. */ + unsigned long plt_size; + /* Number of symbols in syminfo. */ + int nsyms; + /* Additional symbol information (got and plt offsets). */ + struct mod_arch_syminfo *syminfo; +}; + +#endif /* _ASM_S390_MODULE_H */ diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h new file mode 100644 index 00000000000..458c1f7fbc1 --- /dev/null +++ b/arch/s390/include/asm/mutex.h @@ -0,0 +1,9 @@ +/* + * Pull in the generic implementation for the mutex fastpath. + * + * TODO: implement optimized primitives instead, or leave the generic + * implementation in place, or pick the atomic_xchg() based generic + * implementation. (see asm-generic/mutex-xchg.h for details) + */ + +#include <asm-generic/mutex-dec.h> diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h new file mode 100644 index 00000000000..35f8ec18561 --- /dev/null +++ b/arch/s390/include/asm/nmi.h @@ -0,0 +1,66 @@ +/* + * Machine check handler definitions + * + * Copyright IBM Corp. 2000, 2009 + * Author(s): Ingo Adlung <adlung@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Cornelia Huck <cornelia.huck@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#ifndef _ASM_S390_NMI_H +#define _ASM_S390_NMI_H + +#include <linux/types.h> + +struct mci { + __u32 sd : 1; /* 00 system damage */ + __u32 pd : 1; /* 01 instruction-processing damage */ + __u32 sr : 1; /* 02 system recovery */ + __u32 : 1; /* 03 */ + __u32 cd : 1; /* 04 timing-facility damage */ + __u32 ed : 1; /* 05 external damage */ + __u32 : 1; /* 06 */ + __u32 dg : 1; /* 07 degradation */ + __u32 w : 1; /* 08 warning pending */ + __u32 cp : 1; /* 09 channel-report pending */ + __u32 sp : 1; /* 10 service-processor damage */ + __u32 ck : 1; /* 11 channel-subsystem damage */ + __u32 : 2; /* 12-13 */ + __u32 b : 1; /* 14 backed up */ + __u32 : 1; /* 15 */ + __u32 se : 1; /* 16 storage error uncorrected */ + __u32 sc : 1; /* 17 storage error corrected */ + __u32 ke : 1; /* 18 storage-key error uncorrected */ + __u32 ds : 1; /* 19 storage degradation */ + __u32 wp : 1; /* 20 psw mwp validity */ + __u32 ms : 1; /* 21 psw mask and key validity */ + __u32 pm : 1; /* 22 psw program mask and cc validity */ + __u32 ia : 1; /* 23 psw instruction address validity */ + __u32 fa : 1; /* 24 failing storage address validity */ + __u32 : 1; /* 25 */ + __u32 ec : 1; /* 26 external damage code validity */ + __u32 fp : 1; /* 27 floating point register validity */ + __u32 gr : 1; /* 28 general register validity */ + __u32 cr : 1; /* 29 control register validity */ + __u32 : 1; /* 30 */ + __u32 st : 1; /* 31 storage logical validity */ + __u32 ie : 1; /* 32 indirect storage error */ + __u32 ar : 1; /* 33 access register validity */ + __u32 da : 1; /* 34 delayed access exception */ + __u32 : 7; /* 35-41 */ + __u32 pr : 1; /* 42 tod programmable register validity */ + __u32 fc : 1; /* 43 fp control register validity */ + __u32 ap : 1; /* 44 ancillary report */ + __u32 : 1; /* 45 */ + __u32 ct : 1; /* 46 cpu timer validity */ + __u32 cc : 1; /* 47 clock comparator validity */ + __u32 : 16; /* 47-63 */ +}; + +struct pt_regs; + +extern void s390_handle_mcck(void); +extern void s390_do_machine_check(struct pt_regs *regs); + +#endif /* _ASM_S390_NMI_H */ diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h new file mode 100644 index 00000000000..295f2c4f1c9 --- /dev/null +++ b/arch/s390/include/asm/os_info.h @@ -0,0 +1,49 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ +#ifndef _ASM_S390_OS_INFO_H +#define _ASM_S390_OS_INFO_H + +#define OS_INFO_VERSION_MAJOR 1 +#define OS_INFO_VERSION_MINOR 1 +#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */ + +#define OS_INFO_VMCOREINFO 0 +#define OS_INFO_REIPL_BLOCK 1 + +struct os_info_entry { + u64 addr; + u64 size; + u32 csum; +} __packed; + +struct os_info { + u64 magic; + u32 csum; + u16 version_major; + u16 version_minor; + u64 crashkernel_addr; + u64 crashkernel_size; + struct os_info_entry entry[2]; + u8 reserved[4024]; +} __packed; + +void os_info_init(void); +void os_info_entry_add(int nr, void *ptr, u64 len); +void os_info_crashkernel_add(unsigned long base, unsigned long size); +u32 os_info_csum(struct os_info *os_info); + +#ifdef CONFIG_CRASH_DUMP +void *os_info_old_entry(int nr, unsigned long *size); +int copy_from_oldmem(void *dest, void *src, size_t count); +#else +static inline void *os_info_old_entry(int nr, unsigned long *size) +{ + return NULL; +} +#endif + +#endif /* _ASM_S390_OS_INFO_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h new file mode 100644 index 00000000000..114258eeaac --- /dev/null +++ b/arch/s390/include/asm/page.h @@ -0,0 +1,167 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Hartmut Penner (hp@de.ibm.com) + */ + +#ifndef _S390_PAGE_H +#define _S390_PAGE_H + +#include <linux/const.h> +#include <asm/types.h> + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_DEFAULT_ACC 0 +#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) + +#define HPAGE_SHIFT 20 +#define HPAGE_SIZE (1UL << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#define ARCH_HAS_SETCLEAR_HUGE_PTE +#define ARCH_HAS_HUGE_PTE_TYPE +#define ARCH_HAS_PREPARE_HUGEPAGE +#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH + +#include <asm/setup.h> +#ifndef __ASSEMBLY__ + +static inline void storage_key_init_range(unsigned long start, unsigned long end) +{ +#if PAGE_DEFAULT_KEY + __storage_key_init_range(start, end); +#endif +} + +static inline void clear_page(void *page) +{ + register unsigned long reg1 asm ("1") = 0; + register void *reg2 asm ("2") = page; + register unsigned long reg3 asm ("3") = 4096; + asm volatile( + " mvcl 2,0" + : "+d" (reg2), "+d" (reg3) : "d" (reg1) + : "memory", "cc"); +} + +/* + * copy_page uses the mvcl instruction with 0xb0 padding byte in order to + * bypass caches when copying a page. Especially when copying huge pages + * this keeps L1 and L2 data caches alive. + */ +static inline void copy_page(void *to, void *from) +{ + register void *reg2 asm ("2") = to; + register unsigned long reg3 asm ("3") = 0x1000; + register void *reg4 asm ("4") = from; + register unsigned long reg5 asm ("5") = 0xb0001000; + asm volatile( + " mvcl 2,4" + : "+d" (reg2), "+d" (reg3), "+d" (reg4), "+d" (reg5) + : : "memory", "cc"); +} + +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + +/* + * These are used to make use of C type-checking.. + */ + +typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct { unsigned long pgste; } pgste_t; +typedef struct { unsigned long pte; } pte_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; +typedef struct { unsigned long pgd; } pgd_t; +typedef pte_t *pgtable_t; + +#define pgprot_val(x) ((x).pgprot) +#define pgste_val(x) ((x).pgste) +#define pte_val(x) ((x).pte) +#define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) +#define pgd_val(x) ((x).pgd) + +#define __pgste(x) ((pgste_t) { (x) } ) +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +static inline void page_set_storage_key(unsigned long addr, + unsigned char skey, int mapped) +{ + if (!mapped) + asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" + : : "d" (skey), "a" (addr)); + else + asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); +} + +static inline unsigned char page_get_storage_key(unsigned long addr) +{ + unsigned char skey; + + asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr)); + return skey; +} + +static inline int page_reset_referenced(unsigned long addr) +{ + unsigned int ipm; + + asm volatile( + " rrbe 0,%1\n" + " ipm %0\n" + : "=d" (ipm) : "a" (addr) : "cc"); + return !!(ipm & 0x20000000); +} + +/* Bits int the storage key */ +#define _PAGE_CHANGED 0x02 /* HW changed bit */ +#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ +#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ +#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ + +struct page; +void arch_free_page(struct page *page, int order); +void arch_alloc_page(struct page *page, int order); +void arch_set_page_states(int make_stable); + +static inline int devmem_is_allowed(unsigned long pfn) +{ + return 0; +} + +#define HAVE_ARCH_FREE_PAGE +#define HAVE_ARCH_ALLOC_PAGE + +#endif /* !__ASSEMBLY__ */ + +#define __PAGE_OFFSET 0x0UL +#define PAGE_OFFSET 0x0UL +#define __pa(x) (unsigned long)(x) +#define __va(x) (void *)(unsigned long)(x) +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#include <asm-generic/memory_model.h> +#include <asm-generic/getorder.h> + +#define __HAVE_ARCH_GATE_AREA 1 + +#endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h new file mode 100644 index 00000000000..c030900320e --- /dev/null +++ b/arch/s390/include/asm/pci.h @@ -0,0 +1,192 @@ +#ifndef __ASM_S390_PCI_H +#define __ASM_S390_PCI_H + +/* must be set before including asm-generic/pci.h */ +#define PCI_DMA_BUS_IS_PHYS (0) +/* must be set before including pci_clp.h */ +#define PCI_BAR_COUNT 6 + +#include <linux/pci.h> +#include <asm-generic/pci.h> +#include <asm-generic/pci-dma-compat.h> +#include <asm/pci_clp.h> +#include <asm/pci_debug.h> + +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM 0x10000000 + +#define pcibios_assign_all_busses() (0) + +void __iomem *pci_iomap(struct pci_dev *, int, unsigned long); +void pci_iounmap(struct pci_dev *, void __iomem *); +int pci_domain_nr(struct pci_bus *); +int pci_proc_domain(struct pci_bus *); + +#define ZPCI_BUS_NR 0 /* default bus number */ +#define ZPCI_DEVFN 0 /* default device number */ + +/* PCI Function Controls */ +#define ZPCI_FC_FN_ENABLED 0x80 +#define ZPCI_FC_ERROR 0x40 +#define ZPCI_FC_BLOCKED 0x20 +#define ZPCI_FC_DMA_ENABLED 0x10 + +struct zpci_fmb { + u32 format : 8; + u32 dma_valid : 1; + u32 : 23; + u32 samples; + u64 last_update; + /* hardware counters */ + u64 ld_ops; + u64 st_ops; + u64 stb_ops; + u64 rpcit_ops; + u64 dma_rbytes; + u64 dma_wbytes; + /* software counters */ + atomic64_t allocated_pages; + atomic64_t mapped_pages; + atomic64_t unmapped_pages; +} __packed __aligned(16); + +#define ZPCI_MSI_VEC_BITS 11 +#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS) +#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1) + +enum zpci_state { + ZPCI_FN_STATE_RESERVED, + ZPCI_FN_STATE_STANDBY, + ZPCI_FN_STATE_CONFIGURED, + ZPCI_FN_STATE_ONLINE, + NR_ZPCI_FN_STATES, +}; + +struct zpci_bar_struct { + struct resource *res; /* bus resource */ + u32 val; /* bar start & 3 flag bits */ + u16 map_idx; /* index into bar mapping array */ + u8 size; /* order 2 exponent */ +}; + +/* Private data per function */ +struct zpci_dev { + struct pci_dev *pdev; + struct pci_bus *bus; + struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ + + enum zpci_state state; + u32 fid; /* function ID, used by sclp */ + u32 fh; /* function handle, used by insn's */ + u16 vfn; /* virtual function number */ + u16 pchid; /* physical channel ID */ + u8 pfgid; /* function group ID */ + u8 pft; /* pci function type */ + u16 domain; + + u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ + u32 uid; /* user defined id */ + u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ + + /* IRQ stuff */ + u64 msi_addr; /* MSI address */ + struct airq_iv *aibv; /* adapter interrupt bit vector */ + unsigned int aisb; /* number of the summary bit */ + + /* DMA stuff */ + unsigned long *dma_table; + spinlock_t dma_table_lock; + int tlb_refresh; + + spinlock_t iommu_bitmap_lock; + unsigned long *iommu_bitmap; + unsigned long iommu_size; + unsigned long iommu_pages; + unsigned int next_bit; + + char res_name[16]; + struct zpci_bar_struct bars[PCI_BAR_COUNT]; + + u64 start_dma; /* Start of available DMA addresses */ + u64 end_dma; /* End of available DMA addresses */ + u64 dma_mask; /* DMA address space mask */ + + /* Function measurement block */ + struct zpci_fmb *fmb; + u16 fmb_update; /* update interval */ + + enum pci_bus_speed max_bus_speed; + + struct dentry *debugfs_dev; + struct dentry *debugfs_perf; +}; + +static inline bool zdev_enabled(struct zpci_dev *zdev) +{ + return (zdev->fh & (1UL << 31)) ? true : false; +} + +extern const struct attribute_group *zpci_attr_groups[]; + +/* ----------------------------------------------------------------------------- + Prototypes +----------------------------------------------------------------------------- */ +/* Base stuff */ +int zpci_create_device(struct zpci_dev *); +int zpci_enable_device(struct zpci_dev *); +int zpci_disable_device(struct zpci_dev *); +void zpci_stop_device(struct zpci_dev *); +int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); +int zpci_unregister_ioat(struct zpci_dev *, u8); + +/* CLP */ +int clp_scan_pci_devices(void); +int clp_rescan_pci_devices(void); +int clp_rescan_pci_devices_simple(void); +int clp_add_pci_device(u32, u32, int); +int clp_enable_fh(struct zpci_dev *, u8); +int clp_disable_fh(struct zpci_dev *); + +#ifdef CONFIG_PCI +/* Error handling and recovery */ +void zpci_event_error(void *); +void zpci_event_availability(void *); +void zpci_rescan(void); +bool zpci_is_enabled(void); +#else /* CONFIG_PCI */ +static inline void zpci_event_error(void *e) {} +static inline void zpci_event_availability(void *e) {} +static inline void zpci_rescan(void) {} +#endif /* CONFIG_PCI */ + +#ifdef CONFIG_HOTPLUG_PCI_S390 +int zpci_init_slot(struct zpci_dev *); +void zpci_exit_slot(struct zpci_dev *); +#else /* CONFIG_HOTPLUG_PCI_S390 */ +static inline int zpci_init_slot(struct zpci_dev *zdev) +{ + return 0; +} +static inline void zpci_exit_slot(struct zpci_dev *zdev) {} +#endif /* CONFIG_HOTPLUG_PCI_S390 */ + +/* Helpers */ +struct zpci_dev *get_zdev(struct pci_dev *); +struct zpci_dev *get_zdev_by_fid(u32); + +/* DMA */ +int zpci_dma_init(void); +void zpci_dma_exit(void); + +/* FMB */ +int zpci_fmb_enable_device(struct zpci_dev *); +int zpci_fmb_disable_device(struct zpci_dev *); + +/* Debug */ +int zpci_debug_init(void); +void zpci_debug_exit(void); +void zpci_debug_init_device(struct zpci_dev *); +void zpci_debug_exit_device(struct zpci_dev *); +void zpci_debug_info(struct zpci_dev *, struct seq_file *); + +#endif diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h new file mode 100644 index 00000000000..dd78f92f1cc --- /dev/null +++ b/arch/s390/include/asm/pci_clp.h @@ -0,0 +1,186 @@ +#ifndef _ASM_S390_PCI_CLP_H +#define _ASM_S390_PCI_CLP_H + +#include <asm/clp.h> + +/* + * Call Logical Processor - Command Codes + */ +#define CLP_LIST_PCI 0x0002 +#define CLP_QUERY_PCI_FN 0x0003 +#define CLP_QUERY_PCI_FNGRP 0x0004 +#define CLP_SET_PCI_FN 0x0005 + +/* PCI function handle list entry */ +struct clp_fh_list_entry { + u16 device_id; + u16 vendor_id; + u32 config_state : 1; + u32 : 31; + u32 fid; /* PCI function id */ + u32 fh; /* PCI function handle */ +} __packed; + +#define CLP_RC_SETPCIFN_FH 0x0101 /* Invalid PCI fn handle */ +#define CLP_RC_SETPCIFN_FHOP 0x0102 /* Fn handle not valid for op */ +#define CLP_RC_SETPCIFN_DMAAS 0x0103 /* Invalid DMA addr space */ +#define CLP_RC_SETPCIFN_RES 0x0104 /* Insufficient resources */ +#define CLP_RC_SETPCIFN_ALRDY 0x0105 /* Fn already in requested state */ +#define CLP_RC_SETPCIFN_ERR 0x0106 /* Fn in permanent error state */ +#define CLP_RC_SETPCIFN_RECPND 0x0107 /* Error recovery pending */ +#define CLP_RC_SETPCIFN_BUSY 0x0108 /* Fn busy */ +#define CLP_RC_LISTPCI_BADRT 0x010a /* Resume token not recognized */ +#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */ + +/* request or response block header length */ +#define LIST_PCI_HDR_LEN 32 + +/* Number of function handles fitting in response block */ +#define CLP_FH_LIST_NR_ENTRIES \ + ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \ + / sizeof(struct clp_fh_list_entry)) + +#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */ +#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */ + +#define CLP_UTIL_STR_LEN 64 +#define CLP_PFIP_NR_SEGMENTS 4 + +/* List PCI functions request */ +struct clp_req_list_pci { + struct clp_req_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u64 resume_token; + u64 reserved2; +} __packed; + +/* List PCI functions response */ +struct clp_rsp_list_pci { + struct clp_rsp_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u64 resume_token; + u32 reserved2; + u16 max_fn; + u8 reserved3; + u8 entry_size; + struct clp_fh_list_entry fh_list[CLP_FH_LIST_NR_ENTRIES]; +} __packed; + +/* Query PCI function request */ +struct clp_req_query_pci { + struct clp_req_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u32 fh; /* function handle */ + u32 reserved2; + u64 reserved3; +} __packed; + +/* Query PCI function response */ +struct clp_rsp_query_pci { + struct clp_rsp_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 : 64; + u16 vfn; /* virtual fn number */ + u16 : 7; + u16 util_str_avail : 1; /* utility string available? */ + u16 pfgid : 8; /* pci function group id */ + u32 fid; /* pci function id */ + u8 bar_size[PCI_BAR_COUNT]; + u16 pchid; + u32 bar[PCI_BAR_COUNT]; + u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ + u32 : 24; + u8 pft; /* pci function type */ + u64 sdma; /* start dma as */ + u64 edma; /* end dma as */ + u32 reserved[11]; + u32 uid; /* user defined id */ + u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ +} __packed; + +/* Query PCI function group request */ +struct clp_req_query_pci_grp { + struct clp_req_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u32 : 24; + u32 pfgid : 8; /* function group id */ + u32 reserved2; + u64 reserved3; +} __packed; + +/* Query PCI function group response */ +struct clp_rsp_query_pci_grp { + struct clp_rsp_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u16 : 4; + u16 noi : 12; /* number of interrupts */ + u8 version; + u8 : 6; + u8 frame : 1; + u8 refresh : 1; /* TLB refresh mode */ + u16 reserved2; + u16 mui; + u64 reserved3; + u64 dasm; /* dma address space mask */ + u64 msia; /* MSI address */ + u64 reserved4; + u64 reserved5; +} __packed; + +/* Set PCI function request */ +struct clp_req_set_pci { + struct clp_req_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u32 fh; /* function handle */ + u16 reserved2; + u8 oc; /* operation controls */ + u8 ndas; /* number of dma spaces */ + u64 reserved3; +} __packed; + +/* Set PCI function response */ +struct clp_rsp_set_pci { + struct clp_rsp_hdr hdr; + u32 fmt : 4; /* cmd request block format */ + u32 : 28; + u64 reserved1; + u32 fh; /* function handle */ + u32 reserved3; + u64 reserved4; +} __packed; + +/* Combined request/response block structures used by clp insn */ +struct clp_req_rsp_list_pci { + struct clp_req_list_pci request; + struct clp_rsp_list_pci response; +} __packed; + +struct clp_req_rsp_set_pci { + struct clp_req_set_pci request; + struct clp_rsp_set_pci response; +} __packed; + +struct clp_req_rsp_query_pci { + struct clp_req_query_pci request; + struct clp_rsp_query_pci response; +} __packed; + +struct clp_req_rsp_query_pci_grp { + struct clp_req_query_pci_grp request; + struct clp_rsp_query_pci_grp response; +} __packed; + +#endif diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h new file mode 100644 index 00000000000..ac24b26fc06 --- /dev/null +++ b/arch/s390/include/asm/pci_debug.h @@ -0,0 +1,28 @@ +#ifndef _S390_ASM_PCI_DEBUG_H +#define _S390_ASM_PCI_DEBUG_H + +#include <asm/debug.h> + +extern debug_info_t *pci_debug_msg_id; +extern debug_info_t *pci_debug_err_id; + +#define zpci_dbg(imp, fmt, args...) \ + debug_sprintf_event(pci_debug_msg_id, imp, fmt, ##args) + +#define zpci_err(text...) \ + do { \ + char debug_buffer[16]; \ + snprintf(debug_buffer, 16, text); \ + debug_text_event(pci_debug_err_id, 0, debug_buffer); \ + } while (0) + +static inline void zpci_err_hex(void *addr, int len) +{ + while (len > 0) { + debug_event(pci_debug_err_id, 0, (void *) addr, len); + len -= pci_debug_err_id->buf_size; + addr += pci_debug_err_id->buf_size; + } +} + +#endif diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h new file mode 100644 index 00000000000..30b4c179c38 --- /dev/null +++ b/arch/s390/include/asm/pci_dma.h @@ -0,0 +1,196 @@ +#ifndef _ASM_S390_PCI_DMA_H +#define _ASM_S390_PCI_DMA_H + +/* I/O Translation Anchor (IOTA) */ +enum zpci_ioat_dtype { + ZPCI_IOTA_STO = 0, + ZPCI_IOTA_RTTO = 1, + ZPCI_IOTA_RSTO = 2, + ZPCI_IOTA_RFTO = 3, + ZPCI_IOTA_PFAA = 4, + ZPCI_IOTA_IOPFAA = 5, + ZPCI_IOTA_IOPTO = 7 +}; + +#define ZPCI_IOTA_IOT_ENABLED 0x800UL +#define ZPCI_IOTA_DT_ST (ZPCI_IOTA_STO << 2) +#define ZPCI_IOTA_DT_RT (ZPCI_IOTA_RTTO << 2) +#define ZPCI_IOTA_DT_RS (ZPCI_IOTA_RSTO << 2) +#define ZPCI_IOTA_DT_RF (ZPCI_IOTA_RFTO << 2) +#define ZPCI_IOTA_DT_PF (ZPCI_IOTA_PFAA << 2) +#define ZPCI_IOTA_FS_4K 0 +#define ZPCI_IOTA_FS_1M 1 +#define ZPCI_IOTA_FS_2G 2 +#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5) + +#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST) +#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT) +#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS) +#define ZPCI_IOTA_RFTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF) +#define ZPCI_IOTA_RFAA_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G) + +/* I/O Region and segment tables */ +#define ZPCI_INDEX_MASK 0x7ffUL + +#define ZPCI_TABLE_TYPE_MASK 0xc +#define ZPCI_TABLE_TYPE_RFX 0xc +#define ZPCI_TABLE_TYPE_RSX 0x8 +#define ZPCI_TABLE_TYPE_RTX 0x4 +#define ZPCI_TABLE_TYPE_SX 0x0 + +#define ZPCI_TABLE_LEN_RFX 0x3 +#define ZPCI_TABLE_LEN_RSX 0x3 +#define ZPCI_TABLE_LEN_RTX 0x3 + +#define ZPCI_TABLE_OFFSET_MASK 0xc0 +#define ZPCI_TABLE_SIZE 0x4000 +#define ZPCI_TABLE_ALIGN ZPCI_TABLE_SIZE +#define ZPCI_TABLE_ENTRY_SIZE (sizeof(unsigned long)) +#define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE) + +#define ZPCI_TABLE_BITS 11 +#define ZPCI_PT_BITS 8 +#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT) +#define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS) + +#define ZPCI_RTE_FLAG_MASK 0x3fffUL +#define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK) +#define ZPCI_STE_FLAG_MASK 0x7ffUL +#define ZPCI_STE_ADDR_MASK (~ZPCI_STE_FLAG_MASK) + +/* I/O Page tables */ +#define ZPCI_PTE_VALID_MASK 0x400 +#define ZPCI_PTE_INVALID 0x400 +#define ZPCI_PTE_VALID 0x000 +#define ZPCI_PT_SIZE 0x800 +#define ZPCI_PT_ALIGN ZPCI_PT_SIZE +#define ZPCI_PT_ENTRIES (ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE) +#define ZPCI_PT_MASK (ZPCI_PT_ENTRIES - 1) + +#define ZPCI_PTE_FLAG_MASK 0xfffUL +#define ZPCI_PTE_ADDR_MASK (~ZPCI_PTE_FLAG_MASK) + +/* Shared bits */ +#define ZPCI_TABLE_VALID 0x00 +#define ZPCI_TABLE_INVALID 0x20 +#define ZPCI_TABLE_PROTECTED 0x200 +#define ZPCI_TABLE_UNPROTECTED 0x000 + +#define ZPCI_TABLE_VALID_MASK 0x20 +#define ZPCI_TABLE_PROT_MASK 0x200 + +static inline unsigned int calc_rtx(dma_addr_t ptr) +{ + return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; +} + +static inline unsigned int calc_sx(dma_addr_t ptr) +{ + return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; +} + +static inline unsigned int calc_px(dma_addr_t ptr) +{ + return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; +} + +static inline void set_pt_pfaa(unsigned long *entry, void *pfaa) +{ + *entry &= ZPCI_PTE_FLAG_MASK; + *entry |= ((unsigned long) pfaa & ZPCI_PTE_ADDR_MASK); +} + +static inline void set_rt_sto(unsigned long *entry, void *sto) +{ + *entry &= ZPCI_RTE_FLAG_MASK; + *entry |= ((unsigned long) sto & ZPCI_RTE_ADDR_MASK); + *entry |= ZPCI_TABLE_TYPE_RTX; +} + +static inline void set_st_pto(unsigned long *entry, void *pto) +{ + *entry &= ZPCI_STE_FLAG_MASK; + *entry |= ((unsigned long) pto & ZPCI_STE_ADDR_MASK); + *entry |= ZPCI_TABLE_TYPE_SX; +} + +static inline void validate_rt_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry &= ~ZPCI_TABLE_OFFSET_MASK; + *entry |= ZPCI_TABLE_VALID; + *entry |= ZPCI_TABLE_LEN_RTX; +} + +static inline void validate_st_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry |= ZPCI_TABLE_VALID; +} + +static inline void invalidate_table_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry |= ZPCI_TABLE_INVALID; +} + +static inline void invalidate_pt_entry(unsigned long *entry) +{ + WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); + *entry &= ~ZPCI_PTE_VALID_MASK; + *entry |= ZPCI_PTE_INVALID; +} + +static inline void validate_pt_entry(unsigned long *entry) +{ + WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID); + *entry &= ~ZPCI_PTE_VALID_MASK; + *entry |= ZPCI_PTE_VALID; +} + +static inline void entry_set_protected(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_PROT_MASK; + *entry |= ZPCI_TABLE_PROTECTED; +} + +static inline void entry_clr_protected(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_PROT_MASK; + *entry |= ZPCI_TABLE_UNPROTECTED; +} + +static inline int reg_entry_isvalid(unsigned long entry) +{ + return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID; +} + +static inline int pt_entry_isvalid(unsigned long entry) +{ + return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; +} + +static inline int entry_isprotected(unsigned long entry) +{ + return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED; +} + +static inline unsigned long *get_rt_sto(unsigned long entry) +{ + return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) + ? (unsigned long *) (entry & ZPCI_RTE_ADDR_MASK) + : NULL; +} + +static inline unsigned long *get_st_pto(unsigned long entry) +{ + return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) + ? (unsigned long *) (entry & ZPCI_STE_ADDR_MASK) + : NULL; +} + +/* Prototypes */ +int zpci_dma_init_device(struct zpci_dev *); +void zpci_dma_exit_device(struct zpci_dev *); + +#endif diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h new file mode 100644 index 00000000000..649eb62c52b --- /dev/null +++ b/arch/s390/include/asm/pci_insn.h @@ -0,0 +1,86 @@ +#ifndef _ASM_S390_PCI_INSN_H +#define _ASM_S390_PCI_INSN_H + +/* Load/Store status codes */ +#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4 +#define ZPCI_PCI_ST_FUNC_IN_ERR 8 +#define ZPCI_PCI_ST_BLOCKED 12 +#define ZPCI_PCI_ST_INSUF_RES 16 +#define ZPCI_PCI_ST_INVAL_AS 20 +#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24 +#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28 +#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36 +#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40 +#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44 + +/* Load/Store return codes */ +#define ZPCI_PCI_LS_OK 0 +#define ZPCI_PCI_LS_ERR 1 +#define ZPCI_PCI_LS_BUSY 2 +#define ZPCI_PCI_LS_INVAL_HANDLE 3 + +/* Load/Store address space identifiers */ +#define ZPCI_PCIAS_MEMIO_0 0 +#define ZPCI_PCIAS_MEMIO_1 1 +#define ZPCI_PCIAS_MEMIO_2 2 +#define ZPCI_PCIAS_MEMIO_3 3 +#define ZPCI_PCIAS_MEMIO_4 4 +#define ZPCI_PCIAS_MEMIO_5 5 +#define ZPCI_PCIAS_CFGSPC 15 + +/* Modify PCI Function Controls */ +#define ZPCI_MOD_FC_REG_INT 2 +#define ZPCI_MOD_FC_DEREG_INT 3 +#define ZPCI_MOD_FC_REG_IOAT 4 +#define ZPCI_MOD_FC_DEREG_IOAT 5 +#define ZPCI_MOD_FC_REREG_IOAT 6 +#define ZPCI_MOD_FC_RESET_ERROR 7 +#define ZPCI_MOD_FC_RESET_BLOCK 9 +#define ZPCI_MOD_FC_SET_MEASURE 10 + +/* FIB function controls */ +#define ZPCI_FIB_FC_ENABLED 0x80 +#define ZPCI_FIB_FC_ERROR 0x40 +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 +#define ZPCI_FIB_FC_DMAAS_REG 0x10 + +/* FIB function controls */ +#define ZPCI_FIB_FC_ENABLED 0x80 +#define ZPCI_FIB_FC_ERROR 0x40 +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 +#define ZPCI_FIB_FC_DMAAS_REG 0x10 + +/* Function Information Block */ +struct zpci_fib { + u32 fmt : 8; /* format */ + u32 : 24; + u32 : 32; + u8 fc; /* function controls */ + u64 : 56; + u64 pba; /* PCI base address */ + u64 pal; /* PCI address limit */ + u64 iota; /* I/O Translation Anchor */ + u32 : 1; + u32 isc : 3; /* Interrupt subclass */ + u32 noi : 12; /* Number of interrupts */ + u32 : 2; + u32 aibvo : 6; /* Adapter interrupt bit vector offset */ + u32 sum : 1; /* Adapter int summary bit enabled */ + u32 : 1; + u32 aisbo : 6; /* Adapter int summary bit offset */ + u32 : 32; + u64 aibv; /* Adapter int bit vector address */ + u64 aisb; /* Adapter int summary bit address */ + u64 fmb_addr; /* Function measurement block address and key */ + u32 : 32; + u32 gd; +} __packed __aligned(8); + +int zpci_mod_fc(u64 req, struct zpci_fib *fib); +int zpci_refresh_trans(u64 fn, u64 addr, u64 range); +int zpci_load(u64 *data, u64 req, u64 offset); +int zpci_store(u64 data, u64 req, u64 offset); +int zpci_store_block(const u64 *data, u64 req, u64 offset); +void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); + +#endif diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h new file mode 100644 index 00000000000..d194d544d69 --- /dev/null +++ b/arch/s390/include/asm/pci_io.h @@ -0,0 +1,198 @@ +#ifndef _ASM_S390_PCI_IO_H +#define _ASM_S390_PCI_IO_H + +#ifdef CONFIG_PCI + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <asm/pci_insn.h> + +/* I/O Map */ +#define ZPCI_IOMAP_MAX_ENTRIES 0x7fff +#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000ULL +#define ZPCI_IOMAP_ADDR_IDX_MASK 0x7fff000000000000ULL +#define ZPCI_IOMAP_ADDR_OFF_MASK 0x0000ffffffffffffULL + +struct zpci_iomap_entry { + u32 fh; + u8 bar; +}; + +extern struct zpci_iomap_entry *zpci_iomap_start; + +#define ZPCI_IDX(addr) \ + (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48) +#define ZPCI_OFFSET(addr) \ + ((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK) + +#define ZPCI_CREATE_REQ(handle, space, len) \ + ((u64) handle << 32 | space << 16 | len) + +#define zpci_read(LENGTH, RETTYPE) \ +static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \ +{ \ + struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \ + u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \ + u64 data; \ + int rc; \ + \ + rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \ + if (rc) \ + data = -1ULL; \ + return (RETTYPE) data; \ +} + +#define zpci_write(LENGTH, VALTYPE) \ +static inline void zpci_write_##VALTYPE(VALTYPE val, \ + const volatile void __iomem *addr) \ +{ \ + struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \ + u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \ + u64 data = (VALTYPE) val; \ + \ + zpci_store(data, req, ZPCI_OFFSET(addr)); \ +} + +zpci_read(8, u64) +zpci_read(4, u32) +zpci_read(2, u16) +zpci_read(1, u8) +zpci_write(8, u64) +zpci_write(4, u32) +zpci_write(2, u16) +zpci_write(1, u8) + +static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len) +{ + u64 val; + + switch (len) { + case 1: + val = (u64) *((u8 *) data); + break; + case 2: + val = (u64) *((u16 *) data); + break; + case 4: + val = (u64) *((u32 *) data); + break; + case 8: + val = (u64) *((u64 *) data); + break; + default: + val = 0; /* let FW report error */ + break; + } + return zpci_store(val, req, offset); +} + +static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) +{ + u64 data; + int cc; + + cc = zpci_load(&data, req, offset); + if (cc) + goto out; + + switch (len) { + case 1: + *((u8 *) dst) = (u8) data; + break; + case 2: + *((u16 *) dst) = (u16) data; + break; + case 4: + *((u32 *) dst) = (u32) data; + break; + case 8: + *((u64 *) dst) = (u64) data; + break; + } +out: + return cc; +} + +static inline int zpci_write_block(u64 req, const u64 *data, u64 offset) +{ + return zpci_store_block(data, req, offset); +} + +static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max) +{ + int count = len > max ? max : len, size = 1; + + while (!(src & 0x1) && !(dst & 0x1) && ((size << 1) <= count)) { + dst = dst >> 1; + src = src >> 1; + size = size << 1; + } + return size; +} + +static inline int zpci_memcpy_fromio(void *dst, + const volatile void __iomem *src, + unsigned long n) +{ + struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(src)]; + u64 req, offset = ZPCI_OFFSET(src); + int size, rc = 0; + + while (n > 0) { + size = zpci_get_max_write_size((u64) src, (u64) dst, n, 8); + req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size); + rc = zpci_read_single(req, dst, offset, size); + if (rc) + break; + offset += size; + dst += size; + n -= size; + } + return rc; +} + +static inline int zpci_memcpy_toio(volatile void __iomem *dst, + const void *src, unsigned long n) +{ + struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)]; + u64 req, offset = ZPCI_OFFSET(dst); + int size, rc = 0; + + if (!src) + return -EINVAL; + + while (n > 0) { + size = zpci_get_max_write_size((u64) dst, (u64) src, n, 128); + req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size); + + if (size > 8) /* main path */ + rc = zpci_write_block(req, src, offset); + else + rc = zpci_write_single(req, src, offset, size); + if (rc) + break; + offset += size; + src += size; + n -= size; + } + return rc; +} + +static inline int zpci_memset_io(volatile void __iomem *dst, + unsigned char val, size_t count) +{ + u8 *src = kmalloc(count, GFP_KERNEL); + int rc; + + if (src == NULL) + return -ENOMEM; + memset(src, val, count); + + rc = zpci_memcpy_toio(dst, src, count); + kfree(src); + return rc; +} + +#endif /* CONFIG_PCI */ + +#endif /* _ASM_S390_PCI_IO_H */ diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h new file mode 100644 index 00000000000..fa91e009745 --- /dev/null +++ b/arch/s390/include/asm/percpu.h @@ -0,0 +1,190 @@ +#ifndef __ARCH_S390_PERCPU__ +#define __ARCH_S390_PERCPU__ + +#include <linux/preempt.h> +#include <asm/cmpxchg.h> + +/* + * s390 uses its own implementation for per cpu data, the offset of + * the cpu local data area is cached in the cpu's lowcore memory. + */ +#define __my_cpu_offset S390_lowcore.percpu_offset + +#ifdef CONFIG_64BIT + +/* + * For 64 bit module code, the module may be more than 4G above the + * per cpu area, use weak definitions to force the compiler to + * generate external references. + */ +#if defined(CONFIG_SMP) && defined(MODULE) +#define ARCH_NEEDS_WEAK_PER_CPU +#endif + +/* + * We use a compare-and-swap loop since that uses less cpu cycles than + * disabling and enabling interrupts like the generic variant would do. + */ +#define arch_this_cpu_to_op_simple(pcp, val, op) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ old__, new__, prev__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + prev__ = *ptr__; \ + do { \ + old__ = prev__; \ + new__ = old__ op (val); \ + prev__ = cmpxchg(ptr__, old__, new__); \ + } while (prev__ != old__); \ + preempt_enable(); \ + new__; \ +}) + +#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) +#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) +#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) +#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) + +#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +) +#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) +#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &) +#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) +#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |) + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +#define arch_this_cpu_add(pcp, val, op1, op2, szcast) \ +{ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ val__ = (val); \ + pcp_op_T__ old__, *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + if (__builtin_constant_p(val__) && \ + ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ + asm volatile( \ + op2 " %[ptr__],%[val__]\n" \ + : [ptr__] "+Q" (*ptr__) \ + : [val__] "i" ((szcast)val__) \ + : "cc"); \ + } else { \ + asm volatile( \ + op1 " %[old__],%[val__],%[ptr__]\n" \ + : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ + : [val__] "d" (val__) \ + : "cc"); \ + } \ + preempt_enable(); \ +} + +#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int) +#define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long) + +#define arch_this_cpu_add_return(pcp, val, op) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ val__ = (val); \ + pcp_op_T__ old__, *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + asm volatile( \ + op " %[old__],%[val__],%[ptr__]\n" \ + : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ + : [val__] "d" (val__) \ + : "cc"); \ + preempt_enable(); \ + old__ + val__; \ +}) + +#define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa") +#define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag") + +#define arch_this_cpu_to_op(pcp, val, op) \ +{ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ val__ = (val); \ + pcp_op_T__ old__, *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + asm volatile( \ + op " %[old__],%[val__],%[ptr__]\n" \ + : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ + : [val__] "d" (val__) \ + : "cc"); \ + preempt_enable(); \ +} + +#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan") +#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, "lang") +#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lao") +#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laog") + +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +#define arch_this_cpu_cmpxchg(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ ret__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + ret__ = cmpxchg(ptr__, oval, nval); \ + preempt_enable(); \ + ret__; \ +}) + +#define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) + +#define arch_this_cpu_xchg(pcp, nval) \ +({ \ + typeof(pcp) *ptr__; \ + typeof(pcp) ret__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + ret__ = xchg(ptr__, nval); \ + preempt_enable(); \ + ret__; \ +}) + +#define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval) +#define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval) +#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval) +#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval) + +#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \ +({ \ + typeof(pcp1) o1__ = (o1), n1__ = (n1); \ + typeof(pcp2) o2__ = (o2), n2__ = (n2); \ + typeof(pcp1) *p1__; \ + typeof(pcp2) *p2__; \ + int ret__; \ + preempt_disable(); \ + p1__ = __this_cpu_ptr(&(pcp1)); \ + p2__ = __this_cpu_ptr(&(pcp2)); \ + ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \ + preempt_enable(); \ + ret__; \ +}) + +#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double +#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double + +#endif /* CONFIG_64BIT */ + +#include <asm-generic/percpu.h> + +#endif /* __ARCH_S390_PERCPU__ */ diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h new file mode 100644 index 00000000000..159a8ec6da9 --- /dev/null +++ b/arch/s390/include/asm/perf_event.h @@ -0,0 +1,96 @@ +/* + * Performance event support - s390 specific definitions. + * + * Copyright IBM Corp. 2009, 2013 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + * Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ + +#ifndef _ASM_S390_PERF_EVENT_H +#define _ASM_S390_PERF_EVENT_H + +#ifdef CONFIG_64BIT + +#include <linux/perf_event.h> +#include <linux/device.h> +#include <asm/cpu_mf.h> + +/* Per-CPU flags for PMU states */ +#define PMU_F_RESERVED 0x1000 +#define PMU_F_ENABLED 0x2000 +#define PMU_F_IN_USE 0x4000 +#define PMU_F_ERR_IBE 0x0100 +#define PMU_F_ERR_LSDA 0x0200 +#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA) + +/* Perf defintions for PMU event attributes in sysfs */ +extern __init const struct attribute_group **cpumf_cf_event_group(void); +extern ssize_t cpumf_events_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page); +#define EVENT_VAR(_cat, _name) event_attr_##_cat##_##_name +#define EVENT_PTR(_cat, _name) (&EVENT_VAR(_cat, _name).attr.attr) + +#define CPUMF_EVENT_ATTR(cat, name, id) \ + PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show) +#define CPUMF_EVENT_PTR(cat, name) EVENT_PTR(cat, name) + + +/* Perf callbacks */ +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + +/* Perf pt_regs extension for sample-data-entry indicators */ +struct perf_sf_sde_regs { + unsigned char in_guest:1; /* guest sample */ + unsigned long reserved:63; /* reserved */ +}; + +/* Perf PMU definitions for the counter facility */ +#define PERF_CPUM_CF_MAX_CTR 256 + +/* Perf PMU definitions for the sampling facility */ +#define PERF_CPUM_SF_MAX_CTR 2 +#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */ +#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */ +#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */ +#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */ +#define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \ + PERF_CPUM_SF_DIAG_MODE) +#define PERF_CPUM_SF_FULL_BLOCKS 0x0004 /* Process full SDBs only */ + +#define REG_NONE 0 +#define REG_OVERFLOW 1 +#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) +#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) +#define RAWSAMPLE_REG(hwc) ((hwc)->config) +#define TEAR_REG(hwc) ((hwc)->last_tag) +#define SAMPL_RATE(hwc) ((hwc)->event_base) +#define SAMPL_FLAGS(hwc) ((hwc)->config_base) +#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) +#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS) + +/* Structure for sampling data entries to be passed as perf raw sample data + * to user space. Note that raw sample data must be aligned and, thus, might + * be padded with zeros. + */ +struct sf_raw_sample { +#define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE +#define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE + u64 format; + u32 size; /* Size of sf_raw_sample */ + u16 bsdes; /* Basic-sampling data entry size */ + u16 dsdes; /* Diagnostic-sampling data entry size */ + struct hws_basic_entry basic; /* Basic-sampling data entry */ + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ + u8 padding[]; /* Padding to next multiple of 8 */ +} __packed; + +/* Perf hardware reserve and release functions */ +int perf_reserve_sampling(void); +void perf_release_sampling(void); + +#endif /* CONFIG_64BIT */ +#endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h new file mode 100644 index 00000000000..9e18a61d3df --- /dev/null +++ b/arch/s390/include/asm/pgalloc.h @@ -0,0 +1,156 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Hartmut Penner (hp@de.ibm.com) + * Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/pgalloc.h" + * Copyright (C) 1994 Linus Torvalds + */ + +#ifndef _S390_PGALLOC_H +#define _S390_PGALLOC_H + +#include <linux/threads.h> +#include <linux/gfp.h> +#include <linux/mm.h> + +unsigned long *crst_table_alloc(struct mm_struct *); +void crst_table_free(struct mm_struct *, unsigned long *); + +unsigned long *page_table_alloc(struct mm_struct *, unsigned long); +void page_table_free(struct mm_struct *, unsigned long *); +void page_table_free_rcu(struct mmu_gather *, unsigned long *); + +void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long, + bool init_skey); +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq); + +static inline void clear_table(unsigned long *s, unsigned long val, size_t n) +{ + typedef struct { char _[n]; } addrtype; + + *s = val; + n = (n / 256) - 1; + asm volatile( +#ifdef CONFIG_64BIT + " mvc 8(248,%0),0(%0)\n" +#else + " mvc 4(252,%0),0(%0)\n" +#endif + "0: mvc 256(256,%0),0(%0)\n" + " la %0,256(%0)\n" + " brct %1,0b\n" + : "+a" (s), "+d" (n), "=m" (*(addrtype *) s) + : "m" (*(addrtype *) s)); +} + +static inline void crst_table_init(unsigned long *crst, unsigned long entry) +{ + clear_table(crst, entry, sizeof(unsigned long)*2048); +} + +#ifndef CONFIG_64BIT + +static inline unsigned long pgd_entry_type(struct mm_struct *mm) +{ + return _SEGMENT_ENTRY_EMPTY; +} + +#define pud_alloc_one(mm,address) ({ BUG(); ((pud_t *)2); }) +#define pud_free(mm, x) do { } while (0) + +#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) +#define pmd_free(mm, x) do { } while (0) + +#define pgd_populate(mm, pgd, pud) BUG() +#define pud_populate(mm, pud, pmd) BUG() + +#else /* CONFIG_64BIT */ + +static inline unsigned long pgd_entry_type(struct mm_struct *mm) +{ + if (mm->context.asce_limit <= (1UL << 31)) + return _SEGMENT_ENTRY_EMPTY; + if (mm->context.asce_limit <= (1UL << 42)) + return _REGION3_ENTRY_EMPTY; + return _REGION2_ENTRY_EMPTY; +} + +int crst_table_upgrade(struct mm_struct *, unsigned long limit); +void crst_table_downgrade(struct mm_struct *, unsigned long limit); + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) +{ + unsigned long *table = crst_table_alloc(mm); + if (table) + crst_table_init(table, _REGION3_ENTRY_EMPTY); + return (pud_t *) table; +} +#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud) + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *table = crst_table_alloc(mm); + + if (!table) + return NULL; + crst_table_init(table, _SEGMENT_ENTRY_EMPTY); + if (!pgtable_pmd_page_ctor(virt_to_page(table))) { + crst_table_free(mm, table); + return NULL; + } + return (pmd_t *) table; +} + +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +{ + pgtable_pmd_page_dtor(virt_to_page(pmd)); + crst_table_free(mm, (unsigned long *) pmd); +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); +} + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); +} + +#endif /* CONFIG_64BIT */ + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + spin_lock_init(&mm->context.list_lock); + INIT_LIST_HEAD(&mm->context.pgtable_list); + INIT_LIST_HEAD(&mm->context.gmap_list); + return (pgd_t *) crst_table_alloc(mm); +} +#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) + +static inline void pmd_populate(struct mm_struct *mm, + pmd_t *pmd, pgtable_t pte) +{ + pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); +} + +#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) + +#define pmd_pgtable(pmd) \ + (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) + +/* + * page table entry allocation/free routines. + */ +#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) +#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr)) + +#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) +#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) + +extern void rcu_table_freelist_finish(void); + +#endif /* _S390_PGALLOC_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h new file mode 100644 index 00000000000..fcba5e03839 --- /dev/null +++ b/arch/s390/include/asm/pgtable.h @@ -0,0 +1,1739 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Hartmut Penner (hp@de.ibm.com) + * Ulrich Weigand (weigand@de.ibm.com) + * Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/pgtable.h" + */ + +#ifndef _ASM_S390_PGTABLE_H +#define _ASM_S390_PGTABLE_H + +/* + * The Linux memory management assumes a three-level page table setup. For + * s390 31 bit we "fold" the mid level into the top-level page table, so + * that we physically have the same two-level page table as the s390 mmu + * expects in 31 bit mode. For s390 64 bit we use three of the five levels + * the hardware provides (region first and region second tables are not + * used). + * + * The "pgd_xxx()" functions are trivial for a folded two-level + * setup: the pgd is never bad, and a pmd always exists (as it's folded + * into the pgd entry) + * + * This file contains the functions and defines necessary to modify and use + * the S390 page table tree. + */ +#ifndef __ASSEMBLY__ +#include <linux/sched.h> +#include <linux/mm_types.h> +#include <linux/page-flags.h> +#include <asm/bug.h> +#include <asm/page.h> + +extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); +extern void paging_init(void); +extern void vmem_map_init(void); + +/* + * The S390 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +#define update_mmu_cache(vma, address, ptep) do { } while (0) +#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0) + +/* + * ZERO_PAGE is a global shared page that is always zero; used + * for zero-mapped memory areas etc.. + */ + +extern unsigned long empty_zero_page; +extern unsigned long zero_page_mask; + +#define ZERO_PAGE(vaddr) \ + (virt_to_page((void *)(empty_zero_page + \ + (((unsigned long)(vaddr)) &zero_page_mask)))) +#define __HAVE_COLOR_ZERO_PAGE + +/* TODO: s390 cannot support io_remap_pfn_range... */ +#endif /* !__ASSEMBLY__ */ + +/* + * PMD_SHIFT determines the size of the area a second-level page + * table can map + * PGDIR_SHIFT determines what a third-level page table entry can map + */ +#ifndef CONFIG_64BIT +# define PMD_SHIFT 20 +# define PUD_SHIFT 20 +# define PGDIR_SHIFT 20 +#else /* CONFIG_64BIT */ +# define PMD_SHIFT 20 +# define PUD_SHIFT 31 +# define PGDIR_SHIFT 42 +#endif /* CONFIG_64BIT */ + +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * entries per page directory level: the S390 is two-level, so + * we don't really have any PMD directory physically. + * for S390 segment-table entries are combined to one PGD + * that leads to 1024 pte per pgd + */ +#define PTRS_PER_PTE 256 +#ifndef CONFIG_64BIT +#define PTRS_PER_PMD 1 +#define PTRS_PER_PUD 1 +#else /* CONFIG_64BIT */ +#define PTRS_PER_PMD 2048 +#define PTRS_PER_PUD 2048 +#endif /* CONFIG_64BIT */ +#define PTRS_PER_PGD 2048 + +#define FIRST_USER_ADDRESS 0 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) + +#ifndef __ASSEMBLY__ +/* + * The vmalloc and module area will always be on the topmost area of the kernel + * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules. + * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where + * modules will reside. That makes sure that inter module branches always + * happen without trampolines and in addition the placement within a 2GB frame + * is branch prediction unit friendly. + */ +extern unsigned long VMALLOC_START; +extern unsigned long VMALLOC_END; +extern struct page *vmemmap; + +#define VMEM_MAX_PHYS ((unsigned long) vmemmap) + +#ifdef CONFIG_64BIT +extern unsigned long MODULES_VADDR; +extern unsigned long MODULES_END; +#define MODULES_VADDR MODULES_VADDR +#define MODULES_END MODULES_END +#define MODULES_LEN (1UL << 31) +#endif + +/* + * A 31 bit pagetable entry of S390 has following format: + * | PFRA | | OS | + * 0 0IP0 + * 00000000001111111111222222222233 + * 01234567890123456789012345678901 + * + * I Page-Invalid Bit: Page is not available for address-translation + * P Page-Protection Bit: Store access not possible for page + * + * A 31 bit segmenttable entry of S390 has following format: + * | P-table origin | |PTL + * 0 IC + * 00000000001111111111222222222233 + * 01234567890123456789012345678901 + * + * I Segment-Invalid Bit: Segment is not available for address-translation + * C Common-Segment Bit: Segment is not private (PoP 3-30) + * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256) + * + * The 31 bit segmenttable origin of S390 has following format: + * + * |S-table origin | | STL | + * X **GPS + * 00000000001111111111222222222233 + * 01234567890123456789012345678901 + * + * X Space-Switch event: + * G Segment-Invalid Bit: * + * P Private-Space Bit: Segment is not private (PoP 3-30) + * S Storage-Alteration: + * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) + * + * A 64 bit pagetable entry of S390 has following format: + * | PFRA |0IPC| OS | + * 0000000000111111111122222222223333333333444444444455555555556666 + * 0123456789012345678901234567890123456789012345678901234567890123 + * + * I Page-Invalid Bit: Page is not available for address-translation + * P Page-Protection Bit: Store access not possible for page + * C Change-bit override: HW is not required to set change bit + * + * A 64 bit segmenttable entry of S390 has following format: + * | P-table origin | TT + * 0000000000111111111122222222223333333333444444444455555555556666 + * 0123456789012345678901234567890123456789012345678901234567890123 + * + * I Segment-Invalid Bit: Segment is not available for address-translation + * C Common-Segment Bit: Segment is not private (PoP 3-30) + * P Page-Protection Bit: Store access not possible for page + * TT Type 00 + * + * A 64 bit region table entry of S390 has following format: + * | S-table origin | TF TTTL + * 0000000000111111111122222222223333333333444444444455555555556666 + * 0123456789012345678901234567890123456789012345678901234567890123 + * + * I Segment-Invalid Bit: Segment is not available for address-translation + * TT Type 01 + * TF + * TL Table length + * + * The 64 bit regiontable origin of S390 has following format: + * | region table origon | DTTL + * 0000000000111111111122222222223333333333444444444455555555556666 + * 0123456789012345678901234567890123456789012345678901234567890123 + * + * X Space-Switch event: + * G Segment-Invalid Bit: + * P Private-Space Bit: + * S Storage-Alteration: + * R Real space + * TL Table-Length: + * + * A storage key has the following format: + * | ACC |F|R|C|0| + * 0 3 4 5 6 7 + * ACC: access key + * F : fetch protection bit + * R : referenced bit + * C : changed bit + */ + +/* Hardware bits in the page table entry */ +#define _PAGE_CO 0x100 /* HW Change-bit override */ +#define _PAGE_PROTECT 0x200 /* HW read-only bit */ +#define _PAGE_INVALID 0x400 /* HW invalid bit */ +#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */ + +/* Software bits in the page table entry */ +#define _PAGE_PRESENT 0x001 /* SW pte present bit */ +#define _PAGE_TYPE 0x002 /* SW pte type bit */ +#define _PAGE_YOUNG 0x004 /* SW pte young bit */ +#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ +#define _PAGE_READ 0x010 /* SW pte read bit */ +#define _PAGE_WRITE 0x020 /* SW pte write bit */ +#define _PAGE_SPECIAL 0x040 /* SW associated with special page */ +#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */ +#define __HAVE_ARCH_PTE_SPECIAL + +/* Set of bits not changed in pte_modify */ +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \ + _PAGE_DIRTY | _PAGE_YOUNG) + +/* + * handle_pte_fault uses pte_present, pte_none and pte_file to find out the + * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit + * is used to distinguish present from not-present ptes. It is changed only + * with the page table lock held. + * + * The following table gives the different possible bit combinations for + * the pte hardware and software bits in the last 12 bits of a pte: + * + * 842100000000 + * 000084210000 + * 000000008421 + * .IR...wrdytp + * empty .10...000000 + * swap .10...xxxx10 + * file .11...xxxxx0 + * prot-none, clean, old .11...000001 + * prot-none, clean, young .11...000101 + * prot-none, dirty, old .10...001001 + * prot-none, dirty, young .10...001101 + * read-only, clean, old .11...010001 + * read-only, clean, young .01...010101 + * read-only, dirty, old .11...011001 + * read-only, dirty, young .01...011101 + * read-write, clean, old .11...110001 + * read-write, clean, young .01...110101 + * read-write, dirty, old .10...111001 + * read-write, dirty, young .00...111101 + * + * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 + * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 + * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600 + * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 + */ + +#ifndef CONFIG_64BIT + +/* Bits in the segment table address-space-control-element */ +#define _ASCE_SPACE_SWITCH 0x80000000UL /* space switch event */ +#define _ASCE_ORIGIN_MASK 0x7ffff000UL /* segment table origin */ +#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ +#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ +#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ + +/* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */ +#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ +#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ +#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT + +#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) + +/* + * Segment table entry encoding (I = invalid, R = read-only bit): + * ..R...I..... + * prot-none ..1...1..... + * read-only ..1...0..... + * read-write ..0...0..... + * empty ..0...1..... + */ + +/* Page status table bits for virtualization */ +#define PGSTE_ACC_BITS 0xf0000000UL +#define PGSTE_FP_BIT 0x08000000UL +#define PGSTE_PCL_BIT 0x00800000UL +#define PGSTE_HR_BIT 0x00400000UL +#define PGSTE_HC_BIT 0x00200000UL +#define PGSTE_GR_BIT 0x00040000UL +#define PGSTE_GC_BIT 0x00020000UL +#define PGSTE_UC_BIT 0x00008000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x00004000UL /* IPTE notify bit */ + +#else /* CONFIG_64BIT */ + +/* Bits in the segment/region table address-space-control-element */ +#define _ASCE_ORIGIN ~0xfffUL/* segment table origin */ +#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ +#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ +#define _ASCE_SPACE_SWITCH 0x40 /* space switch event */ +#define _ASCE_REAL_SPACE 0x20 /* real space control */ +#define _ASCE_TYPE_MASK 0x0c /* asce table type mask */ +#define _ASCE_TYPE_REGION1 0x0c /* region first table type */ +#define _ASCE_TYPE_REGION2 0x08 /* region second table type */ +#define _ASCE_TYPE_REGION3 0x04 /* region third table type */ +#define _ASCE_TYPE_SEGMENT 0x00 /* segment table type */ +#define _ASCE_TABLE_LENGTH 0x03 /* region table length */ + +/* Bits in the region table entry */ +#define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ +#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */ +#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ +#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ +#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ +#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */ +#define _REGION_ENTRY_TYPE_R3 0x04 /* region third table type */ +#define _REGION_ENTRY_LENGTH 0x03 /* region third length */ + +#define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH) +#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID) +#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) +#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID) +#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) +#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) + +#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ +#define _REGION3_ENTRY_RO 0x200 /* page protection bit */ +#define _REGION3_ENTRY_CO 0x100 /* change-recording override */ + +/* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL +#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ +#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ + +#define _SEGMENT_ENTRY (0) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) + +#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ +#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ +#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ +#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */ +#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG + +/* + * Segment table entry encoding (R = read-only, I = invalid, y = young bit): + * ..R...I...y. + * prot-none, old ..0...1...1. + * prot-none, young ..1...1...1. + * read-only, old ..1...1...0. + * read-only, young ..1...0...1. + * read-write, old ..0...1...0. + * read-write, young ..0...0...1. + * The segment table origin is used to distinguish empty (origin==0) from + * read-write, old segment table entries (origin!=0) + */ + +#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ + +/* Set of bits not changed in pmd_modify */ +#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ + | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) + +/* Page status table bits for virtualization */ +#define PGSTE_ACC_BITS 0xf000000000000000UL +#define PGSTE_FP_BIT 0x0800000000000000UL +#define PGSTE_PCL_BIT 0x0080000000000000UL +#define PGSTE_HR_BIT 0x0040000000000000UL +#define PGSTE_HC_BIT 0x0020000000000000UL +#define PGSTE_GR_BIT 0x0004000000000000UL +#define PGSTE_GC_BIT 0x0002000000000000UL +#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */ + +#endif /* CONFIG_64BIT */ + +/* Guest Page State used for virtualization */ +#define _PGSTE_GPS_ZERO 0x0000000080000000UL +#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL +#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL +#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL + +/* + * A user page table pointer has the space-switch-event bit, the + * private-space-control bit and the storage-alteration-event-control + * bit set. A kernel page table pointer doesn't need them. + */ +#define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ + _ASCE_ALT_EVENT) + +/* + * Page protection definitions. + */ +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID) +#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \ + _PAGE_INVALID | _PAGE_PROTECT) +#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_INVALID | _PAGE_PROTECT) + +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_YOUNG | _PAGE_DIRTY) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_YOUNG | _PAGE_DIRTY) +#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ + _PAGE_PROTECT) + +/* + * On s390 the page table entry has an invalid bit and a read-only bit. + * Read permission implies execute permission and write permission + * implies read permission. + */ + /*xwr*/ +#define __P000 PAGE_NONE +#define __P001 PAGE_READ +#define __P010 PAGE_READ +#define __P011 PAGE_READ +#define __P100 PAGE_READ +#define __P101 PAGE_READ +#define __P110 PAGE_READ +#define __P111 PAGE_READ + +#define __S000 PAGE_NONE +#define __S001 PAGE_READ +#define __S010 PAGE_WRITE +#define __S011 PAGE_WRITE +#define __S100 PAGE_READ +#define __S101 PAGE_READ +#define __S110 PAGE_WRITE +#define __S111 PAGE_WRITE + +/* + * Segment entry (large page) protection definitions. + */ +#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ + _SEGMENT_ENTRY_NONE) +#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \ + _SEGMENT_ENTRY_PROTECT) +#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID) + +static inline int mm_has_pgste(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.has_pgste)) + return 1; +#endif + return 0; +} + +static inline int mm_use_skey(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (mm->context.use_skey) + return 1; +#endif + return 0; +} + +/* + * pgd/pmd/pte query functions + */ +#ifndef CONFIG_64BIT + +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } + +static inline int pud_present(pud_t pud) { return 1; } +static inline int pud_none(pud_t pud) { return 0; } +static inline int pud_large(pud_t pud) { return 0; } +static inline int pud_bad(pud_t pud) { return 0; } + +#else /* CONFIG_64BIT */ + +static inline int pgd_present(pgd_t pgd) +{ + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + return 1; + return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL; +} + +static inline int pgd_none(pgd_t pgd) +{ + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) + return 0; + return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL; +} + +static inline int pgd_bad(pgd_t pgd) +{ + /* + * With dynamic page table levels the pgd can be a region table + * entry or a segment table entry. Check for the bit that are + * invalid for either table entry. + */ + unsigned long mask = + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & + ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; + return (pgd_val(pgd) & mask) != 0; +} + +static inline int pud_present(pud_t pud) +{ + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + return 1; + return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; +} + +static inline int pud_none(pud_t pud) +{ + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) + return 0; + return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL; +} + +static inline int pud_large(pud_t pud) +{ + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) + return 0; + return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); +} + +static inline int pud_bad(pud_t pud) +{ + /* + * With dynamic page table levels the pud can be a region table + * entry or a segment table entry. Check for the bit that are + * invalid for either table entry. + */ + unsigned long mask = + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & + ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; + return (pud_val(pud) & mask) != 0; +} + +#endif /* CONFIG_64BIT */ + +static inline int pmd_present(pmd_t pmd) +{ + return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID; +} + +static inline int pmd_none(pmd_t pmd) +{ + return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID; +} + +static inline int pmd_large(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; +#else + return 0; +#endif +} + +static inline int pmd_prot_none(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) && + (pmd_val(pmd) & _SEGMENT_ENTRY_NONE); +} + +static inline int pmd_bad(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_large(pmd)) + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; +#endif + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; +} + +#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH +extern void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +extern int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty); + +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +extern int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + +#define __HAVE_ARCH_PMD_WRITE +static inline int pmd_write(pmd_t pmd) +{ + if (pmd_prot_none(pmd)) + return 0; + return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; +} + +static inline int pmd_young(pmd_t pmd) +{ + int young = 0; +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) + young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0; + else + young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; +#endif + return young; +} + +static inline int pte_present(pte_t pte) +{ + /* Bit pattern: (pte & 0x001) == 0x001 */ + return (pte_val(pte) & _PAGE_PRESENT) != 0; +} + +static inline int pte_none(pte_t pte) +{ + /* Bit pattern: pte == 0x400 */ + return pte_val(pte) == _PAGE_INVALID; +} + +static inline int pte_swap(pte_t pte) +{ + /* Bit pattern: (pte & 0x603) == 0x402 */ + return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | + _PAGE_TYPE | _PAGE_PRESENT)) + == (_PAGE_INVALID | _PAGE_TYPE); +} + +static inline int pte_file(pte_t pte) +{ + /* Bit pattern: (pte & 0x601) == 0x600 */ + return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT)) + == (_PAGE_INVALID | _PAGE_PROTECT); +} + +static inline int pte_special(pte_t pte) +{ + return (pte_val(pte) & _PAGE_SPECIAL); +} + +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t a, pte_t b) +{ + return pte_val(a) == pte_val(b); +} + +static inline pgste_t pgste_get_lock(pte_t *ptep) +{ + unsigned long new = 0; +#ifdef CONFIG_PGSTE + unsigned long old; + + preempt_disable(); + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear PCL bit in old */ + " oihh %1,0x0080\n" /* set PCL bit in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); +#endif + return __pgste(new); +} + +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + asm( + " nihh %1,0xff7f\n" /* clear PCL bit */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) + : "cc", "memory"); + preempt_enable(); +#endif +} + +static inline pgste_t pgste_get(pte_t *ptep) +{ + unsigned long pgste = 0; +#ifdef CONFIG_PGSTE + pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); +#endif + return __pgste(pgste); +} + +static inline void pgste_set(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; +#endif +} + +static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste, + struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + unsigned long address, bits, skey; + + if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID) + return pgste; + address = pte_val(*ptep) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; +#endif + return pgste; + +} + +static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, + struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + unsigned long address; + unsigned long nkey; + + if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) + return; + VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); + address = pte_val(entry) & PAGE_MASK; + /* + * Set page access key and fetch protection bit from pgste. + * The guest C/R information is still in the PGSTE, set real + * key C/R to 0. + */ + nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; + page_set_storage_key(address, nkey, 0); +#endif +} + +static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) +{ + if ((pte_val(entry) & _PAGE_PRESENT) && + (pte_val(entry) & _PAGE_WRITE) && + !(pte_val(entry) & _PAGE_INVALID)) { + if (!MACHINE_HAS_ESOP) { + /* + * Without enhanced suppression-on-protection force + * the dirty bit on for all writable ptes. + */ + pte_val(entry) |= _PAGE_DIRTY; + pte_val(entry) &= ~_PAGE_PROTECT; + } + if (!(pte_val(entry) & _PAGE_PROTECT)) + /* This pte allows write access, set user-dirty */ + pgste_val(pgste) |= PGSTE_UC_BIT; + } + *ptep = entry; + return pgste; +} + +/** + * struct gmap_struct - guest address space + * @mm: pointer to the parent mm_struct + * @table: pointer to the page directory + * @asce: address space control element for gmap page table + * @crst_list: list of all crst tables used in the guest address space + * @pfault_enabled: defines if pfaults are applicable for the guest + */ +struct gmap { + struct list_head list; + struct mm_struct *mm; + unsigned long *table; + unsigned long asce; + void *private; + struct list_head crst_list; + bool pfault_enabled; +}; + +/** + * struct gmap_rmap - reverse mapping for segment table entries + * @gmap: pointer to the gmap_struct + * @entry: pointer to a segment table entry + * @vmaddr: virtual address in the guest address space + */ +struct gmap_rmap { + struct list_head list; + struct gmap *gmap; + unsigned long *entry; + unsigned long vmaddr; +}; + +/** + * struct gmap_pgtable - gmap information attached to a page table + * @vmaddr: address of the 1MB segment in the process virtual memory + * @mapper: list of segment table entries mapping a page table + */ +struct gmap_pgtable { + unsigned long vmaddr; + struct list_head mapper; +}; + +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long address); +}; + +struct gmap *gmap_alloc(struct mm_struct *mm); +void gmap_free(struct gmap *gmap); +void gmap_enable(struct gmap *gmap); +void gmap_disable(struct gmap *gmap); +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len); +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_translate(unsigned long address, struct gmap *); +unsigned long gmap_translate(unsigned long address, struct gmap *); +unsigned long __gmap_fault(unsigned long address, struct gmap *); +unsigned long gmap_fault(unsigned long address, struct gmap *); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *); +void __gmap_zap(unsigned long address, struct gmap *); +bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); + + +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); +void gmap_do_ipte_notify(struct mm_struct *, pte_t *); + +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & PGSTE_IN_BIT) { + pgste_val(pgste) &= ~PGSTE_IN_BIT; + gmap_do_ipte_notify(mm, ptep); + } +#endif + return pgste; +} + +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + pgste_t pgste; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste_set_key(ptep, pgste, entry, mm); + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); + } else { + if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1) + pte_val(entry) |= _PAGE_CO; + *ptep = entry; + } +} + +/* + * query functions pte_write/pte_dirty/pte_young only work if + * pte_present() is true. Undefined behaviour if not.. + */ +static inline int pte_write(pte_t pte) +{ + return (pte_val(pte) & _PAGE_WRITE) != 0; +} + +static inline int pte_dirty(pte_t pte) +{ + return (pte_val(pte) & _PAGE_DIRTY) != 0; +} + +static inline int pte_young(pte_t pte) +{ + return (pte_val(pte) & _PAGE_YOUNG) != 0; +} + +#define __HAVE_ARCH_PTE_UNUSED +static inline int pte_unused(pte_t pte) +{ + return pte_val(pte) & _PAGE_UNUSED; +} + +/* + * pgd/pmd/pte modification functions + */ + +static inline void pgd_clear(pgd_t *pgd) +{ +#ifdef CONFIG_64BIT + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; +#endif +} + +static inline void pud_clear(pud_t *pud) +{ +#ifdef CONFIG_64BIT + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pud_val(*pud) = _REGION3_ENTRY_EMPTY; +#endif +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID; +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_val(*ptep) = _PAGE_INVALID; +} + +/* + * The following pte modification functions only work if + * pte_present() is true. Undefined behaviour if not.. + */ +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte_val(pte) &= _PAGE_CHG_MASK; + pte_val(pte) |= pgprot_val(newprot); + /* + * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the + * invalid bit set, clear it again for readable, young pages + */ + if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ)) + pte_val(pte) &= ~_PAGE_INVALID; + /* + * newprot for PAGE_READ and PAGE_WRITE has the page protection + * bit set, clear it again for writable, dirty pages + */ + if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) &= ~_PAGE_PROTECT; + return pte; +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_WRITE; + pte_val(pte) |= _PAGE_PROTECT; + return pte; +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_DIRTY) + pte_val(pte) &= ~_PAGE_PROTECT; + return pte; +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_DIRTY; + pte_val(pte) |= _PAGE_PROTECT; + return pte; +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_val(pte) |= _PAGE_DIRTY; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) &= ~_PAGE_PROTECT; + return pte; +} + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_YOUNG; + pte_val(pte) |= _PAGE_INVALID; + return pte; +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_val(pte) |= _PAGE_YOUNG; + if (pte_val(pte) & _PAGE_READ) + pte_val(pte) &= ~_PAGE_INVALID; + return pte; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= _PAGE_SPECIAL; + return pte; +} + +#ifdef CONFIG_HUGETLB_PAGE +static inline pte_t pte_mkhuge(pte_t pte) +{ + pte_val(pte) |= _PAGE_LARGE; + return pte; +} +#endif + +static inline void __ptep_ipte(unsigned long address, pte_t *ptep) +{ + unsigned long pto = (unsigned long) ptep; + +#ifndef CONFIG_64BIT + /* pto in ESA mode must point to the start of the segment table */ + pto &= 0x7ffffc00; +#endif + /* Invalidation + global TLB flush for the pte */ + asm volatile( + " ipte %2,%3" + : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); +} + +static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep) +{ + unsigned long pto = (unsigned long) ptep; + +#ifndef CONFIG_64BIT + /* pto in ESA mode must point to the start of the segment table */ + pto &= 0x7ffffc00; +#endif + /* Invalidation + local TLB flush for the pte */ + asm volatile( + " .insn rrf,0xb2210000,%2,%3,0,1" + : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); +} + +static inline void ptep_flush_direct(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + int active, count; + + if (pte_val(*ptep) & _PAGE_INVALID) + return; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __ptep_ipte_local(address, ptep); + else + __ptep_ipte(address, ptep); + atomic_sub(0x10000, &mm->context.attach_count); +} + +static inline void ptep_flush_lazy(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + int active, count; + + if (pte_val(*ptep) & _PAGE_INVALID) + return; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pte_val(*ptep) |= _PAGE_INVALID; + mm->context.flush_mm = 1; + } else + __ptep_ipte(address, ptep); + atomic_sub(0x10000, &mm->context.attach_count); +} + +/* + * Get (and clear) the user dirty bit for a pte. + */ +static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + int dirty; + + if (!mm_has_pgste(mm)) + return 0; + pgste = pgste_get_lock(ptep); + dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); + pgste_val(pgste) &= ~PGSTE_UC_BIT; + pte = *ptep; + if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { + pgste = pgste_ipte_notify(mm, ptep, pgste); + __ptep_ipte(addr, ptep); + if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_INVALID; + *ptep = pte; + } + pgste_set_unlock(ptep, pgste); + return dirty; +} + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + int young; + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); + } + + pte = *ptep; + ptep_flush_direct(vma->vm_mm, addr, ptep); + young = pte_young(pte); + pte = pte_mkold(pte); + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else + *ptep = pte; + + return young; +} + +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +static inline int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + return ptep_test_and_clear_young(vma, address, ptep); +} + +/* + * This is hard to understand. ptep_get_and_clear and ptep_clear_flush + * both clear the TLB for the unmapped pte. The reason is that + * ptep_get_and_clear is used in common code (e.g. change_pte_range) + * to modify an active pte. The sequence is + * 1) ptep_get_and_clear + * 2) set_pte_at + * 3) flush_tlb_range + * On s390 the tlb needs to get flushed with the modification of the pte + * if the pte is active. The only way how this can be implemented is to + * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range + * is a nop. + */ +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, ptep, pgste); + } + + pte = *ptep; + ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) = _PAGE_INVALID; + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste, mm); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, + unsigned long address, + pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_ipte_notify(mm, ptep, pgste); + } + + pte = *ptep; + ptep_flush_lazy(mm, address, ptep); + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste, mm); + pgste_set(ptep, pgste); + } + return pte; +} + +static inline void ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long address, + pte_t *ptep, pte_t pte) +{ + pgste_t pgste; + + if (mm_has_pgste(mm)) { + pgste = pgste_get(ptep); + pgste_set_key(ptep, pgste, pte, mm); + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else + *ptep = pte; +} + +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH +static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); + } + + pte = *ptep; + ptep_flush_direct(vma->vm_mm, address, ptep); + pte_val(*ptep) = _PAGE_INVALID; + + if (mm_has_pgste(vma->vm_mm)) { + if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == + _PGSTE_GPS_USAGE_UNUSED) + pte_val(pte) |= _PAGE_UNUSED; + pgste = pgste_update_all(&pte, pgste, vma->vm_mm); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +/* + * The batched pte unmap code uses ptep_get_and_clear_full to clear the + * ptes. Here an optimization is possible. tlb_gather_mmu flushes all + * tlbs of an mm if it can guarantee that the ptes of the mm_struct + * cannot be accessed while the batched unmap is running. In this case + * full==1 and a simple pte_clear is enough. See tlb.h. + */ +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, + unsigned long address, + pte_t *ptep, int full) +{ + pgste_t pgste; + pte_t pte; + + if (!full && mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, ptep, pgste); + } + + pte = *ptep; + if (!full) + ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) = _PAGE_INVALID; + + if (!full && mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste, mm); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte = *ptep; + + if (pte_write(pte)) { + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, ptep, pgste); + } + + ptep_flush_lazy(mm, address, ptep); + pte = pte_wrprotect(pte); + + if (mm_has_pgste(mm)) { + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else + *ptep = pte; + } + return pte; +} + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + pgste_t pgste; + + if (pte_same(*ptep, entry)) + return 0; + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); + } + + ptep_flush_direct(vma->vm_mm, address, ptep); + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); + } else + *ptep = entry; + return 1; +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) +{ + pte_t __pte; + pte_val(__pte) = physpage + pgprot_val(pgprot); + return pte_mkyoung(__pte); +} + +static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) +{ + unsigned long physpage = page_to_phys(page); + pte_t __pte = mk_pte_phys(physpage, pgprot); + + if (pte_write(__pte) && PageDirty(page)) + __pte = pte_mkdirty(__pte); + return __pte; +} + +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) +#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) + +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +#ifndef CONFIG_64BIT + +#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) +#define pud_deref(pmd) ({ BUG(); 0UL; }) +#define pgd_deref(pmd) ({ BUG(); 0UL; }) + +#define pud_offset(pgd, address) ((pud_t *) pgd) +#define pmd_offset(pud, address) ((pmd_t *) pud + pmd_index(address)) + +#else /* CONFIG_64BIT */ + +#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) +#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) +#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) + +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +{ + pud_t *pud = (pud_t *) pgd; + if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pud = (pud_t *) pgd_deref(*pgd); + return pud + pud_index(address); +} + +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +{ + pmd_t *pmd = (pmd_t *) pud; + if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pmd = (pmd_t *) pud_deref(*pud); + return pmd + pmd_index(address); +} + +#endif /* CONFIG_64BIT */ + +#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) +#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) +#define pte_page(x) pfn_to_page(pte_pfn(x)) + +#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) + +/* Find an entry in the lowest level page table.. */ +#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) +#define pte_offset_kernel(pmd, address) pte_offset(pmd,address) +#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) +#define pte_unmap(pte) do { } while (0) + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) +static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) +{ + /* + * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx) + * Convert to segment table entry format. + */ + if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE)) + return pgprot_val(SEGMENT_NONE); + if (pgprot_val(pgprot) == pgprot_val(PAGE_READ)) + return pgprot_val(SEGMENT_READ); + return pgprot_val(SEGMENT_WRITE); +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) { + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } else { + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + } +#endif + return pmd; +} + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + } else { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + } +#endif + return pmd; +} + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + int young; + + young = pmd_young(pmd); + pmd_val(pmd) &= _SEGMENT_CHG_MASK; + pmd_val(pmd) |= massage_pgprot_pmd(newprot); + if (young) + pmd = pmd_mkyoung(pmd); + return pmd; +} + +static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) +{ + pmd_t __pmd; + pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); + return pmd_mkyoung(__pmd); +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + /* Do not clobber PROT_NONE segments! */ + if (!pmd_prot_none(pmd)) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + return pmd; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ + +static inline void __pmdp_csp(pmd_t *pmdp) +{ + register unsigned long reg2 asm("2") = pmd_val(*pmdp); + register unsigned long reg3 asm("3") = pmd_val(*pmdp) | + _SEGMENT_ENTRY_INVALID; + register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; + + asm volatile( + " csp %1,%3" + : "=m" (*pmdp) + : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); +} + +static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp) +{ + unsigned long sto; + + sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,0" + : "=m" (*pmdp) + : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK)) + : "cc" ); +} + +static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) +{ + unsigned long sto; + + sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,1" + : "=m" (*pmdp) + : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK)) + : "cc" ); +} + +static inline void pmdp_flush_direct(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + int active, count; + + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return; + if (!MACHINE_HAS_IDTE) { + __pmdp_csp(pmdp); + return; + } + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __pmdp_idte_local(address, pmdp); + else + __pmdp_idte(address, pmdp); + atomic_sub(0x10000, &mm->context.attach_count); +} + +static inline void pmdp_flush_lazy(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + int active, count; + + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; + mm->context.flush_mm = 1; + } else if (MACHINE_HAS_IDTE) + __pmdp_idte(address, pmdp); + else + __pmdp_csp(pmdp); + atomic_sub(0x10000, &mm->context.attach_count); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT; +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t entry) +{ + if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1) + pmd_val(entry) |= _SEGMENT_ENTRY_CO; + *pmdp = entry; +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; + return pmd; +} + +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + /* Do not clobber PROT_NONE segments! */ + if (!pmd_prot_none(pmd)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + return pmd; +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + /* No dirty bit in the segment table entry. */ + return pmd; +} + +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + pmd_t pmd; + + pmd = *pmdp; + pmdp_flush_direct(vma->vm_mm, address, pmdp); + *pmdp = pmd_mkold(pmd); + return pmd_young(pmd); +} + +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + pmd_t pmd = *pmdp; + + pmdp_flush_direct(mm, address, pmdp); + pmd_clear(pmdp); + return pmd; +} + +#define __HAVE_ARCH_PMDP_CLEAR_FLUSH +static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + return pmdp_get_and_clear(vma->vm_mm, address, pmdp); +} + +#define __HAVE_ARCH_PMDP_INVALIDATE +static inline void pmdp_invalidate(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + pmdp_flush_direct(vma->vm_mm, address, pmdp); +} + +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + pmd_t pmd = *pmdp; + + if (pmd_write(pmd)) { + pmdp_flush_direct(mm, address, pmdp); + set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd)); + } +} + +#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) + +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE; +} + +static inline int has_transparent_hugepage(void) +{ + return MACHINE_HAS_HPAGE ? 1 : 0; +} + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return pmd_val(pmd) >> PAGE_SHIFT; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +/* + * 31 bit swap entry format: + * A page-table entry has some bits we have to treat in a special way. + * Bits 0, 20 and bit 23 have to be zero, otherwise an specification + * exception will occur instead of a page translation exception. The + * specifiation exception has the bad habit not to store necessary + * information in the lowcore. + * Bits 21, 22, 30 and 31 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 + * This leaves the bits 1-19 and bits 24-29 to store type and offset. + * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 + * plus 24 for the offset. + * 0| offset |0110|o|type |00| + * 0 0000000001111111111 2222 2 22222 33 + * 0 1234567890123456789 0123 4 56789 01 + * + * 64 bit swap entry format: + * A page-table entry has some bits we have to treat in a special way. + * Bits 52 and bit 55 have to be zero, otherwise an specification + * exception will occur instead of a page translation exception. The + * specifiation exception has the bad habit not to store necessary + * information in the lowcore. + * Bits 53, 54, 62 and 63 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 + * This leaves the bits 0-51 and bits 56-61 to store type and offset. + * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 + * plus 56 for the offset. + * | offset |0110|o|type |00| + * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 + * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 + */ +#ifndef CONFIG_64BIT +#define __SWP_OFFSET_MASK (~0UL >> 12) +#else +#define __SWP_OFFSET_MASK (~0UL >> 11) +#endif +static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) +{ + pte_t pte; + offset &= __SWP_OFFSET_MASK; + pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | + ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); + return pte; +} + +#define __swp_type(entry) (((entry).val >> 2) & 0x1f) +#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) +#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) + +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#ifndef CONFIG_64BIT +# define PTE_FILE_MAX_BITS 26 +#else /* CONFIG_64BIT */ +# define PTE_FILE_MAX_BITS 59 +#endif /* CONFIG_64BIT */ + +#define pte_to_pgoff(__pte) \ + ((((__pte).pte >> 12) << 7) + (((__pte).pte >> 1) & 0x7f)) + +#define pgoff_to_pte(__off) \ + ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ + | _PAGE_INVALID | _PAGE_PROTECT }) + +#endif /* !__ASSEMBLY__ */ + +#define kern_addr_valid(addr) (1) + +extern int vmem_add_mapping(unsigned long start, unsigned long size); +extern int vmem_remove_mapping(unsigned long start, unsigned long size); +extern int s390_enable_sie(void); +extern void s390_enable_skey(void); + +/* + * No page table caches to initialise + */ +static inline void pgtable_cache_init(void) { } +static inline void check_pgt_cache(void) { } + +#include <asm-generic/pgtable.h> + +#endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h new file mode 100644 index 00000000000..6f02d452bbe --- /dev/null +++ b/arch/s390/include/asm/processor.h @@ -0,0 +1,419 @@ +/* + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Hartmut Penner (hp@de.ibm.com), + * Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/processor.h" + * Copyright (C) 1994, Linus Torvalds + */ + +#ifndef __ASM_S390_PROCESSOR_H +#define __ASM_S390_PROCESSOR_H + +#define CIF_MCCK_PENDING 0 /* machine check handling is pending */ +#define CIF_ASCE 1 /* user asce needs fixup / uaccess */ + +#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING) +#define _CIF_ASCE (1<<CIF_ASCE) + + +#ifndef __ASSEMBLY__ + +#include <linux/linkage.h> +#include <linux/irqflags.h> +#include <asm/cpu.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/setup.h> +#include <asm/runtime_instr.h> + +static inline void set_cpu_flag(int flag) +{ + S390_lowcore.cpu_flags |= (1U << flag); +} + +static inline void clear_cpu_flag(int flag) +{ + S390_lowcore.cpu_flags &= ~(1U << flag); +} + +static inline int test_cpu_flag(int flag) +{ + return !!(S390_lowcore.cpu_flags & (1U << flag)); +} + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; }) + +static inline void get_cpu_id(struct cpuid *ptr) +{ + asm volatile("stidp %0" : "=Q" (*ptr)); +} + +extern void s390_adjust_jiffies(void); +extern const struct seq_operations cpuinfo_op; +extern int sysctl_ieee_emulation_warnings; +extern void execve_tail(void); + +/* + * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. + */ +#ifndef CONFIG_64BIT + +#define TASK_SIZE (1UL << 31) +#define TASK_MAX_SIZE (1UL << 31) +#define TASK_UNMAPPED_BASE (1UL << 30) + +#else /* CONFIG_64BIT */ + +#define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit) +#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ + (1UL << 30) : (1UL << 41)) +#define TASK_SIZE TASK_SIZE_OF(current) +#define TASK_MAX_SIZE (1UL << 53) + +#endif /* CONFIG_64BIT */ + +#ifndef CONFIG_64BIT +#define STACK_TOP (1UL << 31) +#define STACK_TOP_MAX (1UL << 31) +#else /* CONFIG_64BIT */ +#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42)) +#define STACK_TOP_MAX (1UL << 42) +#endif /* CONFIG_64BIT */ + +#define HAVE_ARCH_PICK_MMAP_LAYOUT + +typedef struct { + __u32 ar4; +} mm_segment_t; + +/* + * Thread structure + */ +struct thread_struct { + s390_fp_regs fp_regs; + unsigned int acrs[NUM_ACRS]; + unsigned long ksp; /* kernel stack pointer */ + mm_segment_t mm_segment; + unsigned long gmap_addr; /* address of last gmap fault. */ + unsigned int gmap_pfault; /* signal of a pending guest pfault */ + struct per_regs per_user; /* User specified PER registers */ + struct per_event per_event; /* Cause of the last PER trap */ + unsigned long per_flags; /* Flags to control debug behavior */ + /* pfault_wait is used to block the process on a pfault event */ + unsigned long pfault_wait; + struct list_head list; + /* cpu runtime instrumentation */ + struct runtime_instr_cb *ri_cb; + int ri_signum; +#ifdef CONFIG_64BIT + unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ +#endif +}; + +/* Flag to disable transactions. */ +#define PER_FLAG_NO_TE 1UL +/* Flag to enable random transaction aborts. */ +#define PER_FLAG_TE_ABORT_RAND 2UL +/* Flag to specify random transaction abort mode: + * - abort each transaction at a random instruction before TEND if set. + * - abort random transactions at a random instruction if cleared. + */ +#define PER_FLAG_TE_ABORT_RAND_TEND 4UL + +typedef struct thread_struct thread_struct; + +/* + * Stack layout of a C stack frame. + */ +#ifndef __PACK_STACK +struct stack_frame { + unsigned long back_chain; + unsigned long empty1[5]; + unsigned long gprs[10]; + unsigned int empty2[8]; +}; +#else +struct stack_frame { + unsigned long empty1[5]; + unsigned int empty2[8]; + unsigned long gprs[10]; + unsigned long back_chain; +}; +#endif + +#define ARCH_MIN_TASKALIGN 8 + +#define INIT_THREAD { \ + .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ +} + +/* + * Do necessary setup to start up a new thread. + */ +#define start_thread(regs, new_psw, new_stackp) do { \ + regs->psw.mask = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ + execve_tail(); \ +} while (0) + +#define start_thread31(regs, new_psw, new_stackp) do { \ + regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ + crst_table_downgrade(current->mm, 1UL << 31); \ + execve_tail(); \ +} while (0) + +/* Forward declaration, a strange C thing */ +struct task_struct; +struct mm_struct; +struct seq_file; + +#ifdef CONFIG_64BIT +extern void show_cacheinfo(struct seq_file *m); +#else +static inline void show_cacheinfo(struct seq_file *m) { } +#endif + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); + +/* + * Return saved PC of a blocked thread. + */ +extern unsigned long thread_saved_pc(struct task_struct *t); + +unsigned long get_wchan(struct task_struct *p); +#define task_pt_regs(tsk) ((struct pt_regs *) \ + (task_stack_page(tsk) + THREAD_SIZE) - 1) +#define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr) +#define KSTK_ESP(tsk) (task_pt_regs(tsk)->gprs[15]) + +/* Has task runtime instrumentation enabled ? */ +#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) + +static inline unsigned short stap(void) +{ + unsigned short cpu_address; + + asm volatile("stap %0" : "=m" (cpu_address)); + return cpu_address; +} + +/* + * Give up the time slice of the virtual PU. + */ +static inline void cpu_relax(void) +{ + if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,68"); + barrier(); +} + +#define arch_mutex_cpu_relax() barrier() + +static inline void psw_set_key(unsigned int key) +{ + asm volatile("spka 0(%0)" : : "d" (key)); +} + +/* + * Set PSW to specified value. + */ +static inline void __load_psw(psw_t psw) +{ +#ifndef CONFIG_64BIT + asm volatile("lpsw %0" : : "Q" (psw) : "cc"); +#else + asm volatile("lpswe %0" : : "Q" (psw) : "cc"); +#endif +} + +/* + * Set PSW mask to specified value, while leaving the + * PSW addr pointing to the next instruction. + */ +static inline void __load_psw_mask (unsigned long mask) +{ + unsigned long addr; + psw_t psw; + + psw.mask = mask; + +#ifndef CONFIG_64BIT + asm volatile( + " basr %0,0\n" + "0: ahi %0,1f-0b\n" + " st %0,%O1+4(%R1)\n" + " lpsw %1\n" + "1:" + : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); +#else /* CONFIG_64BIT */ + asm volatile( + " larl %0,1f\n" + " stg %0,%O1+8(%R1)\n" + " lpswe %1\n" + "1:" + : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); +#endif /* CONFIG_64BIT */ +} + +/* + * Rewind PSW instruction address by specified number of bytes. + */ +static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) +{ +#ifndef CONFIG_64BIT + if (psw.addr & PSW_ADDR_AMODE) + /* 31 bit mode */ + return (psw.addr - ilc) | PSW_ADDR_AMODE; + /* 24 bit mode */ + return (psw.addr - ilc) & ((1UL << 24) - 1); +#else + unsigned long mask; + + mask = (psw.mask & PSW_MASK_EA) ? -1UL : + (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 : + (1UL << 24) - 1; + return (psw.addr - ilc) & mask; +#endif +} + +/* + * Function to drop a processor into disabled wait state + */ +static inline void __noreturn disabled_wait(unsigned long code) +{ + unsigned long ctl_buf; + psw_t dw_psw; + + dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA; + dw_psw.addr = code; + /* + * Store status and then load disabled wait psw, + * the processor is dead afterwards + */ +#ifndef CONFIG_64BIT + asm volatile( + " stctl 0,0,0(%2)\n" + " ni 0(%2),0xef\n" /* switch off protection */ + " lctl 0,0,0(%2)\n" + " stpt 0xd8\n" /* store timer */ + " stckc 0xe0\n" /* store clock comparator */ + " stpx 0x108\n" /* store prefix register */ + " stam 0,15,0x120\n" /* store access registers */ + " std 0,0x160\n" /* store f0 */ + " std 2,0x168\n" /* store f2 */ + " std 4,0x170\n" /* store f4 */ + " std 6,0x178\n" /* store f6 */ + " stm 0,15,0x180\n" /* store general registers */ + " stctl 0,15,0x1c0\n" /* store control registers */ + " oi 0x1c0,0x10\n" /* fake protection bit */ + " lpsw 0(%1)" + : "=m" (ctl_buf) + : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc"); +#else /* CONFIG_64BIT */ + asm volatile( + " stctg 0,0,0(%2)\n" + " ni 4(%2),0xef\n" /* switch off protection */ + " lctlg 0,0,0(%2)\n" + " lghi 1,0x1000\n" + " stpt 0x328(1)\n" /* store timer */ + " stckc 0x330(1)\n" /* store clock comparator */ + " stpx 0x318(1)\n" /* store prefix register */ + " stam 0,15,0x340(1)\n"/* store access registers */ + " stfpc 0x31c(1)\n" /* store fpu control */ + " std 0,0x200(1)\n" /* store f0 */ + " std 1,0x208(1)\n" /* store f1 */ + " std 2,0x210(1)\n" /* store f2 */ + " std 3,0x218(1)\n" /* store f3 */ + " std 4,0x220(1)\n" /* store f4 */ + " std 5,0x228(1)\n" /* store f5 */ + " std 6,0x230(1)\n" /* store f6 */ + " std 7,0x238(1)\n" /* store f7 */ + " std 8,0x240(1)\n" /* store f8 */ + " std 9,0x248(1)\n" /* store f9 */ + " std 10,0x250(1)\n" /* store f10 */ + " std 11,0x258(1)\n" /* store f11 */ + " std 12,0x260(1)\n" /* store f12 */ + " std 13,0x268(1)\n" /* store f13 */ + " std 14,0x270(1)\n" /* store f14 */ + " std 15,0x278(1)\n" /* store f15 */ + " stmg 0,15,0x280(1)\n"/* store general registers */ + " stctg 0,15,0x380(1)\n"/* store control registers */ + " oi 0x384(1),0x10\n"/* fake protection bit */ + " lpswe 0(%1)" + : "=m" (ctl_buf) + : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1"); +#endif /* CONFIG_64BIT */ + while (1); +} + +/* + * Use to set psw mask except for the first byte which + * won't be changed by this function. + */ +static inline void +__set_psw_mask(unsigned long mask) +{ + __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); +} + +#define local_mcck_enable() \ + __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK) +#define local_mcck_disable() \ + __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT) + +/* + * Basic Machine Check/Program Check Handler. + */ + +extern void s390_base_mcck_handler(void); +extern void s390_base_pgm_handler(void); +extern void s390_base_ext_handler(void); + +extern void (*s390_base_mcck_handler_fn)(void); +extern void (*s390_base_pgm_handler_fn)(void); +extern void (*s390_base_ext_handler_fn)(void); + +#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL + +extern int memcpy_real(void *, void *, size_t); +extern void memcpy_absolute(void *, void *, size_t); + +#define mem_assign_absolute(dest, val) { \ + __typeof__(dest) __tmp = (val); \ + \ + BUILD_BUG_ON(sizeof(__tmp) != sizeof(val)); \ + memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \ +} + +/* + * Helper macro for exception table entries + */ +#define EX_TABLE(_fault, _target) \ + ".section __ex_table,\"a\"\n" \ + ".align 4\n" \ + ".long (" #_fault ") - .\n" \ + ".long (" #_target ") - .\n" \ + ".previous\n" + +#else /* __ASSEMBLY__ */ + +#define EX_TABLE(_fault, _target) \ + .section __ex_table,"a" ; \ + .align 4 ; \ + .long (_fault) - . ; \ + .long (_target) - . ; \ + .previous + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h new file mode 100644 index 00000000000..55d69dd7473 --- /dev/null +++ b/arch/s390/include/asm/ptrace.h @@ -0,0 +1,175 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + */ +#ifndef _S390_PTRACE_H +#define _S390_PTRACE_H + +#include <uapi/asm/ptrace.h> + +#define PIF_SYSCALL 0 /* inside a system call */ +#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ + +#define _PIF_SYSCALL (1<<PIF_SYSCALL) +#define _PIF_PER_TRAP (1<<PIF_PER_TRAP) + +#ifndef __ASSEMBLY__ + +#define PSW_KERNEL_BITS (PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \ + PSW_MASK_EA | PSW_MASK_BA) +#define PSW_USER_BITS (PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \ + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \ + PSW_MASK_PSTATE | PSW_ASC_PRIMARY) + +struct psw_bits { + unsigned long long : 1; + unsigned long long r : 1; /* PER-Mask */ + unsigned long long : 3; + unsigned long long t : 1; /* DAT Mode */ + unsigned long long i : 1; /* Input/Output Mask */ + unsigned long long e : 1; /* External Mask */ + unsigned long long key : 4; /* PSW Key */ + unsigned long long : 1; + unsigned long long m : 1; /* Machine-Check Mask */ + unsigned long long w : 1; /* Wait State */ + unsigned long long p : 1; /* Problem State */ + unsigned long long as : 2; /* Address Space Control */ + unsigned long long cc : 2; /* Condition Code */ + unsigned long long pm : 4; /* Program Mask */ + unsigned long long ri : 1; /* Runtime Instrumentation */ + unsigned long long : 6; + unsigned long long eaba : 2; /* Addressing Mode */ +#ifdef CONFIG_64BIT + unsigned long long : 31; + unsigned long long ia : 64;/* Instruction Address */ +#else + unsigned long long ia : 31;/* Instruction Address */ +#endif +}; + +enum { + PSW_AMODE_24BIT = 0, + PSW_AMODE_31BIT = 1, + PSW_AMODE_64BIT = 3 +}; + +enum { + PSW_AS_PRIMARY = 0, + PSW_AS_ACCREG = 1, + PSW_AS_SECONDARY = 2, + PSW_AS_HOME = 3 +}; + +#define psw_bits(__psw) (*({ \ + typecheck(psw_t, __psw); \ + &(*(struct psw_bits *)(&(__psw))); \ +})) + +/* + * The pt_regs struct defines the way the registers are stored on + * the stack during a system call. + */ +struct pt_regs +{ + unsigned long args[1]; + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned long orig_gpr2; + unsigned int int_code; + unsigned int int_parm; + unsigned long int_parm_long; + unsigned long flags; +}; + +/* + * Program event recording (PER) register set. + */ +struct per_regs { + unsigned long control; /* PER control bits */ + unsigned long start; /* PER starting address */ + unsigned long end; /* PER ending address */ +}; + +/* + * PER event contains information about the cause of the last PER exception. + */ +struct per_event { + unsigned short cause; /* PER code, ATMID and AI */ + unsigned long address; /* PER address */ + unsigned char paid; /* PER access identification */ +}; + +/* + * Simplified per_info structure used to decode the ptrace user space ABI. + */ +struct per_struct_kernel { + unsigned long cr9; /* PER control bits */ + unsigned long cr10; /* PER starting address */ + unsigned long cr11; /* PER ending address */ + unsigned long bits; /* Obsolete software bits */ + unsigned long starting_addr; /* User specified start address */ + unsigned long ending_addr; /* User specified end address */ + unsigned short perc_atmid; /* PER trap ATMID */ + unsigned long address; /* PER trap instruction address */ + unsigned char access_id; /* PER trap access identification */ +}; + +#define PER_EVENT_MASK 0xEB000000UL + +#define PER_EVENT_BRANCH 0x80000000UL +#define PER_EVENT_IFETCH 0x40000000UL +#define PER_EVENT_STORE 0x20000000UL +#define PER_EVENT_STORE_REAL 0x08000000UL +#define PER_EVENT_TRANSACTION_END 0x02000000UL +#define PER_EVENT_NULLIFICATION 0x01000000UL + +#define PER_CONTROL_MASK 0x00e00000UL + +#define PER_CONTROL_BRANCH_ADDRESS 0x00800000UL +#define PER_CONTROL_SUSPENSION 0x00400000UL +#define PER_CONTROL_ALTERATION 0x00200000UL + +static inline void set_pt_regs_flag(struct pt_regs *regs, int flag) +{ + regs->flags |= (1U << flag); +} + +static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag) +{ + regs->flags &= ~(1U << flag); +} + +static inline int test_pt_regs_flag(struct pt_regs *regs, int flag) +{ + return !!(regs->flags & (1U << flag)); +} + +/* + * These are defined as per linux/ptrace.h, which see. + */ +#define arch_has_single_step() (1) +#define arch_has_block_step() (1) + +#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) +#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) +#define user_stack_pointer(regs)((regs)->gprs[15]) +#define profile_pc(regs) instruction_pointer(regs) + +static inline long regs_return_value(struct pt_regs *regs) +{ + return regs->gprs[2]; +} + +int regs_query_register_offset(const char *name); +const char *regs_query_register_name(unsigned int offset); +unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); + +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->gprs[15] & PSW_ADDR_INSN; +} + +#endif /* __ASSEMBLY__ */ +#endif /* _S390_PTRACE_H */ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h new file mode 100644 index 00000000000..d786c634e05 --- /dev/null +++ b/arch/s390/include/asm/qdio.h @@ -0,0 +1,436 @@ +/* + * Copyright IBM Corp. 2000, 2008 + * Author(s): Utz Bacher <utz.bacher@de.ibm.com> + * Jan Glauber <jang@linux.vnet.ibm.com> + * + */ +#ifndef __QDIO_H__ +#define __QDIO_H__ + +#include <linux/interrupt.h> +#include <asm/cio.h> +#include <asm/ccwdev.h> + +/* only use 4 queues to save some cachelines */ +#define QDIO_MAX_QUEUES_PER_IRQ 4 +#define QDIO_MAX_BUFFERS_PER_Q 128 +#define QDIO_MAX_BUFFERS_MASK (QDIO_MAX_BUFFERS_PER_Q - 1) +#define QDIO_MAX_ELEMENTS_PER_BUFFER 16 +#define QDIO_SBAL_SIZE 256 + +#define QDIO_QETH_QFMT 0 +#define QDIO_ZFCP_QFMT 1 +#define QDIO_IQDIO_QFMT 2 + +/** + * struct qdesfmt0 - queue descriptor, format 0 + * @sliba: storage list information block address + * @sla: storage list address + * @slsba: storage list state block address + * @akey: access key for DLIB + * @bkey: access key for SL + * @ckey: access key for SBALs + * @dkey: access key for SLSB + */ +struct qdesfmt0 { + u64 sliba; + u64 sla; + u64 slsba; + u32 : 32; + u32 akey : 4; + u32 bkey : 4; + u32 ckey : 4; + u32 dkey : 4; + u32 : 16; +} __attribute__ ((packed)); + +#define QDR_AC_MULTI_BUFFER_ENABLE 0x01 + +/** + * struct qdr - queue description record (QDR) + * @qfmt: queue format + * @pfmt: implementation dependent parameter format + * @ac: adapter characteristics + * @iqdcnt: input queue descriptor count + * @oqdcnt: output queue descriptor count + * @iqdsz: inpout queue descriptor size + * @oqdsz: output queue descriptor size + * @qiba: queue information block address + * @qkey: queue information block key + * @qdf0: queue descriptions + */ +struct qdr { + u32 qfmt : 8; + u32 pfmt : 8; + u32 : 8; + u32 ac : 8; + u32 : 8; + u32 iqdcnt : 8; + u32 : 8; + u32 oqdcnt : 8; + u32 : 8; + u32 iqdsz : 8; + u32 : 8; + u32 oqdsz : 8; + /* private: */ + u32 res[9]; + /* public: */ + u64 qiba; + u32 : 32; + u32 qkey : 4; + u32 : 28; + struct qdesfmt0 qdf0[126]; +} __attribute__ ((packed, aligned(4096))); + +#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40 +#define QIB_RFLAGS_ENABLE_QEBSM 0x80 +#define QIB_RFLAGS_ENABLE_DATA_DIV 0x02 + +/** + * struct qib - queue information block (QIB) + * @qfmt: queue format + * @pfmt: implementation dependent parameter format + * @rflags: QEBSM + * @ac: adapter characteristics + * @isliba: absolute address of first input SLIB + * @osliba: absolute address of first output SLIB + * @ebcnam: adapter identifier in EBCDIC + * @parm: implementation dependent parameters + */ +struct qib { + u32 qfmt : 8; + u32 pfmt : 8; + u32 rflags : 8; + u32 ac : 8; + u32 : 32; + u64 isliba; + u64 osliba; + u32 : 32; + u32 : 32; + u8 ebcnam[8]; + /* private: */ + u8 res[88]; + /* public: */ + u8 parm[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(256))); + +/** + * struct slibe - storage list information block element (SLIBE) + * @parms: implementation dependent parameters + */ +struct slibe { + u64 parms; +}; + +/** + * struct qaob - queue asynchronous operation block + * @res0: reserved parameters + * @res1: reserved parameter + * @res2: reserved parameter + * @res3: reserved parameter + * @aorc: asynchronous operation return code + * @flags: internal flags + * @cbtbs: control block type + * @sb_count: number of storage blocks + * @sba: storage block element addresses + * @dcount: size of storage block elements + * @user0: user defineable value + * @res4: reserved paramater + * @user1: user defineable value + * @user2: user defineable value + */ +struct qaob { + u64 res0[6]; + u8 res1; + u8 res2; + u8 res3; + u8 aorc; + u8 flags; + u16 cbtbs; + u8 sb_count; + u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u64 user0; + u64 res4[2]; + u64 user1; + u64 user2; +} __attribute__ ((packed, aligned(256))); + +/** + * struct slib - storage list information block (SLIB) + * @nsliba: next SLIB address (if any) + * @sla: SL address + * @slsba: SLSB address + * @slibe: SLIB elements + */ +struct slib { + u64 nsliba; + u64 sla; + u64 slsba; + /* private: */ + u8 res[1000]; + /* public: */ + struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(2048))); + +#define SBAL_EFLAGS_LAST_ENTRY 0x40 +#define SBAL_EFLAGS_CONTIGUOUS 0x20 +#define SBAL_EFLAGS_FIRST_FRAG 0x04 +#define SBAL_EFLAGS_MIDDLE_FRAG 0x08 +#define SBAL_EFLAGS_LAST_FRAG 0x0c +#define SBAL_EFLAGS_MASK 0x6f + +#define SBAL_SFLAGS0_PCI_REQ 0x40 +#define SBAL_SFLAGS0_DATA_CONTINUATION 0x20 + +/* Awesome OpenFCP extensions */ +#define SBAL_SFLAGS0_TYPE_STATUS 0x00 +#define SBAL_SFLAGS0_TYPE_WRITE 0x08 +#define SBAL_SFLAGS0_TYPE_READ 0x10 +#define SBAL_SFLAGS0_TYPE_WRITE_READ 0x18 +#define SBAL_SFLAGS0_MORE_SBALS 0x04 +#define SBAL_SFLAGS0_COMMAND 0x02 +#define SBAL_SFLAGS0_LAST_SBAL 0x00 +#define SBAL_SFLAGS0_ONLY_SBAL SBAL_SFLAGS0_COMMAND +#define SBAL_SFLAGS0_MIDDLE_SBAL SBAL_SFLAGS0_MORE_SBALS +#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND) + +/** + * struct qdio_buffer_element - SBAL entry + * @eflags: SBAL entry flags + * @scount: SBAL count + * @sflags: whole SBAL flags + * @length: length + * @addr: address +*/ +struct qdio_buffer_element { + u8 eflags; + /* private: */ + u8 res1; + /* public: */ + u8 scount; + u8 sflags; + u32 length; +#ifdef CONFIG_32BIT + /* private: */ + void *res2; + /* public: */ +#endif + void *addr; +} __attribute__ ((packed, aligned(16))); + +/** + * struct qdio_buffer - storage block address list (SBAL) + * @element: SBAL entries + */ +struct qdio_buffer { + struct qdio_buffer_element element[QDIO_MAX_ELEMENTS_PER_BUFFER]; +} __attribute__ ((packed, aligned(256))); + +/** + * struct sl_element - storage list entry + * @sbal: absolute SBAL address + */ +struct sl_element { +#ifdef CONFIG_32BIT + /* private: */ + unsigned long reserved; + /* public: */ +#endif + unsigned long sbal; +} __attribute__ ((packed)); + +/** + * struct sl - storage list (SL) + * @element: SL entries + */ +struct sl { + struct sl_element element[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(1024))); + +/** + * struct slsb - storage list state block (SLSB) + * @val: state per buffer + */ +struct slsb { + u8 val[QDIO_MAX_BUFFERS_PER_Q]; +} __attribute__ ((packed, aligned(256))); + +/** + * struct qdio_outbuf_state - SBAL related asynchronous operation information + * (for communication with upper layer programs) + * (only required for use with completion queues) + * @flags: flags indicating state of buffer + * @aob: pointer to QAOB used for the particular SBAL + * @user: pointer to upper layer program's state information related to SBAL + * (stored in user1 data of QAOB) + */ +struct qdio_outbuf_state { + u8 flags; + struct qaob *aob; + void *user; +}; + +#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 +#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 + +#define CHSC_AC1_INITIATE_INPUTQ 0x80 + + +/* qdio adapter-characteristics-1 flag */ +#define AC1_SIGA_INPUT_NEEDED 0x40 /* process input queues */ +#define AC1_SIGA_OUTPUT_NEEDED 0x20 /* process output queues */ +#define AC1_SIGA_SYNC_NEEDED 0x10 /* ask hypervisor to sync */ +#define AC1_AUTOMATIC_SYNC_ON_THININT 0x08 /* set by hypervisor */ +#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI 0x04 /* set by hypervisor */ +#define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */ +#define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */ + +#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080 +#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040 +#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 +#define CHSC_AC2_DATA_DIV_ENABLED 0x0002 + +#define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000 + +struct qdio_ssqd_desc { + u8 flags; + u8:8; + u16 sch; + u8 qfmt; + u8 parm; + u8 qdioac1; + u8 sch_class; + u8 pcnt; + u8 icnt; + u8:8; + u8 ocnt; + u8:8; + u8 mbccnt; + u16 qdioac2; + u64 sch_token; + u8 mro; + u8 mri; + u16 qdioac3; + u16:16; + u8:8; + u8 mmwc; +} __attribute__ ((packed)); + +/* params are: ccw_device, qdio_error, queue_number, + first element processed, number of elements processed, int_parm */ +typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, + int, int, unsigned long); + +/* qdio errors reported to the upper-layer program */ +#define QDIO_ERROR_ACTIVATE 0x0001 +#define QDIO_ERROR_GET_BUF_STATE 0x0002 +#define QDIO_ERROR_SET_BUF_STATE 0x0004 +#define QDIO_ERROR_SLSB_STATE 0x0100 + +#define QDIO_ERROR_FATAL 0x00ff +#define QDIO_ERROR_TEMPORARY 0xff00 + +/* for qdio_cleanup */ +#define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01 +#define QDIO_FLAG_CLEANUP_USING_HALT 0x02 + +/** + * struct qdio_initialize - qdio initialization data + * @cdev: associated ccw device + * @q_format: queue format + * @adapter_name: name for the adapter + * @qib_param_field_format: format for qib_parm_field + * @qib_param_field: pointer to 128 bytes or NULL, if no param field + * @qib_rflags: rflags to set + * @input_slib_elements: pointer to no_input_qs * 128 words of data or NULL + * @output_slib_elements: pointer to no_output_qs * 128 words of data or NULL + * @no_input_qs: number of input queues + * @no_output_qs: number of output queues + * @input_handler: handler to be called for input queues + * @output_handler: handler to be called for output queues + * @queue_start_poll_array: polling handlers (one per input queue or NULL) + * @int_parm: interruption parameter + * @input_sbal_addr_array: address of no_input_qs * 128 pointers + * @output_sbal_addr_array: address of no_output_qs * 128 pointers + * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL) + */ +struct qdio_initialize { + struct ccw_device *cdev; + unsigned char q_format; + unsigned char qdr_ac; + unsigned char adapter_name[8]; + unsigned int qib_param_field_format; + unsigned char *qib_param_field; + unsigned char qib_rflags; + unsigned long *input_slib_elements; + unsigned long *output_slib_elements; + unsigned int no_input_qs; + unsigned int no_output_qs; + qdio_handler_t *input_handler; + qdio_handler_t *output_handler; + void (**queue_start_poll_array) (struct ccw_device *, int, + unsigned long); + int scan_threshold; + unsigned long int_parm; + void **input_sbal_addr_array; + void **output_sbal_addr_array; + struct qdio_outbuf_state *output_sbal_state_array; +}; + +/** + * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc() + * @l3_ipv6_addr: entry contains IPv6 address + * @l3_ipv4_addr: entry contains IPv4 address + * @l2_addr_lnid: entry contains MAC address and VLAN ID + */ +enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid}; + +/** + * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc() + * @nit: Network interface token + * @addr: Address of one of the three types + * + * The struct is passed to the callback function by qdio_brinfo_desc() + */ +struct qdio_brinfo_entry_l3_ipv6 { + u64 nit; + struct { unsigned char _s6_addr[16]; } addr; +} __packed; +struct qdio_brinfo_entry_l3_ipv4 { + u64 nit; + struct { uint32_t _s_addr; } addr; +} __packed; +struct qdio_brinfo_entry_l2 { + u64 nit; + struct { u8 mac[6]; u16 lnid; } addr_lnid; +} __packed; + +#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ +#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ +#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ +#define QDIO_STATE_STOPPED 0x00000010 /* after queues went down */ + +#define QDIO_FLAG_SYNC_INPUT 0x01 +#define QDIO_FLAG_SYNC_OUTPUT 0x02 +#define QDIO_FLAG_PCI_OUT 0x10 + +extern int qdio_allocate(struct qdio_initialize *); +extern int qdio_establish(struct qdio_initialize *); +extern int qdio_activate(struct ccw_device *); +extern void qdio_release_aob(struct qaob *); +extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int, + unsigned int); +extern int qdio_start_irq(struct ccw_device *, int); +extern int qdio_stop_irq(struct ccw_device *, int); +extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *); +extern int qdio_shutdown(struct ccw_device *, int); +extern int qdio_free(struct ccw_device *); +extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *); +extern int qdio_pnso_brinfo(struct subchannel_id schid, + int cnc, u16 *response, + void (*cb)(void *priv, enum qdio_brinfo_entry_type type, + void *entry), + void *priv); + +#endif /* __QDIO_H__ */ diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h new file mode 100644 index 00000000000..804578587a7 --- /dev/null +++ b/arch/s390/include/asm/reset.h @@ -0,0 +1,19 @@ +/* + * Copyright IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#ifndef _ASM_S390_RESET_H +#define _ASM_S390_RESET_H + +#include <linux/list.h> + +struct reset_call { + struct list_head list; + void (*fn)(void); +}; + +extern void register_reset_call(struct reset_call *reset); +extern void unregister_reset_call(struct reset_call *reset); +extern void s390_reset_system(void (*func)(void *), void *data); +#endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h new file mode 100644 index 00000000000..830da737ff8 --- /dev/null +++ b/arch/s390/include/asm/runtime_instr.h @@ -0,0 +1,98 @@ +#ifndef _RUNTIME_INSTR_H +#define _RUNTIME_INSTR_H + +#define S390_RUNTIME_INSTR_START 0x1 +#define S390_RUNTIME_INSTR_STOP 0x2 + +struct runtime_instr_cb { + __u64 buf_current; + __u64 buf_origin; + __u64 buf_limit; + + __u32 valid : 1; + __u32 pstate : 1; + __u32 pstate_set_buf : 1; + __u32 home_space : 1; + __u32 altered : 1; + __u32 : 3; + __u32 pstate_sample : 1; + __u32 sstate_sample : 1; + __u32 pstate_collect : 1; + __u32 sstate_collect : 1; + __u32 : 1; + __u32 halted_int : 1; + __u32 int_requested : 1; + __u32 buffer_full_int : 1; + __u32 key : 4; + __u32 : 9; + __u32 rgs : 3; + + __u32 mode : 4; + __u32 next : 1; + __u32 mae : 1; + __u32 : 2; + __u32 call_type_br : 1; + __u32 return_type_br : 1; + __u32 other_type_br : 1; + __u32 bc_other_type : 1; + __u32 emit : 1; + __u32 tx_abort : 1; + __u32 : 2; + __u32 bp_xn : 1; + __u32 bp_xt : 1; + __u32 bp_ti : 1; + __u32 bp_ni : 1; + __u32 suppr_y : 1; + __u32 suppr_z : 1; + + __u32 dc_miss_extra : 1; + __u32 lat_lev_ignore : 1; + __u32 ic_lat_lev : 4; + __u32 dc_lat_lev : 4; + + __u64 reserved1; + __u64 scaling_factor; + __u64 rsic; + __u64 reserved2; +} __packed __aligned(8); + +extern struct runtime_instr_cb runtime_instr_empty_cb; + +static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */ + : : "Q" (*cb)); +} + +static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */ + : "=Q" (*cb) : : "cc"); +} + +static inline void save_ri_cb(struct runtime_instr_cb *cb_prev) +{ +#ifdef CONFIG_64BIT + if (cb_prev) + store_runtime_instr_cb(cb_prev); +#endif +} + +static inline void restore_ri_cb(struct runtime_instr_cb *cb_next, + struct runtime_instr_cb *cb_prev) +{ +#ifdef CONFIG_64BIT + if (cb_next) + load_runtime_instr_cb(cb_next); + else if (cb_prev) + load_runtime_instr_cb(&runtime_instr_empty_cb); +#endif +} + +#ifdef CONFIG_64BIT +extern void exit_thread_runtime_instr(void); +#else +static inline void exit_thread_runtime_instr(void) { } +#endif + +#endif /* _RUNTIME_INSTR_H */ diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h new file mode 100644 index 00000000000..487f9b64efb --- /dev/null +++ b/arch/s390/include/asm/rwsem.h @@ -0,0 +1,318 @@ +#ifndef _S390_RWSEM_H +#define _S390_RWSEM_H + +/* + * S390 version + * Copyright IBM Corp. 2002 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h + */ + +/* + * + * The MSW of the count is the negated number of active writers and waiting + * lockers, and the LSW is the total number of active locks + * + * The lock count is initialized to 0 (no active and no waiting lockers). + * + * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an + * uncontended lock. This can be determined because XADD returns the old value. + * Readers increment by 1 and see a positive value when uncontended, negative + * if there are writers (and maybe) readers waiting (in which case it goes to + * sleep). + * + * The value of WAITING_BIAS supports up to 32766 waiting processes. This can + * be extended to 65534 by manually checking the whole MSW rather than relying + * on the S flag. + * + * The value of ACTIVE_BIAS supports up to 65535 active processes. + * + * This should be totally fair - if anything is waiting, a process that wants a + * lock will go to the back of the queue. When the currently active lock is + * released, if there's a writer at the front of the queue, then that and only + * that will be woken up; if there's a bunch of consequtive readers at the + * front, then they'll all be woken up, but no other readers will be. + */ + +#ifndef _LINUX_RWSEM_H +#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" +#endif + +#ifndef CONFIG_64BIT +#define RWSEM_UNLOCKED_VALUE 0x00000000 +#define RWSEM_ACTIVE_BIAS 0x00000001 +#define RWSEM_ACTIVE_MASK 0x0000ffff +#define RWSEM_WAITING_BIAS (-0x00010000) +#else /* CONFIG_64BIT */ +#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L +#define RWSEM_ACTIVE_BIAS 0x0000000000000001L +#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL +#define RWSEM_WAITING_BIAS (-0x0000000100000000L) +#endif /* CONFIG_64BIT */ +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + +/* + * lock for reading + */ +static inline void __down_read(struct rw_semaphore *sem) +{ + signed long old, new; + + asm volatile( +#ifndef CONFIG_64BIT + " l %0,%2\n" + "0: lr %1,%0\n" + " ahi %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* CONFIG_64BIT */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " aghi %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* CONFIG_64BIT */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS) + : "cc", "memory"); + if (old < 0) + rwsem_down_read_failed(sem); +} + +/* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +static inline int __down_read_trylock(struct rw_semaphore *sem) +{ + signed long old, new; + + asm volatile( +#ifndef CONFIG_64BIT + " l %0,%2\n" + "0: ltr %1,%0\n" + " jm 1f\n" + " ahi %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b\n" + "1:" +#else /* CONFIG_64BIT */ + " lg %0,%2\n" + "0: ltgr %1,%0\n" + " jm 1f\n" + " aghi %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b\n" + "1:" +#endif /* CONFIG_64BIT */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS) + : "cc", "memory"); + return old >= 0 ? 1 : 0; +} + +/* + * lock for writing + */ +static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +{ + signed long old, new, tmp; + + tmp = RWSEM_ACTIVE_WRITE_BIAS; + asm volatile( +#ifndef CONFIG_64BIT + " l %0,%2\n" + "0: lr %1,%0\n" + " a %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* CONFIG_64BIT */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " ag %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* CONFIG_64BIT */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "m" (tmp) + : "cc", "memory"); + if (old != 0) + rwsem_down_write_failed(sem); +} + +static inline void __down_write(struct rw_semaphore *sem) +{ + __down_write_nested(sem, 0); +} + +/* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +static inline int __down_write_trylock(struct rw_semaphore *sem) +{ + signed long old; + + asm volatile( +#ifndef CONFIG_64BIT + " l %0,%1\n" + "0: ltr %0,%0\n" + " jnz 1f\n" + " cs %0,%3,%1\n" + " jl 0b\n" +#else /* CONFIG_64BIT */ + " lg %0,%1\n" + "0: ltgr %0,%0\n" + " jnz 1f\n" + " csg %0,%3,%1\n" + " jl 0b\n" +#endif /* CONFIG_64BIT */ + "1:" + : "=&d" (old), "=Q" (sem->count) + : "Q" (sem->count), "d" (RWSEM_ACTIVE_WRITE_BIAS) + : "cc", "memory"); + return (old == RWSEM_UNLOCKED_VALUE) ? 1 : 0; +} + +/* + * unlock after reading + */ +static inline void __up_read(struct rw_semaphore *sem) +{ + signed long old, new; + + asm volatile( +#ifndef CONFIG_64BIT + " l %0,%2\n" + "0: lr %1,%0\n" + " ahi %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* CONFIG_64BIT */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " aghi %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* CONFIG_64BIT */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "i" (-RWSEM_ACTIVE_READ_BIAS) + : "cc", "memory"); + if (new < 0) + if ((new & RWSEM_ACTIVE_MASK) == 0) + rwsem_wake(sem); +} + +/* + * unlock after writing + */ +static inline void __up_write(struct rw_semaphore *sem) +{ + signed long old, new, tmp; + + tmp = -RWSEM_ACTIVE_WRITE_BIAS; + asm volatile( +#ifndef CONFIG_64BIT + " l %0,%2\n" + "0: lr %1,%0\n" + " a %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* CONFIG_64BIT */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " ag %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* CONFIG_64BIT */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "m" (tmp) + : "cc", "memory"); + if (new < 0) + if ((new & RWSEM_ACTIVE_MASK) == 0) + rwsem_wake(sem); +} + +/* + * downgrade write lock to read lock + */ +static inline void __downgrade_write(struct rw_semaphore *sem) +{ + signed long old, new, tmp; + + tmp = -RWSEM_WAITING_BIAS; + asm volatile( +#ifndef CONFIG_64BIT + " l %0,%2\n" + "0: lr %1,%0\n" + " a %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* CONFIG_64BIT */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " ag %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* CONFIG_64BIT */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "m" (tmp) + : "cc", "memory"); + if (new > 1) + rwsem_downgrade_wake(sem); +} + +/* + * implement atomic add functionality + */ +static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) +{ + signed long old, new; + + asm volatile( +#ifndef CONFIG_64BIT + " l %0,%2\n" + "0: lr %1,%0\n" + " ar %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* CONFIG_64BIT */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " agr %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* CONFIG_64BIT */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "d" (delta) + : "cc", "memory"); +} + +/* + * implement exchange and add functionality + */ +static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) +{ + signed long old, new; + + asm volatile( +#ifndef CONFIG_64BIT + " l %0,%2\n" + "0: lr %1,%0\n" + " ar %1,%4\n" + " cs %0,%1,%2\n" + " jl 0b" +#else /* CONFIG_64BIT */ + " lg %0,%2\n" + "0: lgr %1,%0\n" + " agr %1,%4\n" + " csg %0,%1,%2\n" + " jl 0b" +#endif /* CONFIG_64BIT */ + : "=&d" (old), "=&d" (new), "=Q" (sem->count) + : "Q" (sem->count), "d" (delta) + : "cc", "memory"); + return new; +} + +#endif /* _S390_RWSEM_H */ diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h new file mode 100644 index 00000000000..6d45ef6c12a --- /dev/null +++ b/arch/s390/include/asm/scatterlist.h @@ -0,0 +1,3 @@ +#include <asm-generic/scatterlist.h> + +#define ARCH_HAS_SG_CHAIN diff --git a/arch/s390/include/asm/schid.h b/arch/s390/include/asm/schid.h new file mode 100644 index 00000000000..40b47dfa9d6 --- /dev/null +++ b/arch/s390/include/asm/schid.h @@ -0,0 +1,21 @@ +#ifndef ASM_SCHID_H +#define ASM_SCHID_H + +#include <linux/string.h> +#include <uapi/asm/schid.h> + +/* Helper function for sane state of pre-allocated subchannel_id. */ +static inline void +init_subchannel_id(struct subchannel_id *schid) +{ + memset(schid, 0, sizeof(struct subchannel_id)); + schid->one = 1; +} + +static inline int +schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2) +{ + return !memcmp(schid1, schid2, sizeof(struct subchannel_id)); +} + +#endif /* ASM_SCHID_H */ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h new file mode 100644 index 00000000000..1aba89b53cb --- /dev/null +++ b/arch/s390/include/asm/sclp.h @@ -0,0 +1,71 @@ +/* + * Copyright IBM Corp. 2007 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#ifndef _ASM_S390_SCLP_H +#define _ASM_S390_SCLP_H + +#include <linux/types.h> +#include <asm/chpid.h> +#include <asm/cpu.h> + +#define SCLP_CHP_INFO_MASK_SIZE 32 + +struct sclp_chp_info { + u8 recognized[SCLP_CHP_INFO_MASK_SIZE]; + u8 standby[SCLP_CHP_INFO_MASK_SIZE]; + u8 configured[SCLP_CHP_INFO_MASK_SIZE]; +}; + +#define LOADPARM_LEN 8 + +struct sclp_ipl_info { + int is_valid; + int has_dump; + char loadparm[LOADPARM_LEN]; +}; + +struct sclp_cpu_entry { + u8 address; + u8 reserved0[2]; + u8 : 3; + u8 siif : 1; + u8 : 4; + u8 reserved2[10]; + u8 type; + u8 reserved1; +} __attribute__((packed)); + +struct sclp_cpu_info { + unsigned int configured; + unsigned int standby; + unsigned int combined; + int has_cpu_type; + struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1]; +}; + +int sclp_get_cpu_info(struct sclp_cpu_info *info); +int sclp_cpu_configure(u8 cpu); +int sclp_cpu_deconfigure(u8 cpu); +unsigned long long sclp_get_rnmax(void); +unsigned long long sclp_get_rzm(void); +unsigned int sclp_get_max_cpu(void); +int sclp_sdias_blk_count(void); +int sclp_sdias_copy(void *dest, int blk_num, int nr_blks); +int sclp_chp_configure(struct chp_id chpid); +int sclp_chp_deconfigure(struct chp_id chpid); +int sclp_chp_read_info(struct sclp_chp_info *info); +void sclp_get_ipl_info(struct sclp_ipl_info *info); +bool __init sclp_has_linemode(void); +bool __init sclp_has_vt220(void); +bool sclp_has_sprp(void); +int sclp_pci_configure(u32 fid); +int sclp_pci_deconfigure(u32 fid); +int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); +unsigned long sclp_get_hsa_size(void); +void sclp_early_detect(void); +int sclp_has_siif(void); +unsigned int sclp_get_ibc(void); + +#endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h new file mode 100644 index 00000000000..4af99cdaddf --- /dev/null +++ b/arch/s390/include/asm/scsw.h @@ -0,0 +1,988 @@ +/* + * Helper functions for scsw access. + * + * Copyright IBM Corp. 2008, 2012 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _ASM_S390_SCSW_H_ +#define _ASM_S390_SCSW_H_ + +#include <linux/types.h> +#include <asm/css_chars.h> +#include <asm/cio.h> + +/** + * struct cmd_scsw - command-mode subchannel status word + * @key: subchannel key + * @sctl: suspend control + * @eswf: esw format + * @cc: deferred condition code + * @fmt: format + * @pfch: prefetch + * @isic: initial-status interruption control + * @alcc: address-limit checking control + * @ssi: suppress-suspended interruption + * @zcc: zero condition code + * @ectl: extended control + * @pno: path not operational + * @res: reserved + * @fctl: function control + * @actl: activity control + * @stctl: status control + * @cpa: channel program address + * @dstat: device status + * @cstat: subchannel status + * @count: residual count + */ +struct cmd_scsw { + __u32 key : 4; + __u32 sctl : 1; + __u32 eswf : 1; + __u32 cc : 2; + __u32 fmt : 1; + __u32 pfch : 1; + __u32 isic : 1; + __u32 alcc : 1; + __u32 ssi : 1; + __u32 zcc : 1; + __u32 ectl : 1; + __u32 pno : 1; + __u32 res : 1; + __u32 fctl : 3; + __u32 actl : 7; + __u32 stctl : 5; + __u32 cpa; + __u32 dstat : 8; + __u32 cstat : 8; + __u32 count : 16; +} __attribute__ ((packed)); + +/** + * struct tm_scsw - transport-mode subchannel status word + * @key: subchannel key + * @eswf: esw format + * @cc: deferred condition code + * @fmt: format + * @x: IRB-format control + * @q: interrogate-complete + * @ectl: extended control + * @pno: path not operational + * @fctl: function control + * @actl: activity control + * @stctl: status control + * @tcw: TCW address + * @dstat: device status + * @cstat: subchannel status + * @fcxs: FCX status + * @schxs: subchannel-extended status + */ +struct tm_scsw { + u32 key:4; + u32 :1; + u32 eswf:1; + u32 cc:2; + u32 fmt:3; + u32 x:1; + u32 q:1; + u32 :1; + u32 ectl:1; + u32 pno:1; + u32 :1; + u32 fctl:3; + u32 actl:7; + u32 stctl:5; + u32 tcw; + u32 dstat:8; + u32 cstat:8; + u32 fcxs:8; + u32 schxs:8; +} __attribute__ ((packed)); + +/** + * struct eadm_scsw - subchannel status word for eadm subchannels + * @key: subchannel key + * @eswf: esw format + * @cc: deferred condition code + * @ectl: extended control + * @fctl: function control + * @actl: activity control + * @stctl: status control + * @aob: AOB address + * @dstat: device status + * @cstat: subchannel status + */ +struct eadm_scsw { + u32 key:4; + u32:1; + u32 eswf:1; + u32 cc:2; + u32:6; + u32 ectl:1; + u32:2; + u32 fctl:3; + u32 actl:7; + u32 stctl:5; + u32 aob; + u32 dstat:8; + u32 cstat:8; + u32:16; +} __packed; + +/** + * union scsw - subchannel status word + * @cmd: command-mode SCSW + * @tm: transport-mode SCSW + * @eadm: eadm SCSW + */ +union scsw { + struct cmd_scsw cmd; + struct tm_scsw tm; + struct eadm_scsw eadm; +} __packed; + +#define SCSW_FCTL_CLEAR_FUNC 0x1 +#define SCSW_FCTL_HALT_FUNC 0x2 +#define SCSW_FCTL_START_FUNC 0x4 + +#define SCSW_ACTL_SUSPENDED 0x1 +#define SCSW_ACTL_DEVACT 0x2 +#define SCSW_ACTL_SCHACT 0x4 +#define SCSW_ACTL_CLEAR_PEND 0x8 +#define SCSW_ACTL_HALT_PEND 0x10 +#define SCSW_ACTL_START_PEND 0x20 +#define SCSW_ACTL_RESUME_PEND 0x40 + +#define SCSW_STCTL_STATUS_PEND 0x1 +#define SCSW_STCTL_SEC_STATUS 0x2 +#define SCSW_STCTL_PRIM_STATUS 0x4 +#define SCSW_STCTL_INTER_STATUS 0x8 +#define SCSW_STCTL_ALERT_STATUS 0x10 + +#define DEV_STAT_ATTENTION 0x80 +#define DEV_STAT_STAT_MOD 0x40 +#define DEV_STAT_CU_END 0x20 +#define DEV_STAT_BUSY 0x10 +#define DEV_STAT_CHN_END 0x08 +#define DEV_STAT_DEV_END 0x04 +#define DEV_STAT_UNIT_CHECK 0x02 +#define DEV_STAT_UNIT_EXCEP 0x01 + +#define SCHN_STAT_PCI 0x80 +#define SCHN_STAT_INCORR_LEN 0x40 +#define SCHN_STAT_PROG_CHECK 0x20 +#define SCHN_STAT_PROT_CHECK 0x10 +#define SCHN_STAT_CHN_DATA_CHK 0x08 +#define SCHN_STAT_CHN_CTRL_CHK 0x04 +#define SCHN_STAT_INTF_CTRL_CHK 0x02 +#define SCHN_STAT_CHAIN_CHECK 0x01 + +/* + * architectured values for first sense byte + */ +#define SNS0_CMD_REJECT 0x80 +#define SNS_CMD_REJECT SNS0_CMD_REJEC +#define SNS0_INTERVENTION_REQ 0x40 +#define SNS0_BUS_OUT_CHECK 0x20 +#define SNS0_EQUIPMENT_CHECK 0x10 +#define SNS0_DATA_CHECK 0x08 +#define SNS0_OVERRUN 0x04 +#define SNS0_INCOMPL_DOMAIN 0x01 + +/* + * architectured values for second sense byte + */ +#define SNS1_PERM_ERR 0x80 +#define SNS1_INV_TRACK_FORMAT 0x40 +#define SNS1_EOC 0x20 +#define SNS1_MESSAGE_TO_OPER 0x10 +#define SNS1_NO_REC_FOUND 0x08 +#define SNS1_FILE_PROTECTED 0x04 +#define SNS1_WRITE_INHIBITED 0x02 +#define SNS1_INPRECISE_END 0x01 + +/* + * architectured values for third sense byte + */ +#define SNS2_REQ_INH_WRITE 0x80 +#define SNS2_CORRECTABLE 0x40 +#define SNS2_FIRST_LOG_ERR 0x20 +#define SNS2_ENV_DATA_PRESENT 0x10 +#define SNS2_INPRECISE_END 0x04 + +/** + * scsw_is_tm - check for transport mode scsw + * @scsw: pointer to scsw + * + * Return non-zero if the specified scsw is a transport mode scsw, zero + * otherwise. + */ +static inline int scsw_is_tm(union scsw *scsw) +{ + return css_general_characteristics.fcx && (scsw->tm.x == 1); +} + +/** + * scsw_key - return scsw key field + * @scsw: pointer to scsw + * + * Return the value of the key field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_key(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.key; + else + return scsw->cmd.key; +} + +/** + * scsw_eswf - return scsw eswf field + * @scsw: pointer to scsw + * + * Return the value of the eswf field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_eswf(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.eswf; + else + return scsw->cmd.eswf; +} + +/** + * scsw_cc - return scsw cc field + * @scsw: pointer to scsw + * + * Return the value of the cc field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_cc(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.cc; + else + return scsw->cmd.cc; +} + +/** + * scsw_ectl - return scsw ectl field + * @scsw: pointer to scsw + * + * Return the value of the ectl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_ectl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.ectl; + else + return scsw->cmd.ectl; +} + +/** + * scsw_pno - return scsw pno field + * @scsw: pointer to scsw + * + * Return the value of the pno field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_pno(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.pno; + else + return scsw->cmd.pno; +} + +/** + * scsw_fctl - return scsw fctl field + * @scsw: pointer to scsw + * + * Return the value of the fctl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_fctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.fctl; + else + return scsw->cmd.fctl; +} + +/** + * scsw_actl - return scsw actl field + * @scsw: pointer to scsw + * + * Return the value of the actl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_actl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.actl; + else + return scsw->cmd.actl; +} + +/** + * scsw_stctl - return scsw stctl field + * @scsw: pointer to scsw + * + * Return the value of the stctl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_stctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.stctl; + else + return scsw->cmd.stctl; +} + +/** + * scsw_dstat - return scsw dstat field + * @scsw: pointer to scsw + * + * Return the value of the dstat field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_dstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.dstat; + else + return scsw->cmd.dstat; +} + +/** + * scsw_cstat - return scsw cstat field + * @scsw: pointer to scsw + * + * Return the value of the cstat field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_cstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.cstat; + else + return scsw->cmd.cstat; +} + +/** + * scsw_cmd_is_valid_key - check key field validity + * @scsw: pointer to scsw + * + * Return non-zero if the key field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_key(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_sctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_sctl(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_eswf - check eswf field validity + * @scsw: pointer to scsw + * + * Return non-zero if the eswf field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_eswf(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_cmd_is_valid_cc - check cc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cc field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_cc(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) && + (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_cmd_is_valid_fmt - check fmt field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fmt field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_fmt(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_pfch - check pfch field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pfch field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_pfch(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_isic - check isic field validity + * @scsw: pointer to scsw + * + * Return non-zero if the isic field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_isic(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_alcc - check alcc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the alcc field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_alcc(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_ssi - check ssi field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ssi field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_ssi(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_zcc - check zcc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the zcc field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_zcc(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) && + (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS); +} + +/** + * scsw_cmd_is_valid_ectl - check ectl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ectl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_ectl(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS); +} + +/** + * scsw_cmd_is_valid_pno - check pno field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pno field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_pno(union scsw *scsw) +{ + return (scsw->cmd.fctl != 0) && + (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) || + ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->cmd.actl & SCSW_ACTL_SUSPENDED))); +} + +/** + * scsw_cmd_is_valid_fctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_fctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_cmd_is_valid_actl - check actl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the actl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_actl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_cmd_is_valid_stctl - check stctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the stctl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_stctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_cmd_is_valid_dstat - check dstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the dstat field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_dstat(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->cmd.cc != 3); +} + +/** + * scsw_cmd_is_valid_cstat - check cstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cstat field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_cstat(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->cmd.cc != 3); +} + +/** + * scsw_tm_is_valid_key - check key field validity + * @scsw: pointer to scsw + * + * Return non-zero if the key field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_key(union scsw *scsw) +{ + return (scsw->tm.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_tm_is_valid_eswf - check eswf field validity + * @scsw: pointer to scsw + * + * Return non-zero if the eswf field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_eswf(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_tm_is_valid_cc - check cc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cc field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_cc(union scsw *scsw) +{ + return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) && + (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_tm_is_valid_fmt - check fmt field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fmt field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_fmt(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_x - check x field validity + * @scsw: pointer to scsw + * + * Return non-zero if the x field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_x(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_q - check q field validity + * @scsw: pointer to scsw + * + * Return non-zero if the q field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_q(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_ectl - check ectl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ectl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_ectl(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS); +} + +/** + * scsw_tm_is_valid_pno - check pno field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pno field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_pno(union scsw *scsw) +{ + return (scsw->tm.fctl != 0) && + (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) || + ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->tm.actl & SCSW_ACTL_SUSPENDED))); +} + +/** + * scsw_tm_is_valid_fctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_fctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_tm_is_valid_actl - check actl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the actl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_actl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_tm_is_valid_stctl - check stctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the stctl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_stctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_tm_is_valid_dstat - check dstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the dstat field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_dstat(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->tm.cc != 3); +} + +/** + * scsw_tm_is_valid_cstat - check cstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cstat field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_cstat(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->tm.cc != 3); +} + +/** + * scsw_tm_is_valid_fcxs - check fcxs field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fcxs field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_fcxs(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_schxs - check schxs field validity + * @scsw: pointer to scsw + * + * Return non-zero if the schxs field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_schxs(union scsw *scsw) +{ + return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK | + SCHN_STAT_INTF_CTRL_CHK | + SCHN_STAT_PROT_CHECK | + SCHN_STAT_CHN_DATA_CHK)); +} + +/** + * scsw_is_valid_actl - check actl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the actl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_actl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_actl(scsw); + else + return scsw_cmd_is_valid_actl(scsw); +} + +/** + * scsw_is_valid_cc - check cc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cc field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_cc(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_cc(scsw); + else + return scsw_cmd_is_valid_cc(scsw); +} + +/** + * scsw_is_valid_cstat - check cstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cstat field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_cstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_cstat(scsw); + else + return scsw_cmd_is_valid_cstat(scsw); +} + +/** + * scsw_is_valid_dstat - check dstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the dstat field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_dstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_dstat(scsw); + else + return scsw_cmd_is_valid_dstat(scsw); +} + +/** + * scsw_is_valid_ectl - check ectl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ectl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_ectl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_ectl(scsw); + else + return scsw_cmd_is_valid_ectl(scsw); +} + +/** + * scsw_is_valid_eswf - check eswf field validity + * @scsw: pointer to scsw + * + * Return non-zero if the eswf field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_eswf(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_eswf(scsw); + else + return scsw_cmd_is_valid_eswf(scsw); +} + +/** + * scsw_is_valid_fctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_fctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_fctl(scsw); + else + return scsw_cmd_is_valid_fctl(scsw); +} + +/** + * scsw_is_valid_key - check key field validity + * @scsw: pointer to scsw + * + * Return non-zero if the key field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_key(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_key(scsw); + else + return scsw_cmd_is_valid_key(scsw); +} + +/** + * scsw_is_valid_pno - check pno field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pno field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_pno(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_pno(scsw); + else + return scsw_cmd_is_valid_pno(scsw); +} + +/** + * scsw_is_valid_stctl - check stctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the stctl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_stctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_stctl(scsw); + else + return scsw_cmd_is_valid_stctl(scsw); +} + +/** + * scsw_cmd_is_solicited - check for solicited scsw + * @scsw: pointer to scsw + * + * Return non-zero if the command mode scsw indicates that the associated + * status condition is solicited, zero if it is unsolicited. + */ +static inline int scsw_cmd_is_solicited(union scsw *scsw) +{ + return (scsw->cmd.cc != 0) || (scsw->cmd.stctl != + (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS)); +} + +/** + * scsw_tm_is_solicited - check for solicited scsw + * @scsw: pointer to scsw + * + * Return non-zero if the transport mode scsw indicates that the associated + * status condition is solicited, zero if it is unsolicited. + */ +static inline int scsw_tm_is_solicited(union scsw *scsw) +{ + return (scsw->tm.cc != 0) || (scsw->tm.stctl != + (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS)); +} + +/** + * scsw_is_solicited - check for solicited scsw + * @scsw: pointer to scsw + * + * Return non-zero if the transport or command mode scsw indicates that the + * associated status condition is solicited, zero if it is unsolicited. + */ +static inline int scsw_is_solicited(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_solicited(scsw); + else + return scsw_cmd_is_solicited(scsw); +} + +#endif /* _ASM_S390_SCSW_H_ */ diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h new file mode 100644 index 00000000000..781a9cf9b00 --- /dev/null +++ b/arch/s390/include/asm/seccomp.h @@ -0,0 +1,16 @@ +#ifndef _ASM_S390_SECCOMP_H +#define _ASM_S390_SECCOMP_H + +#include <linux/unistd.h> + +#define __NR_seccomp_read __NR_read +#define __NR_seccomp_write __NR_write +#define __NR_seccomp_exit __NR_exit +#define __NR_seccomp_sigreturn __NR_sigreturn + +#define __NR_seccomp_read_32 __NR_read +#define __NR_seccomp_write_32 __NR_write +#define __NR_seccomp_exit_32 __NR_exit +#define __NR_seccomp_sigreturn_32 __NR_sigreturn + +#endif /* _ASM_S390_SECCOMP_H */ diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h new file mode 100644 index 00000000000..fbd9116eb17 --- /dev/null +++ b/arch/s390/include/asm/sections.h @@ -0,0 +1,8 @@ +#ifndef _S390_SECTIONS_H +#define _S390_SECTIONS_H + +#include <asm-generic/sections.h> + +extern char _eshared[], _ehead[]; + +#endif diff --git a/arch/s390/include/asm/segment.h b/arch/s390/include/asm/segment.h new file mode 100644 index 00000000000..8bfce3475b1 --- /dev/null +++ b/arch/s390/include/asm/segment.h @@ -0,0 +1,4 @@ +#ifndef _ASM_SEGMENT_H +#define _ASM_SEGMENT_H + +#endif diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h new file mode 100644 index 00000000000..5b3e48ef534 --- /dev/null +++ b/arch/s390/include/asm/serial.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_SERIAL_H +#define _ASM_S390_SERIAL_H + +#define BASE_BAUD 0 + +#endif /* _ASM_S390_SERIAL_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h new file mode 100644 index 00000000000..089a49814c5 --- /dev/null +++ b/arch/s390/include/asm/setup.h @@ -0,0 +1,151 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2010 + */ +#ifndef _ASM_S390_SETUP_H +#define _ASM_S390_SETUP_H + +#include <uapi/asm/setup.h> + + +#define PARMAREA 0x10400 + +#ifndef __ASSEMBLY__ + +#include <asm/lowcore.h> +#include <asm/types.h> + +#ifndef CONFIG_64BIT +#define IPL_DEVICE (*(unsigned long *) (0x10404)) +#define INITRD_START (*(unsigned long *) (0x1040C)) +#define INITRD_SIZE (*(unsigned long *) (0x10414)) +#define OLDMEM_BASE (*(unsigned long *) (0x1041C)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10424)) +#else /* CONFIG_64BIT */ +#define IPL_DEVICE (*(unsigned long *) (0x10400)) +#define INITRD_START (*(unsigned long *) (0x10408)) +#define INITRD_SIZE (*(unsigned long *) (0x10410)) +#define OLDMEM_BASE (*(unsigned long *) (0x10418)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10420)) +#endif /* CONFIG_64BIT */ +#define COMMAND_LINE ((char *) (0x10480)) + +extern int memory_end_set; +extern unsigned long memory_end; +extern unsigned long max_physmem_end; + +extern void detect_memory_memblock(void); + +/* + * Machine features detected in head.S + */ + +#define MACHINE_FLAG_VM (1UL << 0) +#define MACHINE_FLAG_IEEE (1UL << 1) +#define MACHINE_FLAG_CSP (1UL << 2) +#define MACHINE_FLAG_MVPG (1UL << 3) +#define MACHINE_FLAG_DIAG44 (1UL << 4) +#define MACHINE_FLAG_IDTE (1UL << 5) +#define MACHINE_FLAG_DIAG9C (1UL << 6) +#define MACHINE_FLAG_KVM (1UL << 8) +#define MACHINE_FLAG_ESOP (1UL << 9) +#define MACHINE_FLAG_EDAT1 (1UL << 10) +#define MACHINE_FLAG_EDAT2 (1UL << 11) +#define MACHINE_FLAG_LPAR (1UL << 12) +#define MACHINE_FLAG_LPP (1UL << 13) +#define MACHINE_FLAG_TOPOLOGY (1UL << 14) +#define MACHINE_FLAG_TE (1UL << 15) +#define MACHINE_FLAG_RRBM (1UL << 16) +#define MACHINE_FLAG_TLB_LC (1UL << 17) + +#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) +#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) +#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) + +#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) +#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP) +#define MACHINE_HAS_PFMF MACHINE_HAS_EDAT1 +#define MACHINE_HAS_HPAGE MACHINE_HAS_EDAT1 + +#ifndef CONFIG_64BIT +#define MACHINE_HAS_IEEE (S390_lowcore.machine_flags & MACHINE_FLAG_IEEE) +#define MACHINE_HAS_CSP (S390_lowcore.machine_flags & MACHINE_FLAG_CSP) +#define MACHINE_HAS_IDTE (0) +#define MACHINE_HAS_DIAG44 (1) +#define MACHINE_HAS_MVPG (S390_lowcore.machine_flags & MACHINE_FLAG_MVPG) +#define MACHINE_HAS_EDAT1 (0) +#define MACHINE_HAS_EDAT2 (0) +#define MACHINE_HAS_LPP (0) +#define MACHINE_HAS_TOPOLOGY (0) +#define MACHINE_HAS_TE (0) +#define MACHINE_HAS_RRBM (0) +#define MACHINE_HAS_TLB_LC (0) +#else /* CONFIG_64BIT */ +#define MACHINE_HAS_IEEE (1) +#define MACHINE_HAS_CSP (1) +#define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE) +#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) +#define MACHINE_HAS_MVPG (1) +#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1) +#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2) +#define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP) +#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) +#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) +#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) +#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) +#endif /* CONFIG_64BIT */ + +/* + * Console mode. Override with conmode= + */ +extern unsigned int console_mode; +extern unsigned int console_devno; +extern unsigned int console_irq; + +extern char vmhalt_cmd[]; +extern char vmpoff_cmd[]; + +#define CONSOLE_IS_UNDEFINED (console_mode == 0) +#define CONSOLE_IS_SCLP (console_mode == 1) +#define CONSOLE_IS_3215 (console_mode == 2) +#define CONSOLE_IS_3270 (console_mode == 3) +#define SET_CONSOLE_SCLP do { console_mode = 1; } while (0) +#define SET_CONSOLE_3215 do { console_mode = 2; } while (0) +#define SET_CONSOLE_3270 do { console_mode = 3; } while (0) + +#define NSS_NAME_SIZE 8 +extern char kernel_nss_name[]; + +#ifdef CONFIG_PFAULT +extern int pfault_init(void); +extern void pfault_fini(void); +#else /* CONFIG_PFAULT */ +#define pfault_init() ({-1;}) +#define pfault_fini() do { } while (0) +#endif /* CONFIG_PFAULT */ + +extern void cmma_init(void); + +extern void (*_machine_restart)(char *command); +extern void (*_machine_halt)(void); +extern void (*_machine_power_off)(void); + +#else /* __ASSEMBLY__ */ + +#ifndef CONFIG_64BIT +#define IPL_DEVICE 0x10404 +#define INITRD_START 0x1040C +#define INITRD_SIZE 0x10414 +#define OLDMEM_BASE 0x1041C +#define OLDMEM_SIZE 0x10424 +#else /* CONFIG_64BIT */ +#define IPL_DEVICE 0x10400 +#define INITRD_START 0x10408 +#define INITRD_SIZE 0x10410 +#define OLDMEM_BASE 0x10418 +#define OLDMEM_SIZE 0x10420 +#endif /* CONFIG_64BIT */ +#define COMMAND_LINE 0x10480 + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_S390_SETUP_H */ diff --git a/arch/s390/include/asm/sfp-machine.h b/arch/s390/include/asm/sfp-machine.h new file mode 100644 index 00000000000..4e16aede4b0 --- /dev/null +++ b/arch/s390/include/asm/sfp-machine.h @@ -0,0 +1,142 @@ +/* Machine-dependent software floating-point definitions. + S/390 kernel version. + Copyright (C) 1997,1998,1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com), + Jakub Jelinek (jj@ultra.linux.cz), + David S. Miller (davem@redhat.com) and + Peter Maydell (pmaydell@chiark.greenend.org.uk). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef _SFP_MACHINE_H +#define _SFP_MACHINE_H + + +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned int +#define _FP_WS_TYPE signed int +#define _FP_I_TYPE int + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 + +/* + * If one NaN is signaling and the other is not, + * we choose that one, otherwise we choose X. + */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +/* Some assembly to speed things up. */ +#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({ \ + unsigned int __r2 = (x2) + (y2); \ + unsigned int __r1 = (x1); \ + unsigned int __r0 = (x0); \ + asm volatile( \ + " alr %2,%3\n" \ + " brc 12,0f\n" \ + " lhi 0,1\n" \ + " alr %1,0\n" \ + " brc 12,0f\n" \ + " alr %0,0\n" \ + "0:" \ + : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0) \ + : "d" (y0), "i" (1) : "cc", "0" ); \ + asm volatile( \ + " alr %1,%2\n" \ + " brc 12,0f\n" \ + " ahi %0,1\n" \ + "0:" \ + : "+&d" (__r2), "+&d" (__r1) \ + : "d" (y1) : "cc"); \ + (r2) = __r2; \ + (r1) = __r1; \ + (r0) = __r0; \ +}) + +#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({ \ + unsigned int __r2 = (x2) - (y2); \ + unsigned int __r1 = (x1); \ + unsigned int __r0 = (x0); \ + asm volatile( \ + " slr %2,%3\n" \ + " brc 3,0f\n" \ + " lhi 0,1\n" \ + " slr %1,0\n" \ + " brc 3,0f\n" \ + " slr %0,0\n" \ + "0:" \ + : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0) \ + : "d" (y0) : "cc", "0"); \ + asm volatile( \ + " slr %1,%2\n" \ + " brc 3,0f\n" \ + " ahi %0,-1\n" \ + "0:" \ + : "+&d" (__r2), "+&d" (__r1) \ + : "d" (y1) : "cc"); \ + (r2) = __r2; \ + (r1) = __r1; \ + (r0) = __r0; \ +}) + +#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0) + +/* Obtain the current rounding mode. */ +#define FP_ROUNDMODE mode + +/* Exception flags. */ +#define FP_EX_INVALID 0x800000 +#define FP_EX_DIVZERO 0x400000 +#define FP_EX_OVERFLOW 0x200000 +#define FP_EX_UNDERFLOW 0x100000 +#define FP_EX_INEXACT 0x080000 + +/* We write the results always */ +#define FP_INHIBIT_RESULTS 0 + +#endif diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h new file mode 100644 index 00000000000..5959bfb3b69 --- /dev/null +++ b/arch/s390/include/asm/sfp-util.h @@ -0,0 +1,77 @@ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <asm/byteorder.h> + +#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({ \ + unsigned int __sh = (ah); \ + unsigned int __sl = (al); \ + asm volatile( \ + " alr %1,%3\n" \ + " brc 12,0f\n" \ + " ahi %0,1\n" \ + "0: alr %0,%2" \ + : "+&d" (__sh), "+d" (__sl) \ + : "d" (bh), "d" (bl) : "cc"); \ + (sh) = __sh; \ + (sl) = __sl; \ +}) + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({ \ + unsigned int __sh = (ah); \ + unsigned int __sl = (al); \ + asm volatile( \ + " slr %1,%3\n" \ + " brc 3,0f\n" \ + " ahi %0,-1\n" \ + "0: slr %0,%2" \ + : "+&d" (__sh), "+d" (__sl) \ + : "d" (bh), "d" (bl) : "cc"); \ + (sh) = __sh; \ + (sl) = __sl; \ +}) + +/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */ +#define umul_ppmm(wh, wl, u, v) ({ \ + unsigned int __wh = u; \ + unsigned int __wl = v; \ + asm volatile( \ + " ltr 1,%0\n" \ + " mr 0,%1\n" \ + " jnm 0f\n" \ + " alr 0,%1\n" \ + "0: ltr %1,%1\n" \ + " jnm 1f\n" \ + " alr 0,%0\n" \ + "1: lr %0,0\n" \ + " lr %1,1\n" \ + : "+d" (__wh), "+d" (__wl) \ + : : "0", "1", "cc"); \ + wh = __wh; \ + wl = __wl; \ +}) + +#ifdef CONFIG_64BIT +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { unsigned long __n; \ + unsigned int __r, __d; \ + __n = ((unsigned long)(n1) << 32) + n0; \ + __d = (d); \ + (q) = __n / __d; \ + (r) = __n % __d; \ + } while (0) +#else +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { unsigned int __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int, + unsigned int , unsigned int); +#endif + +#define UDIV_NEEDS_NORMALIZATION 0 + +#define abort() BUG() + +#define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/include/asm/shmparam.h b/arch/s390/include/asm/shmparam.h new file mode 100644 index 00000000000..e985182738f --- /dev/null +++ b/arch/s390/include/asm/shmparam.h @@ -0,0 +1,11 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/shmparam.h" + */ +#ifndef _ASM_S390_SHMPARAM_H +#define _ASM_S390_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* _ASM_S390_SHMPARAM_H */ diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h new file mode 100644 index 00000000000..abf9e573594 --- /dev/null +++ b/arch/s390/include/asm/signal.h @@ -0,0 +1,25 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/signal.h" + */ +#ifndef _ASMS390_SIGNAL_H +#define _ASMS390_SIGNAL_H + +#include <uapi/asm/signal.h> + +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ +#include <asm/sigcontext.h> +#define _NSIG _SIGCONTEXT_NSIG +#define _NSIG_BPW _SIGCONTEXT_NSIG_BPW +#define _NSIG_WORDS _SIGCONTEXT_NSIG_WORDS + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +#define __ARCH_HAS_SA_RESTORER +#endif diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h new file mode 100644 index 00000000000..bf9c823d402 --- /dev/null +++ b/arch/s390/include/asm/sigp.h @@ -0,0 +1,53 @@ +#ifndef __S390_ASM_SIGP_H +#define __S390_ASM_SIGP_H + +/* SIGP order codes */ +#define SIGP_SENSE 1 +#define SIGP_EXTERNAL_CALL 2 +#define SIGP_EMERGENCY_SIGNAL 3 +#define SIGP_START 4 +#define SIGP_STOP 5 +#define SIGP_RESTART 6 +#define SIGP_STOP_AND_STORE_STATUS 9 +#define SIGP_INITIAL_CPU_RESET 11 +#define SIGP_SET_PREFIX 13 +#define SIGP_STORE_STATUS_AT_ADDRESS 14 +#define SIGP_SET_ARCHITECTURE 18 +#define SIGP_COND_EMERGENCY_SIGNAL 19 +#define SIGP_SENSE_RUNNING 21 + +/* SIGP condition codes */ +#define SIGP_CC_ORDER_CODE_ACCEPTED 0 +#define SIGP_CC_STATUS_STORED 1 +#define SIGP_CC_BUSY 2 +#define SIGP_CC_NOT_OPERATIONAL 3 + +/* SIGP cpu status bits */ + +#define SIGP_STATUS_CHECK_STOP 0x00000010UL +#define SIGP_STATUS_STOPPED 0x00000040UL +#define SIGP_STATUS_EXT_CALL_PENDING 0x00000080UL +#define SIGP_STATUS_INVALID_PARAMETER 0x00000100UL +#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL +#define SIGP_STATUS_NOT_RUNNING 0x00000400UL + +#ifndef __ASSEMBLY__ + +static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) +{ + register unsigned int reg1 asm ("1") = parm; + int cc; + + asm volatile( + " sigp %1,%2,0(%3)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); + if (status && cc == 1) + *status = reg1; + return cc; +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __S390_ASM_SIGP_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h new file mode 100644 index 00000000000..4f1307962a9 --- /dev/null +++ b/arch/s390/include/asm/smp.h @@ -0,0 +1,78 @@ +/* + * Copyright IBM Corp. 1999, 2012 + * Author(s): Denis Joseph Barrow, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +#include <asm/sigp.h> + +#ifdef CONFIG_SMP + +#include <asm/lowcore.h> + +#define raw_smp_processor_id() (S390_lowcore.cpu_nr) + +extern struct mutex smp_cpu_state_mutex; + +extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); + +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +extern void smp_call_online_cpu(void (*func)(void *), void *); +extern void smp_call_ipl_cpu(void (*func)(void *), void *); + +extern int smp_find_processor_id(u16 address); +extern int smp_store_status(int cpu); +extern int smp_vcpu_scheduled(int cpu); +extern void smp_yield_cpu(int cpu); +extern void smp_yield(void); +extern void smp_cpu_set_polarization(int cpu, int val); +extern int smp_cpu_get_polarization(int cpu); +extern void smp_fill_possible_mask(void); + +#else /* CONFIG_SMP */ + +static inline void smp_call_ipl_cpu(void (*func)(void *), void *data) +{ + func(data); +} + +static inline void smp_call_online_cpu(void (*func)(void *), void *data) +{ + func(data); +} + +static inline int smp_find_processor_id(u16 address) { return 0; } +static inline int smp_store_status(int cpu) { return 0; } +static inline int smp_vcpu_scheduled(int cpu) { return 1; } +static inline void smp_yield_cpu(int cpu) { } +static inline void smp_yield(void) { } +static inline void smp_fill_possible_mask(void) { } + +#endif /* CONFIG_SMP */ + +static inline void smp_stop_cpu(void) +{ + u16 pcpu = stap(); + + for (;;) { + __pcpu_sigp(pcpu, SIGP_STOP, 0, NULL); + cpu_relax(); + } +} + +#ifdef CONFIG_HOTPLUG_CPU +extern int smp_rescan_cpus(void); +extern void __noreturn cpu_die(void); +extern void __cpu_die(unsigned int cpu); +extern int __cpu_disable(void); +#else +static inline int smp_rescan_cpus(void) { return 0; } +static inline void cpu_die(void) { } +#endif + +#endif /* __ASM_SMP_H */ diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h new file mode 100644 index 00000000000..a60d085ddb4 --- /dev/null +++ b/arch/s390/include/asm/sparsemem.h @@ -0,0 +1,16 @@ +#ifndef _ASM_S390_SPARSEMEM_H +#define _ASM_S390_SPARSEMEM_H + +#ifdef CONFIG_64BIT + +#define SECTION_SIZE_BITS 28 +#define MAX_PHYSMEM_BITS 46 + +#else + +#define SECTION_SIZE_BITS 25 +#define MAX_PHYSMEM_BITS 31 + +#endif /* CONFIG_64BIT */ + +#endif /* _ASM_S390_SPARSEMEM_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h new file mode 100644 index 00000000000..96879f7ad6d --- /dev/null +++ b/arch/s390/include/asm/spinlock.h @@ -0,0 +1,202 @@ +/* + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/spinlock.h" + */ + +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include <linux/smp.h> + +#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval) + +extern int spin_retry; + +static inline int +_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) +{ + unsigned int old_expected = old; + + asm volatile( + " cs %0,%3,%1" + : "=d" (old), "=Q" (*lock) + : "0" (old), "d" (new), "Q" (*lock) + : "cc", "memory" ); + return old == old_expected; +} + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + * + * (the type definitions are in asm/spinlock_types.h) + */ + +void arch_spin_lock_wait(arch_spinlock_t *); +int arch_spin_trylock_retry(arch_spinlock_t *); +void arch_spin_relax(arch_spinlock_t *); +void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); + +static inline u32 arch_spin_lockval(int cpu) +{ + return ~cpu; +} + +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.lock == 0; +} + +static inline int arch_spin_is_locked(arch_spinlock_t *lp) +{ + return ACCESS_ONCE(lp->lock) != 0; +} + +static inline int arch_spin_trylock_once(arch_spinlock_t *lp) +{ + barrier(); + return likely(arch_spin_value_unlocked(*lp) && + _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); +} + +static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp) +{ + return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0); +} + +static inline void arch_spin_lock(arch_spinlock_t *lp) +{ + if (!arch_spin_trylock_once(lp)) + arch_spin_lock_wait(lp); +} + +static inline void arch_spin_lock_flags(arch_spinlock_t *lp, + unsigned long flags) +{ + if (!arch_spin_trylock_once(lp)) + arch_spin_lock_wait_flags(lp, flags); +} + +static inline int arch_spin_trylock(arch_spinlock_t *lp) +{ + if (!arch_spin_trylock_once(lp)) + return arch_spin_trylock_retry(lp); + return 1; +} + +static inline void arch_spin_unlock(arch_spinlock_t *lp) +{ + arch_spin_tryrelease_once(lp); +} + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + while (arch_spin_is_locked(lock)) + arch_spin_relax(lock); +} + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ + +/** + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +#define arch_read_can_lock(x) ((int)(x)->lock >= 0) + +/** + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +#define arch_write_can_lock(x) ((x)->lock == 0) + +extern void _raw_read_lock_wait(arch_rwlock_t *lp); +extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); +extern int _raw_read_trylock_retry(arch_rwlock_t *lp); +extern void _raw_write_lock_wait(arch_rwlock_t *lp); +extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); +extern int _raw_write_trylock_retry(arch_rwlock_t *lp); + +static inline int arch_read_trylock_once(arch_rwlock_t *rw) +{ + unsigned int old = ACCESS_ONCE(rw->lock); + return likely((int) old >= 0 && + _raw_compare_and_swap(&rw->lock, old, old + 1)); +} + +static inline int arch_write_trylock_once(arch_rwlock_t *rw) +{ + unsigned int old = ACCESS_ONCE(rw->lock); + return likely(old == 0 && + _raw_compare_and_swap(&rw->lock, 0, 0x80000000)); +} + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + if (!arch_read_trylock_once(rw)) + _raw_read_lock_wait(rw); +} + +static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) +{ + if (!arch_read_trylock_once(rw)) + _raw_read_lock_wait_flags(rw, flags); +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned int old; + + do { + old = ACCESS_ONCE(rw->lock); + } while (!_raw_compare_and_swap(&rw->lock, old, old - 1)); +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + if (!arch_write_trylock_once(rw)) + _raw_write_lock_wait(rw); +} + +static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) +{ + if (!arch_write_trylock_once(rw)) + _raw_write_lock_wait_flags(rw, flags); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + _raw_compare_and_swap(&rw->lock, 0x80000000, 0); +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + if (!arch_read_trylock_once(rw)) + return _raw_read_trylock_retry(rw); + return 1; +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + if (!arch_write_trylock_once(rw)) + return _raw_write_trylock_retry(rw); + return 1; +} + +#define arch_read_relax(lock) cpu_relax() +#define arch_write_relax(lock) cpu_relax() + +#endif /* __ASM_SPINLOCK_H */ diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h new file mode 100644 index 00000000000..b2cd6ff7c2c --- /dev/null +++ b/arch/s390/include/asm/spinlock_types.h @@ -0,0 +1,20 @@ +#ifndef __ASM_SPINLOCK_TYPES_H +#define __ASM_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +# error "please don't include this file directly" +#endif + +typedef struct { + unsigned int lock; +} __attribute__ ((aligned (4))) arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, } + +typedef struct { + unsigned int lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0 } + +#endif diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h new file mode 100644 index 00000000000..7e2dcd7c57e --- /dev/null +++ b/arch/s390/include/asm/string.h @@ -0,0 +1,143 @@ +/* + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + */ + +#ifndef _S390_STRING_H_ +#define _S390_STRING_H_ + +#ifndef _LINUX_TYPES_H +#include <linux/types.h> +#endif + +#define __HAVE_ARCH_MEMCHR /* inline & arch function */ +#define __HAVE_ARCH_MEMCMP /* arch function */ +#define __HAVE_ARCH_MEMCPY /* gcc builtin & arch function */ +#define __HAVE_ARCH_MEMSCAN /* inline & arch function */ +#define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */ +#define __HAVE_ARCH_STRCAT /* inline & arch function */ +#define __HAVE_ARCH_STRCMP /* arch function */ +#define __HAVE_ARCH_STRCPY /* inline & arch function */ +#define __HAVE_ARCH_STRLCAT /* arch function */ +#define __HAVE_ARCH_STRLCPY /* arch function */ +#define __HAVE_ARCH_STRLEN /* inline & arch function */ +#define __HAVE_ARCH_STRNCAT /* arch function */ +#define __HAVE_ARCH_STRNCPY /* arch function */ +#define __HAVE_ARCH_STRNLEN /* inline & arch function */ +#define __HAVE_ARCH_STRRCHR /* arch function */ +#define __HAVE_ARCH_STRSTR /* arch function */ + +/* Prototypes for non-inlined arch strings functions. */ +extern int memcmp(const void *, const void *, size_t); +extern void *memcpy(void *, const void *, size_t); +extern void *memset(void *, int, size_t); +extern int strcmp(const char *,const char *); +extern size_t strlcat(char *, const char *, size_t); +extern size_t strlcpy(char *, const char *, size_t); +extern char *strncat(char *, const char *, size_t); +extern char *strncpy(char *, const char *, size_t); +extern char *strrchr(const char *, int); +extern char *strstr(const char *, const char *); + +#undef __HAVE_ARCH_MEMMOVE +#undef __HAVE_ARCH_STRCHR +#undef __HAVE_ARCH_STRNCHR +#undef __HAVE_ARCH_STRNCMP +#undef __HAVE_ARCH_STRNICMP +#undef __HAVE_ARCH_STRPBRK +#undef __HAVE_ARCH_STRSEP +#undef __HAVE_ARCH_STRSPN + +#if !defined(IN_ARCH_STRING_C) + +static inline void *memchr(const void * s, int c, size_t n) +{ + register int r0 asm("0") = (char) c; + const void *ret = s + n; + + asm volatile( + "0: srst %0,%1\n" + " jo 0b\n" + " jl 1f\n" + " la %0,0\n" + "1:" + : "+a" (ret), "+&a" (s) : "d" (r0) : "cc"); + return (void *) ret; +} + +static inline void *memscan(void *s, int c, size_t n) +{ + register int r0 asm("0") = (char) c; + const void *ret = s + n; + + asm volatile( + "0: srst %0,%1\n" + " jo 0b\n" + : "+a" (ret), "+&a" (s) : "d" (r0) : "cc"); + return (void *) ret; +} + +static inline char *strcat(char *dst, const char *src) +{ + register int r0 asm("0") = 0; + unsigned long dummy; + char *ret = dst; + + asm volatile( + "0: srst %0,%1\n" + " jo 0b\n" + "1: mvst %0,%2\n" + " jo 1b" + : "=&a" (dummy), "+a" (dst), "+a" (src) + : "d" (r0), "0" (0) : "cc", "memory" ); + return ret; +} + +static inline char *strcpy(char *dst, const char *src) +{ + register int r0 asm("0") = 0; + char *ret = dst; + + asm volatile( + "0: mvst %0,%1\n" + " jo 0b" + : "+&a" (dst), "+&a" (src) : "d" (r0) + : "cc", "memory"); + return ret; +} + +static inline size_t strlen(const char *s) +{ + register unsigned long r0 asm("0") = 0; + const char *tmp = s; + + asm volatile( + "0: srst %0,%1\n" + " jo 0b" + : "+d" (r0), "+a" (tmp) : : "cc"); + return r0 - (unsigned long) s; +} + +static inline size_t strnlen(const char * s, size_t n) +{ + register int r0 asm("0") = 0; + const char *tmp = s; + const char *end = s + n; + + asm volatile( + "0: srst %0,%1\n" + " jo 0b" + : "+a" (end), "+a" (tmp) : "d" (r0) : "cc"); + return end - s; +} +#else /* IN_ARCH_STRING_C */ +void *memchr(const void * s, int c, size_t n); +void *memscan(void *s, int c, size_t n); +char *strcat(char *dst, const char *src); +char *strcpy(char *dst, const char *src); +size_t strlen(const char *s); +size_t strnlen(const char * s, size_t n); +#endif /* !IN_ARCH_STRING_C */ + +#endif /* __S390_STRING_H_ */ diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h new file mode 100644 index 00000000000..18ea9e3f814 --- /dev/null +++ b/arch/s390/include/asm/switch_to.h @@ -0,0 +1,137 @@ +/* + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __ASM_SWITCH_TO_H +#define __ASM_SWITCH_TO_H + +#include <linux/thread_info.h> +#include <asm/ptrace.h> + +extern struct task_struct *__switch_to(void *, void *); +extern void update_cr_regs(struct task_struct *task); + +static inline int test_fp_ctl(u32 fpc) +{ + u32 orig_fpc; + int rc; + + if (!MACHINE_HAS_IEEE) + return 0; + + asm volatile( + " efpc %1\n" + " sfpc %2\n" + "0: sfpc %1\n" + " la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc), "=d" (orig_fpc) + : "d" (fpc), "0" (-EINVAL)); + return rc; +} + +static inline void save_fp_ctl(u32 *fpc) +{ + if (!MACHINE_HAS_IEEE) + return; + + asm volatile( + " stfpc %0\n" + : "+Q" (*fpc)); +} + +static inline int restore_fp_ctl(u32 *fpc) +{ + int rc; + + if (!MACHINE_HAS_IEEE) + return 0; + + asm volatile( + " lfpc %1\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL)); + return rc; +} + +static inline void save_fp_regs(freg_t *fprs) +{ + asm volatile("std 0,%0" : "=Q" (fprs[0])); + asm volatile("std 2,%0" : "=Q" (fprs[2])); + asm volatile("std 4,%0" : "=Q" (fprs[4])); + asm volatile("std 6,%0" : "=Q" (fprs[6])); + if (!MACHINE_HAS_IEEE) + return; + asm volatile("std 1,%0" : "=Q" (fprs[1])); + asm volatile("std 3,%0" : "=Q" (fprs[3])); + asm volatile("std 5,%0" : "=Q" (fprs[5])); + asm volatile("std 7,%0" : "=Q" (fprs[7])); + asm volatile("std 8,%0" : "=Q" (fprs[8])); + asm volatile("std 9,%0" : "=Q" (fprs[9])); + asm volatile("std 10,%0" : "=Q" (fprs[10])); + asm volatile("std 11,%0" : "=Q" (fprs[11])); + asm volatile("std 12,%0" : "=Q" (fprs[12])); + asm volatile("std 13,%0" : "=Q" (fprs[13])); + asm volatile("std 14,%0" : "=Q" (fprs[14])); + asm volatile("std 15,%0" : "=Q" (fprs[15])); +} + +static inline void restore_fp_regs(freg_t *fprs) +{ + asm volatile("ld 0,%0" : : "Q" (fprs[0])); + asm volatile("ld 2,%0" : : "Q" (fprs[2])); + asm volatile("ld 4,%0" : : "Q" (fprs[4])); + asm volatile("ld 6,%0" : : "Q" (fprs[6])); + if (!MACHINE_HAS_IEEE) + return; + asm volatile("ld 1,%0" : : "Q" (fprs[1])); + asm volatile("ld 3,%0" : : "Q" (fprs[3])); + asm volatile("ld 5,%0" : : "Q" (fprs[5])); + asm volatile("ld 7,%0" : : "Q" (fprs[7])); + asm volatile("ld 8,%0" : : "Q" (fprs[8])); + asm volatile("ld 9,%0" : : "Q" (fprs[9])); + asm volatile("ld 10,%0" : : "Q" (fprs[10])); + asm volatile("ld 11,%0" : : "Q" (fprs[11])); + asm volatile("ld 12,%0" : : "Q" (fprs[12])); + asm volatile("ld 13,%0" : : "Q" (fprs[13])); + asm volatile("ld 14,%0" : : "Q" (fprs[14])); + asm volatile("ld 15,%0" : : "Q" (fprs[15])); +} + +static inline void save_access_regs(unsigned int *acrs) +{ + typedef struct { int _[NUM_ACRS]; } acrstype; + + asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs)); +} + +static inline void restore_access_regs(unsigned int *acrs) +{ + typedef struct { int _[NUM_ACRS]; } acrstype; + + asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs)); +} + +#define switch_to(prev,next,last) do { \ + if (prev->mm) { \ + save_fp_ctl(&prev->thread.fp_regs.fpc); \ + save_fp_regs(prev->thread.fp_regs.fprs); \ + save_access_regs(&prev->thread.acrs[0]); \ + save_ri_cb(prev->thread.ri_cb); \ + } \ + if (next->mm) { \ + restore_fp_ctl(&next->thread.fp_regs.fpc); \ + restore_fp_regs(next->thread.fp_regs.fprs); \ + restore_access_regs(&next->thread.acrs[0]); \ + restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ + update_cr_regs(next); \ + } \ + prev = __switch_to(prev,next); \ +} while (0) + +#endif /* __ASM_SWITCH_TO_H */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h new file mode 100644 index 00000000000..abad78d5b10 --- /dev/null +++ b/arch/s390/include/asm/syscall.h @@ -0,0 +1,100 @@ +/* + * Access to user system call parameters and results + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H 1 + +#include <uapi/linux/audit.h> +#include <linux/sched.h> +#include <linux/err.h> +#include <asm/ptrace.h> + +/* + * The syscall table always contains 32 bit pointers since we know that the + * address of the function to be called is (way) below 4GB. So the "int" + * type here is what we want [need] for both 32 bit and 64 bit systems. + */ +extern const unsigned int sys_call_table[]; +extern const unsigned int sys_call_table_emu[]; + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + return test_pt_regs_flag(regs, PIF_SYSCALL) ? + (regs->int_code & 0xffff) : -1; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->gprs[2] = regs->orig_gpr2; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->gprs[2]; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->gprs[2] = error ? -error : val; +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + unsigned long mask = -1UL; + + BUG_ON(i + n > 6); +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) + mask = 0xffffffff; +#endif + while (n-- > 0) + if (i + n > 0) + args[n] = regs->gprs[2 + i + n] & mask; + if (i == 0) + args[0] = regs->orig_gpr2 & mask; +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + BUG_ON(i + n > 6); + while (n-- > 0) + if (i + n > 0) + regs->gprs[2 + i + n] = args[n]; + if (i == 0) + regs->orig_gpr2 = args[0]; +} + +static inline int syscall_get_arch(void) +{ +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(current, TIF_31BIT)) + return AUDIT_ARCH_S390; +#endif + return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390; +} +#endif /* _ASM_SYSCALL_H */ diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h new file mode 100644 index 00000000000..f92428e459f --- /dev/null +++ b/arch/s390/include/asm/sysinfo.h @@ -0,0 +1,169 @@ +/* + * definition for store system information stsi + * + * Copyright IBM Corp. 2001, 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Ulrich Weigand <weigand@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#ifndef __ASM_S390_SYSINFO_H +#define __ASM_S390_SYSINFO_H + +#include <asm/bitsperlong.h> + +struct sysinfo_1_1_1 { + unsigned char p:1; + unsigned char :6; + unsigned char t:1; + unsigned char :8; + unsigned char ccr; + unsigned char cai; + char reserved_0[28]; + char manufacturer[16]; + char type[4]; + char reserved_1[12]; + char model_capacity[16]; + char sequence[16]; + char plant[4]; + char model[16]; + char model_perm_cap[16]; + char model_temp_cap[16]; + unsigned int model_cap_rating; + unsigned int model_perm_cap_rating; + unsigned int model_temp_cap_rating; + unsigned char typepct[5]; + unsigned char reserved_2[3]; + unsigned int ncr; + unsigned int npr; + unsigned int ntr; +}; + +struct sysinfo_1_2_1 { + char reserved_0[80]; + char sequence[16]; + char plant[4]; + char reserved_1[2]; + unsigned short cpu_address; +}; + +struct sysinfo_1_2_2 { + char format; + char reserved_0[1]; + unsigned short acc_offset; + char reserved_1[20]; + unsigned int nominal_cap; + unsigned int secondary_cap; + unsigned int capability; + unsigned short cpus_total; + unsigned short cpus_configured; + unsigned short cpus_standby; + unsigned short cpus_reserved; + unsigned short adjustment[0]; +}; + +struct sysinfo_1_2_2_extension { + unsigned int alt_capability; + unsigned short alt_adjustment[0]; +}; + +struct sysinfo_2_2_1 { + char reserved_0[80]; + char sequence[16]; + char plant[4]; + unsigned short cpu_id; + unsigned short cpu_address; +}; + +struct sysinfo_2_2_2 { + char reserved_0[32]; + unsigned short lpar_number; + char reserved_1; + unsigned char characteristics; + unsigned short cpus_total; + unsigned short cpus_configured; + unsigned short cpus_standby; + unsigned short cpus_reserved; + char name[8]; + unsigned int caf; + char reserved_2[16]; + unsigned short cpus_dedicated; + unsigned short cpus_shared; +}; + +#define LPAR_CHAR_DEDICATED (1 << 7) +#define LPAR_CHAR_SHARED (1 << 6) +#define LPAR_CHAR_LIMITED (1 << 5) + +struct sysinfo_3_2_2 { + char reserved_0[31]; + unsigned char :4; + unsigned char count:4; + struct { + char reserved_0[4]; + unsigned short cpus_total; + unsigned short cpus_configured; + unsigned short cpus_standby; + unsigned short cpus_reserved; + char name[8]; + unsigned int caf; + char cpi[16]; + char reserved_1[24]; + + } vm[8]; + char reserved_544[3552]; +}; + +extern int topology_max_mnest; + +#define TOPOLOGY_CPU_BITS 64 +#define TOPOLOGY_NR_MAG 6 + +struct topology_cpu { + unsigned char reserved0[4]; + unsigned char :6; + unsigned char pp:2; + unsigned char reserved1; + unsigned short origin; + unsigned long mask[TOPOLOGY_CPU_BITS / BITS_PER_LONG]; +}; + +struct topology_container { + unsigned char reserved[7]; + unsigned char id; +}; + +union topology_entry { + unsigned char nl; + struct topology_cpu cpu; + struct topology_container container; +}; + +struct sysinfo_15_1_x { + unsigned char reserved0[2]; + unsigned short length; + unsigned char mag[TOPOLOGY_NR_MAG]; + unsigned char reserved1; + unsigned char mnest; + unsigned char reserved2[4]; + union topology_entry tle[0]; +}; + +int stsi(void *sysinfo, int fc, int sel1, int sel2); + +/* + * Service level reporting interface. + */ +struct service_level { + struct list_head list; + void (*seq_print)(struct seq_file *, struct service_level *); +}; + +int register_service_level(struct service_level *); +int unregister_service_level(struct service_level *); + +#endif /* __ASM_S390_SYSINFO_H */ diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h new file mode 100644 index 00000000000..db028d17f06 --- /dev/null +++ b/arch/s390/include/asm/termios.h @@ -0,0 +1,25 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/termios.h" + */ +#ifndef _S390_TERMIOS_H +#define _S390_TERMIOS_H + +#include <uapi/asm/termios.h> + + +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^U werase=^W lnext=^V + eol2=\0 +*/ +#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" + +#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) +#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) + +#include <asm-generic/termios-base.h> + +#endif /* _S390_TERMIOS_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h new file mode 100644 index 00000000000..b833e9c0bfb --- /dev/null +++ b/arch/s390/include/asm/thread_info.h @@ -0,0 +1,109 @@ +/* + * S390 version + * Copyright IBM Corp. 2002, 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#ifndef _ASM_THREAD_INFO_H +#define _ASM_THREAD_INFO_H + +/* + * Size of kernel stack for each process + */ +#ifndef CONFIG_64BIT +#define THREAD_ORDER 1 +#define ASYNC_ORDER 1 +#else /* CONFIG_64BIT */ +#define THREAD_ORDER 2 +#define ASYNC_ORDER 2 +#endif /* CONFIG_64BIT */ + +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) +#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER) + +#ifndef __ASSEMBLY__ +#include <asm/lowcore.h> +#include <asm/page.h> +#include <asm/processor.h> + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + * - if the contents of this structure are changed, the assembly constants must also be changed + */ +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + unsigned long sys_call_table; /* System call table address */ + unsigned int cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ + struct restart_block restart_block; + unsigned int system_call; + __u64 user_timer; + __u64 system_timer; + unsigned long last_break; /* last breaking-event-address. */ +}; + +/* + * macros/functions for gaining access to the thread information structure + */ +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +/* how to get the thread information struct from C */ +static inline struct thread_info *current_thread_info(void) +{ + return (struct thread_info *) S390_lowcore.thread_info; +} + +#define THREAD_SIZE_ORDER THREAD_ORDER + +#endif + +/* + * thread information flags bit numbers + */ +#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ +#define TIF_SECCOMP 5 /* secure computing */ +#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ +#define TIF_31BIT 16 /* 32bit process */ +#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ +#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ +#define TIF_SINGLE_STEP 19 /* This task is single stepped */ +#define TIF_BLOCK_STEP 20 /* This task is block stepped */ + +#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) +#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) +#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1<<TIF_SECCOMP) +#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) +#define _TIF_31BIT (1<<TIF_31BIT) +#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) + +#ifdef CONFIG_64BIT +#define is_32bit_task() (test_thread_flag(TIF_31BIT)) +#else +#define is_32bit_task() (1) +#endif + +#endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h new file mode 100644 index 00000000000..8beee1cceba --- /dev/null +++ b/arch/s390/include/asm/timex.h @@ -0,0 +1,161 @@ +/* + * S390 version + * Copyright IBM Corp. 1999 + * + * Derived from "include/asm-i386/timex.h" + * Copyright (C) 1992, Linus Torvalds + */ + +#ifndef _ASM_S390_TIMEX_H +#define _ASM_S390_TIMEX_H + +#include <asm/lowcore.h> + +/* The value of the TOD clock for 1.1.1970. */ +#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL + +/* Inline functions for clock register access. */ +static inline int set_tod_clock(__u64 time) +{ + int cc; + + asm volatile( + " sck %1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) : "Q" (time) : "cc"); + return cc; +} + +static inline int store_tod_clock(__u64 *time) +{ + int cc; + + asm volatile( + " stck %1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc), "=Q" (*time) : : "cc"); + return cc; +} + +static inline void set_clock_comparator(__u64 time) +{ + asm volatile("sckc %0" : : "Q" (time)); +} + +static inline void store_clock_comparator(__u64 *time) +{ + asm volatile("stckc %0" : "=Q" (*time)); +} + +void clock_comparator_work(void); + +static inline unsigned long long local_tick_disable(void) +{ + unsigned long long old; + + old = S390_lowcore.clock_comparator; + S390_lowcore.clock_comparator = -1ULL; + set_clock_comparator(S390_lowcore.clock_comparator); + return old; +} + +static inline void local_tick_enable(unsigned long long comp) +{ + S390_lowcore.clock_comparator = comp; + set_clock_comparator(S390_lowcore.clock_comparator); +} + +#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ + +typedef unsigned long long cycles_t; + +static inline void get_tod_clock_ext(char clk[16]) +{ + typedef struct { char _[sizeof(clk)]; } addrtype; + + asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc"); +} + +static inline unsigned long long get_tod_clock(void) +{ + unsigned char clk[16]; + get_tod_clock_ext(clk); + return *((unsigned long long *)&clk[1]); +} + +static inline unsigned long long get_tod_clock_fast(void) +{ +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + unsigned long long clk; + + asm volatile("stckf %0" : "=Q" (clk) : : "cc"); + return clk; +#else + return get_tod_clock(); +#endif +} + +static inline cycles_t get_cycles(void) +{ + return (cycles_t) get_tod_clock() >> 2; +} + +int get_sync_clock(unsigned long long *clock); +void init_cpu_timer(void); +unsigned long long monotonic_clock(void); + +void tod_to_timeval(__u64, struct timespec *); + +static inline +void stck_to_timespec(unsigned long long stck, struct timespec *ts) +{ + tod_to_timeval(stck - TOD_UNIX_EPOCH, ts); +} + +extern u64 sched_clock_base_cc; + +/** + * get_clock_monotonic - returns current time in clock rate units + * + * The caller must ensure that preemption is disabled. + * The clock and sched_clock_base get changed via stop_machine. + * Therefore preemption must be disabled when calling this + * function, otherwise the returned value is not guaranteed to + * be monotonic. + */ +static inline unsigned long long get_tod_clock_monotonic(void) +{ + return get_tod_clock() - sched_clock_base_cc; +} + +/** + * tod_to_ns - convert a TOD format value to nanoseconds + * @todval: to be converted TOD format value + * Returns: number of nanoseconds that correspond to the TOD format value + * + * Converting a 64 Bit TOD format value to nanoseconds means that the value + * must be divided by 4.096. In order to achieve that we multiply with 125 + * and divide by 512: + * + * ns = (todval * 125) >> 9; + * + * In order to avoid an overflow with the multiplication we can rewrite this. + * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits) + * we end up with + * + * ns = ((2^32 * th + tl) * 125 ) >> 9; + * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9); + * + */ +static inline unsigned long long tod_to_ns(unsigned long long todval) +{ + unsigned long long ns; + + ns = ((todval >> 32) << 23) * 125; + ns += ((todval & 0xffffffff) * 125) >> 9; + return ns; +} + +#endif diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h new file mode 100644 index 00000000000..a25f09fbaf3 --- /dev/null +++ b/arch/s390/include/asm/tlb.h @@ -0,0 +1,151 @@ +#ifndef _S390_TLB_H +#define _S390_TLB_H + +/* + * TLB flushing on s390 is complicated. The following requirement + * from the principles of operation is the most arduous: + * + * "A valid table entry must not be changed while it is attached + * to any CPU and may be used for translation by that CPU except to + * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY, + * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page + * table entry, or (3) make a change by means of a COMPARE AND SWAP + * AND PURGE instruction that purges the TLB." + * + * The modification of a pte of an active mm struct therefore is + * a two step process: i) invalidate the pte, ii) store the new pte. + * This is true for the page protection bit as well. + * The only possible optimization is to flush at the beginning of + * a tlb_gather_mmu cycle if the mm_struct is currently not in use. + * + * Pages used for the page tables is a different story. FIXME: more + */ + +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <asm/processor.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +struct mmu_gather { + struct mm_struct *mm; + struct mmu_table_batch *batch; + unsigned int fullmm; + unsigned long start, end; +}; + +struct mmu_table_batch { + struct rcu_head rcu; + unsigned int nr; + void *tables[0]; +}; + +#define MAX_TABLE_BATCH \ + ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) + +extern void tlb_table_flush(struct mmu_gather *tlb); +extern void tlb_remove_table(struct mmu_gather *tlb, void *table); + +static inline void tlb_gather_mmu(struct mmu_gather *tlb, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + tlb->mm = mm; + tlb->start = start; + tlb->end = end; + tlb->fullmm = !(start | (end+1)); + tlb->batch = NULL; +} + +static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) +{ + __tlb_flush_mm_lazy(tlb->mm); +} + +static inline void tlb_flush_mmu_free(struct mmu_gather *tlb) +{ + tlb_table_flush(tlb); +} + + +static inline void tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_flush_mmu_tlbonly(tlb); + tlb_flush_mmu_free(tlb); +} + +static inline void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + tlb_flush_mmu(tlb); +} + +/* + * Release the page cache reference for a pte removed by + * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page + * has already been freed, so just do free_page_and_swap_cache. + */ +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + free_page_and_swap_cache(page); + return 1; /* avoid calling tlb_flush_mmu */ +} + +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + free_page_and_swap_cache(page); +} + +/* + * pte_free_tlb frees a pte table and clears the CRSTE for the + * page table from the tlb. + */ +static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, + unsigned long address) +{ + page_table_free_rcu(tlb, (unsigned long *) pte); +} + +/* + * pmd_free_tlb frees a pmd table and clears the CRSTE for the + * segment table entry from the tlb. + * If the mm uses a two level page table the single pmd is freed + * as the pgd. pmd_free_tlb checks the asce_limit against 2GB + * to avoid the double free of the pmd in this case. + */ +static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long address) +{ +#ifdef CONFIG_64BIT + if (tlb->mm->context.asce_limit <= (1UL << 31)) + return; + tlb_remove_table(tlb, pmd); +#endif +} + +/* + * pud_free_tlb frees a pud table and clears the CRSTE for the + * region third table entry from the tlb. + * If the mm uses a three level page table the single pud is freed + * as the pgd. pud_free_tlb checks the asce_limit against 4TB + * to avoid the double free of the pud in this case. + */ +static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, + unsigned long address) +{ +#ifdef CONFIG_64BIT + if (tlb->mm->context.asce_limit <= (1UL << 42)) + return; + tlb_remove_table(tlb, pud); +#endif +} + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) +#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0) +#define tlb_migrate_finish(mm) do { } while (0) + +#endif /* _S390_TLB_H */ diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h new file mode 100644 index 00000000000..16c9c88658c --- /dev/null +++ b/arch/s390/include/asm/tlbflush.h @@ -0,0 +1,211 @@ +#ifndef _S390_TLBFLUSH_H +#define _S390_TLBFLUSH_H + +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/processor.h> +#include <asm/pgalloc.h> + +/* + * Flush all TLB entries on the local CPU. + */ +static inline void __tlb_flush_local(void) +{ + asm volatile("ptlb" : : : "memory"); +} + +/* + * Flush TLB entries for a specific ASCE on all CPUs + */ +static inline void __tlb_flush_idte(unsigned long asce) +{ + /* Global TLB flush for the mm */ + asm volatile( + " .insn rrf,0xb98e0000,0,%0,%1,0" + : : "a" (2048), "a" (asce) : "cc"); +} + +/* + * Flush TLB entries for a specific ASCE on the local CPU + */ +static inline void __tlb_flush_idte_local(unsigned long asce) +{ + /* Local TLB flush for the mm */ + asm volatile( + " .insn rrf,0xb98e0000,0,%0,%1,1" + : : "a" (2048), "a" (asce) : "cc"); +} + +#ifdef CONFIG_SMP +void smp_ptlb_all(void); + +/* + * Flush all TLB entries on all CPUs. + */ +static inline void __tlb_flush_global(void) +{ + register unsigned long reg2 asm("2"); + register unsigned long reg3 asm("3"); + register unsigned long reg4 asm("4"); + long dummy; + +#ifndef CONFIG_64BIT + if (!MACHINE_HAS_CSP) { + smp_ptlb_all(); + return; + } +#endif /* CONFIG_64BIT */ + + dummy = 0; + reg2 = reg3 = 0; + reg4 = ((unsigned long) &dummy) + 1; + asm volatile( + " csp %0,%2" + : : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" ); +} + +/* + * Flush TLB entries for a specific mm on all CPUs (in case gmap is used + * this implicates multiple ASCEs!). + */ +static inline void __tlb_flush_full(struct mm_struct *mm) +{ + preempt_disable(); + atomic_add(0x10000, &mm->context.attach_count); + if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + /* Local TLB flush */ + __tlb_flush_local(); + } else { + /* Global TLB flush */ + __tlb_flush_global(); + /* Reset TLB flush mask */ + if (MACHINE_HAS_TLB_LC) + cpumask_copy(mm_cpumask(mm), + &mm->context.cpu_attach_mask); + } + atomic_sub(0x10000, &mm->context.attach_count); + preempt_enable(); +} + +/* + * Flush TLB entries for a specific ASCE on all CPUs. + */ +static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) +{ + int active, count; + + preempt_disable(); + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + __tlb_flush_idte_local(asce); + } else { + if (MACHINE_HAS_IDTE) + __tlb_flush_idte(asce); + else + __tlb_flush_global(); + /* Reset TLB flush mask */ + if (MACHINE_HAS_TLB_LC) + cpumask_copy(mm_cpumask(mm), + &mm->context.cpu_attach_mask); + } + atomic_sub(0x10000, &mm->context.attach_count); + preempt_enable(); +} + +static inline void __tlb_flush_kernel(void) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_idte((unsigned long) init_mm.pgd | + init_mm.context.asce_bits); + else + __tlb_flush_global(); +} +#else +#define __tlb_flush_global() __tlb_flush_local() +#define __tlb_flush_full(mm) __tlb_flush_local() + +/* + * Flush TLB entries for a specific ASCE on all CPUs. + */ +static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) +{ + if (MACHINE_HAS_TLB_LC) + __tlb_flush_idte_local(asce); + else + __tlb_flush_local(); +} + +static inline void __tlb_flush_kernel(void) +{ + if (MACHINE_HAS_TLB_LC) + __tlb_flush_idte_local((unsigned long) init_mm.pgd | + init_mm.context.asce_bits); + else + __tlb_flush_local(); +} +#endif + +static inline void __tlb_flush_mm(struct mm_struct * mm) +{ + /* + * If the machine has IDTE we prefer to do a per mm flush + * on all cpus instead of doing a local flush if the mm + * only ran on the local cpu. + */ + if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) + __tlb_flush_asce(mm, (unsigned long) mm->pgd | + mm->context.asce_bits); + else + __tlb_flush_full(mm); +} + +static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) +{ + if (mm->context.flush_mm) { + __tlb_flush_mm(mm); + mm->context.flush_mm = 0; + } +} + +/* + * TLB flushing: + * flush_tlb() - flushes the current mm struct TLBs + * flush_tlb_all() - flushes all processes TLBs + * flush_tlb_mm(mm) - flushes the specified mm context TLB's + * flush_tlb_page(vma, vmaddr) - flushes one page + * flush_tlb_range(vma, start, end) - flushes a range of pages + * flush_tlb_kernel_range(start, end) - flushes a range of kernel pages + */ + +/* + * flush_tlb_mm goes together with ptep_set_wrprotect for the + * copy_page_range operation and flush_tlb_range is related to + * ptep_get_and_clear for change_protection. ptep_set_wrprotect and + * ptep_get_and_clear do not flush the TLBs directly if the mm has + * only one user. At the end of the update the flush_tlb_mm and + * flush_tlb_range functions need to do the flush. + */ +#define flush_tlb() do { } while (0) +#define flush_tlb_all() do { } while (0) +#define flush_tlb_page(vma, addr) do { } while (0) + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + __tlb_flush_mm_lazy(mm); +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + __tlb_flush_mm_lazy(vma->vm_mm); +} + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + __tlb_flush_kernel(); +} + +#endif /* _S390_TLBFLUSH_H */ diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h new file mode 100644 index 00000000000..56af53093d2 --- /dev/null +++ b/arch/s390/include/asm/topology.h @@ -0,0 +1,60 @@ +#ifndef _ASM_S390_TOPOLOGY_H +#define _ASM_S390_TOPOLOGY_H + +#include <linux/cpumask.h> + +struct sysinfo_15_1_x; +struct cpu; + +#ifdef CONFIG_SCHED_BOOK + +struct cpu_topology_s390 { + unsigned short core_id; + unsigned short socket_id; + unsigned short book_id; + cpumask_t core_mask; + cpumask_t book_mask; +}; + +extern struct cpu_topology_s390 cpu_topology[NR_CPUS]; + +#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) +#define topology_core_id(cpu) (cpu_topology[cpu].core_id) +#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_mask) +#define topology_book_id(cpu) (cpu_topology[cpu].book_id) +#define topology_book_cpumask(cpu) (&cpu_topology[cpu].book_mask) + +#define mc_capable() 1 + +int topology_cpu_init(struct cpu *); +int topology_set_cpu_management(int fc); +void topology_schedule_update(void); +void store_topology(struct sysinfo_15_1_x *info); +void topology_expect_change(void); +const struct cpumask *cpu_coregroup_mask(int cpu); + +#else /* CONFIG_SCHED_BOOK */ + +static inline void topology_schedule_update(void) { } +static inline int topology_cpu_init(struct cpu *cpu) { return 0; } +static inline void topology_expect_change(void) { } + +#endif /* CONFIG_SCHED_BOOK */ + +#define POLARIZATION_UNKNOWN (-1) +#define POLARIZATION_HRZ (0) +#define POLARIZATION_VL (1) +#define POLARIZATION_VM (2) +#define POLARIZATION_VH (3) + +#ifdef CONFIG_SCHED_BOOK +void s390_init_cpu_topology(void); +#else +static inline void s390_init_cpu_topology(void) +{ +}; +#endif + +#include <asm-generic/topology.h> + +#endif /* _ASM_S390_TOPOLOGY_H */ diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h new file mode 100644 index 00000000000..dccef3ca91f --- /dev/null +++ b/arch/s390/include/asm/types.h @@ -0,0 +1,28 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/types.h" + */ +#ifndef _S390_TYPES_H +#define _S390_TYPES_H + +#include <uapi/asm/types.h> + +/* + * These aren't exported outside the kernel to avoid name space clashes + */ + +#ifndef __ASSEMBLY__ + +#ifndef CONFIG_64BIT +typedef union { + unsigned long long pair; + struct { + unsigned long even; + unsigned long odd; + } subreg; +} register_pair; + +#endif /* ! CONFIG_64BIT */ +#endif /* __ASSEMBLY__ */ +#endif /* _S390_TYPES_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h new file mode 100644 index 00000000000..cd4c68e0398 --- /dev/null +++ b/arch/s390/include/asm/uaccess.h @@ -0,0 +1,376 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Hartmut Penner (hp@de.ibm.com), + * Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Derived from "include/asm-i386/uaccess.h" + */ +#ifndef __S390_UACCESS_H +#define __S390_UACCESS_H + +/* + * User space memory access functions + */ +#include <linux/sched.h> +#include <linux/errno.h> +#include <asm/ctl_reg.h> + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(a) ((mm_segment_t) { (a) }) + + +#define KERNEL_DS MAKE_MM_SEG(0) +#define USER_DS MAKE_MM_SEG(1) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current->thread.mm_segment) + +#define set_fs(x) \ +({ \ + unsigned long __pto; \ + current->thread.mm_segment = (x); \ + __pto = current->thread.mm_segment.ar4 ? \ + S390_lowcore.user_asce : S390_lowcore.kernel_asce; \ + __ctl_load(__pto, 7, 7); \ +}) + +#define segment_eq(a,b) ((a).ar4 == (b).ar4) + +static inline int __range_ok(unsigned long addr, unsigned long size) +{ + return 1; +} + +#define __access_ok(addr, size) \ +({ \ + __chk_user_ptr(addr); \ + __range_ok((unsigned long)(addr), (size)); \ +}) + +#define access_ok(type, addr, size) __access_ok(addr, size) + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + int insn, fixup; +}; + +static inline unsigned long extable_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +static inline unsigned long extable_fixup(const struct exception_table_entry *x) +{ + return (unsigned long)&x->fixup + x->fixup; +} + +#define ARCH_HAS_SORT_EXTABLE +#define ARCH_HAS_SEARCH_EXTABLE + +/** + * __copy_from_user: - Copy a block of data from user space, with less checking. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +unsigned long __must_check __copy_from_user(void *to, const void __user *from, + unsigned long n); + +/** + * __copy_to_user: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +unsigned long __must_check __copy_to_user(void __user *to, const void *from, + unsigned long n); + +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user + +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + +#define __put_get_user_asm(to, from, size, spec) \ +({ \ + register unsigned long __reg0 asm("0") = spec; \ + int __rc; \ + \ + asm volatile( \ + "0: mvcos %1,%3,%2\n" \ + "1: xr %0,%0\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: lhi %0,%5\n" \ + " jg 2b\n" \ + ".popsection\n" \ + EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + : "=d" (__rc), "=Q" (*(to)) \ + : "d" (size), "Q" (*(from)), \ + "d" (__reg0), "K" (-EFAULT) \ + : "cc"); \ + __rc; \ +}) + +#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL) +#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL) + +#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */ + +static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) +{ + size = __copy_to_user(ptr, x, size); + return size ? -EFAULT : 0; +} + +static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) +{ + size = __copy_from_user(x, ptr, size); + return size ? -EFAULT : 0; +} + +#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */ + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + */ +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __x = (x); \ + int __pu_err = -EFAULT; \ + __chk_user_ptr(ptr); \ + switch (sizeof (*(ptr))) { \ + case 1: \ + case 2: \ + case 4: \ + case 8: \ + __pu_err = __put_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ + break; \ + default: \ + __put_user_bad(); \ + break; \ + } \ + __pu_err; \ +}) + +#define put_user(x, ptr) \ +({ \ + might_fault(); \ + __put_user(x, ptr); \ +}) + + +int __put_user_bad(void) __attribute__((noreturn)); + +#define __get_user(x, ptr) \ +({ \ + int __gu_err = -EFAULT; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: { \ + unsigned char __x; \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 2: { \ + unsigned short __x; \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 4: { \ + unsigned int __x; \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + case 8: { \ + unsigned long long __x; \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ + (x) = *(__force __typeof__(*(ptr)) *) &__x; \ + break; \ + }; \ + default: \ + __get_user_bad(); \ + break; \ + } \ + __gu_err; \ +}) + +#define get_user(x, ptr) \ +({ \ + might_fault(); \ + __get_user(x, ptr); \ +}) + +int __get_user_bad(void) __attribute__((noreturn)); + +#define __put_user_unaligned __put_user +#define __get_user_unaligned __get_user + +/** + * copy_to_user: - Copy a block of data into user space. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +static inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + might_fault(); + return __copy_to_user(to, from, n); +} + +void copy_from_user_overflow(void) +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS +__compiletime_warning("copy_from_user() buffer size is not provably correct") +#endif +; + +/** + * copy_from_user: - Copy a block of data from user space. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +static inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + unsigned int sz = __compiletime_object_size(to); + + might_fault(); + if (unlikely(sz != -1 && sz < n)) { + copy_from_user_overflow(); + return n; + } + return __copy_from_user(to, from, n); +} + +unsigned long __must_check +__copy_in_user(void __user *to, const void __user *from, unsigned long n); + +static inline unsigned long __must_check +copy_in_user(void __user *to, const void __user *from, unsigned long n) +{ + might_fault(); + return __copy_in_user(to, from, n); +} + +/* + * Copy a null terminated string from userspace. + */ + +long __strncpy_from_user(char *dst, const char __user *src, long count); + +static inline long __must_check +strncpy_from_user(char *dst, const char __user *src, long count) +{ + might_fault(); + return __strncpy_from_user(dst, src, count); +} + +unsigned long __must_check __strnlen_user(const char __user *src, unsigned long count); + +static inline unsigned long strnlen_user(const char __user *src, unsigned long n) +{ + might_fault(); + return __strnlen_user(src, n); +} + +/** + * strlen_user: - Get the size of a string in user space. + * @str: The string to measure. + * + * Context: User context only. This function may sleep. + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * + * If there is a limit on the length of a valid string, you may wish to + * consider using strnlen_user() instead. + */ +#define strlen_user(str) strnlen_user(str, ~0UL) + +/* + * Zero Userspace + */ +unsigned long __must_check __clear_user(void __user *to, unsigned long size); + +static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) +{ + might_fault(); + return __clear_user(to, n); +} + +int copy_to_user_real(void __user *dest, void *src, unsigned long count); + +#endif /* __S390_UACCESS_H */ diff --git a/arch/s390/include/asm/unaligned.h b/arch/s390/include/asm/unaligned.h new file mode 100644 index 00000000000..da9627afe5d --- /dev/null +++ b/arch/s390/include/asm/unaligned.h @@ -0,0 +1,13 @@ +#ifndef _ASM_S390_UNALIGNED_H +#define _ASM_S390_UNALIGNED_H + +/* + * The S390 can do unaligned accesses itself. + */ +#include <linux/unaligned/access_ok.h> +#include <linux/unaligned/generic.h> + +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be + +#endif /* _ASM_S390_UNALIGNED_H */ diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h new file mode 100644 index 00000000000..65188635355 --- /dev/null +++ b/arch/s390/include/asm/unistd.h @@ -0,0 +1,57 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/unistd.h" + */ +#ifndef _ASM_S390_UNISTD_H_ +#define _ASM_S390_UNISTD_H_ + +#include <uapi/asm/unistd.h> + + +#ifndef CONFIG_64BIT +#define __IGNORE_select +#else +#define __IGNORE_time +#endif + +/* Ignore NUMA system calls. Not wired up on s390. */ +#define __IGNORE_mbind +#define __IGNORE_get_mempolicy +#define __IGNORE_set_mempolicy +#define __IGNORE_migrate_pages +#define __IGNORE_move_pages + +/* Ignore system calls that are also reachable via sys_socket */ +#define __IGNORE_recvmmsg +#define __IGNORE_sendmmsg + +#define __ARCH_WANT_OLD_READDIR +#define __ARCH_WANT_SYS_ALARM +#define __ARCH_WANT_SYS_GETHOSTNAME +#define __ARCH_WANT_SYS_PAUSE +#define __ARCH_WANT_SYS_SIGNAL +#define __ARCH_WANT_SYS_UTIME +#define __ARCH_WANT_SYS_SOCKETCALL +#define __ARCH_WANT_SYS_IPC +#define __ARCH_WANT_SYS_FADVISE64 +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_LLSEEK +#define __ARCH_WANT_SYS_NICE +#define __ARCH_WANT_SYS_OLD_GETRLIMIT +#define __ARCH_WANT_SYS_OLD_MMAP +#define __ARCH_WANT_SYS_OLDUMOUNT +#define __ARCH_WANT_SYS_SIGPENDING +#define __ARCH_WANT_SYS_SIGPROCMASK +# ifndef CONFIG_64BIT +# define __ARCH_WANT_STAT64 +# define __ARCH_WANT_SYS_TIME +# endif +# ifdef CONFIG_COMPAT +# define __ARCH_WANT_COMPAT_SYS_TIME +# endif +#define __ARCH_WANT_SYS_FORK +#define __ARCH_WANT_SYS_VFORK +#define __ARCH_WANT_SYS_CLONE + +#endif /* _ASM_S390_UNISTD_H_ */ diff --git a/arch/s390/include/asm/user.h b/arch/s390/include/asm/user.h new file mode 100644 index 00000000000..6ed1d188633 --- /dev/null +++ b/arch/s390/include/asm/user.h @@ -0,0 +1,74 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/usr.h" + */ + +#ifndef _S390_USER_H +#define _S390_USER_H + +#include <asm/page.h> +#include <asm/ptrace.h> +/* Core file format: The core file is written in such a way that gdb + can understand it and provide useful information to the user (under + linux we use the 'trad-core' bfd). There are quite a number of + obstacles to being able to view the contents of the floating point + registers, and until these are solved you will not be able to view the + contents of them. Actually, you can read in the core file and look at + the contents of the user struct to find out what the floating point + registers contain. + The actual file contents are as follows: + UPAGE: 1 page consisting of a user struct that tells gdb what is present + in the file. Directly after this is a copy of the task_struct, which + is currently not used by gdb, but it may come in useful at some point. + All of the registers are stored as part of the upage. The upage should + always be only one page. + DATA: The data area is stored. We use current->end_text to + current->brk to pick up all of the user variables, plus any memory + that may have been malloced. No attempt is made to determine if a page + is demand-zero or if a page is totally unused, we just cover the entire + range. All of the addresses are rounded in such a way that an integral + number of pages is written. + STACK: We need the stack information in order to get a meaningful + backtrace. We need to write the data from (esp) to + current->start_stack, so we round each of these off in order to be able + to write an integer number of pages. + The minimum core file size is 3 pages, or 12288 bytes. +*/ + + +/* + * This is the old layout of "struct pt_regs", and + * is still the layout used by user mode (the new + * pt_regs doesn't have all registers as the kernel + * doesn't use the extra segment registers) + */ + +/* When the kernel dumps core, it starts by dumping the user struct - + this will be used by gdb to figure out where the data and stack segments + are within the file, and what virtual addresses to use. */ +struct user { +/* We start with the registers, to mimic the way that "memory" is returned + from the ptrace(3,...) function. */ + struct user_regs_struct regs; /* Where the registers are actually stored */ +/* The rest of this junk is to help gdb figure out what goes where */ + unsigned long int u_tsize; /* Text segment size (pages). */ + unsigned long int u_dsize; /* Data segment size (pages). */ + unsigned long int u_ssize; /* Stack segment size (pages). */ + unsigned long start_code; /* Starting virtual address of text. */ + unsigned long start_stack; /* Starting virtual address of stack area. + This is actually the bottom of the stack, + the top of the stack is always found in the + esp register. */ + long int signal; /* Signal that caused the core dump. */ + unsigned long u_ar0; /* Used by gdb to help find the values for */ + /* the registers. */ + unsigned long magic; /* To uniquely identify a core file */ + char u_comm[32]; /* User command that was responsible */ +}; +#define NBPG PAGE_SIZE +#define UPAGES 1 +#define HOST_TEXT_START_ADDR (u.start_code) +#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) + +#endif /* _S390_USER_H */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h new file mode 100644 index 00000000000..bc9746a7d47 --- /dev/null +++ b/arch/s390/include/asm/vdso.h @@ -0,0 +1,47 @@ +#ifndef __S390_VDSO_H__ +#define __S390_VDSO_H__ + +/* Default link addresses for the vDSOs */ +#define VDSO32_LBASE 0 +#define VDSO64_LBASE 0 + +#define VDSO_VERSION_STRING LINUX_2.6.29 + +#ifndef __ASSEMBLY__ + +/* + * Note about the vdso_data and vdso_per_cpu_data structures: + * + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the + * structure is supposed to be known only to the function in the vdso + * itself and may change without notice. + */ + +struct vdso_data { + __u64 tb_update_count; /* Timebase atomicity ctr 0x00 */ + __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */ + __u64 xtime_clock_sec; /* Kernel time 0x10 */ + __u64 xtime_clock_nsec; /* 0x18 */ + __u64 wtom_clock_sec; /* Wall to monotonic clock 0x20 */ + __u64 wtom_clock_nsec; /* 0x28 */ + __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ + __u32 tz_dsttime; /* Type of dst correction 0x34 */ + __u32 ectg_available; /* ECTG instruction present 0x38 */ + __u32 tk_mult; /* Mult. used for xtime_nsec 0x3c */ + __u32 tk_shift; /* Shift used for xtime_nsec 0x40 */ +}; + +struct vdso_per_cpu_data { + __u64 ectg_timer_base; + __u64 ectg_user_time; +}; + +extern struct vdso_data *vdso_data; + +#ifdef CONFIG_64BIT +int vdso_alloc_per_cpu(struct _lowcore *lowcore); +void vdso_free_per_cpu(struct _lowcore *lowcore); +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h new file mode 100644 index 00000000000..d375526c261 --- /dev/null +++ b/arch/s390/include/asm/vga.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_VGA_H +#define _ASM_S390_VGA_H + +/* Avoid compile errors due to missing asm/vga.h */ + +#endif /* _ASM_S390_VGA_H */ diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h new file mode 100644 index 00000000000..af9896c53eb --- /dev/null +++ b/arch/s390/include/asm/vtime.h @@ -0,0 +1,7 @@ +#ifndef _S390_VTIME_H +#define _S390_VTIME_H + +#define __ARCH_HAS_VTIME_ACCOUNT +#define __ARCH_HAS_VTIME_TASK_SWITCH + +#endif /* _S390_VTIME_H */ diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h new file mode 100644 index 00000000000..bfe25d513ad --- /dev/null +++ b/arch/s390/include/asm/vtimer.h @@ -0,0 +1,33 @@ +/* + * Copyright IBM Corp. 2003, 2012 + * Virtual CPU timer + * + * Author(s): Jan Glauber <jan.glauber@de.ibm.com> + */ + +#ifndef _ASM_S390_TIMER_H +#define _ASM_S390_TIMER_H + +#define VTIMER_MAX_SLICE (0x7fffffffffffffffULL) + +struct vtimer_list { + struct list_head entry; + u64 expires; + u64 interval; + void (*function)(unsigned long); + unsigned long data; +}; + +extern void init_virt_timer(struct vtimer_list *timer); +extern void add_virt_timer(struct vtimer_list *timer); +extern void add_virt_timer_periodic(struct vtimer_list *timer); +extern int mod_virt_timer(struct vtimer_list *timer, u64 expires); +extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires); +extern int del_virt_timer(struct vtimer_list *timer); + +extern void init_cpu_vtimer(void); +extern void vtime_init(void); + +extern void vtime_stop_cpu(void); + +#endif /* _ASM_S390_TIMER_H */ diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h new file mode 100644 index 00000000000..c82eb12a5b1 --- /dev/null +++ b/arch/s390/include/asm/xor.h @@ -0,0 +1 @@ +#include <asm-generic/xor.h> diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild new file mode 100644 index 00000000000..736637363d3 --- /dev/null +++ b/arch/s390/include/uapi/asm/Kbuild @@ -0,0 +1,51 @@ +# UAPI Header export list +include include/uapi/asm-generic/Kbuild.asm + +header-y += auxvec.h +header-y += bitsperlong.h +header-y += byteorder.h +header-y += chpid.h +header-y += chsc.h +header-y += cmb.h +header-y += dasd.h +header-y += debug.h +header-y += errno.h +header-y += fcntl.h +header-y += ioctl.h +header-y += ioctls.h +header-y += ipcbuf.h +header-y += kvm.h +header-y += kvm_para.h +header-y += kvm_virtio.h +header-y += mman.h +header-y += monwriter.h +header-y += msgbuf.h +header-y += param.h +header-y += poll.h +header-y += posix_types.h +header-y += ptrace.h +header-y += qeth.h +header-y += resource.h +header-y += schid.h +header-y += sembuf.h +header-y += setup.h +header-y += shmbuf.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += signal.h +header-y += socket.h +header-y += sockios.h +header-y += sclp_ctl.h +header-y += sie.h +header-y += stat.h +header-y += statfs.h +header-y += swab.h +header-y += tape390.h +header-y += termbits.h +header-y += termios.h +header-y += types.h +header-y += ucontext.h +header-y += unistd.h +header-y += virtio-ccw.h +header-y += vtoc.h +header-y += zcrypt.h diff --git a/arch/s390/include/uapi/asm/auxvec.h b/arch/s390/include/uapi/asm/auxvec.h new file mode 100644 index 00000000000..a1f153e8913 --- /dev/null +++ b/arch/s390/include/uapi/asm/auxvec.h @@ -0,0 +1,6 @@ +#ifndef __ASMS390_AUXVEC_H +#define __ASMS390_AUXVEC_H + +#define AT_SYSINFO_EHDR 33 + +#endif diff --git a/arch/s390/include/uapi/asm/bitsperlong.h b/arch/s390/include/uapi/asm/bitsperlong.h new file mode 100644 index 00000000000..6b235aea9c6 --- /dev/null +++ b/arch/s390/include/uapi/asm/bitsperlong.h @@ -0,0 +1,13 @@ +#ifndef __ASM_S390_BITSPERLONG_H +#define __ASM_S390_BITSPERLONG_H + +#ifndef __s390x__ +#define __BITS_PER_LONG 32 +#else +#define __BITS_PER_LONG 64 +#endif + +#include <asm-generic/bitsperlong.h> + +#endif /* __ASM_S390_BITSPERLONG_H */ + diff --git a/arch/s390/include/uapi/asm/byteorder.h b/arch/s390/include/uapi/asm/byteorder.h new file mode 100644 index 00000000000..a332e59e26f --- /dev/null +++ b/arch/s390/include/uapi/asm/byteorder.h @@ -0,0 +1,6 @@ +#ifndef _S390_BYTEORDER_H +#define _S390_BYTEORDER_H + +#include <linux/byteorder/big_endian.h> + +#endif /* _S390_BYTEORDER_H */ diff --git a/arch/s390/include/uapi/asm/chpid.h b/arch/s390/include/uapi/asm/chpid.h new file mode 100644 index 00000000000..6b4fb29cc19 --- /dev/null +++ b/arch/s390/include/uapi/asm/chpid.h @@ -0,0 +1,22 @@ +/* + * Copyright IBM Corp. 2007, 2012 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _UAPI_ASM_S390_CHPID_H +#define _UAPI_ASM_S390_CHPID_H + +#include <linux/string.h> +#include <linux/types.h> + +#define __MAX_CHPID 255 + +struct chp_id { + __u8 reserved1; + __u8 cssid; + __u8 reserved2; + __u8 id; +} __attribute__((packed)); + + +#endif /* _UAPI_ASM_S390_CHPID_H */ diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h new file mode 100644 index 00000000000..65dc694725a --- /dev/null +++ b/arch/s390/include/uapi/asm/chsc.h @@ -0,0 +1,143 @@ +/* + * ioctl interface for /dev/chsc + * + * Copyright IBM Corp. 2008, 2012 + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + */ + +#ifndef _ASM_CHSC_H +#define _ASM_CHSC_H + +#include <linux/types.h> +#include <linux/ioctl.h> +#include <asm/chpid.h> +#include <asm/schid.h> + +#define CHSC_SIZE 0x1000 + +struct chsc_async_header { + __u16 length; + __u16 code; + __u32 cmd_dependend; + __u32 key : 4; + __u32 : 28; + struct subchannel_id sid; +} __attribute__ ((packed)); + +struct chsc_async_area { + struct chsc_async_header header; + __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; +} __attribute__ ((packed)); + +struct chsc_header { + __u16 length; + __u16 code; +} __attribute__ ((packed)); + +struct chsc_sync_area { + struct chsc_header header; + __u8 data[CHSC_SIZE - sizeof(struct chsc_header)]; +} __attribute__ ((packed)); + +struct chsc_response_struct { + __u16 length; + __u16 code; + __u32 parms; + __u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)]; +} __attribute__ ((packed)); + +struct chsc_chp_cd { + struct chp_id chpid; + int m; + int fmt; + struct chsc_response_struct cpcb; +}; + +struct chsc_cu_cd { + __u16 cun; + __u8 cssid; + int m; + int fmt; + struct chsc_response_struct cucb; +}; + +struct chsc_sch_cud { + struct subchannel_id schid; + int fmt; + struct chsc_response_struct scub; +}; + +struct conf_id { + int m; + __u8 cssid; + __u8 ssid; +}; + +struct chsc_conf_info { + struct conf_id id; + int fmt; + struct chsc_response_struct scid; +}; + +struct ccl_parm_chpid { + int m; + struct chp_id chp; +}; + +struct ccl_parm_cssids { + __u8 f_cssid; + __u8 l_cssid; +}; + +struct chsc_comp_list { + struct { + enum { + CCL_CU_ON_CHP = 1, + CCL_CHP_TYPE_CAP = 2, + CCL_CSS_IMG = 4, + CCL_CSS_IMG_CONF_CHAR = 5, + CCL_IOP_CHP = 6, + } ctype; + int fmt; + struct ccl_parm_chpid chpid; + struct ccl_parm_cssids cssids; + } req; + struct chsc_response_struct sccl; +}; + +struct chsc_dcal { + struct { + enum { + DCAL_CSS_IID_PN = 4, + } atype; + __u32 list_parm[2]; + int fmt; + } req; + struct chsc_response_struct sdcal; +}; + +struct chsc_cpd_info { + struct chp_id chpid; + int m; + int fmt; + int rfmt; + int c; + struct chsc_response_struct chpdb; +}; + +#define CHSC_IOCTL_MAGIC 'c' + +#define CHSC_START _IOWR(CHSC_IOCTL_MAGIC, 0x81, struct chsc_async_area) +#define CHSC_INFO_CHANNEL_PATH _IOWR(CHSC_IOCTL_MAGIC, 0x82, \ + struct chsc_chp_cd) +#define CHSC_INFO_CU _IOWR(CHSC_IOCTL_MAGIC, 0x83, struct chsc_cu_cd) +#define CHSC_INFO_SCH_CU _IOWR(CHSC_IOCTL_MAGIC, 0x84, struct chsc_sch_cud) +#define CHSC_INFO_CI _IOWR(CHSC_IOCTL_MAGIC, 0x85, struct chsc_conf_info) +#define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list) +#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info) +#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal) +#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area) +#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area) +#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b) + +#endif diff --git a/arch/s390/include/uapi/asm/cmb.h b/arch/s390/include/uapi/asm/cmb.h new file mode 100644 index 00000000000..0c086d00d89 --- /dev/null +++ b/arch/s390/include/uapi/asm/cmb.h @@ -0,0 +1,53 @@ +#ifndef _UAPIS390_CMB_H +#define _UAPIS390_CMB_H + +#include <linux/types.h> + +/** + * struct cmbdata - channel measurement block data for user space + * @size: size of the stored data + * @elapsed_time: time since last sampling + * @ssch_rsch_count: number of ssch and rsch + * @sample_count: number of samples + * @device_connect_time: time of device connect + * @function_pending_time: time of function pending + * @device_disconnect_time: time of device disconnect + * @control_unit_queuing_time: time of control unit queuing + * @device_active_only_time: time of device active only + * @device_busy_time: time of device busy (ext. format) + * @initial_command_response_time: initial command response time (ext. format) + * + * All values are stored as 64 bit for simplicity, especially + * in 32 bit emulation mode. All time values are normalized to + * nanoseconds. + * Currently, two formats are known, which differ by the size of + * this structure, i.e. the last two members are only set when + * the extended channel measurement facility (first shipped in + * z990 machines) is activated. + * Potentially, more fields could be added, which would result in a + * new ioctl number. + */ +struct cmbdata { + __u64 size; + __u64 elapsed_time; + /* basic and exended format: */ + __u64 ssch_rsch_count; + __u64 sample_count; + __u64 device_connect_time; + __u64 function_pending_time; + __u64 device_disconnect_time; + __u64 control_unit_queuing_time; + __u64 device_active_only_time; + /* extended format only: */ + __u64 device_busy_time; + __u64 initial_command_response_time; +}; + +/* enable channel measurement */ +#define BIODASDCMFENABLE _IO(DASD_IOCTL_LETTER, 32) +/* enable channel measurement */ +#define BIODASDCMFDISABLE _IO(DASD_IOCTL_LETTER, 33) +/* read channel measurement data */ +#define BIODASDREADALLCMB _IOWR(DASD_IOCTL_LETTER, 33, struct cmbdata) + +#endif /* _UAPIS390_CMB_H */ diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h new file mode 100644 index 00000000000..5812a3b2df9 --- /dev/null +++ b/arch/s390/include/uapi/asm/dasd.h @@ -0,0 +1,295 @@ +/* + * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> + * Bugreports.to..: <Linux390@de.ibm.com> + * Copyright IBM Corp. 1999, 2000 + * EMC Symmetrix ioctl Copyright EMC Corporation, 2008 + * Author.........: Nigel Hislop <hislop_nigel@emc.com> + * + * This file is the interface of the DASD device driver, which is exported to user space + * any future changes wrt the API will result in a change of the APIVERSION reported + * to userspace by the DASDAPIVER-ioctl + * + */ + +#ifndef DASD_H +#define DASD_H +#include <linux/types.h> +#include <linux/ioctl.h> + +#define DASD_IOCTL_LETTER 'D' + +#define DASD_API_VERSION 6 + +/* + * struct dasd_information2_t + * represents any data about the device, which is visible to userspace. + * including foramt and featueres. + */ +typedef struct dasd_information2_t { + unsigned int devno; /* S/390 devno */ + unsigned int real_devno; /* for aliases */ + unsigned int schid; /* S/390 subchannel identifier */ + unsigned int cu_type : 16; /* from SenseID */ + unsigned int cu_model : 8; /* from SenseID */ + unsigned int dev_type : 16; /* from SenseID */ + unsigned int dev_model : 8; /* from SenseID */ + unsigned int open_count; + unsigned int req_queue_len; + unsigned int chanq_len; /* length of chanq */ + char type[4]; /* from discipline.name, 'none' for unknown */ + unsigned int status; /* current device level */ + unsigned int label_block; /* where to find the VOLSER */ + unsigned int FBA_layout; /* fixed block size (like AIXVOL) */ + unsigned int characteristics_size; + unsigned int confdata_size; + char characteristics[64]; /* from read_device_characteristics */ + char configuration_data[256]; /* from read_configuration_data */ + unsigned int format; /* format info like formatted/cdl/ldl/... */ + unsigned int features; /* dasd features like 'ro',... */ + unsigned int reserved0; /* reserved for further use ,... */ + unsigned int reserved1; /* reserved for further use ,... */ + unsigned int reserved2; /* reserved for further use ,... */ + unsigned int reserved3; /* reserved for further use ,... */ + unsigned int reserved4; /* reserved for further use ,... */ + unsigned int reserved5; /* reserved for further use ,... */ + unsigned int reserved6; /* reserved for further use ,... */ + unsigned int reserved7; /* reserved for further use ,... */ +} dasd_information2_t; + +/* + * values to be used for dasd_information_t.format + * 0x00: NOT formatted + * 0x01: Linux disc layout + * 0x02: Common disc layout + */ +#define DASD_FORMAT_NONE 0 +#define DASD_FORMAT_LDL 1 +#define DASD_FORMAT_CDL 2 +/* + * values to be used for dasd_information_t.features + * 0x00: default features + * 0x01: readonly (ro) + * 0x02: use diag discipline (diag) + * 0x04: set the device initially online (internal use only) + * 0x08: enable ERP related logging + * 0x20: give access to raw eckd data + */ +#define DASD_FEATURE_DEFAULT 0x00 +#define DASD_FEATURE_READONLY 0x01 +#define DASD_FEATURE_USEDIAG 0x02 +#define DASD_FEATURE_INITIAL_ONLINE 0x04 +#define DASD_FEATURE_ERPLOG 0x08 +#define DASD_FEATURE_FAILFAST 0x10 +#define DASD_FEATURE_FAILONSLCK 0x20 +#define DASD_FEATURE_USERAW 0x40 + +#define DASD_PARTN_BITS 2 + +/* + * struct dasd_information_t + * represents any data about the data, which is visible to userspace + */ +typedef struct dasd_information_t { + unsigned int devno; /* S/390 devno */ + unsigned int real_devno; /* for aliases */ + unsigned int schid; /* S/390 subchannel identifier */ + unsigned int cu_type : 16; /* from SenseID */ + unsigned int cu_model : 8; /* from SenseID */ + unsigned int dev_type : 16; /* from SenseID */ + unsigned int dev_model : 8; /* from SenseID */ + unsigned int open_count; + unsigned int req_queue_len; + unsigned int chanq_len; /* length of chanq */ + char type[4]; /* from discipline.name, 'none' for unknown */ + unsigned int status; /* current device level */ + unsigned int label_block; /* where to find the VOLSER */ + unsigned int FBA_layout; /* fixed block size (like AIXVOL) */ + unsigned int characteristics_size; + unsigned int confdata_size; + char characteristics[64]; /* from read_device_characteristics */ + char configuration_data[256]; /* from read_configuration_data */ +} dasd_information_t; + +/* + * Read Subsystem Data - Performance Statistics + */ +typedef struct dasd_rssd_perf_stats_t { + unsigned char invalid:1; + unsigned char format:3; + unsigned char data_format:4; + unsigned char unit_address; + unsigned short device_status; + unsigned int nr_read_normal; + unsigned int nr_read_normal_hits; + unsigned int nr_write_normal; + unsigned int nr_write_fast_normal_hits; + unsigned int nr_read_seq; + unsigned int nr_read_seq_hits; + unsigned int nr_write_seq; + unsigned int nr_write_fast_seq_hits; + unsigned int nr_read_cache; + unsigned int nr_read_cache_hits; + unsigned int nr_write_cache; + unsigned int nr_write_fast_cache_hits; + unsigned int nr_inhibit_cache; + unsigned int nr_bybass_cache; + unsigned int nr_seq_dasd_to_cache; + unsigned int nr_dasd_to_cache; + unsigned int nr_cache_to_dasd; + unsigned int nr_delayed_fast_write; + unsigned int nr_normal_fast_write; + unsigned int nr_seq_fast_write; + unsigned int nr_cache_miss; + unsigned char status2; + unsigned int nr_quick_write_promotes; + unsigned char reserved; + unsigned short ssid; + unsigned char reseved2[96]; +} __attribute__((packed)) dasd_rssd_perf_stats_t; + +/* + * struct profile_info_t + * holds the profinling information + */ +typedef struct dasd_profile_info_t { + unsigned int dasd_io_reqs; /* number of requests processed at all */ + unsigned int dasd_io_sects; /* number of sectors processed at all */ + unsigned int dasd_io_secs[32]; /* histogram of request's sizes */ + unsigned int dasd_io_times[32]; /* histogram of requests's times */ + unsigned int dasd_io_timps[32]; /* histogram of requests's times per sector */ + unsigned int dasd_io_time1[32]; /* histogram of time from build to start */ + unsigned int dasd_io_time2[32]; /* histogram of time from start to irq */ + unsigned int dasd_io_time2ps[32]; /* histogram of time from start to irq */ + unsigned int dasd_io_time3[32]; /* histogram of time from irq to end */ + unsigned int dasd_io_nr_req[32]; /* histogram of # of requests in chanq */ +} dasd_profile_info_t; + +/* + * struct format_data_t + * represents all data necessary to format a dasd + */ +typedef struct format_data_t { + unsigned int start_unit; /* from track */ + unsigned int stop_unit; /* to track */ + unsigned int blksize; /* sectorsize */ + unsigned int intensity; +} format_data_t; + +/* + * values to be used for format_data_t.intensity + * 0/8: normal format + * 1/9: also write record zero + * 3/11: also write home address + * 4/12: invalidate track + */ +#define DASD_FMT_INT_FMT_R0 1 /* write record zero */ +#define DASD_FMT_INT_FMT_HA 2 /* write home address, also set FMT_R0 ! */ +#define DASD_FMT_INT_INVAL 4 /* invalidate tracks */ +#define DASD_FMT_INT_COMPAT 8 /* use OS/390 compatible disk layout */ + + +/* + * struct attrib_data_t + * represents the operation (cache) bits for the device. + * Used in DE to influence caching of the DASD. + */ +typedef struct attrib_data_t { + unsigned char operation:3; /* cache operation mode */ + unsigned char reserved:5; /* cache operation mode */ + __u16 nr_cyl; /* no of cyliners for read ahaed */ + __u8 reserved2[29]; /* for future use */ +} __attribute__ ((packed)) attrib_data_t; + +/* definition of operation (cache) bits within attributes of DE */ +#define DASD_NORMAL_CACHE 0x0 +#define DASD_BYPASS_CACHE 0x1 +#define DASD_INHIBIT_LOAD 0x2 +#define DASD_SEQ_ACCESS 0x3 +#define DASD_SEQ_PRESTAGE 0x4 +#define DASD_REC_ACCESS 0x5 + +/* + * Perform EMC Symmetrix I/O + */ +typedef struct dasd_symmio_parms { + unsigned char reserved[8]; /* compat with older releases */ + unsigned long long psf_data; /* char * cast to u64 */ + unsigned long long rssd_result; /* char * cast to u64 */ + int psf_data_len; + int rssd_result_len; +} __attribute__ ((packed)) dasd_symmio_parms_t; + +/* + * Data returned by Sense Path Group ID (SNID) + */ +struct dasd_snid_data { + struct { + __u8 group:2; + __u8 reserve:2; + __u8 mode:1; + __u8 res:3; + } __attribute__ ((packed)) path_state; + __u8 pgid[11]; +} __attribute__ ((packed)); + +struct dasd_snid_ioctl_data { + struct dasd_snid_data data; + __u8 path_mask; +} __attribute__ ((packed)); + + +/******************************************************************************** + * SECTION: Definition of IOCTLs + * + * Here ist how the ioctl-nr should be used: + * 0 - 31 DASD driver itself + * 32 - 239 still open + * 240 - 255 reserved for EMC + *******************************************************************************/ + +/* Disable the volume (for Linux) */ +#define BIODASDDISABLE _IO(DASD_IOCTL_LETTER,0) +/* Enable the volume (for Linux) */ +#define BIODASDENABLE _IO(DASD_IOCTL_LETTER,1) +/* Issue a reserve/release command, rsp. */ +#define BIODASDRSRV _IO(DASD_IOCTL_LETTER,2) /* reserve */ +#define BIODASDRLSE _IO(DASD_IOCTL_LETTER,3) /* release */ +#define BIODASDSLCK _IO(DASD_IOCTL_LETTER,4) /* steal lock */ +/* reset profiling information of a device */ +#define BIODASDPRRST _IO(DASD_IOCTL_LETTER,5) +/* Quiesce IO on device */ +#define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) +/* Resume IO on device */ +#define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7) +/* Abort all I/O on a device */ +#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240) +/* Allow I/O on a device */ +#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241) + + +/* retrieve API version number */ +#define DASDAPIVER _IOR(DASD_IOCTL_LETTER,0,int) +/* Get information on a dasd device */ +#define BIODASDINFO _IOR(DASD_IOCTL_LETTER,1,dasd_information_t) +/* retrieve profiling information of a device */ +#define BIODASDPRRD _IOR(DASD_IOCTL_LETTER,2,dasd_profile_info_t) +/* Get information on a dasd device (enhanced) */ +#define BIODASDINFO2 _IOR(DASD_IOCTL_LETTER,3,dasd_information2_t) +/* Performance Statistics Read */ +#define BIODASDPSRD _IOR(DASD_IOCTL_LETTER,4,dasd_rssd_perf_stats_t) +/* Get Attributes (cache operations) */ +#define BIODASDGATTR _IOR(DASD_IOCTL_LETTER,5,attrib_data_t) + + +/* #define BIODASDFORMAT _IOW(IOCTL_LETTER,0,format_data_t) , deprecated */ +#define BIODASDFMT _IOW(DASD_IOCTL_LETTER,1,format_data_t) +/* Set Attributes (cache operations) */ +#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t) + +/* Get Sense Path Group ID (SNID) data */ +#define BIODASDSNID _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data) + +#define BIODASDSYMMIO _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t) + +#endif /* DASD_H */ + diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h new file mode 100644 index 00000000000..c59fc79125f --- /dev/null +++ b/arch/s390/include/uapi/asm/debug.h @@ -0,0 +1,34 @@ +/* + * S/390 debug facility + * + * Copyright IBM Corp. 1999, 2000 + */ + +#ifndef _UAPIDEBUG_H +#define _UAPIDEBUG_H + +#include <linux/fs.h> + +/* Note: + * struct __debug_entry must be defined outside of #ifdef __KERNEL__ + * in order to allow a user program to analyze the 'raw'-view. + */ + +struct __debug_entry{ + union { + struct { + unsigned long long clock:52; + unsigned long long exception:1; + unsigned long long level:3; + unsigned long long cpuid:8; + } fields; + + unsigned long long stck; + } id; + void* caller; +} __attribute__((packed)); + + +#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ + +#endif /* _UAPIDEBUG_H */ diff --git a/arch/s390/include/uapi/asm/errno.h b/arch/s390/include/uapi/asm/errno.h new file mode 100644 index 00000000000..395e97d8005 --- /dev/null +++ b/arch/s390/include/uapi/asm/errno.h @@ -0,0 +1,11 @@ +/* + * S390 version + * + */ + +#ifndef _S390_ERRNO_H +#define _S390_ERRNO_H + +#include <asm-generic/errno.h> + +#endif diff --git a/arch/s390/include/uapi/asm/fcntl.h b/arch/s390/include/uapi/asm/fcntl.h new file mode 100644 index 00000000000..46ab12db573 --- /dev/null +++ b/arch/s390/include/uapi/asm/fcntl.h @@ -0,0 +1 @@ +#include <asm-generic/fcntl.h> diff --git a/arch/s390/include/uapi/asm/hypfs.h b/arch/s390/include/uapi/asm/hypfs.h new file mode 100644 index 00000000000..37998b44953 --- /dev/null +++ b/arch/s390/include/uapi/asm/hypfs.h @@ -0,0 +1,25 @@ +/* + * IOCTL interface for hypfs + * + * Copyright IBM Corp. 2013 + * + * Author: Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _ASM_HYPFS_CTL_H +#define _ASM_HYPFS_CTL_H + +#include <linux/types.h> + +struct hypfs_diag304 { + __u32 args[2]; + __u64 data; + __u64 rc; +} __attribute__((packed)); + +#define HYPFS_IOCTL_MAGIC 0x10 + +#define HYPFS_DIAG304 \ + _IOWR(HYPFS_IOCTL_MAGIC, 0x20, struct hypfs_diag304) + +#endif diff --git a/arch/s390/include/uapi/asm/ioctl.h b/arch/s390/include/uapi/asm/ioctl.h new file mode 100644 index 00000000000..b279fe06dfe --- /dev/null +++ b/arch/s390/include/uapi/asm/ioctl.h @@ -0,0 +1 @@ +#include <asm-generic/ioctl.h> diff --git a/arch/s390/include/uapi/asm/ioctls.h b/arch/s390/include/uapi/asm/ioctls.h new file mode 100644 index 00000000000..960a4c1ebdf --- /dev/null +++ b/arch/s390/include/uapi/asm/ioctls.h @@ -0,0 +1,8 @@ +#ifndef __ARCH_S390_IOCTLS_H__ +#define __ARCH_S390_IOCTLS_H__ + +#define FIOQSIZE 0x545E + +#include <asm-generic/ioctls.h> + +#endif diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h new file mode 100644 index 00000000000..37f293d12c8 --- /dev/null +++ b/arch/s390/include/uapi/asm/ipcbuf.h @@ -0,0 +1,31 @@ +#ifndef __S390_IPCBUF_H__ +#define __S390_IPCBUF_H__ + +/* + * The user_ipc_perm structure for S/390 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 32-bit mode_t and seq + * - 2 miscellaneous 32-bit values + */ + +struct ipc64_perm +{ + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; +#ifndef __s390x__ + unsigned short __pad2; +#endif /* ! __s390x__ */ + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* __S390_IPCBUF_H__ */ diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h new file mode 100644 index 00000000000..0fc26430a1e --- /dev/null +++ b/arch/s390/include/uapi/asm/kvm.h @@ -0,0 +1,131 @@ +#ifndef __LINUX_KVM_S390_H +#define __LINUX_KVM_S390_H +/* + * KVM s390 specific structures and definitions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ +#include <linux/types.h> + +#define __KVM_S390 +#define __KVM_HAVE_GUEST_DEBUG + +/* Device control API: s390-specific devices */ +#define KVM_DEV_FLIC_GET_ALL_IRQS 1 +#define KVM_DEV_FLIC_ENQUEUE 2 +#define KVM_DEV_FLIC_CLEAR_IRQS 3 +#define KVM_DEV_FLIC_APF_ENABLE 4 +#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5 +#define KVM_DEV_FLIC_ADAPTER_REGISTER 6 +#define KVM_DEV_FLIC_ADAPTER_MODIFY 7 +/* + * We can have up to 4*64k pending subchannels + 8 adapter interrupts, + * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. + * There are also sclp and machine checks. This gives us + * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000 + * Lets round up to 8192 pages. + */ +#define KVM_S390_MAX_FLOAT_IRQS 266250 +#define KVM_S390_FLIC_MAX_BUFFER 0x2000000 + +struct kvm_s390_io_adapter { + __u32 id; + __u8 isc; + __u8 maskable; + __u8 swap; + __u8 pad; +}; + +#define KVM_S390_IO_ADAPTER_MASK 1 +#define KVM_S390_IO_ADAPTER_MAP 2 +#define KVM_S390_IO_ADAPTER_UNMAP 3 + +struct kvm_s390_io_adapter_req { + __u32 id; + __u8 type; + __u8 mask; + __u16 pad0; + __u64 addr; +}; + +/* kvm attr_group on vm fd */ +#define KVM_S390_VM_MEM_CTRL 0 + +/* kvm attributes for mem_ctrl */ +#define KVM_S390_VM_MEM_ENABLE_CMMA 0 +#define KVM_S390_VM_MEM_CLR_CMMA 1 + +/* for KVM_GET_REGS and KVM_SET_REGS */ +struct kvm_regs { + /* general purpose regs for s390 */ + __u64 gprs[16]; +}; + +/* for KVM_GET_SREGS and KVM_SET_SREGS */ +struct kvm_sregs { + __u32 acrs[16]; + __u64 crs[16]; +}; + +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { + __u32 fpc; + __u64 fprs[16]; +}; + +#define KVM_GUESTDBG_USE_HW_BP 0x00010000 + +#define KVM_HW_BP 1 +#define KVM_HW_WP_WRITE 2 +#define KVM_SINGLESTEP 4 + +struct kvm_debug_exit_arch { + __u64 addr; + __u8 type; + __u8 pad[7]; /* Should be set to 0 */ +}; + +struct kvm_hw_breakpoint { + __u64 addr; + __u64 phys_addr; + __u64 len; + __u8 type; + __u8 pad[7]; /* Should be set to 0 */ +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { + __u32 nr_hw_bp; + __u32 pad; /* Should be set to 0 */ + struct kvm_hw_breakpoint __user *hw_bp; +}; + +#define KVM_SYNC_PREFIX (1UL << 0) +#define KVM_SYNC_GPRS (1UL << 1) +#define KVM_SYNC_ACRS (1UL << 2) +#define KVM_SYNC_CRS (1UL << 3) +/* definition of registers in kvm_run */ +struct kvm_sync_regs { + __u64 prefix; /* prefix register */ + __u64 gprs[16]; /* general purpose registers */ + __u32 acrs[16]; /* access registers */ + __u64 crs[16]; /* control registers */ +}; + +#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) +#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2) +#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3) +#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4) +#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5) +#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6) +#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7) +#define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8) +#define KVM_REG_S390_GBEA (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9) +#endif diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h new file mode 100644 index 00000000000..ff1f4e7b301 --- /dev/null +++ b/arch/s390/include/uapi/asm/kvm_para.h @@ -0,0 +1,11 @@ +/* + * User API definitions for paravirtual devices on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + */ diff --git a/arch/s390/include/uapi/asm/kvm_virtio.h b/arch/s390/include/uapi/asm/kvm_virtio.h new file mode 100644 index 00000000000..44a438ca9e7 --- /dev/null +++ b/arch/s390/include/uapi/asm/kvm_virtio.h @@ -0,0 +1,64 @@ +/* + * definition for virtio for kvm on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#ifndef __KVM_S390_VIRTIO_H +#define __KVM_S390_VIRTIO_H + +#include <linux/types.h> + +struct kvm_device_desc { + /* The device type: console, network, disk etc. Type 0 terminates. */ + __u8 type; + /* The number of virtqueues (first in config array) */ + __u8 num_vq; + /* + * The number of bytes of feature bits. Multiply by 2: one for host + * features and one for guest acknowledgements. + */ + __u8 feature_len; + /* The number of bytes of the config array after virtqueues. */ + __u8 config_len; + /* A status byte, written by the Guest. */ + __u8 status; + __u8 config[0]; +}; + +/* + * This is how we expect the device configuration field for a virtqueue + * to be laid out in config space. + */ +struct kvm_vqconfig { + /* The token returned with an interrupt. Set by the guest */ + __u64 token; + /* The address of the virtio ring */ + __u64 address; + /* The number of entries in the virtio_ring */ + __u16 num; + +}; + +#define KVM_S390_VIRTIO_NOTIFY 0 +#define KVM_S390_VIRTIO_RESET 1 +#define KVM_S390_VIRTIO_SET_STATUS 2 + +/* The alignment to use between consumer and producer parts of vring. + * This is pagesize for historical reasons. */ +#define KVM_S390_VIRTIO_RING_ALIGN 4096 + + +/* These values are supposed to be in ext_params on an interrupt */ +#define VIRTIO_PARAM_MASK 0xff +#define VIRTIO_PARAM_VRING_INTERRUPT 0x0 +#define VIRTIO_PARAM_CONFIG_CHANGED 0x1 +#define VIRTIO_PARAM_DEV_ADD 0x2 + +#endif diff --git a/arch/s390/include/uapi/asm/mman.h b/arch/s390/include/uapi/asm/mman.h new file mode 100644 index 00000000000..de23da1f41b --- /dev/null +++ b/arch/s390/include/uapi/asm/mman.h @@ -0,0 +1,6 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/mman.h" + */ +#include <asm-generic/mman.h> diff --git a/arch/s390/include/uapi/asm/monwriter.h b/arch/s390/include/uapi/asm/monwriter.h new file mode 100644 index 00000000000..f845c8e2f86 --- /dev/null +++ b/arch/s390/include/uapi/asm/monwriter.h @@ -0,0 +1,31 @@ +/* + * Copyright IBM Corp. 2006 + * Character device driver for writing z/VM APPLDATA monitor records + * Version 1.0 + * Author(s): Melissa Howland <melissah@us.ibm.com> + * + */ + +#ifndef _ASM_390_MONWRITER_H +#define _ASM_390_MONWRITER_H + +/* mon_function values */ +#define MONWRITE_START_INTERVAL 0x00 /* start interval recording */ +#define MONWRITE_STOP_INTERVAL 0x01 /* stop interval or config recording */ +#define MONWRITE_GEN_EVENT 0x02 /* generate event record */ +#define MONWRITE_START_CONFIG 0x03 /* start configuration recording */ + +/* the header the app uses in its write() data */ +struct monwrite_hdr { + unsigned char mon_function; + unsigned short applid; + unsigned char record_num; + unsigned short version; + unsigned short release; + unsigned short mod_level; + unsigned short datalen; + unsigned char hdrlen; + +} __attribute__((packed)); + +#endif /* _ASM_390_MONWRITER_H */ diff --git a/arch/s390/include/uapi/asm/msgbuf.h b/arch/s390/include/uapi/asm/msgbuf.h new file mode 100644 index 00000000000..1bbdee92792 --- /dev/null +++ b/arch/s390/include/uapi/asm/msgbuf.h @@ -0,0 +1,37 @@ +#ifndef _S390_MSGBUF_H +#define _S390_MSGBUF_H + +/* + * The msqid64_ds structure for S/390 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ +#ifndef __s390x__ + unsigned long __unused1; +#endif /* ! __s390x__ */ + __kernel_time_t msg_rtime; /* last msgrcv time */ +#ifndef __s390x__ + unsigned long __unused2; +#endif /* ! __s390x__ */ + __kernel_time_t msg_ctime; /* last change time */ +#ifndef __s390x__ + unsigned long __unused3; +#endif /* ! __s390x__ */ + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused4; + unsigned long __unused5; +}; + +#endif /* _S390_MSGBUF_H */ diff --git a/arch/s390/include/uapi/asm/param.h b/arch/s390/include/uapi/asm/param.h new file mode 100644 index 00000000000..c616821bf2a --- /dev/null +++ b/arch/s390/include/uapi/asm/param.h @@ -0,0 +1,6 @@ +#ifndef _ASMS390_PARAM_H +#define _ASMS390_PARAM_H + +#include <asm-generic/param.h> + +#endif /* _ASMS390_PARAM_H */ diff --git a/arch/s390/include/uapi/asm/poll.h b/arch/s390/include/uapi/asm/poll.h new file mode 100644 index 00000000000..c98509d3149 --- /dev/null +++ b/arch/s390/include/uapi/asm/poll.h @@ -0,0 +1 @@ +#include <asm-generic/poll.h> diff --git a/arch/s390/include/uapi/asm/posix_types.h b/arch/s390/include/uapi/asm/posix_types.h new file mode 100644 index 00000000000..bf2a2ad2f80 --- /dev/null +++ b/arch/s390/include/uapi/asm/posix_types.h @@ -0,0 +1,51 @@ +/* + * S390 version + * + */ + +#ifndef __ARCH_S390_POSIX_TYPES_H +#define __ARCH_S390_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ + +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +#define __kernel_size_t __kernel_size_t + +typedef unsigned short __kernel_old_dev_t; +#define __kernel_old_dev_t __kernel_old_dev_t + +#ifndef __s390x__ + +typedef unsigned long __kernel_ino_t; +typedef unsigned short __kernel_mode_t; +typedef unsigned short __kernel_ipc_pid_t; +typedef unsigned short __kernel_uid_t; +typedef unsigned short __kernel_gid_t; +typedef int __kernel_ptrdiff_t; + +#else /* __s390x__ */ + +typedef unsigned int __kernel_ino_t; +typedef unsigned int __kernel_mode_t; +typedef int __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef long __kernel_ptrdiff_t; +typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ + +#endif /* __s390x__ */ + +#define __kernel_ino_t __kernel_ino_t +#define __kernel_mode_t __kernel_mode_t +#define __kernel_ipc_pid_t __kernel_ipc_pid_t +#define __kernel_uid_t __kernel_uid_t +#define __kernel_gid_t __kernel_gid_t + +#include <asm-generic/posix_types.h> + +#endif diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h new file mode 100644 index 00000000000..a150f4fabe4 --- /dev/null +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -0,0 +1,459 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + */ + +#ifndef _UAPI_S390_PTRACE_H +#define _UAPI_S390_PTRACE_H + +/* + * Offsets in the user_regs_struct. They are used for the ptrace + * system call and in entry.S + */ +#ifndef __s390x__ + +#define PT_PSWMASK 0x00 +#define PT_PSWADDR 0x04 +#define PT_GPR0 0x08 +#define PT_GPR1 0x0C +#define PT_GPR2 0x10 +#define PT_GPR3 0x14 +#define PT_GPR4 0x18 +#define PT_GPR5 0x1C +#define PT_GPR6 0x20 +#define PT_GPR7 0x24 +#define PT_GPR8 0x28 +#define PT_GPR9 0x2C +#define PT_GPR10 0x30 +#define PT_GPR11 0x34 +#define PT_GPR12 0x38 +#define PT_GPR13 0x3C +#define PT_GPR14 0x40 +#define PT_GPR15 0x44 +#define PT_ACR0 0x48 +#define PT_ACR1 0x4C +#define PT_ACR2 0x50 +#define PT_ACR3 0x54 +#define PT_ACR4 0x58 +#define PT_ACR5 0x5C +#define PT_ACR6 0x60 +#define PT_ACR7 0x64 +#define PT_ACR8 0x68 +#define PT_ACR9 0x6C +#define PT_ACR10 0x70 +#define PT_ACR11 0x74 +#define PT_ACR12 0x78 +#define PT_ACR13 0x7C +#define PT_ACR14 0x80 +#define PT_ACR15 0x84 +#define PT_ORIGGPR2 0x88 +#define PT_FPC 0x90 +/* + * A nasty fact of life that the ptrace api + * only supports passing of longs. + */ +#define PT_FPR0_HI 0x98 +#define PT_FPR0_LO 0x9C +#define PT_FPR1_HI 0xA0 +#define PT_FPR1_LO 0xA4 +#define PT_FPR2_HI 0xA8 +#define PT_FPR2_LO 0xAC +#define PT_FPR3_HI 0xB0 +#define PT_FPR3_LO 0xB4 +#define PT_FPR4_HI 0xB8 +#define PT_FPR4_LO 0xBC +#define PT_FPR5_HI 0xC0 +#define PT_FPR5_LO 0xC4 +#define PT_FPR6_HI 0xC8 +#define PT_FPR6_LO 0xCC +#define PT_FPR7_HI 0xD0 +#define PT_FPR7_LO 0xD4 +#define PT_FPR8_HI 0xD8 +#define PT_FPR8_LO 0XDC +#define PT_FPR9_HI 0xE0 +#define PT_FPR9_LO 0xE4 +#define PT_FPR10_HI 0xE8 +#define PT_FPR10_LO 0xEC +#define PT_FPR11_HI 0xF0 +#define PT_FPR11_LO 0xF4 +#define PT_FPR12_HI 0xF8 +#define PT_FPR12_LO 0xFC +#define PT_FPR13_HI 0x100 +#define PT_FPR13_LO 0x104 +#define PT_FPR14_HI 0x108 +#define PT_FPR14_LO 0x10C +#define PT_FPR15_HI 0x110 +#define PT_FPR15_LO 0x114 +#define PT_CR_9 0x118 +#define PT_CR_10 0x11C +#define PT_CR_11 0x120 +#define PT_IEEE_IP 0x13C +#define PT_LASTOFF PT_IEEE_IP +#define PT_ENDREGS 0x140-1 + +#define GPR_SIZE 4 +#define CR_SIZE 4 + +#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */ + +#else /* __s390x__ */ + +#define PT_PSWMASK 0x00 +#define PT_PSWADDR 0x08 +#define PT_GPR0 0x10 +#define PT_GPR1 0x18 +#define PT_GPR2 0x20 +#define PT_GPR3 0x28 +#define PT_GPR4 0x30 +#define PT_GPR5 0x38 +#define PT_GPR6 0x40 +#define PT_GPR7 0x48 +#define PT_GPR8 0x50 +#define PT_GPR9 0x58 +#define PT_GPR10 0x60 +#define PT_GPR11 0x68 +#define PT_GPR12 0x70 +#define PT_GPR13 0x78 +#define PT_GPR14 0x80 +#define PT_GPR15 0x88 +#define PT_ACR0 0x90 +#define PT_ACR1 0x94 +#define PT_ACR2 0x98 +#define PT_ACR3 0x9C +#define PT_ACR4 0xA0 +#define PT_ACR5 0xA4 +#define PT_ACR6 0xA8 +#define PT_ACR7 0xAC +#define PT_ACR8 0xB0 +#define PT_ACR9 0xB4 +#define PT_ACR10 0xB8 +#define PT_ACR11 0xBC +#define PT_ACR12 0xC0 +#define PT_ACR13 0xC4 +#define PT_ACR14 0xC8 +#define PT_ACR15 0xCC +#define PT_ORIGGPR2 0xD0 +#define PT_FPC 0xD8 +#define PT_FPR0 0xE0 +#define PT_FPR1 0xE8 +#define PT_FPR2 0xF0 +#define PT_FPR3 0xF8 +#define PT_FPR4 0x100 +#define PT_FPR5 0x108 +#define PT_FPR6 0x110 +#define PT_FPR7 0x118 +#define PT_FPR8 0x120 +#define PT_FPR9 0x128 +#define PT_FPR10 0x130 +#define PT_FPR11 0x138 +#define PT_FPR12 0x140 +#define PT_FPR13 0x148 +#define PT_FPR14 0x150 +#define PT_FPR15 0x158 +#define PT_CR_9 0x160 +#define PT_CR_10 0x168 +#define PT_CR_11 0x170 +#define PT_IEEE_IP 0x1A8 +#define PT_LASTOFF PT_IEEE_IP +#define PT_ENDREGS 0x1B0-1 + +#define GPR_SIZE 8 +#define CR_SIZE 8 + +#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ + +#endif /* __s390x__ */ + +#define NUM_GPRS 16 +#define NUM_FPRS 16 +#define NUM_CRS 16 +#define NUM_ACRS 16 + +#define NUM_CR_WORDS 3 + +#define FPR_SIZE 8 +#define FPC_SIZE 4 +#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */ +#define ACR_SIZE 4 + + +#define PTRACE_OLDSETOPTIONS 21 + +#ifndef __ASSEMBLY__ +#include <linux/stddef.h> +#include <linux/types.h> + +typedef union +{ + float f; + double d; + __u64 ui; + struct + { + __u32 hi; + __u32 lo; + } fp; +} freg_t; + +typedef struct +{ + __u32 fpc; + __u32 pad; + freg_t fprs[NUM_FPRS]; +} s390_fp_regs; + +#define FPC_EXCEPTION_MASK 0xF8000000 +#define FPC_FLAGS_MASK 0x00F80000 +#define FPC_DXC_MASK 0x0000FF00 +#define FPC_RM_MASK 0x00000003 + +/* this typedef defines how a Program Status Word looks like */ +typedef struct +{ + unsigned long mask; + unsigned long addr; +} __attribute__ ((aligned(8))) psw_t; + +#ifndef __s390x__ + +#define PSW_MASK_PER 0x40000000UL +#define PSW_MASK_DAT 0x04000000UL +#define PSW_MASK_IO 0x02000000UL +#define PSW_MASK_EXT 0x01000000UL +#define PSW_MASK_KEY 0x00F00000UL +#define PSW_MASK_BASE 0x00080000UL /* always one */ +#define PSW_MASK_MCHECK 0x00040000UL +#define PSW_MASK_WAIT 0x00020000UL +#define PSW_MASK_PSTATE 0x00010000UL +#define PSW_MASK_ASC 0x0000C000UL +#define PSW_MASK_CC 0x00003000UL +#define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_RI 0x00000000UL +#define PSW_MASK_EA 0x00000000UL +#define PSW_MASK_BA 0x00000000UL + +#define PSW_MASK_USER 0x0000FF00UL + +#define PSW_ADDR_AMODE 0x80000000UL +#define PSW_ADDR_INSN 0x7FFFFFFFUL + +#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) + +#define PSW_ASC_PRIMARY 0x00000000UL +#define PSW_ASC_ACCREG 0x00004000UL +#define PSW_ASC_SECONDARY 0x00008000UL +#define PSW_ASC_HOME 0x0000C000UL + +#else /* __s390x__ */ + +#define PSW_MASK_PER 0x4000000000000000UL +#define PSW_MASK_DAT 0x0400000000000000UL +#define PSW_MASK_IO 0x0200000000000000UL +#define PSW_MASK_EXT 0x0100000000000000UL +#define PSW_MASK_BASE 0x0000000000000000UL +#define PSW_MASK_KEY 0x00F0000000000000UL +#define PSW_MASK_MCHECK 0x0004000000000000UL +#define PSW_MASK_WAIT 0x0002000000000000UL +#define PSW_MASK_PSTATE 0x0001000000000000UL +#define PSW_MASK_ASC 0x0000C00000000000UL +#define PSW_MASK_CC 0x0000300000000000UL +#define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_RI 0x0000008000000000UL +#define PSW_MASK_EA 0x0000000100000000UL +#define PSW_MASK_BA 0x0000000080000000UL + +#define PSW_MASK_USER 0x0000FF0180000000UL + +#define PSW_ADDR_AMODE 0x0000000000000000UL +#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL + +#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) + +#define PSW_ASC_PRIMARY 0x0000000000000000UL +#define PSW_ASC_ACCREG 0x0000400000000000UL +#define PSW_ASC_SECONDARY 0x0000800000000000UL +#define PSW_ASC_HOME 0x0000C00000000000UL + +#endif /* __s390x__ */ + + +/* + * The s390_regs structure is used to define the elf_gregset_t. + */ +typedef struct +{ + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned int acrs[NUM_ACRS]; + unsigned long orig_gpr2; +} s390_regs; + +/* + * Now for the user space program event recording (trace) definitions. + * The following structures are used only for the ptrace interface, don't + * touch or even look at it if you don't want to modify the user-space + * ptrace interface. In particular stay away from it for in-kernel PER. + */ +typedef struct +{ + unsigned long cr[NUM_CR_WORDS]; +} per_cr_words; + +#define PER_EM_MASK 0xE8000000UL + +typedef struct +{ +#ifdef __s390x__ + unsigned : 32; +#endif /* __s390x__ */ + unsigned em_branching : 1; + unsigned em_instruction_fetch : 1; + /* + * Switching on storage alteration automatically fixes + * the storage alteration event bit in the users std. + */ + unsigned em_storage_alteration : 1; + unsigned em_gpr_alt_unused : 1; + unsigned em_store_real_address : 1; + unsigned : 3; + unsigned branch_addr_ctl : 1; + unsigned : 1; + unsigned storage_alt_space_ctl : 1; + unsigned : 21; + unsigned long starting_addr; + unsigned long ending_addr; +} per_cr_bits; + +typedef struct +{ + unsigned short perc_atmid; + unsigned long address; + unsigned char access_id; +} per_lowcore_words; + +typedef struct +{ + unsigned perc_branching : 1; + unsigned perc_instruction_fetch : 1; + unsigned perc_storage_alteration : 1; + unsigned perc_gpr_alt_unused : 1; + unsigned perc_store_real_address : 1; + unsigned : 3; + unsigned atmid_psw_bit_31 : 1; + unsigned atmid_validity_bit : 1; + unsigned atmid_psw_bit_32 : 1; + unsigned atmid_psw_bit_5 : 1; + unsigned atmid_psw_bit_16 : 1; + unsigned atmid_psw_bit_17 : 1; + unsigned si : 2; + unsigned long address; + unsigned : 4; + unsigned access_id : 4; +} per_lowcore_bits; + +typedef struct +{ + union { + per_cr_words words; + per_cr_bits bits; + } control_regs; + /* + * Use these flags instead of setting em_instruction_fetch + * directly they are used so that single stepping can be + * switched on & off while not affecting other tracing + */ + unsigned single_step : 1; + unsigned instruction_fetch : 1; + unsigned : 30; + /* + * These addresses are copied into cr10 & cr11 if single + * stepping is switched off + */ + unsigned long starting_addr; + unsigned long ending_addr; + union { + per_lowcore_words words; + per_lowcore_bits bits; + } lowcore; +} per_struct; + +typedef struct +{ + unsigned int len; + unsigned long kernel_addr; + unsigned long process_addr; +} ptrace_area; + +/* + * S/390 specific non posix ptrace requests. I chose unusual values so + * they are unlikely to clash with future ptrace definitions. + */ +#define PTRACE_PEEKUSR_AREA 0x5000 +#define PTRACE_POKEUSR_AREA 0x5001 +#define PTRACE_PEEKTEXT_AREA 0x5002 +#define PTRACE_PEEKDATA_AREA 0x5003 +#define PTRACE_POKETEXT_AREA 0x5004 +#define PTRACE_POKEDATA_AREA 0x5005 +#define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 +#define PTRACE_ENABLE_TE 0x5009 +#define PTRACE_DISABLE_TE 0x5010 +#define PTRACE_TE_ABORT_RAND 0x5011 + +/* + * The numbers chosen here are somewhat arbitrary but absolutely MUST + * not overlap with any of the number assigned in <linux/ptrace.h>. + */ +#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ + +/* + * PT_PROT definition is loosely based on hppa bsd definition in + * gdb/hppab-nat.c + */ +#define PTRACE_PROT 21 + +typedef enum +{ + ptprot_set_access_watchpoint, + ptprot_set_write_watchpoint, + ptprot_disable_watchpoint +} ptprot_flags; + +typedef struct +{ + unsigned long lowaddr; + unsigned long hiaddr; + ptprot_flags prot; +} ptprot_area; + +/* Sequence of bytes for breakpoint illegal instruction. */ +#define S390_BREAKPOINT {0x0,0x1} +#define S390_BREAKPOINT_U16 ((__u16)0x0001) +#define S390_SYSCALL_OPCODE ((__u16)0x0a00) +#define S390_SYSCALL_SIZE 2 + +/* + * The user_regs_struct defines the way the user registers are + * store on the stack for signal handling. + */ +struct user_regs_struct +{ + psw_t psw; + unsigned long gprs[NUM_GPRS]; + unsigned int acrs[NUM_ACRS]; + unsigned long orig_gpr2; + s390_fp_regs fp_regs; + /* + * These per registers are in here so that gdb can modify them + * itself as there is no "official" ptrace interface for hardware + * watchpoints. This is the way intel does it. + */ + per_struct per_info; + unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_S390_PTRACE_H */ diff --git a/arch/s390/include/uapi/asm/qeth.h b/arch/s390/include/uapi/asm/qeth.h new file mode 100644 index 00000000000..3a896cf5258 --- /dev/null +++ b/arch/s390/include/uapi/asm/qeth.h @@ -0,0 +1,115 @@ +/* + * ioctl definitions for qeth driver + * + * Copyright IBM Corp. 2004 + * + * Author(s): Thomas Spatzier <tspat@de.ibm.com> + * + */ +#ifndef __ASM_S390_QETH_IOCTL_H__ +#define __ASM_S390_QETH_IOCTL_H__ +#include <linux/types.h> +#include <linux/ioctl.h> + +#define SIOC_QETH_ARP_SET_NO_ENTRIES (SIOCDEVPRIVATE) +#define SIOC_QETH_ARP_QUERY_INFO (SIOCDEVPRIVATE + 1) +#define SIOC_QETH_ARP_ADD_ENTRY (SIOCDEVPRIVATE + 2) +#define SIOC_QETH_ARP_REMOVE_ENTRY (SIOCDEVPRIVATE + 3) +#define SIOC_QETH_ARP_FLUSH_CACHE (SIOCDEVPRIVATE + 4) +#define SIOC_QETH_ADP_SET_SNMP_CONTROL (SIOCDEVPRIVATE + 5) +#define SIOC_QETH_GET_CARD_TYPE (SIOCDEVPRIVATE + 6) +#define SIOC_QETH_QUERY_OAT (SIOCDEVPRIVATE + 7) + +struct qeth_arp_cache_entry { + __u8 macaddr[6]; + __u8 reserved1[2]; + __u8 ipaddr[16]; /* for both IPv4 and IPv6 */ + __u8 reserved2[32]; +} __attribute__ ((packed)); + +enum qeth_arp_ipaddrtype { + QETHARP_IP_ADDR_V4 = 1, + QETHARP_IP_ADDR_V6 = 2, +}; +struct qeth_arp_entrytype { + __u8 mac; + __u8 ip; +} __attribute__((packed)); + +#define QETH_QARP_MEDIASPECIFIC_BYTES 32 +#define QETH_QARP_MACADDRTYPE_BYTES 1 +struct qeth_arp_qi_entry7 { + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; + __u8 macaddr[6]; + __u8 ipaddr[4]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry7_ipv6 { + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; + __u8 macaddr[6]; + __u8 ipaddr[16]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry7_short { + struct qeth_arp_entrytype type; + __u8 macaddr[6]; + __u8 ipaddr[4]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry7_short_ipv6 { + struct qeth_arp_entrytype type; + __u8 macaddr[6]; + __u8 ipaddr[16]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry5 { + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; + __u8 ipaddr[4]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry5_ipv6 { + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; + __u8 ipaddr[16]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry5_short { + struct qeth_arp_entrytype type; + __u8 ipaddr[4]; +} __attribute__((packed)); + +struct qeth_arp_qi_entry5_short_ipv6 { + struct qeth_arp_entrytype type; + __u8 ipaddr[16]; +} __attribute__((packed)); +/* + * can be set by user if no "media specific information" is wanted + * -> saves a lot of space in user space buffer + */ +#define QETH_QARP_STRIP_ENTRIES 0x8000 +#define QETH_QARP_WITH_IPV6 0x4000 +#define QETH_QARP_REQUEST_MASK 0x00ff + +/* data sent to user space as result of query arp ioctl */ +#define QETH_QARP_USER_DATA_SIZE 20000 +#define QETH_QARP_MASK_OFFSET 4 +#define QETH_QARP_ENTRIES_OFFSET 6 +struct qeth_arp_query_user_data { + union { + __u32 data_len; /* set by user space program */ + __u32 no_entries; /* set by kernel */ + } u; + __u16 mask_bits; + char *entries; +} __attribute__((packed)); + +struct qeth_query_oat_data { + __u32 command; + __u32 buffer_len; + __u32 response_len; + __u64 ptr; +}; +#endif /* __ASM_S390_QETH_IOCTL_H__ */ diff --git a/arch/s390/include/uapi/asm/resource.h b/arch/s390/include/uapi/asm/resource.h new file mode 100644 index 00000000000..ec23d1c73c9 --- /dev/null +++ b/arch/s390/include/uapi/asm/resource.h @@ -0,0 +1,13 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/resources.h" + */ + +#ifndef _S390_RESOURCE_H +#define _S390_RESOURCE_H + +#include <asm-generic/resource.h> + +#endif + diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h new file mode 100644 index 00000000000..32f3ab2a820 --- /dev/null +++ b/arch/s390/include/uapi/asm/schid.h @@ -0,0 +1,16 @@ +#ifndef _UAPIASM_SCHID_H +#define _UAPIASM_SCHID_H + +#include <linux/types.h> + +struct subchannel_id { + __u32 cssid : 8; + __u32 : 4; + __u32 m : 1; + __u32 ssid : 2; + __u32 one : 1; + __u32 sch_no : 16; +} __attribute__ ((packed, aligned(4))); + + +#endif /* _UAPIASM_SCHID_H */ diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h new file mode 100644 index 00000000000..f2818613ee4 --- /dev/null +++ b/arch/s390/include/uapi/asm/sclp_ctl.h @@ -0,0 +1,24 @@ +/* + * IOCTL interface for SCLP + * + * Copyright IBM Corp. 2012 + * + * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#ifndef _ASM_SCLP_CTL_H +#define _ASM_SCLP_CTL_H + +#include <linux/types.h> + +struct sclp_ctl_sccb { + __u32 cmdw; + __u64 sccb; +} __attribute__((packed)); + +#define SCLP_CTL_IOCTL_MAGIC 0x10 + +#define SCLP_CTL_SCCB \ + _IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb) + +#endif diff --git a/arch/s390/include/uapi/asm/sembuf.h b/arch/s390/include/uapi/asm/sembuf.h new file mode 100644 index 00000000000..32626b0cac4 --- /dev/null +++ b/arch/s390/include/uapi/asm/sembuf.h @@ -0,0 +1,29 @@ +#ifndef _S390_SEMBUF_H +#define _S390_SEMBUF_H + +/* + * The semid64_ds structure for S/390 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem (for !__s390x__) + * - 2 miscellaneous 32-bit values + */ + +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ +#ifndef __s390x__ + unsigned long __unused1; +#endif /* ! __s390x__ */ + __kernel_time_t sem_ctime; /* last change time */ +#ifndef __s390x__ + unsigned long __unused2; +#endif /* ! __s390x__ */ + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _S390_SEMBUF_H */ diff --git a/arch/s390/include/uapi/asm/setup.h b/arch/s390/include/uapi/asm/setup.h new file mode 100644 index 00000000000..5a637e3e385 --- /dev/null +++ b/arch/s390/include/uapi/asm/setup.h @@ -0,0 +1,13 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2010 + */ + +#ifndef _UAPI_ASM_S390_SETUP_H +#define _UAPI_ASM_S390_SETUP_H + +#define COMMAND_LINE_SIZE 4096 + +#define ARCH_COMMAND_LINE_SIZE 896 + +#endif /* _UAPI_ASM_S390_SETUP_H */ diff --git a/arch/s390/include/uapi/asm/shmbuf.h b/arch/s390/include/uapi/asm/shmbuf.h new file mode 100644 index 00000000000..eed2e280ce3 --- /dev/null +++ b/arch/s390/include/uapi/asm/shmbuf.h @@ -0,0 +1,48 @@ +#ifndef _S390_SHMBUF_H +#define _S390_SHMBUF_H + +/* + * The shmid64_ds structure for S/390 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem (for !__s390x__) + * - 2 miscellaneous 32-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ +#ifndef __s390x__ + unsigned long __unused1; +#endif /* ! __s390x__ */ + __kernel_time_t shm_dtime; /* last detach time */ +#ifndef __s390x__ + unsigned long __unused2; +#endif /* ! __s390x__ */ + __kernel_time_t shm_ctime; /* last change time */ +#ifndef __s390x__ + unsigned long __unused3; +#endif /* ! __s390x__ */ + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused4; + unsigned long __unused5; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _S390_SHMBUF_H */ diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h new file mode 100644 index 00000000000..5d9cc19462c --- /dev/null +++ b/arch/s390/include/uapi/asm/sie.h @@ -0,0 +1,243 @@ +#ifndef _UAPI_ASM_S390_SIE_H +#define _UAPI_ASM_S390_SIE_H + +#define diagnose_codes \ + { 0x10, "DIAG (0x10) release pages" }, \ + { 0x44, "DIAG (0x44) time slice end" }, \ + { 0x9c, "DIAG (0x9c) time slice end directed" }, \ + { 0x204, "DIAG (0x204) logical-cpu utilization" }, \ + { 0x258, "DIAG (0x258) page-reference services" }, \ + { 0x308, "DIAG (0x308) ipl functions" }, \ + { 0x500, "DIAG (0x500) KVM virtio functions" }, \ + { 0x501, "DIAG (0x501) KVM breakpoint" } + +#define sigp_order_codes \ + { 0x01, "SIGP sense" }, \ + { 0x02, "SIGP external call" }, \ + { 0x03, "SIGP emergency signal" }, \ + { 0x05, "SIGP stop" }, \ + { 0x06, "SIGP restart" }, \ + { 0x09, "SIGP stop and store status" }, \ + { 0x0b, "SIGP initial cpu reset" }, \ + { 0x0d, "SIGP set prefix" }, \ + { 0x0e, "SIGP store status at address" }, \ + { 0x12, "SIGP set architecture" }, \ + { 0x15, "SIGP sense running" } + +#define icpt_prog_codes \ + { 0x0001, "Prog Operation" }, \ + { 0x0002, "Prog Privileged Operation" }, \ + { 0x0003, "Prog Execute" }, \ + { 0x0004, "Prog Protection" }, \ + { 0x0005, "Prog Addressing" }, \ + { 0x0006, "Prog Specification" }, \ + { 0x0007, "Prog Data" }, \ + { 0x0008, "Prog Fixedpoint overflow" }, \ + { 0x0009, "Prog Fixedpoint divide" }, \ + { 0x000A, "Prog Decimal overflow" }, \ + { 0x000B, "Prog Decimal divide" }, \ + { 0x000C, "Prog HFP exponent overflow" }, \ + { 0x000D, "Prog HFP exponent underflow" }, \ + { 0x000E, "Prog HFP significance" }, \ + { 0x000F, "Prog HFP divide" }, \ + { 0x0010, "Prog Segment translation" }, \ + { 0x0011, "Prog Page translation" }, \ + { 0x0012, "Prog Translation specification" }, \ + { 0x0013, "Prog Special operation" }, \ + { 0x0015, "Prog Operand" }, \ + { 0x0016, "Prog Trace table" }, \ + { 0x0017, "Prog ASNtranslation specification" }, \ + { 0x001C, "Prog Spaceswitch event" }, \ + { 0x001D, "Prog HFP square root" }, \ + { 0x001F, "Prog PCtranslation specification" }, \ + { 0x0020, "Prog AFX translation" }, \ + { 0x0021, "Prog ASX translation" }, \ + { 0x0022, "Prog LX translation" }, \ + { 0x0023, "Prog EX translation" }, \ + { 0x0024, "Prog Primary authority" }, \ + { 0x0025, "Prog Secondary authority" }, \ + { 0x0026, "Prog LFXtranslation exception" }, \ + { 0x0027, "Prog LSXtranslation exception" }, \ + { 0x0028, "Prog ALET specification" }, \ + { 0x0029, "Prog ALEN translation" }, \ + { 0x002A, "Prog ALE sequence" }, \ + { 0x002B, "Prog ASTE validity" }, \ + { 0x002C, "Prog ASTE sequence" }, \ + { 0x002D, "Prog Extended authority" }, \ + { 0x002E, "Prog LSTE sequence" }, \ + { 0x002F, "Prog ASTE instance" }, \ + { 0x0030, "Prog Stack full" }, \ + { 0x0031, "Prog Stack empty" }, \ + { 0x0032, "Prog Stack specification" }, \ + { 0x0033, "Prog Stack type" }, \ + { 0x0034, "Prog Stack operation" }, \ + { 0x0039, "Prog Region first translation" }, \ + { 0x003A, "Prog Region second translation" }, \ + { 0x003B, "Prog Region third translation" }, \ + { 0x0040, "Prog Monitor event" }, \ + { 0x0080, "Prog PER event" }, \ + { 0x0119, "Prog Crypto operation" } + +#define exit_code_ipa0(ipa0, opcode, mnemonic) \ + { (ipa0 << 8 | opcode), #ipa0 " " mnemonic } +#define exit_code(opcode, mnemonic) \ + { opcode, mnemonic } + +#define icpt_insn_codes \ + exit_code_ipa0(0x01, 0x01, "PR"), \ + exit_code_ipa0(0x01, 0x04, "PTFF"), \ + exit_code_ipa0(0x01, 0x07, "SCKPF"), \ + exit_code_ipa0(0xAA, 0x00, "RINEXT"), \ + exit_code_ipa0(0xAA, 0x01, "RION"), \ + exit_code_ipa0(0xAA, 0x02, "TRIC"), \ + exit_code_ipa0(0xAA, 0x03, "RIOFF"), \ + exit_code_ipa0(0xAA, 0x04, "RIEMIT"), \ + exit_code_ipa0(0xB2, 0x02, "STIDP"), \ + exit_code_ipa0(0xB2, 0x04, "SCK"), \ + exit_code_ipa0(0xB2, 0x05, "STCK"), \ + exit_code_ipa0(0xB2, 0x06, "SCKC"), \ + exit_code_ipa0(0xB2, 0x07, "STCKC"), \ + exit_code_ipa0(0xB2, 0x08, "SPT"), \ + exit_code_ipa0(0xB2, 0x09, "STPT"), \ + exit_code_ipa0(0xB2, 0x0d, "PTLB"), \ + exit_code_ipa0(0xB2, 0x10, "SPX"), \ + exit_code_ipa0(0xB2, 0x11, "STPX"), \ + exit_code_ipa0(0xB2, 0x12, "STAP"), \ + exit_code_ipa0(0xB2, 0x14, "SIE"), \ + exit_code_ipa0(0xB2, 0x16, "SETR"), \ + exit_code_ipa0(0xB2, 0x17, "STETR"), \ + exit_code_ipa0(0xB2, 0x18, "PC"), \ + exit_code_ipa0(0xB2, 0x20, "SERVC"), \ + exit_code_ipa0(0xB2, 0x28, "PT"), \ + exit_code_ipa0(0xB2, 0x29, "ISKE"), \ + exit_code_ipa0(0xB2, 0x2a, "RRBE"), \ + exit_code_ipa0(0xB2, 0x2b, "SSKE"), \ + exit_code_ipa0(0xB2, 0x2c, "TB"), \ + exit_code_ipa0(0xB2, 0x2e, "PGIN"), \ + exit_code_ipa0(0xB2, 0x2f, "PGOUT"), \ + exit_code_ipa0(0xB2, 0x30, "CSCH"), \ + exit_code_ipa0(0xB2, 0x31, "HSCH"), \ + exit_code_ipa0(0xB2, 0x32, "MSCH"), \ + exit_code_ipa0(0xB2, 0x33, "SSCH"), \ + exit_code_ipa0(0xB2, 0x34, "STSCH"), \ + exit_code_ipa0(0xB2, 0x35, "TSCH"), \ + exit_code_ipa0(0xB2, 0x36, "TPI"), \ + exit_code_ipa0(0xB2, 0x37, "SAL"), \ + exit_code_ipa0(0xB2, 0x38, "RSCH"), \ + exit_code_ipa0(0xB2, 0x39, "STCRW"), \ + exit_code_ipa0(0xB2, 0x3a, "STCPS"), \ + exit_code_ipa0(0xB2, 0x3b, "RCHP"), \ + exit_code_ipa0(0xB2, 0x3c, "SCHM"), \ + exit_code_ipa0(0xB2, 0x40, "BAKR"), \ + exit_code_ipa0(0xB2, 0x48, "PALB"), \ + exit_code_ipa0(0xB2, 0x4c, "TAR"), \ + exit_code_ipa0(0xB2, 0x50, "CSP"), \ + exit_code_ipa0(0xB2, 0x54, "MVPG"), \ + exit_code_ipa0(0xB2, 0x58, "BSG"), \ + exit_code_ipa0(0xB2, 0x5a, "BSA"), \ + exit_code_ipa0(0xB2, 0x5f, "CHSC"), \ + exit_code_ipa0(0xB2, 0x74, "SIGA"), \ + exit_code_ipa0(0xB2, 0x76, "XSCH"), \ + exit_code_ipa0(0xB2, 0x78, "STCKE"), \ + exit_code_ipa0(0xB2, 0x7c, "STCKF"), \ + exit_code_ipa0(0xB2, 0x7d, "STSI"), \ + exit_code_ipa0(0xB2, 0xb0, "STFLE"), \ + exit_code_ipa0(0xB2, 0xb1, "STFL"), \ + exit_code_ipa0(0xB2, 0xb2, "LPSWE"), \ + exit_code_ipa0(0xB2, 0xf8, "TEND"), \ + exit_code_ipa0(0xB2, 0xfc, "TABORT"), \ + exit_code_ipa0(0xB9, 0x1e, "KMAC"), \ + exit_code_ipa0(0xB9, 0x28, "PCKMO"), \ + exit_code_ipa0(0xB9, 0x2a, "KMF"), \ + exit_code_ipa0(0xB9, 0x2b, "KMO"), \ + exit_code_ipa0(0xB9, 0x2d, "KMCTR"), \ + exit_code_ipa0(0xB9, 0x2e, "KM"), \ + exit_code_ipa0(0xB9, 0x2f, "KMC"), \ + exit_code_ipa0(0xB9, 0x3e, "KIMD"), \ + exit_code_ipa0(0xB9, 0x3f, "KLMD"), \ + exit_code_ipa0(0xB9, 0x8a, "CSPG"), \ + exit_code_ipa0(0xB9, 0x8d, "EPSW"), \ + exit_code_ipa0(0xB9, 0x8e, "IDTE"), \ + exit_code_ipa0(0xB9, 0x8f, "CRDTE"), \ + exit_code_ipa0(0xB9, 0x9c, "EQBS"), \ + exit_code_ipa0(0xB9, 0xa2, "PTF"), \ + exit_code_ipa0(0xB9, 0xab, "ESSA"), \ + exit_code_ipa0(0xB9, 0xae, "RRBM"), \ + exit_code_ipa0(0xB9, 0xaf, "PFMF"), \ + exit_code_ipa0(0xE3, 0x03, "LRAG"), \ + exit_code_ipa0(0xE3, 0x13, "LRAY"), \ + exit_code_ipa0(0xE3, 0x25, "NTSTG"), \ + exit_code_ipa0(0xE5, 0x00, "LASP"), \ + exit_code_ipa0(0xE5, 0x01, "TPROT"), \ + exit_code_ipa0(0xE5, 0x60, "TBEGIN"), \ + exit_code_ipa0(0xE5, 0x61, "TBEGINC"), \ + exit_code_ipa0(0xEB, 0x25, "STCTG"), \ + exit_code_ipa0(0xEB, 0x2f, "LCTLG"), \ + exit_code_ipa0(0xEB, 0x60, "LRIC"), \ + exit_code_ipa0(0xEB, 0x61, "STRIC"), \ + exit_code_ipa0(0xEB, 0x62, "MRIC"), \ + exit_code_ipa0(0xEB, 0x8a, "SQBS"), \ + exit_code_ipa0(0xC8, 0x01, "ECTG"), \ + exit_code(0x0a, "SVC"), \ + exit_code(0x80, "SSM"), \ + exit_code(0x82, "LPSW"), \ + exit_code(0x83, "DIAG"), \ + exit_code(0xae, "SIGP"), \ + exit_code(0xac, "STNSM"), \ + exit_code(0xad, "STOSM"), \ + exit_code(0xb1, "LRA"), \ + exit_code(0xb6, "STCTL"), \ + exit_code(0xb7, "LCTL"), \ + exit_code(0xee, "PLO") + +#define sie_intercept_code \ + { 0x00, "Host interruption" }, \ + { 0x04, "Instruction" }, \ + { 0x08, "Program interruption" }, \ + { 0x0c, "Instruction and program interruption" }, \ + { 0x10, "External request" }, \ + { 0x14, "External interruption" }, \ + { 0x18, "I/O request" }, \ + { 0x1c, "Wait state" }, \ + { 0x20, "Validity" }, \ + { 0x28, "Stop request" }, \ + { 0x2c, "Operation exception" }, \ + { 0x38, "Partial-execution" }, \ + { 0x3c, "I/O interruption" }, \ + { 0x40, "I/O instruction" }, \ + { 0x48, "Timing subset" } + +/* + * This is the simple interceptable instructions decoder. + * + * It will be used as userspace interface and it can be used in places + * that does not allow to use general decoder functions, + * such as trace events declarations. + * + * Some userspace tools may want to parse this code + * and would be confused by switch(), if() and other statements, + * but they can understand conditional operator. + */ +#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask) \ + (insn >> 56) == (ipa0) ? \ + ((ipa0 << 8) | ((insn >> rshift) & mask)) : + +#define INSN_DECODE(insn) (insn >> 56) + +/* + * The macro icpt_insn_decoder() takes an intercepted instruction + * and returns a key, which can be used to find a mnemonic name + * of the instruction in the icpt_insn_codes table. + */ +#define icpt_insn_decoder(insn) \ + INSN_DECODE_IPA0(0x01, insn, 48, 0xff) \ + INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f) \ + INSN_DECODE_IPA0(0xb2, insn, 48, 0xff) \ + INSN_DECODE_IPA0(0xb9, insn, 48, 0xff) \ + INSN_DECODE_IPA0(0xe3, insn, 48, 0xff) \ + INSN_DECODE_IPA0(0xe5, insn, 48, 0xff) \ + INSN_DECODE_IPA0(0xeb, insn, 16, 0xff) \ + INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f) \ + INSN_DECODE(insn) + +#endif /* _UAPI_ASM_S390_SIE_H */ diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h new file mode 100644 index 00000000000..b30de9c01bb --- /dev/null +++ b/arch/s390/include/uapi/asm/sigcontext.h @@ -0,0 +1,70 @@ +/* + * S390 version + * Copyright IBM Corp. 1999, 2000 + */ + +#ifndef _ASM_S390_SIGCONTEXT_H +#define _ASM_S390_SIGCONTEXT_H + +#include <linux/compiler.h> + +#define __NUM_GPRS 16 +#define __NUM_FPRS 16 +#define __NUM_ACRS 16 + +#ifndef __s390x__ + +/* Has to be at least _NSIG_WORDS from asm/signal.h */ +#define _SIGCONTEXT_NSIG 64 +#define _SIGCONTEXT_NSIG_BPW 32 +/* Size of stack frame allocated when calling signal handler. */ +#define __SIGNAL_FRAMESIZE 96 + +#else /* __s390x__ */ + +/* Has to be at least _NSIG_WORDS from asm/signal.h */ +#define _SIGCONTEXT_NSIG 64 +#define _SIGCONTEXT_NSIG_BPW 64 +/* Size of stack frame allocated when calling signal handler. */ +#define __SIGNAL_FRAMESIZE 160 + +#endif /* __s390x__ */ + +#define _SIGCONTEXT_NSIG_WORDS (_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW) +#define _SIGMASK_COPY_SIZE (sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS) + +typedef struct +{ + unsigned long mask; + unsigned long addr; +} __attribute__ ((aligned(8))) _psw_t; + +typedef struct +{ + _psw_t psw; + unsigned long gprs[__NUM_GPRS]; + unsigned int acrs[__NUM_ACRS]; +} _s390_regs_common; + +typedef struct +{ + unsigned int fpc; + unsigned int pad; + double fprs[__NUM_FPRS]; +} _s390_fp_regs; + +typedef struct +{ + _s390_regs_common regs; + _s390_fp_regs fpregs; +} _sigregs; + +struct sigcontext +{ + unsigned long oldmask[_SIGCONTEXT_NSIG_WORDS]; + _sigregs __user *sregs; +}; + + +#endif + diff --git a/arch/s390/include/uapi/asm/siginfo.h b/arch/s390/include/uapi/asm/siginfo.h new file mode 100644 index 00000000000..91fd3e4b70c --- /dev/null +++ b/arch/s390/include/uapi/asm/siginfo.h @@ -0,0 +1,16 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/siginfo.h" + */ + +#ifndef _S390_SIGINFO_H +#define _S390_SIGINFO_H + +#ifdef __s390x__ +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +#endif + +#include <asm-generic/siginfo.h> + +#endif diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h new file mode 100644 index 00000000000..2f43cfbf5f1 --- /dev/null +++ b/arch/s390/include/uapi/asm/signal.h @@ -0,0 +1,129 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/signal.h" + */ + +#ifndef _UAPI_ASMS390_SIGNAL_H +#define _UAPI_ASMS390_SIGNAL_H + +#include <linux/types.h> +#include <linux/time.h> + +/* Avoid too many header ordering problems. */ +struct siginfo; +struct pt_regs; + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +#define SA_RESTORER 0x04000000 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#include <asm-generic/signal-defs.h> + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; +#ifndef __s390x__ /* lovely */ + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +#else /* __s390x__ */ + unsigned long sa_flags; + void (*sa_restorer)(void); + sigset_t sa_mask; +#endif /* __s390x__ */ +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + + +#endif /* _UAPI_ASMS390_SIGNAL_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h new file mode 100644 index 00000000000..e031332096d --- /dev/null +++ b/arch/s390/include/uapi/asm/socket.h @@ -0,0 +1,89 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/socket.h" + */ + +#ifndef _ASM_SOCKET_H +#define _ASM_SOCKET_H + +#include <asm/sockios.h> + +/* For setsockopt(2) */ +#define SOL_SOCKET 1 + +#define SO_DEBUG 1 +#define SO_REUSEADDR 2 +#define SO_TYPE 3 +#define SO_ERROR 4 +#define SO_DONTROUTE 5 +#define SO_BROADCAST 6 +#define SO_SNDBUF 7 +#define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 +#define SO_KEEPALIVE 9 +#define SO_OOBINLINE 10 +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_LINGER 13 +#define SO_BSDCOMPAT 14 +#define SO_REUSEPORT 15 +#define SO_PASSCRED 16 +#define SO_PEERCRED 17 +#define SO_RCVLOWAT 18 +#define SO_SNDLOWAT 19 +#define SO_RCVTIMEO 20 +#define SO_SNDTIMEO 21 + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define SO_SECURITY_ENCRYPTION_NETWORK 24 + +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_ACCEPTCONN 30 + +#define SO_PEERSEC 31 +#define SO_PASSSEC 34 +#define SO_TIMESTAMPNS 35 +#define SCM_TIMESTAMPNS SO_TIMESTAMPNS + +#define SO_MARK 36 + +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + +#define SO_PROTOCOL 38 +#define SO_DOMAIN 39 + +#define SO_RXQ_OVFL 40 + +#define SO_WIFI_STATUS 41 +#define SCM_WIFI_STATUS SO_WIFI_STATUS +#define SO_PEEK_OFF 42 + +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + +#define SO_LOCK_FILTER 44 + +#define SO_SELECT_ERR_QUEUE 45 + +#define SO_BUSY_POLL 46 + +#define SO_MAX_PACING_RATE 47 + +#define SO_BPF_EXTENSIONS 48 + +#endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/sockios.h b/arch/s390/include/uapi/asm/sockios.h new file mode 100644 index 00000000000..6f60eee7324 --- /dev/null +++ b/arch/s390/include/uapi/asm/sockios.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_SOCKIOS_H +#define _ASM_S390_SOCKIOS_H + +#include <asm-generic/sockios.h> + +#endif diff --git a/arch/s390/include/uapi/asm/stat.h b/arch/s390/include/uapi/asm/stat.h new file mode 100644 index 00000000000..b4ca97d9146 --- /dev/null +++ b/arch/s390/include/uapi/asm/stat.h @@ -0,0 +1,103 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/stat.h" + */ + +#ifndef _S390_STAT_H +#define _S390_STAT_H + +#ifndef __s390x__ +struct __old_kernel_stat { + unsigned short st_dev; + unsigned short st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_mtime; + unsigned long st_ctime; +}; + +struct stat { + unsigned short st_dev; + unsigned short __pad1; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned short __pad2; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused4; + unsigned long __unused5; +}; + +/* This matches struct stat64 in glibc2.1, hence the absolutely + * insane amounts of padding around dev_t's. + */ +struct stat64 { + unsigned long long st_dev; + unsigned int __pad1; +#define STAT64_HAS_BROKEN_ST_INO 1 + unsigned long __st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned long st_uid; + unsigned long st_gid; + unsigned long long st_rdev; + unsigned int __pad3; + long long st_size; + unsigned long st_blksize; + unsigned char __pad4[4]; + unsigned long __pad5; /* future possible st_blocks high bits */ + unsigned long st_blocks; /* Number 512-byte blocks allocated. */ + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; /* will be high 32 bits of ctime someday */ + unsigned long long st_ino; +}; + +#else /* __s390x__ */ + +struct stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + unsigned int st_gid; + unsigned int __pad1; + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long st_blksize; + long st_blocks; + unsigned long __unused[3]; +}; + +#endif /* __s390x__ */ + +#define STAT_HAVE_NSEC 1 + +#endif diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h new file mode 100644 index 00000000000..471eb09184d --- /dev/null +++ b/arch/s390/include/uapi/asm/statfs.h @@ -0,0 +1,50 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/statfs.h" + */ + +#ifndef _S390_STATFS_H +#define _S390_STATFS_H + +/* + * We can't use <asm-generic/statfs.h> because in 64-bit mode + * we mix ints of different sizes in our struct statfs. + */ + +#ifndef __KERNEL_STRICT_NAMES +#include <linux/types.h> +typedef __kernel_fsid_t fsid_t; +#endif + +struct statfs { + unsigned int f_type; + unsigned int f_bsize; + unsigned long f_blocks; + unsigned long f_bfree; + unsigned long f_bavail; + unsigned long f_files; + unsigned long f_ffree; + __kernel_fsid_t f_fsid; + unsigned int f_namelen; + unsigned int f_frsize; + unsigned int f_flags; + unsigned int f_spare[4]; +}; + +struct statfs64 { + unsigned int f_type; + unsigned int f_bsize; + unsigned long long f_blocks; + unsigned long long f_bfree; + unsigned long long f_bavail; + unsigned long long f_files; + unsigned long long f_ffree; + __kernel_fsid_t f_fsid; + unsigned int f_namelen; + unsigned int f_frsize; + unsigned int f_flags; + unsigned int f_spare[4]; +}; + +#endif diff --git a/arch/s390/include/uapi/asm/swab.h b/arch/s390/include/uapi/asm/swab.h new file mode 100644 index 00000000000..da3bfe5cc16 --- /dev/null +++ b/arch/s390/include/uapi/asm/swab.h @@ -0,0 +1,89 @@ +#ifndef _S390_SWAB_H +#define _S390_SWAB_H + +/* + * S390 version + * Copyright IBM Corp. 1999 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#include <linux/types.h> + +#ifndef __s390x__ +# define __SWAB_64_THRU_32__ +#endif + +#ifdef __s390x__ +static inline __u64 __arch_swab64p(const __u64 *x) +{ + __u64 result; + + asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x)); + return result; +} +#define __arch_swab64p __arch_swab64p + +static inline __u64 __arch_swab64(__u64 x) +{ + __u64 result; + + asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x)); + return result; +} +#define __arch_swab64 __arch_swab64 + +static inline void __arch_swab64s(__u64 *x) +{ + *x = __arch_swab64p(x); +} +#define __arch_swab64s __arch_swab64s +#endif /* __s390x__ */ + +static inline __u32 __arch_swab32p(const __u32 *x) +{ + __u32 result; + + asm volatile( +#ifndef __s390x__ + " icm %0,8,%O1+3(%R1)\n" + " icm %0,4,%O1+2(%R1)\n" + " icm %0,2,%O1+1(%R1)\n" + " ic %0,%1" + : "=&d" (result) : "Q" (*x) : "cc"); +#else /* __s390x__ */ + " lrv %0,%1" + : "=d" (result) : "m" (*x)); +#endif /* __s390x__ */ + return result; +} +#define __arch_swab32p __arch_swab32p + +#ifdef __s390x__ +static inline __u32 __arch_swab32(__u32 x) +{ + __u32 result; + + asm volatile("lrvr %0,%1" : "=d" (result) : "d" (x)); + return result; +} +#define __arch_swab32 __arch_swab32 +#endif /* __s390x__ */ + +static inline __u16 __arch_swab16p(const __u16 *x) +{ + __u16 result; + + asm volatile( +#ifndef __s390x__ + " icm %0,2,%O1+1(%R1)\n" + " ic %0,%1\n" + : "=&d" (result) : "Q" (*x) : "cc"); +#else /* __s390x__ */ + " lrvh %0,%1" + : "=d" (result) : "m" (*x)); +#endif /* __s390x__ */ + return result; +} +#define __arch_swab16p __arch_swab16p + +#endif /* _S390_SWAB_H */ diff --git a/arch/s390/include/uapi/asm/tape390.h b/arch/s390/include/uapi/asm/tape390.h new file mode 100644 index 00000000000..b2bc4bab792 --- /dev/null +++ b/arch/s390/include/uapi/asm/tape390.h @@ -0,0 +1,102 @@ +/************************************************************************* + * + * enables user programs to display messages and control encryption + * on s390 tape devices + * + * Copyright IBM Corp. 2001, 2006 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + * + *************************************************************************/ + +#ifndef _TAPE390_H +#define _TAPE390_H + +#define TAPE390_DISPLAY _IOW('d', 1, struct display_struct) + +/* + * The TAPE390_DISPLAY ioctl calls the Load Display command + * which transfers 17 bytes of data from the channel to the subsystem: + * - 1 format control byte, and + * - two 8-byte messages + * + * Format control byte: + * 0-2: New Message Overlay + * 3: Alternate Messages + * 4: Blink Message + * 5: Display Low/High Message + * 6: Reserved + * 7: Automatic Load Request + * + */ + +typedef struct display_struct { + char cntrl; + char message1[8]; + char message2[8]; +} display_struct; + +/* + * Tape encryption support + */ + +struct tape390_crypt_info { + char capability; + char status; + char medium_status; +} __attribute__ ((packed)); + + +/* Macros for "capable" field */ +#define TAPE390_CRYPT_SUPPORTED_MASK 0x01 +#define TAPE390_CRYPT_SUPPORTED(x) \ + ((x.capability & TAPE390_CRYPT_SUPPORTED_MASK)) + +/* Macros for "status" field */ +#define TAPE390_CRYPT_ON_MASK 0x01 +#define TAPE390_CRYPT_ON(x) (((x.status) & TAPE390_CRYPT_ON_MASK)) + +/* Macros for "medium status" field */ +#define TAPE390_MEDIUM_LOADED_MASK 0x01 +#define TAPE390_MEDIUM_ENCRYPTED_MASK 0x02 +#define TAPE390_MEDIUM_ENCRYPTED(x) \ + (((x.medium_status) & TAPE390_MEDIUM_ENCRYPTED_MASK)) +#define TAPE390_MEDIUM_LOADED(x) \ + (((x.medium_status) & TAPE390_MEDIUM_LOADED_MASK)) + +/* + * The TAPE390_CRYPT_SET ioctl is used to switch on/off encryption. + * The "encryption_capable" and "tape_status" fields are ignored for this ioctl! + */ +#define TAPE390_CRYPT_SET _IOW('d', 2, struct tape390_crypt_info) + +/* + * The TAPE390_CRYPT_QUERY ioctl is used to query the encryption state. + */ +#define TAPE390_CRYPT_QUERY _IOR('d', 3, struct tape390_crypt_info) + +/* Values for "kekl1/2_type" and "kekl1/2_type_on_tape" fields */ +#define TAPE390_KEKL_TYPE_NONE 0 +#define TAPE390_KEKL_TYPE_LABEL 1 +#define TAPE390_KEKL_TYPE_HASH 2 + +struct tape390_kekl { + unsigned char type; + unsigned char type_on_tape; + char label[65]; +} __attribute__ ((packed)); + +struct tape390_kekl_pair { + struct tape390_kekl kekl[2]; +} __attribute__ ((packed)); + +/* + * The TAPE390_KEKL_SET ioctl is used to set Key Encrypting Key labels. + */ +#define TAPE390_KEKL_SET _IOW('d', 4, struct tape390_kekl_pair) + +/* + * The TAPE390_KEKL_QUERY ioctl is used to query Key Encrypting Key labels. + */ +#define TAPE390_KEKL_QUERY _IOR('d', 5, struct tape390_kekl_pair) + +#endif diff --git a/arch/s390/include/uapi/asm/termbits.h b/arch/s390/include/uapi/asm/termbits.h new file mode 100644 index 00000000000..71bf6ac6a2b --- /dev/null +++ b/arch/s390/include/uapi/asm/termbits.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_TERMBITS_H +#define _ASM_S390_TERMBITS_H + +#include <asm-generic/termbits.h> + +#endif diff --git a/arch/s390/include/uapi/asm/termios.h b/arch/s390/include/uapi/asm/termios.h new file mode 100644 index 00000000000..554f973db1e --- /dev/null +++ b/arch/s390/include/uapi/asm/termios.h @@ -0,0 +1,49 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/termios.h" + */ + +#ifndef _UAPI_S390_TERMIOS_H +#define _UAPI_S390_TERMIOS_H + +#include <asm/termbits.h> +#include <asm/ioctls.h> + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + + +#endif /* _UAPI_S390_TERMIOS_H */ diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h new file mode 100644 index 00000000000..038f2b9178a --- /dev/null +++ b/arch/s390/include/uapi/asm/types.h @@ -0,0 +1,22 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/types.h" + */ + +#ifndef _UAPI_S390_TYPES_H +#define _UAPI_S390_TYPES_H + +#include <asm-generic/int-ll64.h> + +#ifndef __ASSEMBLY__ + +/* A address type so that arithmetic can be done on it & it can be upgraded to + 64 bit when necessary +*/ +typedef unsigned long addr_t; +typedef __signed__ long saddr_t; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_S390_TYPES_H */ diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h new file mode 100644 index 00000000000..3e077b2a470 --- /dev/null +++ b/arch/s390/include/uapi/asm/ucontext.h @@ -0,0 +1,37 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/ucontext.h" + */ + +#ifndef _ASM_S390_UCONTEXT_H +#define _ASM_S390_UCONTEXT_H + +#define UC_EXTENDED 0x00000001 + +#ifndef __s390x__ + +struct ucontext_extended { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + _sigregs uc_mcontext; + sigset_t uc_sigmask; + /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ + unsigned char __unused[128 - sizeof(sigset_t)]; + unsigned long uc_gprs_high[16]; +}; + +#endif + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + _sigregs uc_mcontext; + sigset_t uc_sigmask; + /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ + unsigned char __unused[128 - sizeof(sigset_t)]; +}; + +#endif /* !_ASM_S390_UCONTEXT_H */ diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h new file mode 100644 index 00000000000..3802d2d3a18 --- /dev/null +++ b/arch/s390/include/uapi/asm/unistd.h @@ -0,0 +1,378 @@ +/* + * S390 version + * + * Derived from "include/asm-i386/unistd.h" + */ + +#ifndef _UAPI_ASM_S390_UNISTD_H_ +#define _UAPI_ASM_S390_UNISTD_H_ + +/* + * This file contains the system call numbers. + */ + +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_restart_syscall 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_brk 45 +#define __NR_signal 48 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_setpgid 57 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_symlink 83 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_lookup_dcookie 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_getdents 141 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 +#define __NR_putpmsg 189 +#define __NR_vfork 190 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_getdents64 220 +#define __NR_readahead 222 +#define __NR_setxattr 224 +#define __NR_lsetxattr 225 +#define __NR_fsetxattr 226 +#define __NR_getxattr 227 +#define __NR_lgetxattr 228 +#define __NR_fgetxattr 229 +#define __NR_listxattr 230 +#define __NR_llistxattr 231 +#define __NR_flistxattr 232 +#define __NR_removexattr 233 +#define __NR_lremovexattr 234 +#define __NR_fremovexattr 235 +#define __NR_gettid 236 +#define __NR_tkill 237 +#define __NR_futex 238 +#define __NR_sched_setaffinity 239 +#define __NR_sched_getaffinity 240 +#define __NR_tgkill 241 +/* Number 242 is reserved for tux */ +#define __NR_io_setup 243 +#define __NR_io_destroy 244 +#define __NR_io_getevents 245 +#define __NR_io_submit 246 +#define __NR_io_cancel 247 +#define __NR_exit_group 248 +#define __NR_epoll_create 249 +#define __NR_epoll_ctl 250 +#define __NR_epoll_wait 251 +#define __NR_set_tid_address 252 +#define __NR_fadvise64 253 +#define __NR_timer_create 254 +#define __NR_timer_settime (__NR_timer_create+1) +#define __NR_timer_gettime (__NR_timer_create+2) +#define __NR_timer_getoverrun (__NR_timer_create+3) +#define __NR_timer_delete (__NR_timer_create+4) +#define __NR_clock_settime (__NR_timer_create+5) +#define __NR_clock_gettime (__NR_timer_create+6) +#define __NR_clock_getres (__NR_timer_create+7) +#define __NR_clock_nanosleep (__NR_timer_create+8) +/* Number 263 is reserved for vserver */ +#define __NR_statfs64 265 +#define __NR_fstatfs64 266 +#define __NR_remap_file_pages 267 +/* Number 268 is reserved for new sys_mbind */ +/* Number 269 is reserved for new sys_get_mempolicy */ +/* Number 270 is reserved for new sys_set_mempolicy */ +#define __NR_mq_open 271 +#define __NR_mq_unlink 272 +#define __NR_mq_timedsend 273 +#define __NR_mq_timedreceive 274 +#define __NR_mq_notify 275 +#define __NR_mq_getsetattr 276 +#define __NR_kexec_load 277 +#define __NR_add_key 278 +#define __NR_request_key 279 +#define __NR_keyctl 280 +#define __NR_waitid 281 +#define __NR_ioprio_set 282 +#define __NR_ioprio_get 283 +#define __NR_inotify_init 284 +#define __NR_inotify_add_watch 285 +#define __NR_inotify_rm_watch 286 +/* Number 287 is reserved for new sys_migrate_pages */ +#define __NR_openat 288 +#define __NR_mkdirat 289 +#define __NR_mknodat 290 +#define __NR_fchownat 291 +#define __NR_futimesat 292 +#define __NR_unlinkat 294 +#define __NR_renameat 295 +#define __NR_linkat 296 +#define __NR_symlinkat 297 +#define __NR_readlinkat 298 +#define __NR_fchmodat 299 +#define __NR_faccessat 300 +#define __NR_pselect6 301 +#define __NR_ppoll 302 +#define __NR_unshare 303 +#define __NR_set_robust_list 304 +#define __NR_get_robust_list 305 +#define __NR_splice 306 +#define __NR_sync_file_range 307 +#define __NR_tee 308 +#define __NR_vmsplice 309 +/* Number 310 is reserved for new sys_move_pages */ +#define __NR_getcpu 311 +#define __NR_epoll_pwait 312 +#define __NR_utimes 313 +#define __NR_fallocate 314 +#define __NR_utimensat 315 +#define __NR_signalfd 316 +#define __NR_timerfd 317 +#define __NR_eventfd 318 +#define __NR_timerfd_create 319 +#define __NR_timerfd_settime 320 +#define __NR_timerfd_gettime 321 +#define __NR_signalfd4 322 +#define __NR_eventfd2 323 +#define __NR_inotify_init1 324 +#define __NR_pipe2 325 +#define __NR_dup3 326 +#define __NR_epoll_create1 327 +#define __NR_preadv 328 +#define __NR_pwritev 329 +#define __NR_rt_tgsigqueueinfo 330 +#define __NR_perf_event_open 331 +#define __NR_fanotify_init 332 +#define __NR_fanotify_mark 333 +#define __NR_prlimit64 334 +#define __NR_name_to_handle_at 335 +#define __NR_open_by_handle_at 336 +#define __NR_clock_adjtime 337 +#define __NR_syncfs 338 +#define __NR_setns 339 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 +#define __NR_s390_runtime_instr 342 +#define __NR_kcmp 343 +#define __NR_finit_module 344 +#define __NR_sched_setattr 345 +#define __NR_sched_getattr 346 +#define __NR_renameat2 347 +#define NR_syscalls 348 + +/* + * There are some system calls that are not present on 64 bit, some + * have a different name although they do the same (e.g. __NR_chown32 + * is __NR_chown on 64 bit). + */ +#ifndef __s390x__ + +#define __NR_time 13 +#define __NR_lchown 16 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_getrlimit 76 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_fchown 95 +#define __NR_ioperm 101 +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR__newselect 142 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_chown 182 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_fcntl64 221 +#define __NR_sendfile64 223 +#define __NR_fadvise64_64 264 +#define __NR_fstatat64 293 + +#else + +#define __NR_select 142 +#define __NR_getrlimit 191 /* SuS compliant getrlimit */ +#define __NR_lchown 198 +#define __NR_getuid 199 +#define __NR_getgid 200 +#define __NR_geteuid 201 +#define __NR_getegid 202 +#define __NR_setreuid 203 +#define __NR_setregid 204 +#define __NR_getgroups 205 +#define __NR_setgroups 206 +#define __NR_fchown 207 +#define __NR_setresuid 208 +#define __NR_getresuid 209 +#define __NR_setresgid 210 +#define __NR_getresgid 211 +#define __NR_chown 212 +#define __NR_setuid 213 +#define __NR_setgid 214 +#define __NR_setfsuid 215 +#define __NR_setfsgid 216 +#define __NR_newfstatat 293 + +#endif + +#endif /* _UAPI_ASM_S390_UNISTD_H_ */ diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h new file mode 100644 index 00000000000..a9a4ebf79fa --- /dev/null +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -0,0 +1,21 @@ +/* + * Definitions for virtio-ccw devices. + * + * Copyright IBM Corp. 2013 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + */ +#ifndef __KVM_VIRTIO_CCW_H +#define __KVM_VIRTIO_CCW_H + +/* Alignment of vring buffers. */ +#define KVM_VIRTIO_CCW_RING_ALIGN 4096 + +/* Subcode for diagnose 500 (virtio hypercall). */ +#define KVM_S390_VIRTIO_CCW_NOTIFY 3 + +#endif diff --git a/arch/s390/include/uapi/asm/vtoc.h b/arch/s390/include/uapi/asm/vtoc.h new file mode 100644 index 00000000000..221419de275 --- /dev/null +++ b/arch/s390/include/uapi/asm/vtoc.h @@ -0,0 +1,213 @@ +/* + * This file contains volume label definitions for DASD devices. + * + * Copyright IBM Corp. 2005 + * + * Author(s): Volker Sameske <sameske@de.ibm.com> + * + */ + +#ifndef _ASM_S390_VTOC_H +#define _ASM_S390_VTOC_H + +#include <linux/types.h> + +struct vtoc_ttr +{ + __u16 tt; + __u8 r; +} __attribute__ ((packed)); + +struct vtoc_cchhb +{ + __u16 cc; + __u16 hh; + __u8 b; +} __attribute__ ((packed)); + +struct vtoc_cchh +{ + __u16 cc; + __u16 hh; +} __attribute__ ((packed)); + +struct vtoc_labeldate +{ + __u8 year; + __u16 day; +} __attribute__ ((packed)); + +struct vtoc_volume_label_cdl +{ + char volkey[4]; /* volume key = volume label */ + char vollbl[4]; /* volume label */ + char volid[6]; /* volume identifier */ + __u8 security; /* security byte */ + struct vtoc_cchhb vtoc; /* VTOC address */ + char res1[5]; /* reserved */ + char cisize[4]; /* CI-size for FBA,... */ + /* ...blanks for CKD */ + char blkperci[4]; /* no of blocks per CI (FBA), blanks for CKD */ + char labperci[4]; /* no of labels per CI (FBA), blanks for CKD */ + char res2[4]; /* reserved */ + char lvtoc[14]; /* owner code for LVTOC */ + char res3[29]; /* reserved */ +} __attribute__ ((packed)); + +struct vtoc_volume_label_ldl { + char vollbl[4]; /* volume label */ + char volid[6]; /* volume identifier */ + char res3[69]; /* reserved */ + char ldl_version; /* version number, valid for ldl format */ + __u64 formatted_blocks; /* valid when ldl_version >= f2 */ +} __attribute__ ((packed)); + +struct vtoc_extent +{ + __u8 typeind; /* extent type indicator */ + __u8 seqno; /* extent sequence number */ + struct vtoc_cchh llimit; /* starting point of this extent */ + struct vtoc_cchh ulimit; /* ending point of this extent */ +} __attribute__ ((packed)); + +struct vtoc_dev_const +{ + __u16 DS4DSCYL; /* number of logical cyls */ + __u16 DS4DSTRK; /* number of tracks in a logical cylinder */ + __u16 DS4DEVTK; /* device track length */ + __u8 DS4DEVI; /* non-last keyed record overhead */ + __u8 DS4DEVL; /* last keyed record overhead */ + __u8 DS4DEVK; /* non-keyed record overhead differential */ + __u8 DS4DEVFG; /* flag byte */ + __u16 DS4DEVTL; /* device tolerance */ + __u8 DS4DEVDT; /* number of DSCB's per track */ + __u8 DS4DEVDB; /* number of directory blocks per track */ +} __attribute__ ((packed)); + +struct vtoc_format1_label +{ + char DS1DSNAM[44]; /* data set name */ + __u8 DS1FMTID; /* format identifier */ + char DS1DSSN[6]; /* data set serial number */ + __u16 DS1VOLSQ; /* volume sequence number */ + struct vtoc_labeldate DS1CREDT; /* creation date: ydd */ + struct vtoc_labeldate DS1EXPDT; /* expiration date */ + __u8 DS1NOEPV; /* number of extents on volume */ + __u8 DS1NOBDB; /* no. of bytes used in last direction blk */ + __u8 DS1FLAG1; /* flag 1 */ + char DS1SYSCD[13]; /* system code */ + struct vtoc_labeldate DS1REFD; /* date last referenced */ + __u8 DS1SMSFG; /* system managed storage indicators */ + __u8 DS1SCXTF; /* sec. space extension flag byte */ + __u16 DS1SCXTV; /* secondary space extension value */ + __u8 DS1DSRG1; /* data set organisation byte 1 */ + __u8 DS1DSRG2; /* data set organisation byte 2 */ + __u8 DS1RECFM; /* record format */ + __u8 DS1OPTCD; /* option code */ + __u16 DS1BLKL; /* block length */ + __u16 DS1LRECL; /* record length */ + __u8 DS1KEYL; /* key length */ + __u16 DS1RKP; /* relative key position */ + __u8 DS1DSIND; /* data set indicators */ + __u8 DS1SCAL1; /* secondary allocation flag byte */ + char DS1SCAL3[3]; /* secondary allocation quantity */ + struct vtoc_ttr DS1LSTAR; /* last used track and block on track */ + __u16 DS1TRBAL; /* space remaining on last used track */ + __u16 res1; /* reserved */ + struct vtoc_extent DS1EXT1; /* first extent description */ + struct vtoc_extent DS1EXT2; /* second extent description */ + struct vtoc_extent DS1EXT3; /* third extent description */ + struct vtoc_cchhb DS1PTRDS; /* possible pointer to f2 or f3 DSCB */ +} __attribute__ ((packed)); + +struct vtoc_format4_label +{ + char DS4KEYCD[44]; /* key code for VTOC labels: 44 times 0x04 */ + __u8 DS4IDFMT; /* format identifier */ + struct vtoc_cchhb DS4HPCHR; /* highest address of a format 1 DSCB */ + __u16 DS4DSREC; /* number of available DSCB's */ + struct vtoc_cchh DS4HCCHH; /* CCHH of next available alternate track */ + __u16 DS4NOATK; /* number of remaining alternate tracks */ + __u8 DS4VTOCI; /* VTOC indicators */ + __u8 DS4NOEXT; /* number of extents in VTOC */ + __u8 DS4SMSFG; /* system managed storage indicators */ + __u8 DS4DEVAC; /* number of alternate cylinders. + * Subtract from first two bytes of + * DS4DEVSZ to get number of usable + * cylinders. can be zero. valid + * only if DS4DEVAV on. */ + struct vtoc_dev_const DS4DEVCT; /* device constants */ + char DS4AMTIM[8]; /* VSAM time stamp */ + char DS4AMCAT[3]; /* VSAM catalog indicator */ + char DS4R2TIM[8]; /* VSAM volume/catalog match time stamp */ + char res1[5]; /* reserved */ + char DS4F6PTR[5]; /* pointer to first format 6 DSCB */ + struct vtoc_extent DS4VTOCE; /* VTOC extent description */ + char res2[10]; /* reserved */ + __u8 DS4EFLVL; /* extended free-space management level */ + struct vtoc_cchhb DS4EFPTR; /* pointer to extended free-space info */ + char res3; /* reserved */ + __u32 DS4DCYL; /* number of logical cyls */ + char res4[2]; /* reserved */ + __u8 DS4DEVF2; /* device flags */ + char res5; /* reserved */ +} __attribute__ ((packed)); + +struct vtoc_ds5ext +{ + __u16 t; /* RTA of the first track of free extent */ + __u16 fc; /* number of whole cylinders in free ext. */ + __u8 ft; /* number of remaining free tracks */ +} __attribute__ ((packed)); + +struct vtoc_format5_label +{ + char DS5KEYID[4]; /* key identifier */ + struct vtoc_ds5ext DS5AVEXT; /* first available (free-space) extent. */ + struct vtoc_ds5ext DS5EXTAV[7]; /* seven available extents */ + __u8 DS5FMTID; /* format identifier */ + struct vtoc_ds5ext DS5MAVET[18]; /* eighteen available extents */ + struct vtoc_cchhb DS5PTRDS; /* pointer to next format5 DSCB */ +} __attribute__ ((packed)); + +struct vtoc_ds7ext +{ + __u32 a; /* starting RTA value */ + __u32 b; /* ending RTA value + 1 */ +} __attribute__ ((packed)); + +struct vtoc_format7_label +{ + char DS7KEYID[4]; /* key identifier */ + struct vtoc_ds7ext DS7EXTNT[5]; /* space for 5 extent descriptions */ + __u8 DS7FMTID; /* format identifier */ + struct vtoc_ds7ext DS7ADEXT[11]; /* space for 11 extent descriptions */ + char res1[2]; /* reserved */ + struct vtoc_cchhb DS7PTRDS; /* pointer to next FMT7 DSCB */ +} __attribute__ ((packed)); + +struct vtoc_cms_label { + __u8 label_id[4]; /* Label identifier */ + __u8 vol_id[6]; /* Volid */ + __u16 version_id; /* Version identifier */ + __u32 block_size; /* Disk block size */ + __u32 origin_ptr; /* Disk origin pointer */ + __u32 usable_count; /* Number of usable cylinders/blocks */ + __u32 formatted_count; /* Maximum number of formatted cylinders/ + * blocks */ + __u32 block_count; /* Disk size in CMS blocks */ + __u32 used_count; /* Number of CMS blocks in use */ + __u32 fst_size; /* File Status Table (FST) size */ + __u32 fst_count; /* Number of FSTs per CMS block */ + __u8 format_date[6]; /* Disk FORMAT date */ + __u8 reserved1[2]; + __u32 disk_offset; /* Disk offset when reserved*/ + __u32 map_block; /* Allocation Map Block with next hole */ + __u32 hblk_disp; /* Displacement into HBLK data of next hole */ + __u32 user_disp; /* Displacement into user part of Allocation + * map */ + __u8 reserved2[4]; + __u8 segment_name[8]; /* Name of shared segment */ +} __attribute__ ((packed)); + +#endif /* _ASM_S390_VTOC_H */ diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h new file mode 100644 index 00000000000..f2b18eacaca --- /dev/null +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -0,0 +1,341 @@ +/* + * include/asm-s390/zcrypt.h + * + * zcrypt 2.1.0 (user-visible header) + * + * Copyright IBM Corp. 2001, 2006 + * Author(s): Robert Burroughs + * Eric Rossman (edrossma@us.ibm.com) + * + * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __ASM_S390_ZCRYPT_H +#define __ASM_S390_ZCRYPT_H + +#define ZCRYPT_VERSION 2 +#define ZCRYPT_RELEASE 1 +#define ZCRYPT_VARIANT 1 + +#include <linux/ioctl.h> +#include <linux/compiler.h> + +/** + * struct ica_rsa_modexpo + * + * Requirements: + * - outputdatalength is at least as large as inputdatalength. + * - All key parts are right justified in their fields, padded on + * the left with zeroes. + * - length(b_key) = inputdatalength + * - length(n_modulus) = inputdatalength + */ +struct ica_rsa_modexpo { + char __user * inputdata; + unsigned int inputdatalength; + char __user * outputdata; + unsigned int outputdatalength; + char __user * b_key; + char __user * n_modulus; +}; + +/** + * struct ica_rsa_modexpo_crt + * + * Requirements: + * - inputdatalength is even. + * - outputdatalength is at least as large as inputdatalength. + * - All key parts are right justified in their fields, padded on + * the left with zeroes. + * - length(bp_key) = inputdatalength/2 + 8 + * - length(bq_key) = inputdatalength/2 + * - length(np_key) = inputdatalength/2 + 8 + * - length(nq_key) = inputdatalength/2 + * - length(u_mult_inv) = inputdatalength/2 + 8 + */ +struct ica_rsa_modexpo_crt { + char __user * inputdata; + unsigned int inputdatalength; + char __user * outputdata; + unsigned int outputdatalength; + char __user * bp_key; + char __user * bq_key; + char __user * np_prime; + char __user * nq_prime; + char __user * u_mult_inv; +}; + +/** + * CPRBX + * Note that all shorts and ints are big-endian. + * All pointer fields are 16 bytes long, and mean nothing. + * + * A request CPRB is followed by a request_parameter_block. + * + * The request (or reply) parameter block is organized thus: + * function code + * VUD block + * key block + */ +struct CPRBX { + unsigned short cprb_len; /* CPRB length 220 */ + unsigned char cprb_ver_id; /* CPRB version id. 0x02 */ + unsigned char pad_000[3]; /* Alignment pad bytes */ + unsigned char func_id[2]; /* function id 0x5432 */ + unsigned char cprb_flags[4]; /* Flags */ + unsigned int req_parml; /* request parameter buffer len */ + unsigned int req_datal; /* request data buffer */ + unsigned int rpl_msgbl; /* reply message block length */ + unsigned int rpld_parml; /* replied parameter block len */ + unsigned int rpl_datal; /* reply data block len */ + unsigned int rpld_datal; /* replied data block len */ + unsigned int req_extbl; /* request extension block len */ + unsigned char pad_001[4]; /* reserved */ + unsigned int rpld_extbl; /* replied extension block len */ + unsigned char padx000[16 - sizeof (char *)]; + unsigned char * req_parmb; /* request parm block 'address' */ + unsigned char padx001[16 - sizeof (char *)]; + unsigned char * req_datab; /* request data block 'address' */ + unsigned char padx002[16 - sizeof (char *)]; + unsigned char * rpl_parmb; /* reply parm block 'address' */ + unsigned char padx003[16 - sizeof (char *)]; + unsigned char * rpl_datab; /* reply data block 'address' */ + unsigned char padx004[16 - sizeof (char *)]; + unsigned char * req_extb; /* request extension block 'addr'*/ + unsigned char padx005[16 - sizeof (char *)]; + unsigned char * rpl_extb; /* reply extension block 'address'*/ + unsigned short ccp_rtcode; /* server return code */ + unsigned short ccp_rscode; /* server reason code */ + unsigned int mac_data_len; /* Mac Data Length */ + unsigned char logon_id[8]; /* Logon Identifier */ + unsigned char mac_value[8]; /* Mac Value */ + unsigned char mac_content_flgs;/* Mac content flag byte */ + unsigned char pad_002; /* Alignment */ + unsigned short domain; /* Domain */ + unsigned char usage_domain[4];/* Usage domain */ + unsigned char cntrl_domain[4];/* Control domain */ + unsigned char S390enf_mask[4];/* S/390 enforcement mask */ + unsigned char pad_004[36]; /* reserved */ +} __attribute__((packed)); + +/** + * xcRB + */ +struct ica_xcRB { + unsigned short agent_ID; + unsigned int user_defined; + unsigned short request_ID; + unsigned int request_control_blk_length; + unsigned char padding1[16 - sizeof (char *)]; + char __user * request_control_blk_addr; + unsigned int request_data_length; + char padding2[16 - sizeof (char *)]; + char __user * request_data_address; + unsigned int reply_control_blk_length; + char padding3[16 - sizeof (char *)]; + char __user * reply_control_blk_addr; + unsigned int reply_data_length; + char padding4[16 - sizeof (char *)]; + char __user * reply_data_addr; + unsigned short priority_window; + unsigned int status; +} __attribute__((packed)); + +/** + * struct ep11_cprb - EP11 connectivity programming request block + * @cprb_len: CPRB header length [0x0020] + * @cprb_ver_id: CPRB version id. [0x04] + * @pad_000: Alignment pad bytes + * @flags: Admin cmd [0x80] or functional cmd [0x00] + * @func_id: Function id / subtype [0x5434] + * @source_id: Source id [originator id] + * @target_id: Target id [usage/ctrl domain id] + * @ret_code: Return code + * @reserved1: Reserved + * @reserved2: Reserved + * @payload_len: Payload length + */ +struct ep11_cprb { + uint16_t cprb_len; + unsigned char cprb_ver_id; + unsigned char pad_000[2]; + unsigned char flags; + unsigned char func_id[2]; + uint32_t source_id; + uint32_t target_id; + uint32_t ret_code; + uint32_t reserved1; + uint32_t reserved2; + uint32_t payload_len; +} __attribute__((packed)); + +/** + * struct ep11_target_dev - EP11 target device list + * @ap_id: AP device id + * @dom_id: Usage domain id + */ +struct ep11_target_dev { + uint16_t ap_id; + uint16_t dom_id; +}; + +/** + * struct ep11_urb - EP11 user request block + * @targets_num: Number of target adapters + * @targets: Addr to target adapter list + * @weight: Level of request priority + * @req_no: Request id/number + * @req_len: Request length + * @req: Addr to request block + * @resp_len: Response length + * @resp: Addr to response block + */ +struct ep11_urb { + uint16_t targets_num; + uint64_t targets; + uint64_t weight; + uint64_t req_no; + uint64_t req_len; + uint64_t req; + uint64_t resp_len; + uint64_t resp; +} __attribute__((packed)); + +#define AUTOSELECT ((unsigned int)0xFFFFFFFF) + +#define ZCRYPT_IOCTL_MAGIC 'z' + +/** + * Interface notes: + * + * The ioctl()s which are implemented (along with relevant details) + * are: + * + * ICARSAMODEXPO + * Perform an RSA operation using a Modulus-Exponent pair + * This takes an ica_rsa_modexpo struct as its arg. + * + * NOTE: please refer to the comments preceding this structure + * for the implementation details for the contents of the + * block + * + * ICARSACRT + * Perform an RSA operation using a Chinese-Remainder Theorem key + * This takes an ica_rsa_modexpo_crt struct as its arg. + * + * NOTE: please refer to the comments preceding this structure + * for the implementation details for the contents of the + * block + * + * ZSECSENDCPRB + * Send an arbitrary CPRB to a crypto card. + * + * ZSENDEP11CPRB + * Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card. + * + * Z90STAT_STATUS_MASK + * Return an 64 element array of unsigned chars for the status of + * all devices. + * 0x01: PCICA + * 0x02: PCICC + * 0x03: PCIXCC_MCL2 + * 0x04: PCIXCC_MCL3 + * 0x05: CEX2C + * 0x06: CEX2A + * 0x0d: device is disabled via the proc filesystem + * + * Z90STAT_QDEPTH_MASK + * Return an 64 element array of unsigned chars for the queue + * depth of all devices. + * + * Z90STAT_PERDEV_REQCNT + * Return an 64 element array of unsigned integers for the number + * of successfully completed requests per device since the device + * was detected and made available. + * + * Z90STAT_REQUESTQ_COUNT + * Return an integer count of the number of entries waiting to be + * sent to a device. + * + * Z90STAT_PENDINGQ_COUNT + * Return an integer count of the number of entries sent to all + * devices awaiting the reply. + * + * Z90STAT_TOTALOPEN_COUNT + * Return an integer count of the number of open file handles. + * + * Z90STAT_DOMAIN_INDEX + * Return the integer value of the Cryptographic Domain. + * + * The following ioctls are deprecated and should be no longer used: + * + * Z90STAT_TOTALCOUNT + * Return an integer count of all device types together. + * + * Z90STAT_PCICACOUNT + * Return an integer count of all PCICAs. + * + * Z90STAT_PCICCCOUNT + * Return an integer count of all PCICCs. + * + * Z90STAT_PCIXCCMCL2COUNT + * Return an integer count of all MCL2 PCIXCCs. + * + * Z90STAT_PCIXCCMCL3COUNT + * Return an integer count of all MCL3 PCIXCCs. + * + * Z90STAT_CEX2CCOUNT + * Return an integer count of all CEX2Cs. + * + * Z90STAT_CEX2ACOUNT + * Return an integer count of all CEX2As. + * + * ICAZ90STATUS + * Return some device driver status in a ica_z90_status struct + * This takes an ica_z90_status struct as its arg. + * + * Z90STAT_PCIXCCCOUNT + * Return an integer count of all PCIXCCs (MCL2 + MCL3). + * This is DEPRECATED now that MCL3 PCIXCCs are treated differently from + * MCL2 PCIXCCs. + */ + +/** + * Supported ioctl calls + */ +#define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0) +#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0) +#define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0) +#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0) + +/* New status calls */ +#define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int) +#define Z90STAT_PCICACOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x41, int) +#define Z90STAT_PCICCCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x42, int) +#define Z90STAT_PCIXCCMCL2COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4b, int) +#define Z90STAT_PCIXCCMCL3COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4c, int) +#define Z90STAT_CEX2CCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4d, int) +#define Z90STAT_CEX2ACOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4e, int) +#define Z90STAT_REQUESTQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int) +#define Z90STAT_PENDINGQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int) +#define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int) +#define Z90STAT_DOMAIN_INDEX _IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int) +#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64]) +#define Z90STAT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64]) +#define Z90STAT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64]) + +#endif /* __ASM_S390_ZCRYPT_H */ diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/s390/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 9269b5788fa..a95c4ca9961 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -2,32 +2,69 @@ # Makefile for the linux kernel. # -EXTRA_AFLAGS := -traditional +ifdef CONFIG_FUNCTION_TRACER +# Don't trace early setup code and tracing code +CFLAGS_REMOVE_early.o = -pg +CFLAGS_REMOVE_ftrace.o = -pg +endif -obj-y := bitmap.o traps.o time.o process.o \ - setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ - semaphore.o s390_ext.o debug.o profile.o irq.o reipl_diag.o +# +# Passing null pointers is ok for smp code, since we access the lowcore here. +# +CFLAGS_smp.o := -Wno-nonnull + +# +# Disable tailcall optimizations for stack / callchain walking functions +# since this might generate broken code when accessing register 15 and +# passing its content to other functions. +# +CFLAGS_stacktrace.o += -fno-optimize-sibling-calls +CFLAGS_dumpstack.o += -fno-optimize-sibling-calls + +# +# Pass UTS_MACHINE for user_regset definition +# +CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' + +CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w + +obj-y := traps.o time.o process.o base.o early.o setup.o vtime.o +obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o +obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o +obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o +obj-y += dumpstack.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) +obj-y += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) -extra-y += head.o init_task.o vmlinux.lds +extra-y += head.o vmlinux.lds +extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) obj-$(CONFIG_MODULES) += s390_ksyms.o module.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SCHED_BOOK) += topology.o +obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o +obj-$(CONFIG_AUDIT) += audit.o +compat-obj-$(CONFIG_AUDIT) += compat_audit.o +obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o +obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y) -obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ - compat_wrapper.o compat_exec_domain.o -obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -obj-$(CONFIG_VIRT_TIMER) += vtime.o +ifdef CONFIG_64BIT +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \ + perf_cpum_cf_events.o +obj-y += runtime_instr.o cache.o +endif -# Kexec part -S390_KEXEC_OBJS := machine_kexec.o crash.o -S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) -obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS) - -# -# This is just to get the dependencies... -# -binfmt_elf32.o: $(TOPDIR)/fs/binfmt_elf.c +# vdso +obj-$(CONFIG_64BIT) += vdso64/ +obj-$(CONFIG_32BIT) += vdso32/ +obj-$(CONFIG_COMPAT) += vdso32/ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 3f7018e9dbe..afe1715a4eb 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -4,46 +4,179 @@ * and format the required data. */ -#include <linux/config.h> -#include <linux/sched.h> - -/* Use marker if you need to separate the values later */ +#define ASM_OFFSETS_C -#define DEFINE(sym, val, marker) \ - asm volatile("\n->" #sym " %0 " #val " " #marker : : "i" (val)) +#include <linux/kbuild.h> +#include <linux/kvm_host.h> +#include <linux/sched.h> +#include <asm/cputime.h> +#include <asm/vdso.h> +#include <asm/pgtable.h> -#define BLANK() asm volatile("\n->" : : ) +/* + * Make sure that the compiler is new enough. We want a compiler that + * is known to work with the "Q" assembler constraint. + */ +#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#error Your compiler is too old; please use version 3.3.3 or newer +#endif int main(void) { - DEFINE(__THREAD_info, offsetof(struct task_struct, thread_info),); - DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp),); - DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info),); - DEFINE(__THREAD_mm_segment, - offsetof(struct task_struct, thread.mm_segment),); - BLANK(); - DEFINE(__TASK_pid, offsetof(struct task_struct, pid),); - BLANK(); - DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid),); - DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address),); - DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id),); - BLANK(); - DEFINE(__TI_task, offsetof(struct thread_info, task),); - DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain),); - DEFINE(__TI_flags, offsetof(struct thread_info, flags),); - DEFINE(__TI_cpu, offsetof(struct thread_info, cpu),); - DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count),); - BLANK(); - DEFINE(__PT_ARGS, offsetof(struct pt_regs, args),); - DEFINE(__PT_PSW, offsetof(struct pt_regs, psw),); - DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs),); - DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2),); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc),); - DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap),); - DEFINE(__PT_SIZE, sizeof(struct pt_regs),); - BLANK(); - DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain),); - DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs),); - DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1),); + DEFINE(__THREAD_info, offsetof(struct task_struct, stack)); + DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp)); + DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment)); + BLANK(); + DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); + BLANK(); + DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause)); + DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address)); + DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid)); + BLANK(); + DEFINE(__TI_task, offsetof(struct thread_info, task)); + DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); + DEFINE(__TI_flags, offsetof(struct thread_info, flags)); + DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table)); + DEFINE(__TI_cpu, offsetof(struct thread_info, cpu)); + DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); + DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer)); + DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer)); + DEFINE(__TI_last_break, offsetof(struct thread_info, last_break)); + BLANK(); + DEFINE(__PT_ARGS, offsetof(struct pt_regs, args)); + DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); + DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); + DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); + DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); + DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm)); + DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); + DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags)); + DEFINE(__PT_SIZE, sizeof(struct pt_regs)); + BLANK(); + DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); + DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs)); + DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1)); + BLANK(); + /* timeval/timezone offsets for use by vdso */ + DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count)); + DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); + DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); + DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); + DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); + DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); + DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult)); + DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift)); + DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base)); + DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time)); + /* constants used by the vdso */ + DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); + DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); + DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + BLANK(); + /* idle data offsets */ + DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter)); + DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit)); + DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter)); + DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit)); + /* lowcore offsets */ + DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); + DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr)); + DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code)); + DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc)); + DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); + DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc)); + DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code)); + DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code)); + DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num)); + DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code)); + DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid)); + DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address)); + DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id)); + DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id)); + DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id)); + DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id)); + DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code)); + DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id)); + DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr)); + DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm)); + DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word)); + DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list)); + DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code)); + DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code)); + DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw)); + DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw)); + DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw)); + DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw)); + DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw)); + DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw)); + DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); + DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw)); + DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw)); + DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw)); + DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw)); + DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); + BLANK(); + DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); + DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); + DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); + DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags)); + DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw)); + DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw)); + DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer)); + DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer)); + DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer)); + DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer)); + DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer)); + DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer)); + DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer)); + DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer)); + DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock)); + DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task)); + DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid)); + DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info)); + DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); + DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); + DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); + DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack)); + DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn)); + DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data)); + DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source)); + DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce)); + DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); + DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); + DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); + DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); + DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); + DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); + BLANK(); + DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); + DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area)); + DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area)); + DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area)); + DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area)); + DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); + DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); + DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); +#ifdef CONFIG_32BIT + DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); +#else /* CONFIG_32BIT */ + DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code)); + DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address)); + DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); + DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); + DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); + DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area)); + DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); + DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); + DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); + DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); + DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); + DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); + DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); + DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); +#endif /* CONFIG_32BIT */ return 0; } diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c new file mode 100644 index 00000000000..f4932c22ebe --- /dev/null +++ b/arch/s390/kernel/audit.c @@ -0,0 +1,78 @@ +#include <linux/init.h> +#include <linux/types.h> +#include <linux/audit.h> +#include <asm/unistd.h> +#include "audit.h" + +static unsigned dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +static unsigned read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +static unsigned write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +static unsigned chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +static unsigned signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int audit_classify_arch(int arch) +{ +#ifdef CONFIG_COMPAT + if (arch == AUDIT_ARCH_S390) + return 1; +#endif + return 0; +} + +int audit_classify_syscall(int abi, unsigned syscall) +{ +#ifdef CONFIG_COMPAT + if (abi == AUDIT_ARCH_S390) + return s390_classify_syscall(syscall); +#endif + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 0; + } +} + +static int __init audit_classes_init(void) +{ +#ifdef CONFIG_COMPAT + audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class); + audit_register_class(AUDIT_CLASS_READ_32, s390_read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class); + audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class); +#endif + audit_register_class(AUDIT_CLASS_WRITE, write_class); + audit_register_class(AUDIT_CLASS_READ, read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); + audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); + return 0; +} + +__initcall(audit_classes_init); diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h new file mode 100644 index 00000000000..12b56f4b5a7 --- /dev/null +++ b/arch/s390/kernel/audit.h @@ -0,0 +1,15 @@ +#ifndef __ARCH_S390_KERNEL_AUDIT_H +#define __ARCH_S390_KERNEL_AUDIT_H + +#include <linux/types.h> + +#ifdef CONFIG_COMPAT +extern int s390_classify_syscall(unsigned); +extern __u32 s390_dir_class[]; +extern __u32 s390_write_class[]; +extern __u32 s390_read_class[]; +extern __u32 s390_chattr_class[]; +extern __u32 s390_signal_class[]; +#endif /* CONFIG_COMPAT */ + +#endif /* __ARCH_S390_KERNEL_AUDIT_H */ diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S new file mode 100644 index 00000000000..797a823a227 --- /dev/null +++ b/arch/s390/kernel/base.S @@ -0,0 +1,206 @@ +/* + * arch/s390/kernel/base.S + * + * Copyright IBM Corp. 2006, 2007 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + * Michael Holzheu <holzheu@de.ibm.com> + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/ptrace.h> +#include <asm/sigp.h> + +#ifdef CONFIG_64BIT + +ENTRY(s390_base_mcck_handler) + basr %r13,0 +0: lg %r15,__LC_PANIC_STACK # load panic stack + aghi %r15,-STACK_FRAME_OVERHEAD + larl %r1,s390_base_mcck_handler_fn + lg %r1,0(%r1) + ltgr %r1,%r1 + jz 1f + basr %r14,%r1 +1: la %r1,4095 + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) + lpswe __LC_MCK_OLD_PSW + + .section .bss + .align 8 + .globl s390_base_mcck_handler_fn +s390_base_mcck_handler_fn: + .quad 0 + .previous + +ENTRY(s390_base_ext_handler) + stmg %r0,%r15,__LC_SAVE_AREA_ASYNC + basr %r13,0 +0: aghi %r15,-STACK_FRAME_OVERHEAD + larl %r1,s390_base_ext_handler_fn + lg %r1,0(%r1) + ltgr %r1,%r1 + jz 1f + basr %r14,%r1 +1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC + ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit + lpswe __LC_EXT_OLD_PSW + + .section .bss + .align 8 + .globl s390_base_ext_handler_fn +s390_base_ext_handler_fn: + .quad 0 + .previous + +ENTRY(s390_base_pgm_handler) + stmg %r0,%r15,__LC_SAVE_AREA_SYNC + basr %r13,0 +0: aghi %r15,-STACK_FRAME_OVERHEAD + larl %r1,s390_base_pgm_handler_fn + lg %r1,0(%r1) + ltgr %r1,%r1 + jz 1f + basr %r14,%r1 + lmg %r0,%r15,__LC_SAVE_AREA_SYNC + lpswe __LC_PGM_OLD_PSW +1: lpswe disabled_wait_psw-0b(%r13) + + .align 8 +disabled_wait_psw: + .quad 0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler + + .section .bss + .align 8 + .globl s390_base_pgm_handler_fn +s390_base_pgm_handler_fn: + .quad 0 + .previous + +# +# Calls diag 308 subcode 1 and continues execution +# +# The following conditions must be ensured before calling this function: +# * Prefix register = 0 +# * Lowcore protection is disabled +# +ENTRY(diag308_reset) + larl %r4,.Lctlregs # Save control registers + stctg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Floating point control register + stfpc 0(%r4) + larl %r4,.Lcontinue_psw # Save PSW flags + epsw %r2,%r3 + stm %r2,%r3,0(%r4) + larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 + lghi %r3,0 + lg %r4,0(%r4) # Save PSW + sturg %r4,%r3 # Use sturg, because of large pages + lghi %r1,1 + diag %r1,%r1,0x308 +.Lrestart_part2: + lhi %r0,0 # Load r0 with zero + lhi %r1,2 # Use mode 2 = ESAME (dump) + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode + sam64 # Switch to 64 bit addressing mode + larl %r4,.Lctlregs # Restore control registers + lctlg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Restore floating point ctl register + lfpc 0(%r4) + larl %r4,.Lcontinue_psw # Restore PSW flags + lpswe 0(%r4) +.Lcontinue: + br %r14 +.align 16 +.Lrestart_psw: + .long 0x00080000,0x80000000 + .Lrestart_part2 + + .section .data..nosave,"aw",@progbits +.align 8 +.Lcontinue_psw: + .quad 0,.Lcontinue + .previous + + .section .bss +.align 8 +.Lctlregs: + .rept 16 + .quad 0 + .endr +.Lfpctl: + .long 0 + .previous + +#else /* CONFIG_64BIT */ + +ENTRY(s390_base_mcck_handler) + basr %r13,0 +0: l %r15,__LC_PANIC_STACK # load panic stack + ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,2f-0b(%r13) + l %r1,0(%r1) + ltr %r1,%r1 + jz 1f + basr %r14,%r1 +1: lm %r0,%r15,__LC_GPREGS_SAVE_AREA + lpsw __LC_MCK_OLD_PSW + +2: .long s390_base_mcck_handler_fn + + .section .bss + .align 4 + .globl s390_base_mcck_handler_fn +s390_base_mcck_handler_fn: + .long 0 + .previous + +ENTRY(s390_base_ext_handler) + stm %r0,%r15,__LC_SAVE_AREA_ASYNC + basr %r13,0 +0: ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,2f-0b(%r13) + l %r1,0(%r1) + ltr %r1,%r1 + jz 1f + basr %r14,%r1 +1: lm %r0,%r15,__LC_SAVE_AREA_ASYNC + ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit + lpsw __LC_EXT_OLD_PSW + +2: .long s390_base_ext_handler_fn + + .section .bss + .align 4 + .globl s390_base_ext_handler_fn +s390_base_ext_handler_fn: + .long 0 + .previous + +ENTRY(s390_base_pgm_handler) + stm %r0,%r15,__LC_SAVE_AREA_SYNC + basr %r13,0 +0: ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,2f-0b(%r13) + l %r1,0(%r1) + ltr %r1,%r1 + jz 1f + basr %r14,%r1 + lm %r0,%r15,__LC_SAVE_AREA_SYNC + lpsw __LC_PGM_OLD_PSW + +1: lpsw disabled_wait_psw-0b(%r13) + +2: .long s390_base_pgm_handler_fn + +disabled_wait_psw: + .align 8 + .long 0x000a0000,0x00000000 + s390_base_pgm_handler + + .section .bss + .align 4 + .globl s390_base_pgm_handler_fn +s390_base_pgm_handler_fn: + .long 0 + .previous + +#endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c deleted file mode 100644 index 1f451c2cb07..00000000000 --- a/arch/s390/kernel/binfmt_elf32.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Support for 32-bit Linux for S390 ELF binaries. - * - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Gerhard Tonn (ton@de.ibm.com) - * - * Heavily inspired by the 32-bit Sparc compat code which is - * Copyright (C) 1995, 1996, 1997, 1998 David S. Miller (davem@redhat.com) - * Copyright (C) 1995, 1996, 1997, 1998 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#define __ASMS390_ELF_H - -#include <linux/time.h> - -/* - * These are used to set parameters in the core dumps. - */ -#define ELF_CLASS ELFCLASS32 -#define ELF_DATA ELFDATA2MSB -#define ELF_ARCH EM_S390 - -/* - * This is used to ensure we don't load something for the wrong architecture. - */ -#define elf_check_arch(x) \ - (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \ - && (x)->e_ident[EI_CLASS] == ELF_CLASS) - -/* ELF register definitions */ -#define NUM_GPRS 16 -#define NUM_FPRS 16 -#define NUM_ACRS 16 - -/* For SVR4/S390 the function pointer to be registered with `atexit` is - passed in R14. */ -#define ELF_PLAT_INIT(_r, load_addr) \ - do { \ - _r->gprs[14] = 0; \ - } while(0) - -#define USE_ELF_CORE_DUMP -#define ELF_EXEC_PAGESIZE 4096 - -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) - -/* Wow, the "main" arch needs arch dependent functions too.. :) */ - -/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is - now struct_user_regs, they are different) */ - -#define ELF_CORE_COPY_REGS(pr_reg, regs) dump_regs32(regs, &pr_reg); - -#define ELF_CORE_COPY_TASK_REGS(tsk, regs) dump_task_regs32(tsk, regs) - -#define ELF_CORE_COPY_FPREGS(tsk, fpregs) dump_task_fpu(tsk, fpregs) - -/* This yields a mask that user programs can use to figure out what - instruction set this CPU supports. */ - -#define ELF_HWCAP (0) - -/* This yields a string that ld.so will use to load implementation - specific libraries for optimization. This is more specific in - intent than poking at uname or /proc/cpuinfo. - - For the moment, we have only optimizations for the Intel generations, - but that could change... */ - -#define ELF_PLATFORM (NULL) - -#define SET_PERSONALITY(ex, ibcs2) \ -do { \ - if (ibcs2) \ - set_personality(PER_SVR4); \ - else if (current->personality != PER_LINUX32) \ - set_personality(PER_LINUX); \ - set_thread_flag(TIF_31BIT); \ -} while (0) - -#include "compat_linux.h" - -typedef _s390_fp_regs32 elf_fpregset_t; - -typedef struct -{ - - _psw_t32 psw; - __u32 gprs[__NUM_GPRS]; - __u32 acrs[__NUM_ACRS]; - __u32 orig_gpr2; -} s390_regs32; -typedef s390_regs32 elf_gregset_t; - -static inline int dump_regs32(struct pt_regs *ptregs, elf_gregset_t *regs) -{ - int i; - - memcpy(®s->psw.mask, &ptregs->psw.mask, 4); - memcpy(®s->psw.addr, (char *)&ptregs->psw.addr + 4, 4); - for (i = 0; i < NUM_GPRS; i++) - regs->gprs[i] = ptregs->gprs[i]; - save_access_regs(regs->acrs); - regs->orig_gpr2 = ptregs->orig_gpr2; - return 1; -} - -static inline int dump_task_regs32(struct task_struct *tsk, elf_gregset_t *regs) -{ - struct pt_regs *ptregs = task_pt_regs(tsk); - int i; - - memcpy(®s->psw.mask, &ptregs->psw.mask, 4); - memcpy(®s->psw.addr, (char *)&ptregs->psw.addr + 4, 4); - for (i = 0; i < NUM_GPRS; i++) - regs->gprs[i] = ptregs->gprs[i]; - memcpy(regs->acrs, tsk->thread.acrs, sizeof(regs->acrs)); - regs->orig_gpr2 = ptregs->orig_gpr2; - return 1; -} - -static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs) -{ - if (tsk == current) - save_fp_regs((s390_fp_regs *) fpregs); - else - memcpy(fpregs, &tsk->thread.fp_regs, sizeof(elf_fpregset_t)); - return 1; -} - -#include <asm/processor.h> -#include <linux/module.h> -#include <linux/config.h> -#include <linux/elfcore.h> -#include <linux/binfmts.h> -#include <linux/compat.h> - -#define elf_prstatus elf_prstatus32 -struct elf_prstatus32 -{ - struct elf_siginfo pr_info; /* Info associated with signal */ - short pr_cursig; /* Current signal */ - u32 pr_sigpend; /* Set of pending signals */ - u32 pr_sighold; /* Set of held signals */ - pid_t pr_pid; - pid_t pr_ppid; - pid_t pr_pgrp; - pid_t pr_sid; - struct compat_timeval pr_utime; /* User time */ - struct compat_timeval pr_stime; /* System time */ - struct compat_timeval pr_cutime; /* Cumulative user time */ - struct compat_timeval pr_cstime; /* Cumulative system time */ - elf_gregset_t pr_reg; /* GP registers */ - int pr_fpvalid; /* True if math co-processor being used. */ -}; - -#define elf_prpsinfo elf_prpsinfo32 -struct elf_prpsinfo32 -{ - char pr_state; /* numeric process state */ - char pr_sname; /* char for pr_state */ - char pr_zomb; /* zombie */ - char pr_nice; /* nice val */ - u32 pr_flag; /* flags */ - u16 pr_uid; - u16 pr_gid; - pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; - /* Lots missing */ - char pr_fname[16]; /* filename of executable */ - char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ -}; - -#include <linux/highuid.h> - -#undef NEW_TO_OLD_UID -#undef NEW_TO_OLD_GID -#define NEW_TO_OLD_UID(uid) ((uid) > 65535) ? (u16)overflowuid : (u16)(uid) -#define NEW_TO_OLD_GID(gid) ((gid) > 65535) ? (u16)overflowgid : (u16)(gid) - -#define elf_addr_t u32 -/* -#define init_elf_binfmt init_elf32_binfmt -*/ - -#undef start_thread -#define start_thread start_thread31 - -MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries," - " Copyright 2000 IBM Corporation"); -MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>"); - -#undef MODULE_DESCRIPTION -#undef MODULE_AUTHOR - -#undef cputime_to_timeval -#define cputime_to_timeval cputime_to_compat_timeval -static __inline__ void -cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value) -{ - value->tv_usec = cputime % 1000000; - value->tv_sec = cputime / 1000000; -} - -#include "../../../fs/binfmt_elf.c" - diff --git a/arch/s390/kernel/bitmap.S b/arch/s390/kernel/bitmap.S deleted file mode 100644 index dfb41f946e2..00000000000 --- a/arch/s390/kernel/bitmap.S +++ /dev/null @@ -1,56 +0,0 @@ -/* - * arch/s390/kernel/bitmap.S - * Bitmaps for set_bit, clear_bit, test_and_set_bit, ... - * See include/asm-s390/{bitops.h|posix_types.h} for details - * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - */ - - .globl _oi_bitmap -_oi_bitmap: - .byte 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 - - .globl _ni_bitmap -_ni_bitmap: - .byte 0xFE,0xFD,0xFB,0xF7,0xEF,0xDF,0xBF,0x7F - - .globl _zb_findmap -_zb_findmap: - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 - - .globl _sb_findmap -_sb_findmap: - .byte 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c new file mode 100644 index 00000000000..c0b03c28d15 --- /dev/null +++ b/arch/s390/kernel/cache.c @@ -0,0 +1,389 @@ +/* + * Extract CPU cache information and expose them via sysfs. + * + * Copyright IBM Corp. 2012 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/notifier.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <asm/facility.h> + +struct cache { + unsigned long size; + unsigned int line_size; + unsigned int associativity; + unsigned int nr_sets; + unsigned int level : 3; + unsigned int type : 2; + unsigned int private : 1; + struct list_head list; +}; + +struct cache_dir { + struct kobject *kobj; + struct cache_index_dir *index; +}; + +struct cache_index_dir { + struct kobject kobj; + int cpu; + struct cache *cache; + struct cache_index_dir *next; +}; + +enum { + CACHE_SCOPE_NOTEXISTS, + CACHE_SCOPE_PRIVATE, + CACHE_SCOPE_SHARED, + CACHE_SCOPE_RESERVED, +}; + +enum { + CACHE_TYPE_SEPARATE, + CACHE_TYPE_DATA, + CACHE_TYPE_INSTRUCTION, + CACHE_TYPE_UNIFIED, +}; + +enum { + EXTRACT_TOPOLOGY, + EXTRACT_LINE_SIZE, + EXTRACT_SIZE, + EXTRACT_ASSOCIATIVITY, +}; + +enum { + CACHE_TI_UNIFIED = 0, + CACHE_TI_DATA = 0, + CACHE_TI_INSTRUCTION, +}; + +struct cache_info { + unsigned char : 4; + unsigned char scope : 2; + unsigned char type : 2; +}; + +#define CACHE_MAX_LEVEL 8 + +union cache_topology { + struct cache_info ci[CACHE_MAX_LEVEL]; + unsigned long long raw; +}; + +static const char * const cache_type_string[] = { + "Data", + "Instruction", + "Unified", +}; + +static struct cache_dir *cache_dir_cpu[NR_CPUS]; +static LIST_HEAD(cache_list); + +void show_cacheinfo(struct seq_file *m) +{ + struct cache *cache; + int index = 0; + + list_for_each_entry(cache, &cache_list, list) { + seq_printf(m, "cache%-11d: ", index); + seq_printf(m, "level=%d ", cache->level); + seq_printf(m, "type=%s ", cache_type_string[cache->type]); + seq_printf(m, "scope=%s ", cache->private ? "Private" : "Shared"); + seq_printf(m, "size=%luK ", cache->size >> 10); + seq_printf(m, "line_size=%u ", cache->line_size); + seq_printf(m, "associativity=%d", cache->associativity); + seq_puts(m, "\n"); + index++; + } +} + +static inline unsigned long ecag(int ai, int li, int ti) +{ + unsigned long cmd, val; + + cmd = ai << 4 | li << 1 | ti; + asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ + : "=d" (val) : "a" (cmd)); + return val; +} + +static int __init cache_add(int level, int private, int type) +{ + struct cache *cache; + int ti; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return -ENOMEM; + if (type == CACHE_TYPE_INSTRUCTION) + ti = CACHE_TI_INSTRUCTION; + else + ti = CACHE_TI_UNIFIED; + cache->size = ecag(EXTRACT_SIZE, level, ti); + cache->line_size = ecag(EXTRACT_LINE_SIZE, level, ti); + cache->associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti); + cache->nr_sets = cache->size / cache->associativity; + cache->nr_sets /= cache->line_size; + cache->private = private; + cache->level = level + 1; + cache->type = type - 1; + list_add_tail(&cache->list, &cache_list); + return 0; +} + +static void __init cache_build_info(void) +{ + struct cache *cache, *next; + union cache_topology ct; + int level, private, rc; + + ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); + for (level = 0; level < CACHE_MAX_LEVEL; level++) { + switch (ct.ci[level].scope) { + case CACHE_SCOPE_SHARED: + private = 0; + break; + case CACHE_SCOPE_PRIVATE: + private = 1; + break; + default: + return; + } + if (ct.ci[level].type == CACHE_TYPE_SEPARATE) { + rc = cache_add(level, private, CACHE_TYPE_DATA); + rc |= cache_add(level, private, CACHE_TYPE_INSTRUCTION); + } else { + rc = cache_add(level, private, ct.ci[level].type); + } + if (rc) + goto error; + } + return; +error: + list_for_each_entry_safe(cache, next, &cache_list, list) { + list_del(&cache->list); + kfree(cache); + } +} + +static struct cache_dir *cache_create_cache_dir(int cpu) +{ + struct cache_dir *cache_dir; + struct kobject *kobj = NULL; + struct device *dev; + + dev = get_cpu_device(cpu); + if (!dev) + goto out; + kobj = kobject_create_and_add("cache", &dev->kobj); + if (!kobj) + goto out; + cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL); + if (!cache_dir) + goto out; + cache_dir->kobj = kobj; + cache_dir_cpu[cpu] = cache_dir; + return cache_dir; +out: + kobject_put(kobj); + return NULL; +} + +static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *kobj) +{ + return container_of(kobj, struct cache_index_dir, kobj); +} + +static void cache_index_release(struct kobject *kobj) +{ + struct cache_index_dir *index; + + index = kobj_to_cache_index_dir(kobj); + kfree(index); +} + +static ssize_t cache_index_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kobj_attribute *kobj_attr; + + kobj_attr = container_of(attr, struct kobj_attribute, attr); + return kobj_attr->show(kobj, kobj_attr, buf); +} + +#define DEFINE_CACHE_ATTR(_name, _format, _value) \ +static ssize_t cache_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *buf) \ +{ \ + struct cache_index_dir *index; \ + \ + index = kobj_to_cache_index_dir(kobj); \ + return sprintf(buf, _format, _value); \ +} \ +static struct kobj_attribute cache_##_name##_attr = \ + __ATTR(_name, 0444, cache_##_name##_show, NULL); + +DEFINE_CACHE_ATTR(size, "%luK\n", index->cache->size >> 10); +DEFINE_CACHE_ATTR(coherency_line_size, "%u\n", index->cache->line_size); +DEFINE_CACHE_ATTR(number_of_sets, "%u\n", index->cache->nr_sets); +DEFINE_CACHE_ATTR(ways_of_associativity, "%u\n", index->cache->associativity); +DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type]); +DEFINE_CACHE_ATTR(level, "%d\n", index->cache->level); + +static ssize_t shared_cpu_map_func(struct kobject *kobj, int type, char *buf) +{ + struct cache_index_dir *index; + int len; + + index = kobj_to_cache_index_dir(kobj); + len = type ? + cpulist_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)) : + cpumask_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)); + len += sprintf(&buf[len], "\n"); + return len; +} + +static ssize_t shared_cpu_map_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return shared_cpu_map_func(kobj, 0, buf); +} +static struct kobj_attribute cache_shared_cpu_map_attr = + __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); + +static ssize_t shared_cpu_list_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return shared_cpu_map_func(kobj, 1, buf); +} +static struct kobj_attribute cache_shared_cpu_list_attr = + __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); + +static struct attribute *cache_index_default_attrs[] = { + &cache_type_attr.attr, + &cache_size_attr.attr, + &cache_number_of_sets_attr.attr, + &cache_ways_of_associativity_attr.attr, + &cache_level_attr.attr, + &cache_coherency_line_size_attr.attr, + &cache_shared_cpu_map_attr.attr, + &cache_shared_cpu_list_attr.attr, + NULL, +}; + +static const struct sysfs_ops cache_index_ops = { + .show = cache_index_show, +}; + +static struct kobj_type cache_index_type = { + .sysfs_ops = &cache_index_ops, + .release = cache_index_release, + .default_attrs = cache_index_default_attrs, +}; + +static int cache_create_index_dir(struct cache_dir *cache_dir, + struct cache *cache, int index, int cpu) +{ + struct cache_index_dir *index_dir; + int rc; + + index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); + if (!index_dir) + return -ENOMEM; + index_dir->cache = cache; + index_dir->cpu = cpu; + rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, + cache_dir->kobj, "index%d", index); + if (rc) + goto out; + index_dir->next = cache_dir->index; + cache_dir->index = index_dir; + return 0; +out: + kfree(index_dir); + return rc; +} + +static int cache_add_cpu(int cpu) +{ + struct cache_dir *cache_dir; + struct cache *cache; + int rc, index = 0; + + if (list_empty(&cache_list)) + return 0; + cache_dir = cache_create_cache_dir(cpu); + if (!cache_dir) + return -ENOMEM; + list_for_each_entry(cache, &cache_list, list) { + if (!cache->private) + break; + rc = cache_create_index_dir(cache_dir, cache, index, cpu); + if (rc) + return rc; + index++; + } + return 0; +} + +static void cache_remove_cpu(int cpu) +{ + struct cache_index_dir *index, *next; + struct cache_dir *cache_dir; + + cache_dir = cache_dir_cpu[cpu]; + if (!cache_dir) + return; + index = cache_dir->index; + while (index) { + next = index->next; + kobject_put(&index->kobj); + index = next; + } + kobject_put(cache_dir->kobj); + kfree(cache_dir); + cache_dir_cpu[cpu] = NULL; +} + +static int cache_hotplug(struct notifier_block *nfb, unsigned long action, + void *hcpu) +{ + int cpu = (long)hcpu; + int rc = 0; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + rc = cache_add_cpu(cpu); + if (rc) + cache_remove_cpu(cpu); + break; + case CPU_DEAD: + cache_remove_cpu(cpu); + break; + } + return rc ? NOTIFY_BAD : NOTIFY_OK; +} + +static int __init cache_init(void) +{ + int cpu; + + if (!test_facility(34)) + return 0; + cache_build_info(); + + cpu_notifier_register_begin(); + for_each_online_cpu(cpu) + cache_add_cpu(cpu); + __hotcpu_notifier(cache_hotplug, 0); + cpu_notifier_register_done(); + return 0; +} +device_initcall(cache_init); diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c new file mode 100644 index 00000000000..d6487bf879e --- /dev/null +++ b/arch/s390/kernel/compat_audit.c @@ -0,0 +1,44 @@ +#undef __s390x__ +#include <asm/unistd.h> +#include "audit.h" + +unsigned s390_dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +unsigned s390_chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +unsigned s390_write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +unsigned s390_read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +unsigned s390_signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int s390_classify_syscall(unsigned syscall) +{ + switch(syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 1; + } +} diff --git a/arch/s390/kernel/compat_exec_domain.c b/arch/s390/kernel/compat_exec_domain.c deleted file mode 100644 index 71d27c49356..00000000000 --- a/arch/s390/kernel/compat_exec_domain.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Support for 32-bit Linux for S390 personality. - * - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Gerhard Tonn (ton@de.ibm.com) - * - * - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/personality.h> -#include <linux/sched.h> - -struct exec_domain s390_exec_domain; - -static int __init -s390_init (void) -{ - s390_exec_domain.name = "Linux/s390"; - s390_exec_domain.handler = NULL; - s390_exec_domain.pers_low = PER_LINUX32; - s390_exec_domain.pers_high = PER_LINUX32; - s390_exec_domain.signal_map = default_exec_domain.signal_map; - s390_exec_domain.signal_invmap = default_exec_domain.signal_invmap; - register_exec_domain(&s390_exec_domain); - return 0; -} - -__initcall(s390_init); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index bf9a7a361b3..ca38139423a 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -1,8 +1,6 @@ /* - * arch/s390x/kernel/linux32.c - * * S390 version - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Gerhard Tonn (ton@de.ibm.com) * Thomas Spatzier (tspat@de.ibm.com) @@ -16,7 +14,6 @@ */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/fs.h> @@ -25,23 +22,13 @@ #include <linux/signal.h> #include <linux/resource.h> #include <linux/times.h> -#include <linux/utsname.h> -#include <linux/timex.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/sem.h> #include <linux/msg.h> #include <linux/shm.h> -#include <linux/slab.h> #include <linux/uio.h> -#include <linux/nfs_fs.h> #include <linux/quota.h> #include <linux/module.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr.h> -#include <linux/nfsd/syscall.h> #include <linux/poll.h> #include <linux/personality.h> #include <linux/stat.h> @@ -60,18 +47,17 @@ #include <linux/vfs.h> #include <linux/ptrace.h> #include <linux/fadvise.h> +#include <linux/ipc.h> +#include <linux/slab.h> #include <asm/types.h> -#include <asm/ipc.h> #include <asm/uaccess.h> -#include <asm/semaphore.h> #include <net/scm.h> #include <net/sock.h> #include "compat_linux.h" - /* For this source file, we want overflow handling. */ #undef high2lowuid @@ -100,92 +86,111 @@ #define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid) #define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid) -asmlinkage long sys32_chown16(const char * filename, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename, + u16, user, u16, group) { return sys_chown(filename, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_lchown16(const char * filename, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *, + filename, u16, user, u16, group) { return sys_lchown(filename, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_fchown16(unsigned int fd, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group) { return sys_fchown(fd, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_setregid16(u16 rgid, u16 egid) +COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid) { return sys_setregid(low2highgid(rgid), low2highgid(egid)); } -asmlinkage long sys32_setgid16(u16 gid) +COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid) { return sys_setgid((gid_t)gid); } -asmlinkage long sys32_setreuid16(u16 ruid, u16 euid) +COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid) { return sys_setreuid(low2highuid(ruid), low2highuid(euid)); } -asmlinkage long sys32_setuid16(u16 uid) +COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid) { return sys_setuid((uid_t)uid); } -asmlinkage long sys32_setresuid16(u16 ruid, u16 euid, u16 suid) +COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid) { return sys_setresuid(low2highuid(ruid), low2highuid(euid), - low2highuid(suid)); + low2highuid(suid)); } -asmlinkage long sys32_getresuid16(u16 *ruid, u16 *euid, u16 *suid) +COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp, + u16 __user *, euidp, u16 __user *, suidp) { + const struct cred *cred = current_cred(); int retval; + u16 ruid, euid, suid; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid)); + euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid)); + suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid)); + + if (!(retval = put_user(ruid, ruidp)) && + !(retval = put_user(euid, euidp))) + retval = put_user(suid, suidp); return retval; } -asmlinkage long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid) +COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid) { return sys_setresgid(low2highgid(rgid), low2highgid(egid), - low2highgid(sgid)); + low2highgid(sgid)); } -asmlinkage long sys32_getresgid16(u16 *rgid, u16 *egid, u16 *sgid) +COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp, + u16 __user *, egidp, u16 __user *, sgidp) { + const struct cred *cred = current_cred(); int retval; + u16 rgid, egid, sgid; + + rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid)); + egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid)); + sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid)); - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(rgid, rgidp)) && + !(retval = put_user(egid, egidp))) + retval = put_user(sgid, sgidp); return retval; } -asmlinkage long sys32_setfsuid16(u16 uid) +COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid) { return sys_setfsuid((uid_t)uid); } -asmlinkage long sys32_setfsgid16(u16 gid) +COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid) { return sys_setfsgid((gid_t)gid); } -static int groups16_to_user(u16 *grouplist, struct group_info *group_info) +static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info) { + struct user_namespace *user_ns = current_user_ns(); int i; u16 group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { - group = (u16)GROUP_AT(group_info, i); + kgid = GROUP_AT(group_info, i); + group = (u16)from_kgid_munged(user_ns, kgid); if (put_user(group, grouplist+i)) return -EFAULT; } @@ -193,45 +198,53 @@ static int groups16_to_user(u16 *grouplist, struct group_info *group_info) return 0; } -static int groups16_from_user(struct group_info *group_info, u16 *grouplist) +static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist) { + struct user_namespace *user_ns = current_user_ns(); int i; u16 group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { if (get_user(group, grouplist+i)) return -EFAULT; - GROUP_AT(group_info, i) = (gid_t)group; + + kgid = make_kgid(user_ns, (gid_t)group); + if (!gid_valid(kgid)) + return -EINVAL; + + GROUP_AT(group_info, i) = kgid; } return 0; } -asmlinkage long sys32_getgroups16(int gidsetsize, u16 *grouplist) +COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist) { + const struct cred *cred = current_cred(); int i; if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(cred->group_info); + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(cred->group_info); return i; } -asmlinkage long sys32_setgroups16(int gidsetsize, u16 *grouplist) +COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist) { struct group_info *group_info; int retval; @@ -256,578 +269,67 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 *grouplist) return retval; } -asmlinkage long sys32_getuid16(void) -{ - return high2lowuid(current->uid); -} - -asmlinkage long sys32_geteuid16(void) +COMPAT_SYSCALL_DEFINE0(s390_getuid16) { - return high2lowuid(current->euid); + return high2lowuid(from_kuid_munged(current_user_ns(), current_uid())); } -asmlinkage long sys32_getgid16(void) +COMPAT_SYSCALL_DEFINE0(s390_geteuid16) { - return high2lowgid(current->gid); + return high2lowuid(from_kuid_munged(current_user_ns(), current_euid())); } -asmlinkage long sys32_getegid16(void) +COMPAT_SYSCALL_DEFINE0(s390_getgid16) { - return high2lowgid(current->egid); + return high2lowgid(from_kgid_munged(current_user_ns(), current_gid())); } -/* 32-bit timeval and related flotsam. */ - -static inline long get_tv32(struct timeval *o, struct compat_timeval *i) +COMPAT_SYSCALL_DEFINE0(s390_getegid16) { - return (!access_ok(VERIFY_READ, o, sizeof(*o)) || - (__get_user(o->tv_sec, &i->tv_sec) || - __get_user(o->tv_usec, &i->tv_usec))); + return high2lowgid(from_kgid_munged(current_user_ns(), current_egid())); } -static inline long put_tv32(struct compat_timeval *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) || - __put_user(i->tv_usec, &o->tv_usec))); -} - -/* - * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation. - * - * This is really horribly ugly. - */ -asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr) +#ifdef CONFIG_SYSVIPC +COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, + compat_ulong_t, third, compat_uptr_t, ptr) { if (call >> 16) /* hack for backward compatibility */ return -EINVAL; - - call &= 0xffff; - - switch (call) { - case SEMTIMEDOP: - return compat_sys_semtimedop(first, compat_ptr(ptr), - second, compat_ptr(third)); - case SEMOP: - /* struct sembuf is the same on 32 and 64bit :)) */ - return sys_semtimedop(first, compat_ptr(ptr), - second, NULL); - case SEMGET: - return sys_semget(first, second, third); - case SEMCTL: - return compat_sys_semctl(first, second, third, - compat_ptr(ptr)); - case MSGSND: - return compat_sys_msgsnd(first, second, third, - compat_ptr(ptr)); - case MSGRCV: - return compat_sys_msgrcv(first, second, 0, third, - 0, compat_ptr(ptr)); - case MSGGET: - return sys_msgget((key_t) first, second); - case MSGCTL: - return compat_sys_msgctl(first, second, compat_ptr(ptr)); - case SHMAT: - return compat_sys_shmat(first, second, third, - 0, compat_ptr(ptr)); - case SHMDT: - return sys_shmdt(compat_ptr(ptr)); - case SHMGET: - return sys_shmget(first, (unsigned)second, third); - case SHMCTL: - return compat_sys_shmctl(first, second, compat_ptr(ptr)); - } - - return -ENOSYS; -} - -asmlinkage long sys32_truncate64(const char * path, unsigned long high, unsigned long low) -{ - if ((int)high < 0) - return -EINVAL; - else - return sys_truncate(path, (high << 32) | low); -} - -asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low) -{ - if ((int)high < 0) - return -EINVAL; - else - return sys_ftruncate(fd, (high << 32) | low); -} - -int cp_compat_stat(struct kstat *stat, struct compat_stat *statbuf) -{ - int err; - - if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) - return -EOVERFLOW; - - err = put_user(old_encode_dev(stat->dev), &statbuf->st_dev); - err |= put_user(stat->ino, &statbuf->st_ino); - err |= put_user(stat->mode, &statbuf->st_mode); - err |= put_user(stat->nlink, &statbuf->st_nlink); - err |= put_user(high2lowuid(stat->uid), &statbuf->st_uid); - err |= put_user(high2lowgid(stat->gid), &statbuf->st_gid); - err |= put_user(old_encode_dev(stat->rdev), &statbuf->st_rdev); - err |= put_user(stat->size, &statbuf->st_size); - err |= put_user(stat->atime.tv_sec, &statbuf->st_atime); - err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec); - err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime); - err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec); - err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime); - err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec); - err |= put_user(stat->blksize, &statbuf->st_blksize); - err |= put_user(stat->blocks, &statbuf->st_blocks); -/* fixme - err |= put_user(0, &statbuf->__unused4[0]); - err |= put_user(0, &statbuf->__unused4[1]); -*/ - return err; -} - -struct sysinfo32 { - s32 uptime; - u32 loads[3]; - u32 totalram; - u32 freeram; - u32 sharedram; - u32 bufferram; - u32 totalswap; - u32 freeswap; - unsigned short procs; - unsigned short pads; - u32 totalhigh; - u32 freehigh; - unsigned int mem_unit; - char _f[8]; -}; - -asmlinkage long sys32_sysinfo(struct sysinfo32 __user *info) -{ - struct sysinfo s; - int ret, err; - mm_segment_t old_fs = get_fs (); - - set_fs (KERNEL_DS); - ret = sys_sysinfo(&s); - set_fs (old_fs); - err = put_user (s.uptime, &info->uptime); - err |= __put_user (s.loads[0], &info->loads[0]); - err |= __put_user (s.loads[1], &info->loads[1]); - err |= __put_user (s.loads[2], &info->loads[2]); - err |= __put_user (s.totalram, &info->totalram); - err |= __put_user (s.freeram, &info->freeram); - err |= __put_user (s.sharedram, &info->sharedram); - err |= __put_user (s.bufferram, &info->bufferram); - err |= __put_user (s.totalswap, &info->totalswap); - err |= __put_user (s.freeswap, &info->freeswap); - err |= __put_user (s.procs, &info->procs); - err |= __put_user (s.totalhigh, &info->totalhigh); - err |= __put_user (s.freehigh, &info->freehigh); - err |= __put_user (s.mem_unit, &info->mem_unit); - if (err) - return -EFAULT; - return ret; -} - -asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval) -{ - struct timespec t; - int ret; - mm_segment_t old_fs = get_fs (); - - set_fs (KERNEL_DS); - ret = sys_sched_rr_get_interval(pid, &t); - set_fs (old_fs); - if (put_compat_timespec(&t, interval)) - return -EFAULT; - return ret; -} - -asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, size_t sigsetsize) -{ - sigset_t s; - compat_sigset_t s32; - int ret; - mm_segment_t old_fs = get_fs(); - - if (set) { - if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) - return -EFAULT; - switch (_NSIG_WORDS) { - case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } - } - set_fs (KERNEL_DS); - ret = sys_rt_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL, sigsetsize); - set_fs (old_fs); - if (ret) return ret; - if (oset) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } - if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) - return -EFAULT; - } - return 0; -} - -asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, - size_t sigsetsize) -{ - sigset_t s; - compat_sigset_t s32; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs (KERNEL_DS); - ret = sys_rt_sigpending(&s, sigsetsize); - set_fs (old_fs); - if (!ret) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } - if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) - return -EFAULT; - } - return ret; -} - -asmlinkage long -sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo) -{ - siginfo_t info; - int ret; - mm_segment_t old_fs = get_fs(); - - if (copy_siginfo_from_user32(&info, uinfo)) - return -EFAULT; - set_fs (KERNEL_DS); - ret = sys_rt_sigqueueinfo(pid, sig, &info); - set_fs (old_fs); - return ret; -} - -/* - * sys32_execve() executes a new program after the asm stub has set - * things up for us. This should basically do what I want it to. - */ -asmlinkage long -sys32_execve(struct pt_regs regs) -{ - int error; - char * filename; - - filename = getname(compat_ptr(regs.orig_gpr2)); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = compat_do_execve(filename, compat_ptr(regs.gprs[3]), - compat_ptr(regs.gprs[4]), ®s); - if (error == 0) - { - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); - current->thread.fp_regs.fpc=0; - __asm__ __volatile__ - ("sr 0,0\n\t" - "sfpc 0,0\n\t" - : : :"0"); - } - putname(filename); -out: - return error; -} - - -#ifdef CONFIG_MODULES - -asmlinkage long -sys32_init_module(void __user *umod, unsigned long len, - const char __user *uargs) -{ - return sys_init_module(umod, len, uargs); -} - -asmlinkage long -sys32_delete_module(const char __user *name_user, unsigned int flags) -{ - return sys_delete_module(name_user, flags); -} - -#else /* CONFIG_MODULES */ - -asmlinkage long -sys32_init_module(void __user *umod, unsigned long len, - const char __user *uargs) -{ - return -ENOSYS; -} - -asmlinkage long -sys32_delete_module(const char __user *name_user, unsigned int flags) -{ - return -ENOSYS; -} - -#endif /* CONFIG_MODULES */ - -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long sys32_gettimeofday(struct compat_timeval *tv, struct timezone *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval *i) -{ - long usec; - - if (!access_ok(VERIFY_READ, i, sizeof(*i))) - return -EFAULT; - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; + return compat_sys_ipc(call, first, second, third, ptr, third); } +#endif -asmlinkage long sys32_settimeofday(struct compat_timeval *tv, struct timezone *tz) +COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low) { - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); + return sys_truncate(path, (unsigned long)high << 32 | low); } -/* These are here just in case some old sparc32 binary calls it. */ -asmlinkage long sys32_pause(void) +COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - return -ERESTARTNOHAND; + return sys_ftruncate(fd, (unsigned long)high << 32 | low); } -asmlinkage long sys32_pread64(unsigned int fd, char *ubuf, - size_t count, u32 poshi, u32 poslo) +COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf, + compat_size_t, count, u32, high, u32, low) { if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); + return sys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low); } -asmlinkage long sys32_pwrite64(unsigned int fd, const char *ubuf, - size_t count, u32 poshi, u32 poslo) +COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf, + compat_size_t, count, u32, high, u32, low) { if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); -} - -asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count) -{ - return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); -} - -asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - - if (offset && get_user(of, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; + return sys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low); } -asmlinkage long sys32_sendfile64(int out_fd, int in_fd, - compat_loff_t *offset, s32 count) +COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count) { - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile64(out_fd, in_fd, offset ? &lof : NULL, count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; + return sys_readahead(fd, (unsigned long)high << 32 | low, count); } -/* Handle adjtimex compatibility. */ - -struct timex32 { - u32 modes; - s32 offset, freq, maxerror, esterror; - s32 status, constant, precision, tolerance; - struct compat_timeval time; - s32 tick; - s32 ppsfreq, jitter, shift, stabil; - s32 jitcnt, calcnt, errcnt, stbcnt; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; -}; - -extern int do_adjtimex(struct timex *); - -asmlinkage long sys32_adjtimex(struct timex32 *utp) -{ - struct timex txc; - int ret; - - memset(&txc, 0, sizeof(struct timex)); - - if(get_user(txc.modes, &utp->modes) || - __get_user(txc.offset, &utp->offset) || - __get_user(txc.freq, &utp->freq) || - __get_user(txc.maxerror, &utp->maxerror) || - __get_user(txc.esterror, &utp->esterror) || - __get_user(txc.status, &utp->status) || - __get_user(txc.constant, &utp->constant) || - __get_user(txc.precision, &utp->precision) || - __get_user(txc.tolerance, &utp->tolerance) || - __get_user(txc.time.tv_sec, &utp->time.tv_sec) || - __get_user(txc.time.tv_usec, &utp->time.tv_usec) || - __get_user(txc.tick, &utp->tick) || - __get_user(txc.ppsfreq, &utp->ppsfreq) || - __get_user(txc.jitter, &utp->jitter) || - __get_user(txc.shift, &utp->shift) || - __get_user(txc.stabil, &utp->stabil) || - __get_user(txc.jitcnt, &utp->jitcnt) || - __get_user(txc.calcnt, &utp->calcnt) || - __get_user(txc.errcnt, &utp->errcnt) || - __get_user(txc.stbcnt, &utp->stbcnt)) - return -EFAULT; - - ret = do_adjtimex(&txc); - - if(put_user(txc.modes, &utp->modes) || - __put_user(txc.offset, &utp->offset) || - __put_user(txc.freq, &utp->freq) || - __put_user(txc.maxerror, &utp->maxerror) || - __put_user(txc.esterror, &utp->esterror) || - __put_user(txc.status, &utp->status) || - __put_user(txc.constant, &utp->constant) || - __put_user(txc.precision, &utp->precision) || - __put_user(txc.tolerance, &utp->tolerance) || - __put_user(txc.time.tv_sec, &utp->time.tv_sec) || - __put_user(txc.time.tv_usec, &utp->time.tv_usec) || - __put_user(txc.tick, &utp->tick) || - __put_user(txc.ppsfreq, &utp->ppsfreq) || - __put_user(txc.jitter, &utp->jitter) || - __put_user(txc.shift, &utp->shift) || - __put_user(txc.stabil, &utp->stabil) || - __put_user(txc.jitcnt, &utp->jitcnt) || - __put_user(txc.calcnt, &utp->calcnt) || - __put_user(txc.errcnt, &utp->errcnt) || - __put_user(txc.stbcnt, &utp->stbcnt)) - ret = -EFAULT; - - return ret; -} - -#ifdef CONFIG_SYSCTL -struct __sysctl_args32 { - u32 name; - int nlen; - u32 oldval; - u32 oldlenp; - u32 newval; - u32 newlen; - u32 __unused[4]; -}; - -asmlinkage long sys32_sysctl(struct __sysctl_args32 *args) -{ - struct __sysctl_args32 tmp; - int error; - size_t oldlen, *oldlenp = NULL; - unsigned long addr = (((long)&args->__unused[0]) + 7) & ~7; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && tmp.oldlenp) { - /* Duh, this is ugly and might not work if sysctl_args - is in read-only memory, but do_sysctl does indirectly - a lot of uaccess in both directions and we'd have to - basically copy the whole sysctl.c here, and - glibc's __sysctl uses rw memory for the structure - anyway. */ - if (get_user(oldlen, (u32 *)A(tmp.oldlenp)) || - put_user(oldlen, (size_t *)addr)) - return -EFAULT; - oldlenp = (size_t *)addr; - } - - lock_kernel(); - error = do_sysctl((int *)A(tmp.name), tmp.nlen, (void *)A(tmp.oldval), - oldlenp, (void *)A(tmp.newval), tmp.newlen); - unlock_kernel(); - if (oldlenp) { - if (!error) { - if (get_user(oldlen, (size_t *)addr) || - put_user(oldlen, (u32 *)A(tmp.oldlenp))) - error = -EFAULT; - } - copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused)); - } - return error; -} -#endif - struct stat64_emu31 { unsigned long long st_dev; unsigned int __pad1; @@ -853,7 +355,7 @@ struct stat64_emu31 { unsigned long st_ino; }; -static int cp_stat64(struct stat64_emu31 *ubuf, struct kstat *stat) +static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) { struct stat64_emu31 tmp; @@ -864,8 +366,8 @@ static int cp_stat64(struct stat64_emu31 *ubuf, struct kstat *stat) tmp.__st_ino = (u32)stat->ino; tmp.st_mode = stat->mode; tmp.st_nlink = (unsigned int)stat->nlink; - tmp.st_uid = stat->uid; - tmp.st_gid = stat->gid; + tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_rdev = huge_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_blksize = (u32)stat->blksize; @@ -877,7 +379,7 @@ static int cp_stat64(struct stat64_emu31 *ubuf, struct kstat *stat) return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } -asmlinkage long sys32_stat64(char * filename, struct stat64_emu31 * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_stat(filename, &stat); @@ -886,7 +388,7 @@ asmlinkage long sys32_stat64(char * filename, struct stat64_emu31 * statbuf) return ret; } -asmlinkage long sys32_lstat64(char * filename, struct stat64_emu31 * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_lstat(filename, &stat); @@ -895,7 +397,7 @@ asmlinkage long sys32_lstat64(char * filename, struct stat64_emu31 * statbuf) return ret; } -asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_fstat(fd, &stat); @@ -904,6 +406,18 @@ asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 * statbuf) return ret; } +COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename, + struct stat64_emu31 __user *, statbuf, int, flag) +{ + struct kstat stat; + int error; + + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); +} + /* * Linux/i386 didn't use to be able to handle more than * 4 system call parameters, so these system calls used a memory @@ -911,78 +425,36 @@ asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 * statbuf) */ struct mmap_arg_struct_emu31 { - u32 addr; - u32 len; - u32 prot; - u32 flags; - u32 fd; - u32 offset; + compat_ulong_t addr; + compat_ulong_t len; + compat_ulong_t prot; + compat_ulong_t flags; + compat_ulong_t fd; + compat_ulong_t offset; }; -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - struct file * file = NULL; - unsigned long error = -EBADF; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - if (!IS_ERR((void *) error) && error + len >= 0x80000000ULL) { - /* Result is out of bounds. */ - do_munmap(current->mm, addr, len); - error = -ENOMEM; - } - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - - -asmlinkage unsigned long -old32_mmap(struct mmap_arg_struct_emu31 *arg) +COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg) { struct mmap_arg_struct_emu31 a; - int error = -EFAULT; if (copy_from_user(&a, arg, sizeof(a))) - goto out; - - error = -EINVAL; + return -EFAULT; if (a.offset & ~PAGE_MASK) - goto out; - - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); -out: - return error; + return -EINVAL; + return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); } -asmlinkage long -sys32_mmap2(struct mmap_arg_struct_emu31 *arg) +COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg) { struct mmap_arg_struct_emu31 a; - int error = -EFAULT; if (copy_from_user(&a, arg, sizeof(a))) - goto out; - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -out: - return error; + return -EFAULT; + return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); } -asmlinkage long sys32_read(unsigned int fd, char * buf, size_t count) +COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count) { if ((compat_ssize_t) count < 0) return -EINVAL; @@ -990,7 +462,7 @@ asmlinkage long sys32_read(unsigned int fd, char * buf, size_t count) return sys_read(fd, buf, count); } -asmlinkage long sys32_write(unsigned int fd, char * buf, size_t count) +COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count) { if ((compat_ssize_t) count < 0) return -EINVAL; @@ -998,36 +470,19 @@ asmlinkage long sys32_write(unsigned int fd, char * buf, size_t count) return sys_write(fd, buf, count); } -asmlinkage long sys32_clone(struct pt_regs regs) -{ - unsigned long clone_flags; - unsigned long newsp; - int *parent_tidptr, *child_tidptr; - - clone_flags = regs.gprs[3] & 0xffffffffUL; - newsp = regs.orig_gpr2 & 0x7fffffffUL; - parent_tidptr = (int *) (regs.gprs[4] & 0x7fffffffUL); - child_tidptr = (int *) (regs.gprs[5] & 0x7fffffffUL); - if (!newsp) - newsp = regs.gprs[15]; - return do_fork(clone_flags, newsp, ®s, 0, - parent_tidptr, child_tidptr); -} - /* * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64. * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE} * because the 31 bit values differ from the 64 bit values. */ -asmlinkage long -sys32_fadvise64(int fd, loff_t offset, size_t len, int advise) +COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise) { if (advise == 4) advise = POSIX_FADV_DONTNEED; else if (advise == 5) advise = POSIX_FADV_NOREUSE; - return sys_fadvise64(fd, offset, len, advise); + return sys_fadvise64(fd, (unsigned long)high << 32 | low, len, advise); } struct fadvise64_64_args { @@ -1037,8 +492,7 @@ struct fadvise64_64_args { int advice; }; -asmlinkage long -sys32_fadvise64_64(struct fadvise64_64_args __user *args) +COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) { struct fadvise64_64_args a; @@ -1050,3 +504,17 @@ sys32_fadvise64_64(struct fadvise64_64_args __user *args) a.advice = POSIX_FADV_NOREUSE; return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); } + +COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow, + u32, nhigh, u32, nlow, unsigned int, flags) +{ + return sys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow, + ((u64)nhigh << 32) + nlow, flags); +} + +COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow, + u32, lenhigh, u32, lenlow) +{ + return sys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow, + ((u64)lenhigh << 32) + lenlow); +} diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 3898f66d0b2..70d4b7c4bea 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -1,14 +1,9 @@ #ifndef _ASM_S390X_S390_H #define _ASM_S390X_S390_H -#include <linux/config.h> #include <linux/compat.h> #include <linux/socket.h> #include <linux/syscalls.h> -#include <linux/nfs_fs.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/export.h> /* Macro that masks the high order bit of an 32 bit pointer and converts it*/ /* to a 64 bit pointer */ @@ -22,81 +17,6 @@ struct ipc_kludge_32 { __s32 msgtyp; }; -struct old_sigaction32 { - __u32 sa_handler; /* Really a pointer, but need to deal with 32 bits */ - compat_old_sigset_t sa_mask; /* A 32 bit mask */ - __u32 sa_flags; - __u32 sa_restorer; /* Another 32 bit pointer */ -}; - -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[((128/sizeof(int)) - 3)]; - - /* kill() */ - struct { - pid_t _pid; /* sender's pid */ - uid_t _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ - } _timer; - - /* POSIX.1b signals */ - struct { - pid_t _pid; /* sender's pid */ - uid_t _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - pid_t _pid; /* which child */ - uid_t _uid; /* sender's uid */ - int _status;/* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - __u32 _addr; /* faulting insn/memory ref. - pointer */ - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - } _sifields; -} compat_siginfo_t; - -/* - * How these fields are to be accessed. - */ -#define si_pid _sifields._kill._pid -#define si_uid _sifields._kill._uid -#define si_status _sifields._sigchld._status -#define si_utime _sifields._sigchld._utime -#define si_stime _sifields._sigchld._stime -#define si_value _sifields._rt._sigval -#define si_int _sifields._rt._sigval.sival_int -#define si_ptr _sifields._rt._sigval.sival_ptr -#define si_addr _sifields._sigfault._addr -#define si_band _sifields._sigpoll._band -#define si_fd _sifields._sigpoll._fd -#define si_tid _sifields._timer._tid -#define si_overrun _sifields._timer._overrun - /* asm/sigcontext.h */ typedef union { @@ -107,6 +27,7 @@ typedef union typedef struct { unsigned int fpc; + unsigned int pad; freg_t32 fprs[__NUM_FPRS]; } _s390_fp_regs32; @@ -116,37 +37,6 @@ typedef struct __u32 addr; } _psw_t32 __attribute__ ((aligned(8))); -#define PSW32_MASK_PER 0x40000000UL -#define PSW32_MASK_DAT 0x04000000UL -#define PSW32_MASK_IO 0x02000000UL -#define PSW32_MASK_EXT 0x01000000UL -#define PSW32_MASK_KEY 0x00F00000UL -#define PSW32_MASK_MCHECK 0x00040000UL -#define PSW32_MASK_WAIT 0x00020000UL -#define PSW32_MASK_PSTATE 0x00010000UL -#define PSW32_MASK_ASC 0x0000C000UL -#define PSW32_MASK_CC 0x00003000UL -#define PSW32_MASK_PM 0x00000f00UL - -#define PSW32_ADDR_AMODE31 0x80000000UL -#define PSW32_ADDR_INSN 0x7FFFFFFFUL - -#define PSW32_BASE_BITS 0x00080000UL - -#define PSW32_ASC_PRIMARY 0x00000000UL -#define PSW32_ASC_ACCREG 0x00004000UL -#define PSW32_ASC_SECONDARY 0x00008000UL -#define PSW32_ASC_HOME 0x0000C000UL - -#define PSW32_USER_BITS (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | \ - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | \ - PSW32_MASK_PSTATE) - -#define PSW32_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \ - ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM))) - - typedef struct { _psw_t32 psw; @@ -172,26 +62,59 @@ struct sigcontext32 }; /* asm/signal.h */ -struct sigaction32 { - __u32 sa_handler; /* pointer */ - __u32 sa_flags; - __u32 sa_restorer; /* pointer */ - compat_sigset_t sa_mask; /* mask last for extensibility */ -}; - -typedef struct { - __u32 ss_sp; /* pointer */ - int ss_flags; - compat_size_t ss_size; -} stack_t32; /* asm/ucontext.h */ struct ucontext32 { __u32 uc_flags; __u32 uc_link; /* pointer */ - stack_t32 uc_stack; + compat_stack_t uc_stack; _sigregs32 uc_mcontext; - compat_sigset_t uc_sigmask; /* mask last for extensibility */ + compat_sigset_t uc_sigmask; + /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ + unsigned char __unused[128 - sizeof(compat_sigset_t)]; }; +struct stat64_emu31; +struct mmap_arg_struct_emu31; +struct fadvise64_64_args; + +long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group); +long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group); +long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group); +long compat_sys_s390_setregid16(u16 rgid, u16 egid); +long compat_sys_s390_setgid16(u16 gid); +long compat_sys_s390_setreuid16(u16 ruid, u16 euid); +long compat_sys_s390_setuid16(u16 uid); +long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid); +long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid); +long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid); +long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid); +long compat_sys_s390_setfsuid16(u16 uid); +long compat_sys_s390_setfsgid16(u16 gid); +long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist); +long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist); +long compat_sys_s390_getuid16(void); +long compat_sys_s390_geteuid16(void); +long compat_sys_s390_getgid16(void); +long compat_sys_s390_getegid16(void); +long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low); +long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low); +long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low); +long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low); +long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count); +long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag); +long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg); +long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg); +long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count); +long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count); +long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise); +long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); +long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags); +long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow); +long compat_sys_sigreturn(void); +long compat_sys_rt_sigreturn(void); + #endif /* _ASM_S390X_S390_H */ diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h index 419aef913ee..12b82383351 100644 --- a/arch/s390/kernel/compat_ptrace.h +++ b/arch/s390/kernel/compat_ptrace.h @@ -1,62 +1,42 @@ #ifndef _PTRACE32_H #define _PTRACE32_H -#include "compat_linux.h" /* needed for _psw_t32 */ - -typedef struct { - __u32 cr[3]; -} per_cr_words32; - -typedef struct { - __u16 perc_atmid; /* 0x096 */ - __u32 address; /* 0x098 */ - __u8 access_id; /* 0x0a1 */ -} per_lowcore_words32; - -typedef struct { - union { - per_cr_words32 words; - } control_regs; - /* - * Use these flags instead of setting em_instruction_fetch - * directly they are used so that single stepping can be - * switched on & off while not affecting other tracing - */ - unsigned single_step : 1; - unsigned instruction_fetch : 1; - unsigned : 30; - /* - * These addresses are copied into cr10 & cr11 if single - * stepping is switched off - */ - __u32 starting_addr; - __u32 ending_addr; - union { - per_lowcore_words32 words; - } lowcore; -} per_struct32; +#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */ +#include "compat_linux.h" /* needed for psw_compat_t */ + +struct compat_per_struct_kernel { + __u32 cr9; /* PER control bits */ + __u32 cr10; /* PER starting address */ + __u32 cr11; /* PER ending address */ + __u32 bits; /* Obsolete software bits */ + __u32 starting_addr; /* User specified start address */ + __u32 ending_addr; /* User specified end address */ + __u16 perc_atmid; /* PER trap ATMID */ + __u32 address; /* PER trap instruction address */ + __u8 access_id; /* PER trap access identification */ +}; -struct user_regs_struct32 +struct compat_user_regs_struct { - _psw_t32 psw; + psw_compat_t psw; u32 gprs[NUM_GPRS]; u32 acrs[NUM_ACRS]; u32 orig_gpr2; + /* nb: there's a 4-byte hole here */ s390_fp_regs fp_regs; /* * These per registers are in here so that gdb can modify them * itself as there is no "official" ptrace interface for hardware * watchpoints. This is the way intel does it. */ - per_struct32 per_info; - u32 ieee_instruction_pointer; - /* Used to give failing instruction back to user for ieee exceptions */ + struct compat_per_struct_kernel per_info; + u32 ieee_instruction_pointer; /* obsolete, always 0 */ }; -struct user32 { +struct compat_user { /* We start with the registers, to mimic the way that "memory" is returned from the ptrace(3,...) function. */ - struct user_regs_struct32 regs; /* Where the registers are actually stored */ + struct compat_user_regs_struct regs; /* The rest of this junk is to help gdb figure out what goes where */ u32 u_tsize; /* Text segment size (pages). */ u32 u_dsize; /* Data segment size (pages). */ @@ -78,6 +58,6 @@ typedef struct __u32 len; __u32 kernel_addr; __u32 process_addr; -} ptrace_area_emu31; +} compat_ptrace_area; #endif /* _PTRACE32_H */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index fa2b3bc22f2..f204d692036 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -1,8 +1,5 @@ /* - * arch/s390/kernel/signal32.c - * - * S390 version - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2000, 2006 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) * Gerhard Tonn (ton@de.ibm.com) * @@ -11,12 +8,10 @@ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson */ -#include <linux/config.h> #include <linux/compat.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/errno.h> @@ -30,10 +25,10 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> +#include <asm/switch_to.h> #include "compat_linux.h" #include "compat_ptrace.h" - -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#include "entry.h" typedef struct { @@ -41,6 +36,7 @@ typedef struct struct sigcontext32 sc; _sigregs32 sregs; int signo; + __u32 gprs_high[NUM_GPRS]; __u8 retcode[S390_SYSCALL_SIZE]; } sigframe32; @@ -50,17 +46,13 @@ typedef struct __u8 retcode[S390_SYSCALL_SIZE]; compat_siginfo_t info; struct ucontext32 uc; + __u32 gprs_high[NUM_GPRS]; } rt_sigframe32; -asmlinkage int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset)); - -int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err; - if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t))) - return -EFAULT; - /* If you change siginfo_t structure, please be sure this code is fixed accordingly. It should never copy any pad contained in the structure @@ -107,7 +99,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) break; } } - return err; + return err ? -EFAULT : 0; } int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) @@ -115,9 +107,6 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) int err; u32 tmp; - if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t))) - return -EFAULT; - err = __get_user(to->si_signo, &from->si_signo); err |= __get_user(to->si_errno, &from->si_errno); err |= __get_user(to->si_code, &from->si_code); @@ -143,7 +132,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; case __SI_FAULT >> 16: err |= __get_user(tmp, &from->si_addr); - to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN); + to->si_addr = (void __force __user *) + (u64) (tmp & PSW32_ADDR_INSN); break; case __SI_POLL >> 16: err |= __get_user(to->si_band, &from->si_band); @@ -158,324 +148,137 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; } } - return err; + return err ? -EFAULT : 0; } -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -asmlinkage int -sys32_sigsuspend(struct pt_regs * regs,int history0, int history1, old_sigset_t mask) -{ - sigset_t saveset; - - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - regs->gprs[2] = -EINTR; - - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage int -sys32_rt_sigsuspend(struct pt_regs * regs, compat_sigset_t __user *unewset, - size_t sigsetsize) +static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) { - sigset_t saveset, newset; - compat_sigset_t set32; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (copy_from_user(&set32, unewset, sizeof(set32))) + _sigregs32 user_sregs; + int i; + + user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32); + user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI; + user_sregs.regs.psw.mask |= PSW32_USER_BITS; + user_sregs.regs.psw.addr = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); + for (i = 0; i < NUM_GPRS; i++) + user_sregs.regs.gprs[i] = (__u32) regs->gprs[i]; + save_access_regs(current->thread.acrs); + memcpy(&user_sregs.regs.acrs, current->thread.acrs, + sizeof(user_sregs.regs.acrs)); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); + memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, + sizeof(user_sregs.fpregs)); + if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32))) return -EFAULT; - switch (_NSIG_WORDS) { - case 4: newset.sig[3] = set32.sig[6] + (((long)set32.sig[7]) << 32); - case 3: newset.sig[2] = set32.sig[4] + (((long)set32.sig[5]) << 32); - case 2: newset.sig[1] = set32.sig[2] + (((long)set32.sig[3]) << 32); - case 1: newset.sig[0] = set32.sig[0] + (((long)set32.sig[1]) << 32); - } - sigdelsetmask(&newset, ~_BLOCKABLE); - - spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - regs->gprs[2] = -EINTR; - - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } + return 0; } -asmlinkage long -sys32_sigaction(int sig, const struct old_sigaction32 __user *act, - struct old_sigaction32 __user *oact) +static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) { - struct k_sigaction new_ka, old_ka; - unsigned long sa_handler, sa_restorer; - int ret; - - if (act) { - compat_old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(sa_handler, &act->sa_handler) || - __get_user(sa_restorer, &act->sa_restorer)) - return -EFAULT; - new_ka.sa.sa_handler = (__sighandler_t) sa_handler; - new_ka.sa.sa_restorer = (void (*)(void)) sa_restorer; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - sa_handler = (unsigned long) old_ka.sa.sa_handler; - sa_restorer = (unsigned long) old_ka.sa.sa_restorer; - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(sa_handler, &oact->sa_handler) || - __put_user(sa_restorer, &oact->sa_restorer)) - return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - } + _sigregs32 user_sregs; + int i; - return ret; -} - -int -do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact); + /* Alwys make any pending restarted system call return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; -asmlinkage long -sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, - struct sigaction32 __user *oact, size_t sigsetsize) -{ - struct k_sigaction new_ka, old_ka; - unsigned long sa_handler; - int ret; - compat_sigset_t set32; + if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs))) + return -EFAULT; - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(compat_sigset_t)) + if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI)) return -EINVAL; - if (act) { - ret = get_user(sa_handler, &act->sa_handler); - ret |= __copy_from_user(&set32, &act->sa_mask, - sizeof(compat_sigset_t)); - switch (_NSIG_WORDS) { - case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] - | (((long)set32.sig[7]) << 32); - case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] - | (((long)set32.sig[5]) << 32); - case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] - | (((long)set32.sig[3]) << 32); - case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] - | (((long)set32.sig[1]) << 32); - } - ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); - - if (ret) - return -EFAULT; - new_ka.sa.sa_handler = (__sighandler_t) sa_handler; - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - switch (_NSIG_WORDS) { - case 4: - set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); - set32.sig[6] = old_ka.sa.sa_mask.sig[3]; - case 3: - set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); - set32.sig[4] = old_ka.sa.sa_mask.sig[2]; - case 2: - set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); - set32.sig[2] = old_ka.sa.sa_mask.sig[1]; - case 1: - set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); - set32.sig[0] = old_ka.sa.sa_mask.sig[0]; - } - ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler); - ret |= __copy_to_user(&oact->sa_mask, &set32, - sizeof(compat_sigset_t)); - ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - } + /* Loading the floating-point-control word can fail. Do that first. */ + if (restore_fp_ctl(&user_sregs.fpregs.fpc)) + return -EINVAL; - return ret; -} + /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) | + (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 | + (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 | + (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE); + /* Check for invalid user address space control. */ + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME) + regs->psw.mask = PSW_ASC_PRIMARY | + (regs->psw.mask & ~PSW_MASK_ASC); + regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN); + for (i = 0; i < NUM_GPRS; i++) + regs->gprs[i] = (__u64) user_sregs.regs.gprs[i]; + memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, + sizeof(current->thread.acrs)); + restore_access_regs(current->thread.acrs); -asmlinkage long -sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss, - struct pt_regs *regs) -{ - stack_t kss, koss; - unsigned long ss_sp; - int ret, err = 0; - mm_segment_t old_fs = get_fs(); - - if (uss) { - if (!access_ok(VERIFY_READ, uss, sizeof(*uss))) - return -EFAULT; - err |= __get_user(ss_sp, &uss->ss_sp); - err |= __get_user(kss.ss_size, &uss->ss_size); - err |= __get_user(kss.ss_flags, &uss->ss_flags); - if (err) - return -EFAULT; - kss.ss_sp = (void __user *) ss_sp; - } + memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, + sizeof(current->thread.fp_regs)); - set_fs (KERNEL_DS); - ret = do_sigaltstack((stack_t __user *) (uss ? &kss : NULL), - (stack_t __user *) (uoss ? &koss : NULL), - regs->gprs[15]); - set_fs (old_fs); - - if (!ret && uoss) { - if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss))) - return -EFAULT; - ss_sp = (unsigned long) koss.ss_sp; - err |= __put_user(ss_sp, &uoss->ss_sp); - err |= __put_user(koss.ss_size, &uoss->ss_size); - err |= __put_user(koss.ss_flags, &uoss->ss_flags); - if (err) - return -EFAULT; - } - return ret; + restore_fp_regs(current->thread.fp_regs.fprs); + clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ + return 0; } -static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) +static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) { - _s390_regs_common32 regs32; - int err, i; + __u32 gprs_high[NUM_GPRS]; + int i; - regs32.psw.mask = PSW32_MASK_MERGE(PSW32_USER_BITS, - (__u32)(regs->psw.mask >> 32)); - regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr; for (i = 0; i < NUM_GPRS; i++) - regs32.gprs[i] = (__u32) regs->gprs[i]; - save_access_regs(current->thread.acrs); - memcpy(regs32.acrs, current->thread.acrs, sizeof(regs32.acrs)); - err = __copy_to_user(&sregs->regs, ®s32, sizeof(regs32)); - if (err) - return err; - save_fp_regs(¤t->thread.fp_regs); - /* s390_fp_regs and _s390_fp_regs32 are the same ! */ - return __copy_to_user(&sregs->fpregs, ¤t->thread.fp_regs, - sizeof(_s390_fp_regs32)); + gprs_high[i] = regs->gprs[i] >> 32; + if (__copy_to_user(uregs, &gprs_high, sizeof(gprs_high))) + return -EFAULT; + return 0; } -static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) +static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) { - _s390_regs_common32 regs32; - int err, i; + __u32 gprs_high[NUM_GPRS]; + int i; - /* Alwys make any pending restarted system call return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; - - err = __copy_from_user(®s32, &sregs->regs, sizeof(regs32)); - if (err) - return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - (__u64)regs32.psw.mask << 32); - regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); + if (__copy_from_user(&gprs_high, uregs, sizeof(gprs_high))) + return -EFAULT; for (i = 0; i < NUM_GPRS; i++) - regs->gprs[i] = (__u64) regs32.gprs[i]; - memcpy(current->thread.acrs, regs32.acrs, sizeof(current->thread.acrs)); - restore_access_regs(current->thread.acrs); - - err = __copy_from_user(¤t->thread.fp_regs, &sregs->fpregs, - sizeof(_s390_fp_regs32)); - current->thread.fp_regs.fpc &= FPC_VALID_MASK; - if (err) - return err; - - restore_fp_regs(¤t->thread.fp_regs); - regs->trap = -1; /* disable syscall checks */ + *(__u32 *)®s->gprs[i] = gprs_high[i]; return 0; } -asmlinkage long sys32_sigreturn(struct pt_regs *regs) +COMPAT_SYSCALL_DEFINE0(sigreturn) { + struct pt_regs *regs = task_pt_regs(current); sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; sigset_t set; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; - + if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + goto badframe; return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; } -asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { + struct pt_regs *regs = task_pt_regs(current); rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; sigset_t set; - stack_t st; - __u32 ss_sp; - int err; - mm_segment_t old_fs = get_fs(); - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) goto badframe; - - err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp); - st.ss_sp = compat_ptr(ss_sp); - err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size); - err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags); - if (err) + if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + goto badframe; + if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; - - set_fs (KERNEL_DS); - do_sigaltstack((stack_t __user *)&st, NULL, regs->gprs[15]); - set_fs (old_fs); - return regs->gprs[2]; - badframe: - force_sig(SIGSEGV, current); - return 0; + force_sig(SIGSEGV, current); + return 0; } /* @@ -494,19 +297,16 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) /* Default to using normal stack */ sp = (unsigned long) A(regs->gprs[15]); + /* Overflow on alternate signal stack gives SIGSEGV. */ + if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL)) + return (void __user *) -1UL; + /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { - if (! on_sig_stack(sp)) + if (! sas_ss_flags(sp)) sp = current->sas_ss_sp + current->sas_ss_size; } - /* This is the legacy signal stack switching. */ - else if (!user_mode(regs) && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - sp = (unsigned long) ka->sa.sa_restorer; - } - return (void __user *)((sp - frame_size) & -8ul); } @@ -520,11 +320,12 @@ static inline int map_signal(int sig) return sig; } -static void setup_frame32(int sig, struct k_sigaction *ka, +static int setup_frame32(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) { sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(sigframe32)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32))) + + if (frame == (void __user *) -1UL) goto give_sigsegv; if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32)) @@ -532,17 +333,19 @@ static void setup_frame32(int sig, struct k_sigaction *ka, if (save_sigregs32(regs, &frame->sregs)) goto give_sigsegv; + if (save_sigregs_gprs_high(regs, frame->gprs_high)) + goto give_sigsegv; if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs)) goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; + regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + (u16 __force __user *)(frame->retcode))) goto give_sigsegv; } @@ -551,45 +354,54 @@ static void setup_frame32(int sig, struct k_sigaction *ka, goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->gprs[15] = (__force __u64) frame; + /* Force 31 bit amode and default user address space control. */ + regs->psw.mask = PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); + regs->psw.addr = (__force __u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->sc; + regs->gprs[3] = (__force __u64) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ - regs->gprs[4] = current->thread.trap_no; - regs->gprs[5] = current->thread.prot_addr; + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; + } /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) goto give_sigsegv; - return; + return 0; give_sigsegv: force_sigsegv(sig, current); + return -EFAULT; } -static void setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { int err = 0; rt_sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(rt_sigframe32)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32))) + + if (frame == (void __user *) -1UL) goto give_sigsegv; if (copy_siginfo_to_user32(&frame->info, info)) goto give_sigsegv; /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->gprs[15]), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]); err |= save_sigregs32(regs, &frame->uc.uc_mcontext); + err |= save_sigregs_gprs_high(regs, frame->gprs_high); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) goto give_sigsegv; @@ -597,49 +409,54 @@ static void setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; - err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode)); + regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, + (u16 __force __user *)(frame->retcode))) + goto give_sigsegv; } /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->gprs[15] = (__force __u64) frame; + /* Force 31 bit amode and default user address space control. */ + regs->psw.mask = PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); + regs->psw.addr = (__u64 __force) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->info; - regs->gprs[4] = (__u64) &frame->uc; - return; + regs->gprs[3] = (__force __u64) &frame->info; + regs->gprs[4] = (__force __u64) &frame->uc; + regs->gprs[5] = task_thread_info(current)->last_break; + return 0; give_sigsegv: force_sigsegv(sig, current); + return -EFAULT; } /* * OK, we're invoking a handler */ -void -handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +void handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { + int ret; + /* Set up the stack frame */ if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame32(sig, ka, info, oldset, regs); + ret = setup_rt_frame32(sig, ka, info, oldset, regs); else - setup_frame32(sig, ka, oldset, regs); - - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + ret = setup_frame32(sig, ka, oldset, regs); + if (ret) + return; + signal_delivered(sig, info, ka, regs, + test_thread_flag(TIF_SINGLE_STEP)); } diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S deleted file mode 100644 index cfde1905d07..00000000000 --- a/arch/s390/kernel/compat_wrapper.S +++ /dev/null @@ -1,1477 +0,0 @@ -/* -* arch/s390/kernel/sys_wrapper31.S -* wrapper for 31 bit compatible system calls. -* -* S390 version -* Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation -* Author(s): Gerhard Tonn (ton@de.ibm.com), -* Thomas Spatzier (tspat@de.ibm.com) -*/ - - .globl sys32_exit_wrapper -sys32_exit_wrapper: - lgfr %r2,%r2 # int - jg sys_exit # branch to sys_exit - - .globl sys32_read_wrapper -sys32_read_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys32_read # branch to sys_read - - .globl sys32_write_wrapper -sys32_write_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - jg sys32_write # branch to system call - - .globl sys32_open_wrapper -sys32_open_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_open # branch to system call - - .globl sys32_close_wrapper -sys32_close_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_close # branch to system call - - .globl sys32_creat_wrapper -sys32_creat_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_creat # branch to system call - - .globl sys32_link_wrapper -sys32_link_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_link # branch to system call - - .globl sys32_unlink_wrapper -sys32_unlink_wrapper: - llgtr %r2,%r2 # const char * - jg sys_unlink # branch to system call - - .globl sys32_chdir_wrapper -sys32_chdir_wrapper: - llgtr %r2,%r2 # const char * - jg sys_chdir # branch to system call - - .globl sys32_time_wrapper -sys32_time_wrapper: - llgtr %r2,%r2 # int * - jg compat_sys_time # branch to system call - - .globl sys32_mknod_wrapper -sys32_mknod_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgfr %r4,%r4 # dev - jg sys_mknod # branch to system call - - .globl sys32_chmod_wrapper -sys32_chmod_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # mode_t - jg sys_chmod # branch to system call - - .globl sys32_lchown16_wrapper -sys32_lchown16_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_uid_emu31_t - jg sys32_lchown16 # branch to system call - - .globl sys32_lseek_wrapper -sys32_lseek_wrapper: - llgfr %r2,%r2 # unsigned int - lgfr %r3,%r3 # off_t - llgfr %r4,%r4 # unsigned int - jg sys_lseek # branch to system call - -#sys32_getpid_wrapper # void - - .globl sys32_mount_wrapper -sys32_mount_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # char * - llgfr %r5,%r5 # unsigned long - llgtr %r6,%r6 # void * - jg compat_sys_mount # branch to system call - - .globl sys32_oldumount_wrapper -sys32_oldumount_wrapper: - llgtr %r2,%r2 # char * - jg sys_oldumount # branch to system call - - .globl sys32_setuid16_wrapper -sys32_setuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - jg sys32_setuid16 # branch to system call - -#sys32_getuid16_wrapper # void - - .globl sys32_ptrace_wrapper -sys32_ptrace_wrapper: - lgfr %r2,%r2 # long - lgfr %r3,%r3 # long - llgtr %r4,%r4 # long - llgfr %r5,%r5 # long - jg sys_ptrace # branch to system call - - .globl sys32_alarm_wrapper -sys32_alarm_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_alarm # branch to system call - -#sys32_pause_wrapper # void - - .globl compat_sys_utime_wrapper -compat_sys_utime_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_utimbuf * - jg compat_sys_utime # branch to system call - - .globl sys32_access_wrapper -sys32_access_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_access # branch to system call - - .globl sys32_nice_wrapper -sys32_nice_wrapper: - lgfr %r2,%r2 # int - jg sys_nice # branch to system call - -#sys32_sync_wrapper # void - - .globl sys32_kill_wrapper -sys32_kill_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_kill # branch to system call - - .globl sys32_rename_wrapper -sys32_rename_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_rename # branch to system call - - .globl sys32_mkdir_wrapper -sys32_mkdir_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_mkdir # branch to system call - - .globl sys32_rmdir_wrapper -sys32_rmdir_wrapper: - llgtr %r2,%r2 # const char * - jg sys_rmdir # branch to system call - - .globl sys32_dup_wrapper -sys32_dup_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_dup # branch to system call - - .globl sys32_pipe_wrapper -sys32_pipe_wrapper: - llgtr %r2,%r2 # u32 * - jg sys_pipe # branch to system call - - .globl compat_sys_times_wrapper -compat_sys_times_wrapper: - llgtr %r2,%r2 # struct compat_tms * - jg compat_sys_times # branch to system call - - .globl sys32_brk_wrapper -sys32_brk_wrapper: - llgtr %r2,%r2 # unsigned long - jg sys_brk # branch to system call - - .globl sys32_setgid16_wrapper -sys32_setgid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - jg sys32_setgid16 # branch to system call - -#sys32_getgid16_wrapper # void - - .globl sys32_signal_wrapper -sys32_signal_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __sighandler_t - jg sys_signal - -#sys32_geteuid16_wrapper # void - -#sys32_getegid16_wrapper # void - - .globl sys32_acct_wrapper -sys32_acct_wrapper: - llgtr %r2,%r2 # char * - jg sys_acct # branch to system call - - .globl sys32_umount_wrapper -sys32_umount_wrapper: - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_umount # branch to system call - - .globl compat_sys_ioctl_wrapper -compat_sys_ioctl_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned int - jg compat_sys_ioctl # branch to system call - - .globl compat_sys_fcntl_wrapper -compat_sys_fcntl_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned long - jg compat_sys_fcntl # branch to system call - - .globl sys32_setpgid_wrapper -sys32_setpgid_wrapper: - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # pid_t - jg sys_setpgid # branch to system call - - .globl sys32_umask_wrapper -sys32_umask_wrapper: - lgfr %r2,%r2 # int - jg sys_umask # branch to system call - - .globl sys32_chroot_wrapper -sys32_chroot_wrapper: - llgtr %r2,%r2 # char * - jg sys_chroot # branch to system call - - .globl sys32_ustat_wrapper -sys32_ustat_wrapper: - llgfr %r2,%r2 # dev_t - llgtr %r3,%r3 # struct ustat * - jg sys_ustat - - .globl sys32_dup2_wrapper -sys32_dup2_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_dup2 # branch to system call - -#sys32_getppid_wrapper # void - -#sys32_getpgrp_wrapper # void - -#sys32_setsid_wrapper # void - - .globl sys32_sigaction_wrapper -sys32_sigaction_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct old_sigaction * - llgtr %r4,%r4 # struct old_sigaction32 * - jg sys32_sigaction # branch to system call - - .globl sys32_setreuid16_wrapper -sys32_setreuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - jg sys32_setreuid16 # branch to system call - - .globl sys32_setregid16_wrapper -sys32_setregid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - llgfr %r3,%r3 # __kernel_old_gid_emu31_t - jg sys32_setregid16 # branch to system call - -#sys32_sigsuspend_wrapper # done in sigsuspend_glue - - .globl compat_sys_sigpending_wrapper -compat_sys_sigpending_wrapper: - llgtr %r2,%r2 # compat_old_sigset_t * - jg compat_sys_sigpending # branch to system call - - .globl sys32_sethostname_wrapper -sys32_sethostname_wrapper: - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_sethostname # branch to system call - - .globl compat_sys_setrlimit_wrapper -compat_sys_setrlimit_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_setrlimit # branch to system call - - .globl compat_sys_old_getrlimit_wrapper -compat_sys_old_getrlimit_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_old_getrlimit # branch to system call - - .globl compat_sys_getrlimit_wrapper -compat_sys_getrlimit_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_getrlimit # branch to system call - - .globl sys32_mmap2_wrapper -sys32_mmap2_wrapper: - llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * - jg sys32_mmap2 # branch to system call - - .globl compat_sys_getrusage_wrapper -compat_sys_getrusage_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct rusage_emu31 * - jg compat_sys_getrusage # branch to system call - - .globl sys32_gettimeofday_wrapper -sys32_gettimeofday_wrapper: - llgtr %r2,%r2 # struct timeval_emu31 * - llgtr %r3,%r3 # struct timezone * - jg sys32_gettimeofday # branch to system call - - .globl sys32_settimeofday_wrapper -sys32_settimeofday_wrapper: - llgtr %r2,%r2 # struct timeval_emu31 * - llgtr %r3,%r3 # struct timezone * - jg sys32_settimeofday # branch to system call - - .globl sys32_getgroups16_wrapper -sys32_getgroups16_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - jg sys32_getgroups16 # branch to system call - - .globl sys32_setgroups16_wrapper -sys32_setgroups16_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - jg sys32_setgroups16 # branch to system call - - .globl sys32_symlink_wrapper -sys32_symlink_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_symlink # branch to system call - - .globl sys32_readlink_wrapper -sys32_readlink_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # char * - lgfr %r4,%r4 # int - jg sys_readlink # branch to system call - - .globl sys32_uselib_wrapper -sys32_uselib_wrapper: - llgtr %r2,%r2 # const char * - jg sys_uselib # branch to system call - - .globl sys32_swapon_wrapper -sys32_swapon_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_swapon # branch to system call - - .globl sys32_reboot_wrapper -sys32_reboot_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # unsigned int - llgtr %r5,%r5 # void * - jg sys_reboot # branch to system call - - .globl old32_readdir_wrapper -old32_readdir_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg compat_sys_old_readdir # branch to system call - - .globl old32_mmap_wrapper -old32_mmap_wrapper: - llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * - jg old32_mmap # branch to system call - - .globl sys32_munmap_wrapper -sys32_munmap_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_munmap # branch to system call - - .globl sys32_truncate_wrapper -sys32_truncate_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned long - jg sys_truncate # branch to system call - - .globl sys32_ftruncate_wrapper -sys32_ftruncate_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - jg sys_ftruncate # branch to system call - - .globl sys32_fchmod_wrapper -sys32_fchmod_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # mode_t - jg sys_fchmod # branch to system call - - .globl sys32_fchown16_wrapper -sys32_fchown16_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # compat_uid_t - llgfr %r4,%r4 # compat_uid_t - jg sys32_fchown16 # branch to system call - - .globl sys32_getpriority_wrapper -sys32_getpriority_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_getpriority # branch to system call - - .globl sys32_setpriority_wrapper -sys32_setpriority_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_setpriority # branch to system call - - .globl compat_sys_statfs_wrapper -compat_sys_statfs_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_statfs * - jg compat_sys_statfs # branch to system call - - .globl compat_sys_fstatfs_wrapper -compat_sys_fstatfs_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct compat_statfs * - jg compat_sys_fstatfs # branch to system call - - .globl compat_sys_socketcall_wrapper -compat_sys_socketcall_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # u32 * - jg compat_sys_socketcall # branch to system call - - .globl sys32_syslog_wrapper -sys32_syslog_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - lgfr %r4,%r4 # int - jg sys_syslog # branch to system call - - .globl compat_sys_setitimer_wrapper -compat_sys_setitimer_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct itimerval_emu31 * - llgtr %r4,%r4 # struct itimerval_emu31 * - jg compat_sys_setitimer # branch to system call - - .globl compat_sys_getitimer_wrapper -compat_sys_getitimer_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct itimerval_emu31 * - jg compat_sys_getitimer # branch to system call - - .globl compat_sys_newstat_wrapper -compat_sys_newstat_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newstat # branch to system call - - .globl compat_sys_newlstat_wrapper -compat_sys_newlstat_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newlstat # branch to system call - - .globl compat_sys_newfstat_wrapper -compat_sys_newfstat_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newfstat # branch to system call - -#sys32_vhangup_wrapper # void - - .globl compat_sys_wait4_wrapper -compat_sys_wait4_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # unsigned int * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct rusage * - jg compat_sys_wait4 # branch to system call - - .globl sys32_swapoff_wrapper -sys32_swapoff_wrapper: - llgtr %r2,%r2 # const char * - jg sys_swapoff # branch to system call - - .globl sys32_sysinfo_wrapper -sys32_sysinfo_wrapper: - llgtr %r2,%r2 # struct sysinfo_emu31 * - jg sys32_sysinfo # branch to system call - - .globl sys32_ipc_wrapper -sys32_ipc_wrapper: - llgfr %r2,%r2 # uint - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgfr %r6,%r6 # u32 - jg sys32_ipc # branch to system call - - .globl sys32_fsync_wrapper -sys32_fsync_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_fsync # branch to system call - -#sys32_sigreturn_wrapper # done in sigreturn_glue - -#sys32_clone_wrapper # done in clone_glue - - .globl sys32_setdomainname_wrapper -sys32_setdomainname_wrapper: - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_setdomainname # branch to system call - - .globl sys32_newuname_wrapper -sys32_newuname_wrapper: - llgtr %r2,%r2 # struct new_utsname * - jg s390x_newuname # branch to system call - - .globl sys32_adjtimex_wrapper -sys32_adjtimex_wrapper: - llgtr %r2,%r2 # struct timex_emu31 * - jg sys32_adjtimex # branch to system call - - .globl sys32_mprotect_wrapper -sys32_mprotect_wrapper: - llgtr %r2,%r2 # unsigned long (actually pointer - llgfr %r3,%r3 # size_t - llgfr %r4,%r4 # unsigned long - jg sys_mprotect # branch to system call - - .globl compat_sys_sigprocmask_wrapper -compat_sys_sigprocmask_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_old_sigset_t * - llgtr %r4,%r4 # compat_old_sigset_t * - jg compat_sys_sigprocmask # branch to system call - - .globl sys32_init_module_wrapper -sys32_init_module_wrapper: - llgtr %r2,%r2 # void * - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # char * - jg sys32_init_module # branch to system call - - .globl sys32_delete_module_wrapper -sys32_delete_module_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned int - jg sys32_delete_module # branch to system call - - .globl sys32_quotactl_wrapper -sys32_quotactl_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # qid_t - llgtr %r5,%r5 # caddr_t - jg sys_quotactl # branch to system call - - .globl sys32_getpgid_wrapper -sys32_getpgid_wrapper: - lgfr %r2,%r2 # pid_t - jg sys_getpgid # branch to system call - - .globl sys32_fchdir_wrapper -sys32_fchdir_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_fchdir # branch to system call - - .globl sys32_bdflush_wrapper -sys32_bdflush_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # long - jg sys_bdflush # branch to system call - - .globl sys32_sysfs_wrapper -sys32_sysfs_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys_sysfs # branch to system call - - .globl sys32_personality_wrapper -sys32_personality_wrapper: - llgfr %r2,%r2 # unsigned long - jg s390x_personality # branch to system call - - .globl sys32_setfsuid16_wrapper -sys32_setfsuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - jg sys32_setfsuid16 # branch to system call - - .globl sys32_setfsgid16_wrapper -sys32_setfsgid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - jg sys32_setfsgid16 # branch to system call - - .globl sys32_llseek_wrapper -sys32_llseek_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgtr %r5,%r5 # loff_t * - llgfr %r6,%r6 # unsigned int - jg sys_llseek # branch to system call - - .globl sys32_getdents_wrapper -sys32_getdents_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg compat_sys_getdents # branch to system call - - .globl compat_sys_select_wrapper -compat_sys_select_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_fd_set * - llgtr %r4,%r4 # compat_fd_set * - llgtr %r5,%r5 # compat_fd_set * - llgtr %r6,%r6 # struct compat_timeval * - jg compat_sys_select # branch to system call - - .globl sys32_flock_wrapper -sys32_flock_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_flock # branch to system call - - .globl sys32_msync_wrapper -sys32_msync_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - lgfr %r4,%r4 # int - jg sys_msync # branch to system call - - .globl compat_sys_readv_wrapper -compat_sys_readv_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct compat_iovec * - llgfr %r4,%r4 # unsigned long - jg compat_sys_readv # branch to system call - - .globl compat_sys_writev_wrapper -compat_sys_writev_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct compat_iovec * - llgfr %r4,%r4 # unsigned long - jg compat_sys_writev # branch to system call - - .globl sys32_getsid_wrapper -sys32_getsid_wrapper: - lgfr %r2,%r2 # pid_t - jg sys_getsid # branch to system call - - .globl sys32_fdatasync_wrapper -sys32_fdatasync_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_fdatasync # branch to system call - -#sys32_sysctl_wrapper # tbd - - .globl sys32_mlock_wrapper -sys32_mlock_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_mlock # branch to system call - - .globl sys32_munlock_wrapper -sys32_munlock_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_munlock # branch to system call - - .globl sys32_mlockall_wrapper -sys32_mlockall_wrapper: - lgfr %r2,%r2 # int - jg sys_mlockall # branch to system call - -#sys32_munlockall_wrapper # void - - .globl sys32_sched_setparam_wrapper -sys32_sched_setparam_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_param * - jg sys_sched_setparam # branch to system call - - .globl sys32_sched_getparam_wrapper -sys32_sched_getparam_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_param * - jg sys_sched_getparam # branch to system call - - .globl sys32_sched_setscheduler_wrapper -sys32_sched_setscheduler_wrapper: - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct sched_param * - jg sys_sched_setscheduler # branch to system call - - .globl sys32_sched_getscheduler_wrapper -sys32_sched_getscheduler_wrapper: - lgfr %r2,%r2 # pid_t - jg sys_sched_getscheduler # branch to system call - -#sys32_sched_yield_wrapper # void - - .globl sys32_sched_get_priority_max_wrapper -sys32_sched_get_priority_max_wrapper: - lgfr %r2,%r2 # int - jg sys_sched_get_priority_max # branch to system call - - .globl sys32_sched_get_priority_min_wrapper -sys32_sched_get_priority_min_wrapper: - lgfr %r2,%r2 # int - jg sys_sched_get_priority_min # branch to system call - - .globl sys32_sched_rr_get_interval_wrapper -sys32_sched_rr_get_interval_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct compat_timespec * - jg sys32_sched_rr_get_interval # branch to system call - - .globl compat_sys_nanosleep_wrapper -compat_sys_nanosleep_wrapper: - llgtr %r2,%r2 # struct compat_timespec * - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_nanosleep # branch to system call - - .globl sys32_mremap_wrapper -sys32_mremap_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_mremap # branch to system call - - .globl sys32_setresuid16_wrapper -sys32_setresuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_uid_emu31_t - jg sys32_setresuid16 # branch to system call - - .globl sys32_getresuid16_wrapper -sys32_getresuid16_wrapper: - llgtr %r2,%r2 # __kernel_old_uid_emu31_t * - llgtr %r3,%r3 # __kernel_old_uid_emu31_t * - llgtr %r4,%r4 # __kernel_old_uid_emu31_t * - jg sys32_getresuid16 # branch to system call - - .globl sys32_poll_wrapper -sys32_poll_wrapper: - llgtr %r2,%r2 # struct pollfd * - llgfr %r3,%r3 # unsigned int - lgfr %r4,%r4 # long - jg sys_poll # branch to system call - - .globl compat_sys_nfsservctl_wrapper -compat_sys_nfsservctl_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct compat_nfsctl_arg* - llgtr %r4,%r4 # union compat_nfsctl_res* - jg compat_sys_nfsservctl # branch to system call - - .globl sys32_setresgid16_wrapper -sys32_setresgid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - llgfr %r3,%r3 # __kernel_old_gid_emu31_t - llgfr %r4,%r4 # __kernel_old_gid_emu31_t - jg sys32_setresgid16 # branch to system call - - .globl sys32_getresgid16_wrapper -sys32_getresgid16_wrapper: - llgtr %r2,%r2 # __kernel_old_gid_emu31_t * - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - llgtr %r4,%r4 # __kernel_old_gid_emu31_t * - jg sys32_getresgid16 # branch to system call - - .globl sys32_prctl_wrapper -sys32_prctl_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_prctl # branch to system call - -#sys32_rt_sigreturn_wrapper # done in rt_sigreturn_glue - - .globl sys32_rt_sigaction_wrapper -sys32_rt_sigaction_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct sigaction_emu31 * - llgtr %r4,%r4 # const struct sigaction_emu31 * - llgfr %r5,%r5 # size_t - jg sys32_rt_sigaction # branch to system call - - .globl sys32_rt_sigprocmask_wrapper -sys32_rt_sigprocmask_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # old_sigset_emu31 * - llgtr %r4,%r4 # old_sigset_emu31 * - llgfr %r5,%r5 # size_t - jg sys32_rt_sigprocmask # branch to system call - - .globl sys32_rt_sigpending_wrapper -sys32_rt_sigpending_wrapper: - llgtr %r2,%r2 # sigset_emu31 * - llgfr %r3,%r3 # size_t - jg sys32_rt_sigpending # branch to system call - - .globl compat_sys_rt_sigtimedwait_wrapper -compat_sys_rt_sigtimedwait_wrapper: - llgtr %r2,%r2 # const sigset_emu31_t * - llgtr %r3,%r3 # siginfo_emu31_t * - llgtr %r4,%r4 # const struct compat_timespec * - llgfr %r5,%r5 # size_t - jg compat_sys_rt_sigtimedwait # branch to system call - - .globl sys32_rt_sigqueueinfo_wrapper -sys32_rt_sigqueueinfo_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # siginfo_emu31_t * - jg sys32_rt_sigqueueinfo # branch to system call - -#sys32_rt_sigsuspend_wrapper # done in rt_sigsuspend_glue - - .globl sys32_pread64_wrapper -sys32_pread64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg sys32_pread64 # branch to system call - - .globl sys32_pwrite64_wrapper -sys32_pwrite64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg sys32_pwrite64 # branch to system call - - .globl sys32_chown16_wrapper -sys32_chown16_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_gid_emu31_t - jg sys32_chown16 # branch to system call - - .globl sys32_getcwd_wrapper -sys32_getcwd_wrapper: - llgtr %r2,%r2 # char * - llgfr %r3,%r3 # unsigned long - jg sys_getcwd # branch to system call - - .globl sys32_capget_wrapper -sys32_capget_wrapper: - llgtr %r2,%r2 # cap_user_header_t - llgtr %r3,%r3 # cap_user_data_t - jg sys_capget # branch to system call - - .globl sys32_capset_wrapper -sys32_capset_wrapper: - llgtr %r2,%r2 # cap_user_header_t - llgtr %r3,%r3 # const cap_user_data_t - jg sys_capset # branch to system call - - .globl sys32_sigaltstack_wrapper -sys32_sigaltstack_wrapper: - llgtr %r2,%r2 # const stack_emu31_t * - llgtr %r3,%r3 # stack_emu31_t * - jg sys32_sigaltstack - - .globl sys32_sendfile_wrapper -sys32_sendfile_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # __kernel_off_emu31_t * - llgfr %r5,%r5 # size_t - jg sys32_sendfile # branch to system call - -#sys32_vfork_wrapper # done in vfork_glue - - .globl sys32_truncate64_wrapper -sys32_truncate64_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys32_truncate64 # branch to system call - - .globl sys32_ftruncate64_wrapper -sys32_ftruncate64_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys32_ftruncate64 # branch to system call - - .globl sys32_lchown_wrapper -sys32_lchown_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_lchown # branch to system call - -#sys32_getuid_wrapper # void -#sys32_getgid_wrapper # void -#sys32_geteuid_wrapper # void -#sys32_getegid_wrapper # void - - .globl sys32_setreuid_wrapper -sys32_setreuid_wrapper: - llgfr %r2,%r2 # uid_t - llgfr %r3,%r3 # uid_t - jg sys_setreuid # branch to system call - - .globl sys32_setregid_wrapper -sys32_setregid_wrapper: - llgfr %r2,%r2 # gid_t - llgfr %r3,%r3 # gid_t - jg sys_setregid # branch to system call - - .globl sys32_getgroups_wrapper -sys32_getgroups_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # gid_t * - jg sys_getgroups # branch to system call - - .globl sys32_setgroups_wrapper -sys32_setgroups_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # gid_t * - jg sys_setgroups # branch to system call - - .globl sys32_fchown_wrapper -sys32_fchown_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_fchown # branch to system call - - .globl sys32_setresuid_wrapper -sys32_setresuid_wrapper: - llgfr %r2,%r2 # uid_t - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # uid_t - jg sys_setresuid # branch to system call - - .globl sys32_getresuid_wrapper -sys32_getresuid_wrapper: - llgtr %r2,%r2 # uid_t * - llgtr %r3,%r3 # uid_t * - llgtr %r4,%r4 # uid_t * - jg sys_getresuid # branch to system call - - .globl sys32_setresgid_wrapper -sys32_setresgid_wrapper: - llgfr %r2,%r2 # gid_t - llgfr %r3,%r3 # gid_t - llgfr %r4,%r4 # gid_t - jg sys_setresgid # branch to system call - - .globl sys32_getresgid_wrapper -sys32_getresgid_wrapper: - llgtr %r2,%r2 # gid_t * - llgtr %r3,%r3 # gid_t * - llgtr %r4,%r4 # gid_t * - jg sys_getresgid # branch to system call - - .globl sys32_chown_wrapper -sys32_chown_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_chown # branch to system call - - .globl sys32_setuid_wrapper -sys32_setuid_wrapper: - llgfr %r2,%r2 # uid_t - jg sys_setuid # branch to system call - - .globl sys32_setgid_wrapper -sys32_setgid_wrapper: - llgfr %r2,%r2 # gid_t - jg sys_setgid # branch to system call - - .globl sys32_setfsuid_wrapper -sys32_setfsuid_wrapper: - llgfr %r2,%r2 # uid_t - jg sys_setfsuid # branch to system call - - .globl sys32_setfsgid_wrapper -sys32_setfsgid_wrapper: - llgfr %r2,%r2 # gid_t - jg sys_setfsgid # branch to system call - - .globl sys32_pivot_root_wrapper -sys32_pivot_root_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_pivot_root # branch to system call - - .globl sys32_mincore_wrapper -sys32_mincore_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - llgtr %r4,%r4 # unsigned char * - jg sys_mincore # branch to system call - - .globl sys32_madvise_wrapper -sys32_madvise_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - lgfr %r4,%r4 # int - jg sys_madvise # branch to system call - - .globl sys32_getdents64_wrapper -sys32_getdents64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg sys_getdents64 # branch to system call - - .globl compat_sys_fcntl64_wrapper -compat_sys_fcntl64_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned long - jg compat_sys_fcntl64 # branch to system call - - .globl sys32_stat64_wrapper -sys32_stat64_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat64 * - jg sys32_stat64 # branch to system call - - .globl sys32_lstat64_wrapper -sys32_lstat64_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat64 * - jg sys32_lstat64 # branch to system call - - .globl sys32_stime_wrapper -sys32_stime_wrapper: - llgtr %r2,%r2 # long * - jg compat_sys_stime # branch to system call - - .globl sys32_sysctl_wrapper -sys32_sysctl_wrapper: - llgtr %r2,%r2 # struct __sysctl_args32 * - jg sys32_sysctl - - .globl sys32_fstat64_wrapper -sys32_fstat64_wrapper: - llgfr %r2,%r2 # unsigned long - llgtr %r3,%r3 # struct stat64 * - jg sys32_fstat64 # branch to system call - - .globl compat_sys_futex_wrapper -compat_sys_futex_wrapper: - llgtr %r2,%r2 # u32 * - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct compat_timespec * - llgtr %r6,%r6 # u32 * - lgf %r0,164(%r15) # int - stg %r0,160(%r15) - jg compat_sys_futex # branch to system call - - .globl sys32_setxattr_wrapper -sys32_setxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_setxattr - - .globl sys32_lsetxattr_wrapper -sys32_lsetxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_lsetxattr - - .globl sys32_fsetxattr_wrapper -sys32_fsetxattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_fsetxattr - - .globl sys32_getxattr_wrapper -sys32_getxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_getxattr - - .globl sys32_lgetxattr_wrapper -sys32_lgetxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_lgetxattr - - .globl sys32_fgetxattr_wrapper -sys32_fgetxattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_fgetxattr - - .globl sys32_listxattr_wrapper -sys32_listxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_listxattr - - .globl sys32_llistxattr_wrapper -sys32_llistxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_llistxattr - - .globl sys32_flistxattr_wrapper -sys32_flistxattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_flistxattr - - .globl sys32_removexattr_wrapper -sys32_removexattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - jg sys_removexattr - - .globl sys32_lremovexattr_wrapper -sys32_lremovexattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - jg sys_lremovexattr - - .globl sys32_fremovexattr_wrapper -sys32_fremovexattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - jg sys_fremovexattr - - .globl sys32_sched_setaffinity_wrapper -sys32_sched_setaffinity_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # unsigned long * - jg compat_sys_sched_setaffinity - - .globl sys32_sched_getaffinity_wrapper -sys32_sched_getaffinity_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # unsigned long * - jg compat_sys_sched_getaffinity - - .globl sys32_exit_group_wrapper -sys32_exit_group_wrapper: - lgfr %r2,%r2 # int - jg sys_exit_group # branch to system call - - .globl sys32_set_tid_address_wrapper -sys32_set_tid_address_wrapper: - llgtr %r2,%r2 # int * - jg sys_set_tid_address # branch to system call - - .globl sys_epoll_create_wrapper -sys_epoll_create_wrapper: - lgfr %r2,%r2 # int - jg sys_epoll_create # branch to system call - - .globl sys_epoll_ctl_wrapper -sys_epoll_ctl_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct epoll_event * - jg sys_epoll_ctl # branch to system call - - .globl sys_epoll_wait_wrapper -sys_epoll_wait_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct epoll_event * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - jg sys_epoll_wait # branch to system call - - .globl sys32_lookup_dcookie_wrapper -sys32_lookup_dcookie_wrapper: - sllg %r2,%r2,32 # get high word of 64bit dcookie - or %r2,%r3 # get low word of 64bit dcookie - llgtr %r3,%r4 # char * - llgfr %r4,%r5 # size_t - jg sys_lookup_dcookie - - .globl sys32_fadvise64_wrapper -sys32_fadvise64_wrapper: - lgfr %r2,%r2 # int - sllg %r3,%r3,32 # get high word of 64bit loff_t - or %r3,%r4 # get low word of 64bit loff_t - llgfr %r4,%r5 # size_t (unsigned long) - lgfr %r5,%r6 # int - jg sys32_fadvise64 - - .globl sys32_fadvise64_64_wrapper -sys32_fadvise64_64_wrapper: - llgtr %r2,%r2 # struct fadvise64_64_args * - jg sys32_fadvise64_64 - - .globl sys32_clock_settime_wrapper -sys32_clock_settime_wrapper: - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_settime - - .globl sys32_clock_gettime_wrapper -sys32_clock_gettime_wrapper: - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_gettime - - .globl sys32_clock_getres_wrapper -sys32_clock_getres_wrapper: - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_getres - - .globl sys32_clock_nanosleep_wrapper -sys32_clock_nanosleep_wrapper: - lgfr %r2,%r2 # clockid_t (int) - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_timespec * - llgtr %r5,%r5 # struct compat_timespec * - jg compat_sys_clock_nanosleep - - .globl sys32_timer_create_wrapper -sys32_timer_create_wrapper: - lgfr %r2,%r2 # timer_t (int) - llgtr %r3,%r3 # struct compat_sigevent * - llgtr %r4,%r4 # timer_t * - jg compat_sys_timer_create - - .globl sys32_timer_settime_wrapper -sys32_timer_settime_wrapper: - lgfr %r2,%r2 # timer_t (int) - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_itimerspec * - llgtr %r5,%r5 # struct compat_itimerspec * - jg compat_sys_timer_settime - - .globl sys32_timer_gettime_wrapper -sys32_timer_gettime_wrapper: - lgfr %r2,%r2 # timer_t (int) - llgtr %r3,%r3 # struct compat_itimerspec * - jg compat_sys_timer_gettime - - .globl sys32_timer_getoverrun_wrapper -sys32_timer_getoverrun_wrapper: - lgfr %r2,%r2 # timer_t (int) - jg sys_timer_getoverrun - - .globl sys32_timer_delete_wrapper -sys32_timer_delete_wrapper: - lgfr %r2,%r2 # timer_t (int) - jg sys_timer_delete - - .globl sys32_io_setup_wrapper -sys32_io_setup_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # u32 * - jg compat_sys_io_setup - - .globl sys32_io_destroy_wrapper -sys32_io_destroy_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - jg sys_io_destroy - - .globl sys32_io_getevents_wrapper -sys32_io_getevents_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - lgfr %r3,%r3 # long - lgfr %r4,%r4 # long - llgtr %r5,%r5 # struct io_event * - llgtr %r6,%r6 # struct compat_timespec * - jg compat_sys_io_getevents - - .globl sys32_io_submit_wrapper -sys32_io_submit_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - lgfr %r3,%r3 # long - llgtr %r4,%r4 # struct iocb ** - jg compat_sys_io_submit - - .globl sys32_io_cancel_wrapper -sys32_io_cancel_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - llgtr %r3,%r3 # struct iocb * - llgtr %r4,%r4 # struct io_event * - jg sys_io_cancel - - .globl compat_sys_statfs64_wrapper -compat_sys_statfs64_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # compat_size_t - llgtr %r4,%r4 # struct compat_statfs64 * - jg compat_sys_statfs64 - - .globl compat_sys_fstatfs64_wrapper -compat_sys_fstatfs64_wrapper: - llgfr %r2,%r2 # unsigned int fd - llgfr %r3,%r3 # compat_size_t - llgtr %r4,%r4 # struct compat_statfs64 * - jg compat_sys_fstatfs64 - - .globl compat_sys_mq_open_wrapper -compat_sys_mq_open_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgfr %r4,%r4 # mode_t - llgtr %r5,%r5 # struct compat_mq_attr * - jg compat_sys_mq_open - - .globl sys32_mq_unlink_wrapper -sys32_mq_unlink_wrapper: - llgtr %r2,%r2 # const char * - jg sys_mq_unlink - - .globl compat_sys_mq_timedsend_wrapper -compat_sys_mq_timedsend_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # unsigned int - llgtr %r6,%r6 # const struct compat_timespec * - jg compat_sys_mq_timedsend - - .globl compat_sys_mq_timedreceive_wrapper -compat_sys_mq_timedreceive_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - llgtr %r5,%r5 # unsigned int * - llgtr %r6,%r6 # const struct compat_timespec * - jg compat_sys_mq_timedreceive - - .globl compat_sys_mq_notify_wrapper -compat_sys_mq_notify_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # struct compat_sigevent * - jg compat_sys_mq_notify - - .globl compat_sys_mq_getsetattr_wrapper -compat_sys_mq_getsetattr_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # struct compat_mq_attr * - llgtr %r4,%r4 # struct compat_mq_attr * - jg compat_sys_mq_getsetattr - - .globl compat_sys_add_key_wrapper -compat_sys_add_key_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # const void * - llgfr %r5,%r5 # size_t - llgfr %r6,%r6 # (key_serial_t) u32 - jg sys_add_key - - .globl compat_sys_request_key_wrapper -compat_sys_request_key_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # const void * - llgfr %r5,%r5 # (key_serial_t) u32 - jg sys_request_key - - .globl sys32_remap_file_pages_wrapper -sys32_remap_file_pages_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_remap_file_pages - - .globl compat_sys_waitid_wrapper -compat_sys_waitid_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # pid_t - llgtr %r4,%r4 # siginfo_emu31_t * - lgfr %r5,%r5 # int - llgtr %r6,%r6 # struct rusage_emu31 * - jg compat_sys_waitid - - .globl compat_sys_kexec_load_wrapper -compat_sys_kexec_load_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # struct kexec_segment * - llgfr %r5,%r5 # unsigned long - jg compat_sys_kexec_load - - .globl sys_ioprio_set_wrapper -sys_ioprio_set_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_ioprio_set - - .globl sys_ioprio_get_wrapper -sys_ioprio_get_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_ioprio_get - - .globl sys_inotify_add_watch_wrapper -sys_inotify_add_watch_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # u32 - jg sys_inotify_add_watch - - .globl sys_inotify_rm_watch_wrapper -sys_inotify_rm_watch_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # u32 - jg sys_inotify_rm_watch diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c new file mode 100644 index 00000000000..45cdb37aa6f --- /dev/null +++ b/arch/s390/kernel/compat_wrapper.c @@ -0,0 +1,216 @@ +/* + * Compat system call wrappers. + * + * Copyright IBM Corp. 2014 + */ + +#include <linux/syscalls.h> +#include <linux/compat.h> +#include "entry.h" + +#define COMPAT_SYSCALL_WRAP1(name, ...) \ + COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP2(name, ...) \ + COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP3(name, ...) \ + COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP4(name, ...) \ + COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP5(name, ...) \ + COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP6(name, ...) \ + COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__) + +#define __SC_COMPAT_TYPE(t, a) \ + __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a + +#define __SC_COMPAT_CAST(t, a) \ +({ \ + long __ReS = a; \ + \ + BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \ + !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \ + if (__TYPE_IS_L(t)) \ + __ReS = (s32)a; \ + if (__TYPE_IS_UL(t)) \ + __ReS = (u32)a; \ + if (__TYPE_IS_PTR(t)) \ + __ReS = a & 0x7fffffff; \ + (t)__ReS; \ +}) + +/* + * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by + * compat tasks. These wrappers will only be used for system calls where only + * the system call arguments need sign or zero extension or zeroing of the upper + * 33 bits of pointers. + * Note: since the wrapper function will afterwards call a system call which + * again performs zero and sign extension for all system call arguments with + * a size of less than eight bytes, these compat wrappers only touch those + * system call arguments with a size of eight bytes ((unsigned) long and + * pointers). Zero and sign extension for e.g. int parameters will be done by + * the regular system call wrappers. + */ +#define COMPAT_SYSCALL_WRAPx(x, name, ...) \ + asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\ + asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \ + { \ + return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ + } + +COMPAT_SYSCALL_WRAP1(exit, int, error_code); +COMPAT_SYSCALL_WRAP1(close, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname); +COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname); +COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename); +COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev); +COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode); +COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name); +COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds); +COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode); +COMPAT_SYSCALL_WRAP1(nice, int, increment); +COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig); +COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname); +COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname); +COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes); +COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes); +COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk); +COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler); +COMPAT_SYSCALL_WRAP1(acct, const char __user *, name); +COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags); +COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid); +COMPAT_SYSCALL_WRAP1(umask, int, mask); +COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename); +COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd); +COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask); +COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len); +COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new); +COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz); +COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library); +COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags); +COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg); +COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len); +COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode); +COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who); +COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval); +COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len); +COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile); +COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len); +COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name); +COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot); +COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs); +COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr); +COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid); +COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data); +COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2); +COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality); +COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence); +COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd); +COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags); +COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid); +COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len); +COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len); +COMPAT_SYSCALL_WRAP1(mlockall, int, flags); +COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid); +COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy); +COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy); +COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr); +COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout); +COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5); +COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size); +COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr); +COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data); +COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid); +COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid); +COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid); +COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid); +COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid); +COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid); +COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid); +COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid); +COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid); +COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid); +COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old); +COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec); +COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior); +COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count); +COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name); +COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name); +COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name); +COMPAT_SYSCALL_WRAP1(exit_group, int, error_code); +COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr); +COMPAT_SYSCALL_WRAP1(epoll_create, int, size); +COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event); +COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout); +COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id); +COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id); +COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx); +COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); +COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name); +COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id); +COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id); +COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags); +COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio); +COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who); +COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask); +COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd); +COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev); +COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag); +COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag); +COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname); +COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags); +COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname); +COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz); +COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode); +COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode); +COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags); +COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache); +COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count); +COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags); +COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags); +COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags); +COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags); +COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags); +COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); +COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); +COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); +COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags); +COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val); +COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags); +COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); +COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); +COMPAT_SYSCALL_WRAP1(syncfs, int, fd); +COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype); +COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum); +COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2); +COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); +COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); +COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags); diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 4ef44e536b2..d7b0c4d2788 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -1,12 +1,13 @@ /* - * arch/s390/kernel/cpcmd.c - * * S390 version - * Copyright (C) 1999,2005 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2007 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Christian Borntraeger (cborntra@de.ibm.com), */ +#define KMSG_COMPONENT "cpcmd" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> @@ -15,118 +16,108 @@ #include <linux/string.h> #include <asm/ebcdic.h> #include <asm/cpcmd.h> -#include <asm/system.h> +#include <asm/io.h> static DEFINE_SPINLOCK(cpcmd_lock); static char cpcmd_buf[241]; +static int diag8_noresponse(int cmdlen) +{ + register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; + register unsigned long reg3 asm ("3") = cmdlen; + + asm volatile( +#ifndef CONFIG_64BIT + " diag %1,%0,0x8\n" +#else /* CONFIG_64BIT */ + " sam31\n" + " diag %1,%0,0x8\n" + " sam64\n" +#endif /* CONFIG_64BIT */ + : "+d" (reg3) : "d" (reg2) : "cc"); + return reg3; +} + +static int diag8_response(int cmdlen, char *response, int *rlen) +{ + register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; + register unsigned long reg3 asm ("3") = (addr_t) response; + register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L; + register unsigned long reg5 asm ("5") = *rlen; + + asm volatile( +#ifndef CONFIG_64BIT + " diag %2,%0,0x8\n" + " brc 8,1f\n" + " ar %1,%4\n" +#else /* CONFIG_64BIT */ + " sam31\n" + " diag %2,%0,0x8\n" + " sam64\n" + " brc 8,1f\n" + " agr %1,%4\n" +#endif /* CONFIG_64BIT */ + "1:\n" + : "+d" (reg4), "+d" (reg5) + : "d" (reg2), "d" (reg3), "d" (*rlen) : "cc"); + *rlen = reg5; + return reg4; +} + /* - * the caller of __cpcmd has to ensure that the response buffer is below 2 GB + * __cpcmd has some restrictions over cpcmd + * - the response buffer must reside below 2GB (if any) + * - __cpcmd is unlocked and therefore not SMP-safe */ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) { - const int mask = 0x40000000L; - unsigned long flags; - int return_code; - int return_len; int cmdlen; + int rc; + int response_len; - spin_lock_irqsave(&cpcmd_lock, flags); cmdlen = strlen(cmd); BUG_ON(cmdlen > 240); memcpy(cpcmd_buf, cmd, cmdlen); ASCEBC(cpcmd_buf, cmdlen); - if (response != NULL && rlen > 0) { + if (response) { memset(response, 0, rlen); -#ifndef CONFIG_64BIT - asm volatile ( "lra 2,0(%2)\n" - "lr 4,%3\n" - "o 4,%6\n" - "lra 3,0(%4)\n" - "lr 5,%5\n" - "diag 2,4,0x8\n" - "brc 8, 1f\n" - "ar 5, %5\n" - "1: \n" - "lr %0,4\n" - "lr %1,5\n" - : "=d" (return_code), "=d" (return_len) - : "a" (cpcmd_buf), "d" (cmdlen), - "a" (response), "d" (rlen), "m" (mask) - : "cc", "2", "3", "4", "5" ); -#else /* CONFIG_64BIT */ - asm volatile ( "lrag 2,0(%2)\n" - "lgr 4,%3\n" - "o 4,%6\n" - "lrag 3,0(%4)\n" - "lgr 5,%5\n" - "sam31\n" - "diag 2,4,0x8\n" - "sam64\n" - "brc 8, 1f\n" - "agr 5, %5\n" - "1: \n" - "lgr %0,4\n" - "lgr %1,5\n" - : "=d" (return_code), "=d" (return_len) - : "a" (cpcmd_buf), "d" (cmdlen), - "a" (response), "d" (rlen), "m" (mask) - : "cc", "2", "3", "4", "5" ); -#endif /* CONFIG_64BIT */ - EBCASC(response, rlen); + response_len = rlen; + rc = diag8_response(cmdlen, response, &rlen); + EBCASC(response, response_len); } else { - return_len = 0; -#ifndef CONFIG_64BIT - asm volatile ( "lra 2,0(%1)\n" - "lr 3,%2\n" - "diag 2,3,0x8\n" - "lr %0,3\n" - : "=d" (return_code) - : "a" (cpcmd_buf), "d" (cmdlen) - : "2", "3" ); -#else /* CONFIG_64BIT */ - asm volatile ( "lrag 2,0(%1)\n" - "lgr 3,%2\n" - "sam31\n" - "diag 2,3,0x8\n" - "sam64\n" - "lgr %0,3\n" - : "=d" (return_code) - : "a" (cpcmd_buf), "d" (cmdlen) - : "2", "3" ); -#endif /* CONFIG_64BIT */ + rc = diag8_noresponse(cmdlen); } - spin_unlock_irqrestore(&cpcmd_lock, flags); - if (response_code != NULL) - *response_code = return_code; - return return_len; + if (response_code) + *response_code = rc; + return rlen; } - EXPORT_SYMBOL(__cpcmd); -#ifdef CONFIG_64BIT int cpcmd(const char *cmd, char *response, int rlen, int *response_code) { char *lowbuf; int len; + unsigned long flags; - if ((rlen == 0) || (response == NULL) - || !((unsigned long)response >> 31)) - len = __cpcmd(cmd, response, rlen, response_code); - else { + if ((virt_to_phys(response) != (unsigned long) response) || + (((unsigned long)response + rlen) >> 31)) { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { - printk(KERN_WARNING - "cpcmd: could not allocate response buffer\n"); + pr_warning("The cpcmd kernel function failed to " + "allocate a response buffer\n"); return -ENOMEM; } + spin_lock_irqsave(&cpcmd_lock, flags); len = __cpcmd(cmd, lowbuf, rlen, response_code); + spin_unlock_irqrestore(&cpcmd_lock, flags); memcpy(response, lowbuf, rlen); kfree(lowbuf); + } else { + spin_lock_irqsave(&cpcmd_lock, flags); + len = __cpcmd(cmd, response, rlen, response_code); + spin_unlock_irqrestore(&cpcmd_lock, flags); } return len; } - EXPORT_SYMBOL(cpcmd); -#endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c deleted file mode 100644 index 926cceeae0f..00000000000 --- a/arch/s390/kernel/crash.c +++ /dev/null @@ -1,15 +0,0 @@ -/* - * arch/s390/kernel/crash.c - * - * (C) Copyright IBM Corp. 2005 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - * - */ - -#include <linux/threads.h> -#include <linux/kexec.h> - -void machine_crash_shutdown(struct pt_regs *regs) -{ -} diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c new file mode 100644 index 00000000000..a3b9150e680 --- /dev/null +++ b/arch/s390/kernel/crash_dump.c @@ -0,0 +1,647 @@ +/* + * S390 kdump implementation + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/crash_dump.h> +#include <asm/lowcore.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/bootmem.h> +#include <linux/elf.h> +#include <linux/memblock.h> +#include <asm/os_info.h> +#include <asm/elf.h> +#include <asm/ipl.h> +#include <asm/sclp.h> + +#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) +#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) +#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + +static struct memblock_region oldmem_region; + +static struct memblock_type oldmem_type = { + .cnt = 1, + .max = 1, + .total_size = 0, + .regions = &oldmem_region, +}; + +#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \ + for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \ + &oldmem_type, p_start, \ + p_end, p_nid); \ + i != (u64)ULLONG_MAX; \ + __next_mem_range(&i, nid, &memblock.physmem, \ + &oldmem_type, \ + p_start, p_end, p_nid)) + +struct dump_save_areas dump_save_areas; + +/* + * Allocate and add a save area for a CPU + */ +struct save_area *dump_save_area_create(int cpu) +{ + struct save_area **save_areas, *save_area; + + save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); + if (!save_area) + return NULL; + if (cpu + 1 > dump_save_areas.count) { + dump_save_areas.count = cpu + 1; + save_areas = krealloc(dump_save_areas.areas, + dump_save_areas.count * sizeof(void *), + GFP_KERNEL | __GFP_ZERO); + if (!save_areas) { + kfree(save_area); + return NULL; + } + dump_save_areas.areas = save_areas; + } + dump_save_areas.areas[cpu] = save_area; + return save_area; +} + +/* + * Return physical address for virtual address + */ +static inline void *load_real_addr(void *addr) +{ + unsigned long real_addr; + + asm volatile( + " lra %0,0(%1)\n" + " jz 0f\n" + " la %0,0\n" + "0:" + : "=a" (real_addr) : "a" (addr) : "cc"); + return (void *)real_addr; +} + +/* + * Copy real to virtual or real memory + */ +static int copy_from_realmem(void *dest, void *src, size_t count) +{ + unsigned long size; + + if (!count) + return 0; + if (!is_vmalloc_or_module_addr(dest)) + return memcpy_real(dest, src, count); + do { + size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK)); + if (memcpy_real(load_real_addr(dest), src, size)) + return -EFAULT; + count -= size; + dest += size; + src += size; + } while (count); + return 0; +} + +/* + * Pointer to ELF header in new kernel + */ +static void *elfcorehdr_newmem; + +/* + * Copy one page from zfcpdump "oldmem" + * + * For pages below HSA size memory from the HSA is copied. Otherwise + * real memory copy is used. + */ +static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize, + unsigned long src, int userbuf) +{ + int rc; + + if (src < sclp_get_hsa_size()) { + rc = memcpy_hsa(buf, src, csize, userbuf); + } else { + if (userbuf) + rc = copy_to_user_real((void __force __user *) buf, + (void *) src, csize); + else + rc = memcpy_real(buf, (void *) src, csize); + } + return rc ? rc : csize; +} + +/* + * Copy one page from kdump "oldmem" + * + * For the kdump reserved memory this functions performs a swap operation: + * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. + * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, + unsigned long src, int userbuf) + +{ + int rc; + + if (src < OLDMEM_SIZE) + src += OLDMEM_BASE; + else if (src > OLDMEM_BASE && + src < OLDMEM_BASE + OLDMEM_SIZE) + src -= OLDMEM_BASE; + if (userbuf) + rc = copy_to_user_real((void __force __user *) buf, + (void *) src, csize); + else + rc = copy_from_realmem(buf, (void *) src, csize); + return (rc == 0) ? rc : csize; +} + +/* + * Copy one page from "oldmem" + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, + unsigned long offset, int userbuf) +{ + unsigned long src; + + if (!csize) + return 0; + src = (pfn << PAGE_SHIFT) + offset; + if (OLDMEM_BASE) + return copy_oldmem_page_kdump(buf, csize, src, userbuf); + else + return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf); +} + +/* + * Remap "oldmem" for kdump + * + * For the kdump reserved memory this functions performs a swap operation: + * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + unsigned long size_old; + int rc; + + if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) { + size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT)); + rc = remap_pfn_range(vma, from, + pfn + (OLDMEM_BASE >> PAGE_SHIFT), + size_old, prot); + if (rc || size == size_old) + return rc; + size -= size_old; + from += size_old; + pfn += size_old >> PAGE_SHIFT; + } + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Remap "oldmem" for zfcpdump + * + * We only map available memory above HSA size. Memory below HSA size + * is read on demand using the copy_oldmem_page() function. + */ +static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, + unsigned long from, + unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + unsigned long hsa_end = sclp_get_hsa_size(); + unsigned long size_hsa; + + if (pfn < hsa_end >> PAGE_SHIFT) { + size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT)); + if (size == size_hsa) + return 0; + size -= size_hsa; + from += size_hsa; + pfn += size_hsa >> PAGE_SHIFT; + } + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Remap "oldmem" for kdump or zfcpdump + */ +int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + if (OLDMEM_BASE) + return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); + else + return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, + prot); +} + +/* + * Copy memory from old kernel + */ +int copy_from_oldmem(void *dest, void *src, size_t count) +{ + unsigned long copied = 0; + int rc; + + if (OLDMEM_BASE) { + if ((unsigned long) src < OLDMEM_SIZE) { + copied = min(count, OLDMEM_SIZE - (unsigned long) src); + rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied); + if (rc) + return rc; + } + } else { + unsigned long hsa_end = sclp_get_hsa_size(); + if ((unsigned long) src < hsa_end) { + copied = min(count, hsa_end - (unsigned long) src); + rc = memcpy_hsa(dest, (unsigned long) src, copied, 0); + if (rc) + return rc; + } + } + return copy_from_realmem(dest + copied, src + copied, count - copied); +} + +/* + * Alloc memory and panic in case of ENOMEM + */ +static void *kzalloc_panic(int len) +{ + void *rc; + + rc = kzalloc(len, GFP_KERNEL); + if (!rc) + panic("s390 kdump kzalloc (%d) failed", len); + return rc; +} + +/* + * Initialize ELF note + */ +static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) +{ + Elf64_Nhdr *note; + u64 len; + + note = (Elf64_Nhdr *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = d_len; + note->n_type = type; + len = sizeof(Elf64_Nhdr); + + memcpy(buf + len, name, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + memcpy(buf + len, desc, note->n_descsz); + len = roundup(len + note->n_descsz, 4); + + return PTR_ADD(buf, len); +} + +/* + * Initialize prstatus note + */ +static void *nt_prstatus(void *ptr, struct save_area *sa) +{ + struct elf_prstatus nt_prstatus; + static int cpu_nr = 1; + + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); + nt_prstatus.pr_pid = cpu_nr; + cpu_nr++; + + return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), + "CORE"); +} + +/* + * Initialize fpregset (floating point) note + */ +static void *nt_fpregset(void *ptr, struct save_area *sa) +{ + elf_fpregset_t nt_fpregset; + + memset(&nt_fpregset, 0, sizeof(nt_fpregset)); + memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); + memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); + + return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), + "CORE"); +} + +/* + * Initialize timer note + */ +static void *nt_s390_timer(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD clock comparator note + */ +static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, + sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD programmable register note + */ +static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, + sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize control register note + */ +static void *nt_s390_ctrs(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, + sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize prefix register note + */ +static void *nt_s390_prefix(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, + sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Fill ELF notes for one CPU with save area registers + */ +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +{ + ptr = nt_prstatus(ptr, sa); + ptr = nt_fpregset(ptr, sa); + ptr = nt_s390_timer(ptr, sa); + ptr = nt_s390_tod_cmp(ptr, sa); + ptr = nt_s390_tod_preg(ptr, sa); + ptr = nt_s390_ctrs(ptr, sa); + ptr = nt_s390_prefix(ptr, sa); + return ptr; +} + +/* + * Initialize prpsinfo note (new kernel) + */ +static void *nt_prpsinfo(void *ptr) +{ + struct elf_prpsinfo prpsinfo; + + memset(&prpsinfo, 0, sizeof(prpsinfo)); + prpsinfo.pr_sname = 'R'; + strcpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Get vmcoreinfo using lowcore->vmcore_info (new kernel) + */ +static void *get_vmcoreinfo_old(unsigned long *size) +{ + char nt_name[11], *vmcoreinfo; + Elf64_Nhdr note; + void *addr; + + if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return NULL; + memset(nt_name, 0, sizeof(nt_name)); + if (copy_from_oldmem(¬e, addr, sizeof(note))) + return NULL; + if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + return NULL; + if (strcmp(nt_name, "VMCOREINFO") != 0) + return NULL; + vmcoreinfo = kzalloc_panic(note.n_descsz); + if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return NULL; + *size = note.n_descsz; + return vmcoreinfo; +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + unsigned long size; + void *vmcoreinfo; + + vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); + if (!vmcoreinfo) + vmcoreinfo = get_vmcoreinfo_old(&size); + if (!vmcoreinfo) + return ptr; + return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); +} + +/* + * Initialize ELF header (new kernel) + */ +static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) +{ + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2MSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = EM_S390; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + ehdr->e_phnum = mem_chunk_cnt + 1; + return ehdr + 1; +} + +/* + * Return CPU count for ELF header (new kernel) + */ +static int get_cpu_cnt(void) +{ + int i, cpus = 0; + + for (i = 0; i < dump_save_areas.count; i++) { + if (dump_save_areas.areas[i]->pref_reg == 0) + continue; + cpus++; + } + return cpus; +} + +/* + * Return memory chunk count for ELF header (new kernel) + */ +static int get_mem_chunk_cnt(void) +{ + int cnt = 0; + u64 idx; + + for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL) + cnt++; + return cnt; +} + +/* + * Initialize ELF loads (new kernel) + */ +static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) +{ + phys_addr_t start, end; + u64 idx; + + for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) { + phdr->p_filesz = end - start; + phdr->p_type = PT_LOAD; + phdr->p_offset = start; + phdr->p_vaddr = start; + phdr->p_paddr = start; + phdr->p_memsz = end - start; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; + phdr++; + } +} + +/* + * Initialize notes (new kernel) + */ +static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) +{ + struct save_area *sa; + void *ptr_start = ptr; + int i; + + ptr = nt_prpsinfo(ptr); + + for (i = 0; i < dump_save_areas.count; i++) { + sa = dump_save_areas.areas[i]; + if (sa->pref_reg == 0) + continue; + ptr = fill_cpu_elf_notes(ptr, sa); + } + ptr = nt_vmcoreinfo(ptr); + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = PT_NOTE; + phdr->p_offset = notes_offset; + phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); + phdr->p_memsz = phdr->p_filesz; + return ptr; +} + +/* + * Create ELF core header (new kernel) + */ +int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) +{ + Elf64_Phdr *phdr_notes, *phdr_loads; + int mem_chunk_cnt; + void *ptr, *hdr; + u32 alloc_size; + u64 hdr_off; + + /* If we are not in kdump or zfcpdump mode return */ + if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) + return 0; + /* If elfcorehdr= has been passed via cmdline, we use that one */ + if (elfcorehdr_addr != ELFCORE_ADDR_MAX) + return 0; + /* If we cannot get HSA size for zfcpdump return error */ + if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size()) + return -ENODEV; + + /* For kdump, exclude previous crashkernel memory */ + if (OLDMEM_BASE) { + oldmem_region.base = OLDMEM_BASE; + oldmem_region.size = OLDMEM_SIZE; + oldmem_type.total_size = OLDMEM_SIZE; + } + + mem_chunk_cnt = get_mem_chunk_cnt(); + + alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + mem_chunk_cnt * sizeof(Elf64_Phdr); + hdr = kzalloc_panic(alloc_size); + /* Init elf header */ + ptr = ehdr_init(hdr, mem_chunk_cnt); + /* Init program headers */ + phdr_notes = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); + phdr_loads = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); + /* Init notes */ + hdr_off = PTR_DIFF(ptr, hdr); + ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); + /* Init loads */ + hdr_off = PTR_DIFF(ptr, hdr); + loads_init(phdr_loads, hdr_off); + *addr = (unsigned long long) hdr; + elfcorehdr_newmem = hdr; + *size = (unsigned long long) hdr_off; + BUG_ON(elfcorehdr_size > alloc_size); + return 0; +} + +/* + * Free ELF core header (new kernel) + */ +void elfcorehdr_free(unsigned long long addr) +{ + if (!elfcorehdr_newmem) + return; + kfree((void *)(unsigned long)addr); +} + +/* + * Read from ELF header + */ +ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + void *src = (void *)(unsigned long)*ppos; + + src = elfcorehdr_newmem ? src : src - OLDMEM_BASE; + memcpy(buf, src, count); + *ppos += count; + return count; +} + +/* + * Read from ELF notes data + */ +ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) +{ + void *src = (void *)(unsigned long)*ppos; + int rc; + + if (elfcorehdr_newmem) { + memcpy(buf, src, count); + } else { + rc = copy_from_oldmem(buf, src, count); + if (rc) + return rc; + } + *ppos += count; + return count; +} diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 896d39d0e4c..ee8390da6ea 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1,24 +1,25 @@ /* - * arch/s390/kernel/debug.c * S/390 debug facility * - * Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH, - * IBM Corporation + * Copyright IBM Corp. 1999, 2012 + * * Author(s): Michael Holzheu (holzheu@de.ibm.com), * Holger Smolinski (Holger.Smolinski@de.ibm.com) * * Bugreports to: <Linux390@de.ibm.com> */ -#include <linux/config.h> +#define KMSG_COMPONENT "s390dbf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/ctype.h> +#include <linux/string.h> #include <linux/sysctl.h> #include <asm/uaccess.h> -#include <asm/semaphore.h> #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> @@ -62,8 +63,6 @@ typedef struct } debug_sprintf_entry_t; -extern void tod_to_timeval(uint64_t todval, struct timespec *xtime); - /* internal function prototyes */ static int debug_init(void); @@ -73,8 +72,8 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, size_t user_len, loff_t * offset); static int debug_open(struct inode *inode, struct file *file); static int debug_close(struct inode *inode, struct file *file); -static debug_info_t* debug_info_create(char *name, int pages_per_area, - int nr_areas, int buf_size); +static debug_info_t *debug_info_create(const char *name, int pages_per_area, + int nr_areas, int buf_size, umode_t mode); static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t * id, @@ -111,6 +110,7 @@ struct debug_view debug_raw_view = { NULL, NULL }; +EXPORT_SYMBOL(debug_raw_view); struct debug_view debug_hex_ascii_view = { "hex_ascii", @@ -120,8 +120,9 @@ struct debug_view debug_hex_ascii_view = { NULL, NULL }; +EXPORT_SYMBOL(debug_hex_ascii_view); -struct debug_view debug_level_view = { +static struct debug_view debug_level_view = { "level", &debug_prolog_level_fn, NULL, @@ -130,7 +131,7 @@ struct debug_view debug_level_view = { NULL }; -struct debug_view debug_pages_view = { +static struct debug_view debug_pages_view = { "pages", &debug_prolog_pages_fn, NULL, @@ -139,7 +140,7 @@ struct debug_view debug_pages_view = { NULL }; -struct debug_view debug_flush_view = { +static struct debug_view debug_flush_view = { "flush", NULL, NULL, @@ -156,24 +157,27 @@ struct debug_view debug_sprintf_view = { NULL, NULL }; +EXPORT_SYMBOL(debug_sprintf_view); - -unsigned int debug_feature_version = __DEBUG_FEATURE_VERSION; +/* used by dump analysis tools to determine version of debug feature */ +static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION; /* static globals */ static debug_info_t *debug_area_first = NULL; static debug_info_t *debug_area_last = NULL; -DECLARE_MUTEX(debug_lock); +static DEFINE_MUTEX(debug_mutex); static int initialized; +static int debug_critical; -static struct file_operations debug_file_ops = { +static const struct file_operations debug_file_ops = { .owner = THIS_MODULE, .read = debug_output, .write = debug_input, .open = debug_open, .release = debug_close, + .llseek = no_llseek, }; static struct dentry *debug_debugfs_root_entry; @@ -192,28 +196,25 @@ debug_areas_alloc(int pages_per_area, int nr_areas) debug_entry_t*** areas; int i,j; - areas = (debug_entry_t ***) kmalloc(nr_areas * + areas = kmalloc(nr_areas * sizeof(debug_entry_t**), GFP_KERNEL); if (!areas) goto fail_malloc_areas; for (i = 0; i < nr_areas; i++) { - areas[i] = (debug_entry_t**) kmalloc(pages_per_area * + areas[i] = kmalloc(pages_per_area * sizeof(debug_entry_t*),GFP_KERNEL); if (!areas[i]) { goto fail_malloc_areas2; } for(j = 0; j < pages_per_area; j++) { - areas[i][j] = (debug_entry_t*)kmalloc(PAGE_SIZE, - GFP_KERNEL); + areas[i][j] = kzalloc(PAGE_SIZE, GFP_KERNEL); if(!areas[i][j]) { for(j--; j >=0 ; j--) { kfree(areas[i][j]); } kfree(areas[i]); goto fail_malloc_areas2; - } else { - memset(areas[i][j],0,PAGE_SIZE); } } } @@ -239,24 +240,22 @@ fail_malloc_areas: */ static debug_info_t* -debug_info_alloc(char *name, int pages_per_area, int nr_areas, int buf_size, - int level, int mode) +debug_info_alloc(const char *name, int pages_per_area, int nr_areas, + int buf_size, int level, int mode) { debug_info_t* rc; /* alloc everything */ - rc = (debug_info_t*) kmalloc(sizeof(debug_info_t), GFP_KERNEL); + rc = kmalloc(sizeof(debug_info_t), GFP_KERNEL); if(!rc) goto fail_malloc_rc; - rc->active_entries = (int*)kmalloc(nr_areas * sizeof(int), GFP_KERNEL); + rc->active_entries = kcalloc(nr_areas, sizeof(int), GFP_KERNEL); if(!rc->active_entries) goto fail_malloc_active_entries; - memset(rc->active_entries, 0, nr_areas * sizeof(int)); - rc->active_pages = (int*)kmalloc(nr_areas * sizeof(int), GFP_KERNEL); + rc->active_pages = kcalloc(nr_areas, sizeof(int), GFP_KERNEL); if(!rc->active_pages) goto fail_malloc_active_pages; - memset(rc->active_pages, 0, nr_areas * sizeof(int)); if((mode == ALL_AREAS) && (pages_per_area != 0)){ rc->areas = debug_areas_alloc(pages_per_area, nr_areas); if(!rc->areas) @@ -274,7 +273,7 @@ debug_info_alloc(char *name, int pages_per_area, int nr_areas, int buf_size, rc->level = level; rc->buf_size = buf_size; rc->entry_size = sizeof(debug_entry_t) + buf_size; - strlcpy(rc->name, name, sizeof(rc->name)-1); + strlcpy(rc->name, name, sizeof(rc->name)); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS * sizeof(struct dentry*)); @@ -333,7 +332,8 @@ debug_info_free(debug_info_t* db_info){ */ static debug_info_t* -debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size) +debug_info_create(const char *name, int pages_per_area, int nr_areas, + int buf_size, umode_t mode) { debug_info_t* rc; @@ -342,6 +342,8 @@ debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size) if(!rc) goto out; + rc->mode = mode & ~S_IFMT; + /* create root directory */ rc->debugfs_root_entry = debugfs_create_dir(rc->name, debug_debugfs_root_entry); @@ -392,7 +394,7 @@ debug_info_copy(debug_info_t* in, int mode) debug_info_free(rc); } while (1); - if(!rc || (mode == NO_AREAS)) + if (mode == NO_AREAS) goto out; for(i = 0; i < in->nr_areas; i++){ @@ -582,7 +584,7 @@ debug_input(struct file *file, const char __user *user_buf, size_t length, int rc = 0; file_private_info_t *p_info; - down(&debug_lock); + mutex_lock(&debug_mutex); p_info = ((file_private_info_t *) file->private_data); if (p_info->view->input_proc) rc = p_info->view->input_proc(p_info->debug_info_org, @@ -590,7 +592,7 @@ debug_input(struct file *file, const char __user *user_buf, size_t length, length, offset); else rc = -EPERM; - up(&debug_lock); + mutex_unlock(&debug_mutex); return rc; /* number of input characters */ } @@ -604,18 +606,18 @@ debug_input(struct file *file, const char __user *user_buf, size_t length, static int debug_open(struct inode *inode, struct file *file) { - int i = 0, rc = 0; + int i, rc = 0; file_private_info_t *p_info; debug_info_t *debug_info, *debug_info_snapshot; - down(&debug_lock); - debug_info = (struct debug_info*)file->f_dentry->d_inode->u.generic_ip; + mutex_lock(&debug_mutex); + debug_info = file_inode(file)->i_private; /* find debug view */ for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!debug_info->views[i]) continue; else if (debug_info->debugfs_entries[i] == - file->f_dentry) { + file->f_path.dentry) { goto found; /* found view ! */ } } @@ -640,11 +642,10 @@ found: rc = -ENOMEM; goto out; } - p_info = (file_private_info_t *) kmalloc(sizeof(file_private_info_t), + p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); if(!p_info){ - if(debug_info_snapshot) - debug_info_free(debug_info_snapshot); + debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } @@ -658,8 +659,9 @@ found: p_info->act_entry_offset = 0; file->private_data = p_info; debug_info_get(debug_info); + nonseekable_open(inode, file); out: - up(&debug_lock); + mutex_unlock(&debug_mutex); return rc; } @@ -682,23 +684,29 @@ debug_close(struct inode *inode, struct file *file) } /* - * debug_register: - * - creates and initializes debug area for the caller - * - returns handle for debug area + * debug_register_mode: + * - Creates and initializes debug area for the caller + * The mode parameter allows to specify access rights for the s390dbf files + * - Returns handle for debug area */ -debug_info_t* -debug_register (char *name, int pages_per_area, int nr_areas, int buf_size) +debug_info_t *debug_register_mode(const char *name, int pages_per_area, + int nr_areas, int buf_size, umode_t mode, + uid_t uid, gid_t gid) { debug_info_t *rc = NULL; - if (!initialized) - BUG(); - down(&debug_lock); + /* Since debugfs currently does not support uid/gid other than root, */ + /* we do not allow gid/uid != 0 until we get support for that. */ + if ((uid != 0) || (gid != 0)) + pr_warning("Root becomes the owner of all s390dbf files " + "in sysfs\n"); + BUG_ON(!initialized); + mutex_lock(&debug_mutex); /* create new debug_info */ - rc = debug_info_create(name, pages_per_area, nr_areas, buf_size); + rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode); if(!rc) goto out; debug_register_view(rc, &debug_level_view); @@ -706,11 +714,26 @@ debug_register (char *name, int pages_per_area, int nr_areas, int buf_size) debug_register_view(rc, &debug_pages_view); out: if (!rc){ - printk(KERN_ERR "debug: debug_register failed for %s\n",name); + pr_err("Registering debug feature %s failed\n", name); } - up(&debug_lock); + mutex_unlock(&debug_mutex); return rc; } +EXPORT_SYMBOL(debug_register_mode); + +/* + * debug_register: + * - creates and initializes debug area for the caller + * - returns handle for debug area + */ + +debug_info_t *debug_register(const char *name, int pages_per_area, + int nr_areas, int buf_size) +{ + return debug_register_mode(name, pages_per_area, nr_areas, buf_size, + S_IRUSR | S_IWUSR, 0, 0); +} +EXPORT_SYMBOL(debug_register); /* * debug_unregister: @@ -722,13 +745,14 @@ debug_unregister(debug_info_t * id) { if (!id) goto out; - down(&debug_lock); + mutex_lock(&debug_mutex); debug_info_put(id); - up(&debug_lock); + mutex_unlock(&debug_mutex); out: return; } +EXPORT_SYMBOL(debug_unregister); /* * debug_set_size: @@ -746,8 +770,8 @@ debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) if(pages_per_area > 0){ new_areas = debug_areas_alloc(pages_per_area, nr_areas); if(!new_areas) { - printk(KERN_WARNING "debug: could not allocate memory "\ - "for pagenumber: %i\n",pages_per_area); + pr_info("Allocating memory for %i pages failed\n", + pages_per_area); rc = -ENOMEM; goto out; } @@ -763,8 +787,7 @@ debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) memset(id->active_entries,0,sizeof(int)*id->nr_areas); memset(id->active_pages, 0, sizeof(int)*id->nr_areas); spin_unlock_irqrestore(&id->lock,flags); - printk(KERN_INFO "debug: %s: set new size (%i pages)\n"\ - ,id->name, pages_per_area); + pr_info("%s: set new size (%i pages)\n" ,id->name, pages_per_area); out: return rc; } @@ -783,17 +806,16 @@ debug_set_level(debug_info_t* id, int new_level) spin_lock_irqsave(&id->lock,flags); if(new_level == DEBUG_OFF_LEVEL){ id->level = DEBUG_OFF_LEVEL; - printk(KERN_INFO "debug: %s: switched off\n",id->name); + pr_info("%s: switched off\n",id->name); } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) { - printk(KERN_INFO - "debug: %s: level %i is out of range (%i - %i)\n", + pr_info("%s: level %i is out of range (%i - %i)\n", id->name, new_level, 0, DEBUG_MAX_LEVEL); } else { id->level = new_level; } spin_unlock_irqrestore(&id->lock,flags); } - +EXPORT_SYMBOL(debug_set_level); /* * proceed_active_entry: @@ -845,7 +867,7 @@ static inline void debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, int exception) { - active->id.stck = get_clock(); + active->id.stck = get_tod_clock_fast(); active->id.fields.cpuid = smp_processor_id(); active->caller = __builtin_return_address(0); active->id.fields.exception = exception; @@ -858,7 +880,6 @@ debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, static int debug_stoppable=1; static int debug_active=1; -#define CTL_S390DBF 5677 #define CTL_S390DBF_STOPPABLE 5678 #define CTL_S390DBF_ACTIVE 5679 @@ -868,11 +889,11 @@ static int debug_active=1; * if debug_active is already off */ static int -s390dbf_procactive(ctl_table *table, int write, struct file *filp, +s390dbf_procactive(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) - return proc_dointvec(table, write, filp, buffer, lenp, ppos); + return proc_dointvec(table, write, buffer, lenp, ppos); else return 0; } @@ -880,38 +901,33 @@ s390dbf_procactive(ctl_table *table, int write, struct file *filp, static struct ctl_table s390dbf_table[] = { { - .ctl_name = CTL_S390DBF_STOPPABLE, .procname = "debug_stoppable", .data = &debug_stoppable, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_S390DBF_ACTIVE, .procname = "debug_active", .data = &debug_active, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = &s390dbf_procactive, - .strategy = &sysctl_intvec, + .proc_handler = s390dbf_procactive, }, - { .ctl_name = 0 } + { } }; static struct ctl_table s390dbf_dir_table[] = { { - .ctl_name = CTL_S390DBF, .procname = "s390dbf", .maxlen = 0, .mode = S_IRUGO | S_IXUGO, .child = s390dbf_table, }, - { .ctl_name = 0 } + { } }; -struct ctl_table_header *s390dbf_sysctl_header; +static struct ctl_table_header *s390dbf_sysctl_header; void debug_stop_all(void) @@ -919,7 +935,12 @@ debug_stop_all(void) if (debug_stoppable) debug_active = 0; } +EXPORT_SYMBOL(debug_stop_all); +void debug_set_critical(void) +{ + debug_critical = 1; +} /* * debug_event_common: @@ -934,7 +955,11 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len) if (!debug_active || !id->areas) return NULL; - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); memset(DEBUG_DATA(active), 0, id->buf_size); memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -943,6 +968,7 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len) return active; } +EXPORT_SYMBOL(debug_event_common); /* * debug_exception_common: @@ -957,7 +983,11 @@ debug_entry_t if (!debug_active || !id->areas) return NULL; - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); memset(DEBUG_DATA(active), 0, id->buf_size); memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -966,6 +996,7 @@ debug_entry_t return active; } +EXPORT_SYMBOL(debug_exception_common); /* * counts arguments in format string for sprintf view @@ -1002,7 +1033,11 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...) return NULL; numargs=debug_count_numargs(string); - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active); va_start(ap,string); @@ -1015,6 +1050,7 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...) return active; } +EXPORT_SYMBOL(debug_sprintf_event); /* * debug_sprintf_exception: @@ -1036,7 +1072,11 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...) numargs=debug_count_numargs(string); - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active); va_start(ap,string); @@ -1049,26 +1089,7 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...) return active; } - -/* - * debug_init: - * - is called exactly once to initialize the debug feature - */ - -static int -__init debug_init(void) -{ - int rc = 0; - - s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table, 1); - down(&debug_lock); - debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT,NULL); - printk(KERN_INFO "debug: Initialization complete\n"); - initialized = 1; - up(&debug_lock); - - return rc; -} +EXPORT_SYMBOL(debug_sprintf_exception); /* * debug_register_view: @@ -1080,20 +1101,21 @@ debug_register_view(debug_info_t * id, struct debug_view *view) int rc = 0; int i; unsigned long flags; - mode_t mode = S_IFREG; + umode_t mode; struct dentry *pde; if (!id) goto out; - if (view->prolog_proc || view->format_proc || view->header_proc) - mode |= S_IRUSR; - if (view->input_proc) - mode |= S_IWUSR; + mode = (id->mode | S_IFREG) & ~S_IXUGO; + if (!(view->prolog_proc || view->format_proc || view->header_proc)) + mode &= ~(S_IRUSR | S_IRGRP | S_IROTH); + if (!view->input_proc) + mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry, id , &debug_file_ops); if (!pde){ - printk(KERN_WARNING "debug: debugfs_create_file() failed!"\ - " Cannot register view %s/%s\n", id->name,view->name); + pr_err("Registering view %s/%s failed due to out of " + "memory\n", id->name,view->name); rc = -1; goto out; } @@ -1103,20 +1125,20 @@ debug_register_view(debug_info_t * id, struct debug_view *view) break; } if (i == DEBUG_MAX_VIEWS) { - printk(KERN_WARNING "debug: cannot register view %s/%s\n", - id->name,view->name); - printk(KERN_WARNING - "debug: maximum number of views reached (%i)!\n", i); - debugfs_remove(pde); + pr_err("Registering view %s/%s would exceed the maximum " + "number of views %i\n", id->name, view->name, i); rc = -1; } else { id->views[i] = view; id->debugfs_entries[i] = pde; } spin_unlock_irqrestore(&id->lock, flags); + if (rc) + debugfs_remove(pde); out: return rc; } +EXPORT_SYMBOL(debug_register_view); /* * debug_unregister_view: @@ -1125,9 +1147,9 @@ out: int debug_unregister_view(debug_info_t * id, struct debug_view *view) { - int rc = 0; - int i; + struct dentry *dentry = NULL; unsigned long flags; + int i, rc = 0; if (!id) goto out; @@ -1139,14 +1161,16 @@ debug_unregister_view(debug_info_t * id, struct debug_view *view) if (i == DEBUG_MAX_VIEWS) rc = -1; else { - debugfs_remove(id->debugfs_entries[i]); + dentry = id->debugfs_entries[i]; id->views[i] = NULL; - rc = 0; + id->debugfs_entries[i] = NULL; } spin_unlock_irqrestore(&id->lock, flags); + debugfs_remove(dentry); out: return rc; } +EXPORT_SYMBOL(debug_unregister_view); static inline char * debug_get_user_string(const char __user *user_buf, size_t user_len) @@ -1173,10 +1197,9 @@ debug_get_uint(char *buf) { int rc; - for(; isspace(*buf); buf++); + buf = skip_spaces(buf); rc = simple_strtoul(buf, &buf, 10); if(*buf){ - printk("debug: no integer specified!\n"); rc = -EINVAL; } return rc; @@ -1288,7 +1311,8 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view, new_level = debug_get_uint(str); } if(new_level < 0) { - printk(KERN_INFO "debug: level `%s` is not valid\n", str); + pr_warning("%s is not a valid level for a debug " + "feature\n", str); rc = -EINVAL; } else { debug_set_level(id, new_level); @@ -1306,8 +1330,7 @@ out: * flushes debug areas */ -void -debug_flush(debug_info_t* id, int area) +static void debug_flush(debug_info_t* id, int area) { unsigned long flags; int i,j; @@ -1324,19 +1347,12 @@ debug_flush(debug_info_t* id, int area) memset(id->areas[i][j], 0, PAGE_SIZE); } } - printk(KERN_INFO "debug: %s: all areas flushed\n",id->name); } else if(area >= 0 && area < id->nr_areas) { id->active_entries[area] = 0; id->active_pages[area] = 0; for(i = 0; i < id->pages_per_area; i++) { memset(id->areas[area][i],0,PAGE_SIZE); } - printk(KERN_INFO "debug: %s: area %i has been flushed\n", - id->name, area); - } else { - printk(KERN_INFO - "debug: %s: area %i cannot be flushed (range: %i - %i)\n", - id->name, area, 0, id->nr_areas-1); } spin_unlock_irqrestore(&id->lock,flags); } @@ -1373,7 +1389,8 @@ debug_input_flush_fn(debug_info_t * id, struct debug_view *view, goto out; } - printk(KERN_INFO "debug: area `%c` is not valid\n", input_buf[0]); + pr_info("Flushing debug data failed because %c is not a valid " + "area\n", input_buf[0]); out: *offset += user_len; @@ -1427,10 +1444,10 @@ debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, rc += sprintf(out_buf + rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; - if (!isprint(c)) - rc += sprintf(out_buf + rc, "."); - else + if (isascii(c) && isprint(c)) rc += sprintf(out_buf + rc, "%c", c); + else + rc += sprintf(out_buf + rc, "."); } rc += sprintf(out_buf + rc, "\n"); return rc; @@ -1445,17 +1462,13 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, int area, debug_entry_t * entry, char *out_buf) { struct timespec time_spec; - unsigned long long time; char *except_str; unsigned long caller; int rc = 0; unsigned int level; level = entry->id.fields.level; - time = entry->id.stck; - /* adjust todclock to 1970 */ - time -= 0x8126d60e46000000LL - (0x3c26700LL * 1000000 * 4096); - tod_to_timeval(time, &time_spec); + stck_to_timespec(entry->id.stck, &time_spec); if (entry->id.fields.exception) except_str = "*"; @@ -1467,6 +1480,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, except_str, entry->id.fields.cpuid, (void *) caller); return rc; } +EXPORT_SYMBOL(debug_dflt_header_fn); /* * prints debug data sprintf-formated: @@ -1515,34 +1529,16 @@ out: } /* - * clean up module + * debug_init: + * - is called exactly once to initialize the debug feature */ -void -__exit debug_exit(void) +static int __init debug_init(void) { - debugfs_remove(debug_debugfs_root_entry); - unregister_sysctl_table(s390dbf_sysctl_header); - return; + s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table); + mutex_lock(&debug_mutex); + debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL); + initialized = 1; + mutex_unlock(&debug_mutex); + return 0; } - -/* - * module definitions - */ postcore_initcall(debug_init); -module_exit(debug_exit); -MODULE_LICENSE("GPL"); - -EXPORT_SYMBOL(debug_register); -EXPORT_SYMBOL(debug_unregister); -EXPORT_SYMBOL(debug_set_level); -EXPORT_SYMBOL(debug_stop_all); -EXPORT_SYMBOL(debug_register_view); -EXPORT_SYMBOL(debug_unregister_view); -EXPORT_SYMBOL(debug_event_common); -EXPORT_SYMBOL(debug_exception_common); -EXPORT_SYMBOL(debug_hex_ascii_view); -EXPORT_SYMBOL(debug_raw_view); -EXPORT_SYMBOL(debug_dflt_header_fn); -EXPORT_SYMBOL(debug_sprintf_view); -EXPORT_SYMBOL(debug_sprintf_exception); -EXPORT_SYMBOL(debug_sprintf_event); diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c new file mode 100644 index 00000000000..8237fc07ac7 --- /dev/null +++ b/arch/s390/kernel/diag.c @@ -0,0 +1,81 @@ +/* + * Implementation of s390 diagnose codes + * + * Copyright IBM Corp. 2007 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#include <linux/module.h> +#include <asm/diag.h> + +/* + * Diagnose 14: Input spool file manipulation + */ +int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) +{ + register unsigned long _ry1 asm("2") = ry1; + register unsigned long _ry2 asm("3") = subcode; + int rc = 0; + + asm volatile( +#ifdef CONFIG_64BIT + " sam31\n" + " diag %2,2,0x14\n" + " sam64\n" +#else + " diag %2,2,0x14\n" +#endif + " ipm %0\n" + " srl %0,28\n" + : "=d" (rc), "+d" (_ry2) + : "d" (rx), "d" (_ry1) + : "cc"); + + return rc; +} +EXPORT_SYMBOL(diag14); + +/* + * Diagnose 210: Get information about a virtual device + */ +int diag210(struct diag210 *addr) +{ + /* + * diag 210 needs its data below the 2GB border, so we + * use a static data area to be sure + */ + static struct diag210 diag210_tmp; + static DEFINE_SPINLOCK(diag210_lock); + unsigned long flags; + int ccode; + + spin_lock_irqsave(&diag210_lock, flags); + diag210_tmp = *addr; + +#ifdef CONFIG_64BIT + asm volatile( + " lhi %0,-1\n" + " sam31\n" + " diag %1,0,0x210\n" + "0: ipm %0\n" + " srl %0,28\n" + "1: sam64\n" + EX_TABLE(0b, 1b) + : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory"); +#else + asm volatile( + " lhi %0,-1\n" + " diag %1,0,0x210\n" + "0: ipm %0\n" + " srl %0,28\n" + "1:\n" + EX_TABLE(0b, 1b) + : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory"); +#endif + + *addr = diag210_tmp; + spin_unlock_irqrestore(&diag210_lock, flags); + + return ccode; +} +EXPORT_SYMBOL(diag210); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c new file mode 100644 index 00000000000..993efe6a887 --- /dev/null +++ b/arch/s390/kernel/dis.c @@ -0,0 +1,1853 @@ +/* + * Disassemble s390 instructions. + * + * Copyright IBM Corp. 2007 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/timer.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/reboot.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> + +#include <asm/uaccess.h> +#include <asm/dis.h> +#include <asm/io.h> +#include <linux/atomic.h> +#include <asm/mathemu.h> +#include <asm/cpcmd.h> +#include <asm/lowcore.h> +#include <asm/debug.h> +#include <asm/irq.h> + +#ifndef CONFIG_64BIT +#define ONELONG "%08lx: " +#else /* CONFIG_64BIT */ +#define ONELONG "%016lx: " +#endif /* CONFIG_64BIT */ + +enum { + UNUSED, /* Indicates the end of the operand list */ + R_8, /* GPR starting at position 8 */ + R_12, /* GPR starting at position 12 */ + R_16, /* GPR starting at position 16 */ + R_20, /* GPR starting at position 20 */ + R_24, /* GPR starting at position 24 */ + R_28, /* GPR starting at position 28 */ + R_32, /* GPR starting at position 32 */ + F_8, /* FPR starting at position 8 */ + F_12, /* FPR starting at position 12 */ + F_16, /* FPR starting at position 16 */ + F_20, /* FPR starting at position 16 */ + F_24, /* FPR starting at position 24 */ + F_28, /* FPR starting at position 28 */ + F_32, /* FPR starting at position 32 */ + A_8, /* Access reg. starting at position 8 */ + A_12, /* Access reg. starting at position 12 */ + A_24, /* Access reg. starting at position 24 */ + A_28, /* Access reg. starting at position 28 */ + C_8, /* Control reg. starting at position 8 */ + C_12, /* Control reg. starting at position 12 */ + B_16, /* Base register starting at position 16 */ + B_32, /* Base register starting at position 32 */ + X_12, /* Index register starting at position 12 */ + D_20, /* Displacement starting at position 20 */ + D_36, /* Displacement starting at position 36 */ + D20_20, /* 20 bit displacement starting at 20 */ + L4_8, /* 4 bit length starting at position 8 */ + L4_12, /* 4 bit length starting at position 12 */ + L8_8, /* 8 bit length starting at position 8 */ + U4_8, /* 4 bit unsigned value starting at 8 */ + U4_12, /* 4 bit unsigned value starting at 12 */ + U4_16, /* 4 bit unsigned value starting at 16 */ + U4_20, /* 4 bit unsigned value starting at 20 */ + U4_24, /* 4 bit unsigned value starting at 24 */ + U4_28, /* 4 bit unsigned value starting at 28 */ + U4_32, /* 4 bit unsigned value starting at 32 */ + U4_36, /* 4 bit unsigned value starting at 36 */ + U8_8, /* 8 bit unsigned value starting at 8 */ + U8_16, /* 8 bit unsigned value starting at 16 */ + U8_24, /* 8 bit unsigned value starting at 24 */ + U8_32, /* 8 bit unsigned value starting at 32 */ + I8_8, /* 8 bit signed value starting at 8 */ + I8_32, /* 8 bit signed value starting at 32 */ + J12_12, /* PC relative offset at 12 */ + I16_16, /* 16 bit signed value starting at 16 */ + I16_32, /* 32 bit signed value starting at 16 */ + U16_16, /* 16 bit unsigned value starting at 16 */ + U16_32, /* 32 bit unsigned value starting at 16 */ + J16_16, /* PC relative jump offset at 16 */ + J16_32, /* PC relative offset at 16 */ + I24_24, /* 24 bit signed value starting at 24 */ + J32_16, /* PC relative long offset at 16 */ + I32_16, /* 32 bit signed value starting at 16 */ + U32_16, /* 32 bit unsigned value starting at 16 */ + M_16, /* 4 bit optional mask starting at 16 */ + M_20, /* 4 bit optional mask starting at 20 */ + RO_28, /* optional GPR starting at position 28 */ +}; + +/* + * Enumeration of the different instruction formats. + * For details consult the principles of operation. + */ +enum { + INSTR_INVALID, + INSTR_E, + INSTR_IE_UU, + INSTR_MII_UPI, + INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU, + INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0, + INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP, + INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU, + INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP, + INSTR_RRE_00, INSTR_RRE_0R, INSTR_RRE_AA, INSTR_RRE_AR, INSTR_RRE_F0, + INSTR_RRE_FF, INSTR_RRE_FR, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF, + INSTR_RRE_RR, INSTR_RRE_RR_OPT, + INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR, + INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_FUFF2, INSTR_RRF_M0RR, + INSTR_RRF_R0RR, INSTR_RRF_R0RR2, INSTR_RRF_RMRR, INSTR_RRF_RURR, + INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, INSTR_RRF_UUFF, + INSTR_RRF_UUFR, INSTR_RRF_UURF, + INSTR_RRR_F0FF, INSTR_RRS_RRRDU, + INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR, + INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, + INSTR_RSI_RRP, + INSTR_RSL_LRDFU, INSTR_RSL_R0RD, + INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD, + INSTR_RSY_RDRM, + INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, + INSTR_RS_RURD, + INSTR_RXE_FRRD, INSTR_RXE_RRRD, + INSTR_RXF_FRRDF, + INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD, + INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD, + INSTR_SIL_RDI, INSTR_SIL_RDU, + INSTR_SIY_IRD, INSTR_SIY_URD, + INSTR_SI_URD, + INSTR_SMI_U0RDP, + INSTR_SSE_RDRD, + INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2, + INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, + INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, + INSTR_S_00, INSTR_S_RD, +}; + +static const struct s390_operand operands[] = +{ + [UNUSED] = { 0, 0, 0 }, + [R_8] = { 4, 8, OPERAND_GPR }, + [R_12] = { 4, 12, OPERAND_GPR }, + [R_16] = { 4, 16, OPERAND_GPR }, + [R_20] = { 4, 20, OPERAND_GPR }, + [R_24] = { 4, 24, OPERAND_GPR }, + [R_28] = { 4, 28, OPERAND_GPR }, + [R_32] = { 4, 32, OPERAND_GPR }, + [F_8] = { 4, 8, OPERAND_FPR }, + [F_12] = { 4, 12, OPERAND_FPR }, + [F_16] = { 4, 16, OPERAND_FPR }, + [F_20] = { 4, 16, OPERAND_FPR }, + [F_24] = { 4, 24, OPERAND_FPR }, + [F_28] = { 4, 28, OPERAND_FPR }, + [F_32] = { 4, 32, OPERAND_FPR }, + [A_8] = { 4, 8, OPERAND_AR }, + [A_12] = { 4, 12, OPERAND_AR }, + [A_24] = { 4, 24, OPERAND_AR }, + [A_28] = { 4, 28, OPERAND_AR }, + [C_8] = { 4, 8, OPERAND_CR }, + [C_12] = { 4, 12, OPERAND_CR }, + [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR }, + [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR }, + [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR }, + [D_20] = { 12, 20, OPERAND_DISP }, + [D_36] = { 12, 36, OPERAND_DISP }, + [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED }, + [L4_8] = { 4, 8, OPERAND_LENGTH }, + [L4_12] = { 4, 12, OPERAND_LENGTH }, + [L8_8] = { 8, 8, OPERAND_LENGTH }, + [U4_8] = { 4, 8, 0 }, + [U4_12] = { 4, 12, 0 }, + [U4_16] = { 4, 16, 0 }, + [U4_20] = { 4, 20, 0 }, + [U4_24] = { 4, 24, 0 }, + [U4_28] = { 4, 28, 0 }, + [U4_32] = { 4, 32, 0 }, + [U4_36] = { 4, 36, 0 }, + [U8_8] = { 8, 8, 0 }, + [U8_16] = { 8, 16, 0 }, + [U8_24] = { 8, 24, 0 }, + [U8_32] = { 8, 32, 0 }, + [J12_12] = { 12, 12, OPERAND_PCREL }, + [I16_16] = { 16, 16, OPERAND_SIGNED }, + [U16_16] = { 16, 16, 0 }, + [U16_32] = { 16, 32, 0 }, + [J16_16] = { 16, 16, OPERAND_PCREL }, + [J16_32] = { 16, 32, OPERAND_PCREL }, + [I16_32] = { 16, 32, OPERAND_SIGNED }, + [I24_24] = { 24, 24, OPERAND_SIGNED }, + [J32_16] = { 32, 16, OPERAND_PCREL }, + [I32_16] = { 32, 16, OPERAND_SIGNED }, + [U32_16] = { 32, 16, 0 }, + [M_16] = { 4, 16, 0 }, + [M_20] = { 4, 20, 0 }, + [RO_28] = { 4, 28, OPERAND_GPR } +}; + +static const unsigned char formats[][7] = { + [INSTR_E] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_IE_UU] = { 0xff, U4_24,U4_28,0,0,0,0 }, + [INSTR_MII_UPI] = { 0xff, U4_8,J12_12,I24_24 }, + [INSTR_RIE_R0IU] = { 0xff, R_8,I16_16,U4_32,0,0,0 }, + [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 }, + [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 }, + [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 }, + [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, + [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 }, + [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 }, + [INSTR_RIE_RUPU] = { 0xff, R_8,U8_32,U4_12,J16_16,0,0 }, + [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, + [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, + [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, + [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 }, + [INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 }, + [INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 }, + [INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 }, + [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 }, + [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 }, + [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 }, + [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 }, + [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 }, + [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 }, + [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 }, + [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 }, + [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 }, + [INSTR_RRE_FR] = { 0xff, F_24,R_28,0,0,0,0 }, + [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 }, + [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 }, + [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 }, + [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 }, + [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 }, + [INSTR_RRF_0UFF] = { 0xff, F_24,F_28,U4_20,0,0,0 }, + [INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 }, + [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 }, + [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 }, + [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 }, + [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 }, + [INSTR_RRF_FUFF2] = { 0xff, F_24,F_28,F_16,U4_20,0,0 }, + [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, + [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, + [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 }, + [INSTR_RRF_RMRR] = { 0xff, R_24,R_16,R_28,M_20,0,0 }, + [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 }, + [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, + [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, + [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 }, + [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 }, + [INSTR_RRF_UUFR] = { 0xff, F_24,U4_16,R_28,U4_20,0,0 }, + [INSTR_RRF_UURF] = { 0xff, R_24,U4_16,F_28,U4_20,0,0 }, + [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 }, + [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 }, + [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 }, + [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 }, + [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 }, + [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 }, + [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 }, + [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, + [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, + [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, + [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, + [INSTR_RSL_LRDFU] = { 0xff, F_32,D_20,L4_8,B_16,U4_36,0 }, + [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 }, + [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 }, + [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, + [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, + [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, + [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, + [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, + [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, + [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, + [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, + [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, + [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 }, + [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RXY_URRD] = { 0xff, U4_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, + [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, + [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 }, + [INSTR_SIL_RDI] = { 0xff, D_20,B_16,I16_32,0,0,0 }, + [INSTR_SIL_RDU] = { 0xff, D_20,B_16,U16_32,0,0,0 }, + [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 }, + [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 }, + [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, + [INSTR_SMI_U0RDP] = { 0xff, U4_8,J16_32,D_20,B_16,0,0 }, + [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, + [INSTR_SSF_RRDRD] = { 0x0f, D_20,B_16,D_36,B_32,R_8,0 }, + [INSTR_SSF_RRDRD2]= { 0x0f, R_8,D_20,B_16,D_36,B_32,0 }, + [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 }, + [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 }, + [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 }, + [INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 }, + [INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 }, + [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 }, + [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, +}; + +enum { + LONG_INSN_ALGHSIK, + LONG_INSN_ALHHHR, + LONG_INSN_ALHHLR, + LONG_INSN_ALHSIK, + LONG_INSN_ALSIHN, + LONG_INSN_CDFBRA, + LONG_INSN_CDGBRA, + LONG_INSN_CDGTRA, + LONG_INSN_CDLFBR, + LONG_INSN_CDLFTR, + LONG_INSN_CDLGBR, + LONG_INSN_CDLGTR, + LONG_INSN_CEFBRA, + LONG_INSN_CEGBRA, + LONG_INSN_CELFBR, + LONG_INSN_CELGBR, + LONG_INSN_CFDBRA, + LONG_INSN_CFEBRA, + LONG_INSN_CFXBRA, + LONG_INSN_CGDBRA, + LONG_INSN_CGDTRA, + LONG_INSN_CGEBRA, + LONG_INSN_CGXBRA, + LONG_INSN_CGXTRA, + LONG_INSN_CLFDBR, + LONG_INSN_CLFDTR, + LONG_INSN_CLFEBR, + LONG_INSN_CLFHSI, + LONG_INSN_CLFXBR, + LONG_INSN_CLFXTR, + LONG_INSN_CLGDBR, + LONG_INSN_CLGDTR, + LONG_INSN_CLGEBR, + LONG_INSN_CLGFRL, + LONG_INSN_CLGHRL, + LONG_INSN_CLGHSI, + LONG_INSN_CLGXBR, + LONG_INSN_CLGXTR, + LONG_INSN_CLHHSI, + LONG_INSN_CXFBRA, + LONG_INSN_CXGBRA, + LONG_INSN_CXGTRA, + LONG_INSN_CXLFBR, + LONG_INSN_CXLFTR, + LONG_INSN_CXLGBR, + LONG_INSN_CXLGTR, + LONG_INSN_FIDBRA, + LONG_INSN_FIEBRA, + LONG_INSN_FIXBRA, + LONG_INSN_LDXBRA, + LONG_INSN_LEDBRA, + LONG_INSN_LEXBRA, + LONG_INSN_LLGFAT, + LONG_INSN_LLGFRL, + LONG_INSN_LLGHRL, + LONG_INSN_LLGTAT, + LONG_INSN_POPCNT, + LONG_INSN_RIEMIT, + LONG_INSN_RINEXT, + LONG_INSN_RISBGN, + LONG_INSN_RISBHG, + LONG_INSN_RISBLG, + LONG_INSN_SLHHHR, + LONG_INSN_SLHHLR, + LONG_INSN_TABORT, + LONG_INSN_TBEGIN, + LONG_INSN_TBEGINC, + LONG_INSN_PCISTG, + LONG_INSN_MPCIFC, + LONG_INSN_STPCIFC, + LONG_INSN_PCISTB, +}; + +static char *long_insn_name[] = { + [LONG_INSN_ALGHSIK] = "alghsik", + [LONG_INSN_ALHHHR] = "alhhhr", + [LONG_INSN_ALHHLR] = "alhhlr", + [LONG_INSN_ALHSIK] = "alhsik", + [LONG_INSN_ALSIHN] = "alsihn", + [LONG_INSN_CDFBRA] = "cdfbra", + [LONG_INSN_CDGBRA] = "cdgbra", + [LONG_INSN_CDGTRA] = "cdgtra", + [LONG_INSN_CDLFBR] = "cdlfbr", + [LONG_INSN_CDLFTR] = "cdlftr", + [LONG_INSN_CDLGBR] = "cdlgbr", + [LONG_INSN_CDLGTR] = "cdlgtr", + [LONG_INSN_CEFBRA] = "cefbra", + [LONG_INSN_CEGBRA] = "cegbra", + [LONG_INSN_CELFBR] = "celfbr", + [LONG_INSN_CELGBR] = "celgbr", + [LONG_INSN_CFDBRA] = "cfdbra", + [LONG_INSN_CFEBRA] = "cfebra", + [LONG_INSN_CFXBRA] = "cfxbra", + [LONG_INSN_CGDBRA] = "cgdbra", + [LONG_INSN_CGDTRA] = "cgdtra", + [LONG_INSN_CGEBRA] = "cgebra", + [LONG_INSN_CGXBRA] = "cgxbra", + [LONG_INSN_CGXTRA] = "cgxtra", + [LONG_INSN_CLFDBR] = "clfdbr", + [LONG_INSN_CLFDTR] = "clfdtr", + [LONG_INSN_CLFEBR] = "clfebr", + [LONG_INSN_CLFHSI] = "clfhsi", + [LONG_INSN_CLFXBR] = "clfxbr", + [LONG_INSN_CLFXTR] = "clfxtr", + [LONG_INSN_CLGDBR] = "clgdbr", + [LONG_INSN_CLGDTR] = "clgdtr", + [LONG_INSN_CLGEBR] = "clgebr", + [LONG_INSN_CLGFRL] = "clgfrl", + [LONG_INSN_CLGHRL] = "clghrl", + [LONG_INSN_CLGHSI] = "clghsi", + [LONG_INSN_CLGXBR] = "clgxbr", + [LONG_INSN_CLGXTR] = "clgxtr", + [LONG_INSN_CLHHSI] = "clhhsi", + [LONG_INSN_CXFBRA] = "cxfbra", + [LONG_INSN_CXGBRA] = "cxgbra", + [LONG_INSN_CXGTRA] = "cxgtra", + [LONG_INSN_CXLFBR] = "cxlfbr", + [LONG_INSN_CXLFTR] = "cxlftr", + [LONG_INSN_CXLGBR] = "cxlgbr", + [LONG_INSN_CXLGTR] = "cxlgtr", + [LONG_INSN_FIDBRA] = "fidbra", + [LONG_INSN_FIEBRA] = "fiebra", + [LONG_INSN_FIXBRA] = "fixbra", + [LONG_INSN_LDXBRA] = "ldxbra", + [LONG_INSN_LEDBRA] = "ledbra", + [LONG_INSN_LEXBRA] = "lexbra", + [LONG_INSN_LLGFAT] = "llgfat", + [LONG_INSN_LLGFRL] = "llgfrl", + [LONG_INSN_LLGHRL] = "llghrl", + [LONG_INSN_LLGTAT] = "llgtat", + [LONG_INSN_POPCNT] = "popcnt", + [LONG_INSN_RIEMIT] = "riemit", + [LONG_INSN_RINEXT] = "rinext", + [LONG_INSN_RISBGN] = "risbgn", + [LONG_INSN_RISBHG] = "risbhg", + [LONG_INSN_RISBLG] = "risblg", + [LONG_INSN_SLHHHR] = "slhhhr", + [LONG_INSN_SLHHLR] = "slhhlr", + [LONG_INSN_TABORT] = "tabort", + [LONG_INSN_TBEGIN] = "tbegin", + [LONG_INSN_TBEGINC] = "tbeginc", + [LONG_INSN_PCISTG] = "pcistg", + [LONG_INSN_MPCIFC] = "mpcifc", + [LONG_INSN_STPCIFC] = "stpcifc", + [LONG_INSN_PCISTB] = "pcistb", +}; + +static struct s390_insn opcode[] = { +#ifdef CONFIG_64BIT + { "bprp", 0xc5, INSTR_MII_UPI }, + { "bpp", 0xc7, INSTR_SMI_U0RDP }, + { "trtr", 0xd0, INSTR_SS_L0RDRD }, + { "lmd", 0xef, INSTR_SS_RRRDRD3 }, +#endif + { "spm", 0x04, INSTR_RR_R0 }, + { "balr", 0x05, INSTR_RR_RR }, + { "bctr", 0x06, INSTR_RR_RR }, + { "bcr", 0x07, INSTR_RR_UR }, + { "svc", 0x0a, INSTR_RR_U0 }, + { "bsm", 0x0b, INSTR_RR_RR }, + { "bassm", 0x0c, INSTR_RR_RR }, + { "basr", 0x0d, INSTR_RR_RR }, + { "mvcl", 0x0e, INSTR_RR_RR }, + { "clcl", 0x0f, INSTR_RR_RR }, + { "lpr", 0x10, INSTR_RR_RR }, + { "lnr", 0x11, INSTR_RR_RR }, + { "ltr", 0x12, INSTR_RR_RR }, + { "lcr", 0x13, INSTR_RR_RR }, + { "nr", 0x14, INSTR_RR_RR }, + { "clr", 0x15, INSTR_RR_RR }, + { "or", 0x16, INSTR_RR_RR }, + { "xr", 0x17, INSTR_RR_RR }, + { "lr", 0x18, INSTR_RR_RR }, + { "cr", 0x19, INSTR_RR_RR }, + { "ar", 0x1a, INSTR_RR_RR }, + { "sr", 0x1b, INSTR_RR_RR }, + { "mr", 0x1c, INSTR_RR_RR }, + { "dr", 0x1d, INSTR_RR_RR }, + { "alr", 0x1e, INSTR_RR_RR }, + { "slr", 0x1f, INSTR_RR_RR }, + { "lpdr", 0x20, INSTR_RR_FF }, + { "lndr", 0x21, INSTR_RR_FF }, + { "ltdr", 0x22, INSTR_RR_FF }, + { "lcdr", 0x23, INSTR_RR_FF }, + { "hdr", 0x24, INSTR_RR_FF }, + { "ldxr", 0x25, INSTR_RR_FF }, + { "mxr", 0x26, INSTR_RR_FF }, + { "mxdr", 0x27, INSTR_RR_FF }, + { "ldr", 0x28, INSTR_RR_FF }, + { "cdr", 0x29, INSTR_RR_FF }, + { "adr", 0x2a, INSTR_RR_FF }, + { "sdr", 0x2b, INSTR_RR_FF }, + { "mdr", 0x2c, INSTR_RR_FF }, + { "ddr", 0x2d, INSTR_RR_FF }, + { "awr", 0x2e, INSTR_RR_FF }, + { "swr", 0x2f, INSTR_RR_FF }, + { "lper", 0x30, INSTR_RR_FF }, + { "lner", 0x31, INSTR_RR_FF }, + { "lter", 0x32, INSTR_RR_FF }, + { "lcer", 0x33, INSTR_RR_FF }, + { "her", 0x34, INSTR_RR_FF }, + { "ledr", 0x35, INSTR_RR_FF }, + { "axr", 0x36, INSTR_RR_FF }, + { "sxr", 0x37, INSTR_RR_FF }, + { "ler", 0x38, INSTR_RR_FF }, + { "cer", 0x39, INSTR_RR_FF }, + { "aer", 0x3a, INSTR_RR_FF }, + { "ser", 0x3b, INSTR_RR_FF }, + { "mder", 0x3c, INSTR_RR_FF }, + { "der", 0x3d, INSTR_RR_FF }, + { "aur", 0x3e, INSTR_RR_FF }, + { "sur", 0x3f, INSTR_RR_FF }, + { "sth", 0x40, INSTR_RX_RRRD }, + { "la", 0x41, INSTR_RX_RRRD }, + { "stc", 0x42, INSTR_RX_RRRD }, + { "ic", 0x43, INSTR_RX_RRRD }, + { "ex", 0x44, INSTR_RX_RRRD }, + { "bal", 0x45, INSTR_RX_RRRD }, + { "bct", 0x46, INSTR_RX_RRRD }, + { "bc", 0x47, INSTR_RX_URRD }, + { "lh", 0x48, INSTR_RX_RRRD }, + { "ch", 0x49, INSTR_RX_RRRD }, + { "ah", 0x4a, INSTR_RX_RRRD }, + { "sh", 0x4b, INSTR_RX_RRRD }, + { "mh", 0x4c, INSTR_RX_RRRD }, + { "bas", 0x4d, INSTR_RX_RRRD }, + { "cvd", 0x4e, INSTR_RX_RRRD }, + { "cvb", 0x4f, INSTR_RX_RRRD }, + { "st", 0x50, INSTR_RX_RRRD }, + { "lae", 0x51, INSTR_RX_RRRD }, + { "n", 0x54, INSTR_RX_RRRD }, + { "cl", 0x55, INSTR_RX_RRRD }, + { "o", 0x56, INSTR_RX_RRRD }, + { "x", 0x57, INSTR_RX_RRRD }, + { "l", 0x58, INSTR_RX_RRRD }, + { "c", 0x59, INSTR_RX_RRRD }, + { "a", 0x5a, INSTR_RX_RRRD }, + { "s", 0x5b, INSTR_RX_RRRD }, + { "m", 0x5c, INSTR_RX_RRRD }, + { "d", 0x5d, INSTR_RX_RRRD }, + { "al", 0x5e, INSTR_RX_RRRD }, + { "sl", 0x5f, INSTR_RX_RRRD }, + { "std", 0x60, INSTR_RX_FRRD }, + { "mxd", 0x67, INSTR_RX_FRRD }, + { "ld", 0x68, INSTR_RX_FRRD }, + { "cd", 0x69, INSTR_RX_FRRD }, + { "ad", 0x6a, INSTR_RX_FRRD }, + { "sd", 0x6b, INSTR_RX_FRRD }, + { "md", 0x6c, INSTR_RX_FRRD }, + { "dd", 0x6d, INSTR_RX_FRRD }, + { "aw", 0x6e, INSTR_RX_FRRD }, + { "sw", 0x6f, INSTR_RX_FRRD }, + { "ste", 0x70, INSTR_RX_FRRD }, + { "ms", 0x71, INSTR_RX_RRRD }, + { "le", 0x78, INSTR_RX_FRRD }, + { "ce", 0x79, INSTR_RX_FRRD }, + { "ae", 0x7a, INSTR_RX_FRRD }, + { "se", 0x7b, INSTR_RX_FRRD }, + { "mde", 0x7c, INSTR_RX_FRRD }, + { "de", 0x7d, INSTR_RX_FRRD }, + { "au", 0x7e, INSTR_RX_FRRD }, + { "su", 0x7f, INSTR_RX_FRRD }, + { "ssm", 0x80, INSTR_S_RD }, + { "lpsw", 0x82, INSTR_S_RD }, + { "diag", 0x83, INSTR_RS_RRRD }, + { "brxh", 0x84, INSTR_RSI_RRP }, + { "brxle", 0x85, INSTR_RSI_RRP }, + { "bxh", 0x86, INSTR_RS_RRRD }, + { "bxle", 0x87, INSTR_RS_RRRD }, + { "srl", 0x88, INSTR_RS_R0RD }, + { "sll", 0x89, INSTR_RS_R0RD }, + { "sra", 0x8a, INSTR_RS_R0RD }, + { "sla", 0x8b, INSTR_RS_R0RD }, + { "srdl", 0x8c, INSTR_RS_R0RD }, + { "sldl", 0x8d, INSTR_RS_R0RD }, + { "srda", 0x8e, INSTR_RS_R0RD }, + { "slda", 0x8f, INSTR_RS_R0RD }, + { "stm", 0x90, INSTR_RS_RRRD }, + { "tm", 0x91, INSTR_SI_URD }, + { "mvi", 0x92, INSTR_SI_URD }, + { "ts", 0x93, INSTR_S_RD }, + { "ni", 0x94, INSTR_SI_URD }, + { "cli", 0x95, INSTR_SI_URD }, + { "oi", 0x96, INSTR_SI_URD }, + { "xi", 0x97, INSTR_SI_URD }, + { "lm", 0x98, INSTR_RS_RRRD }, + { "trace", 0x99, INSTR_RS_RRRD }, + { "lam", 0x9a, INSTR_RS_AARD }, + { "stam", 0x9b, INSTR_RS_AARD }, + { "mvcle", 0xa8, INSTR_RS_RRRD }, + { "clcle", 0xa9, INSTR_RS_RRRD }, + { "stnsm", 0xac, INSTR_SI_URD }, + { "stosm", 0xad, INSTR_SI_URD }, + { "sigp", 0xae, INSTR_RS_RRRD }, + { "mc", 0xaf, INSTR_SI_URD }, + { "lra", 0xb1, INSTR_RX_RRRD }, + { "stctl", 0xb6, INSTR_RS_CCRD }, + { "lctl", 0xb7, INSTR_RS_CCRD }, + { "cs", 0xba, INSTR_RS_RRRD }, + { "cds", 0xbb, INSTR_RS_RRRD }, + { "clm", 0xbd, INSTR_RS_RURD }, + { "stcm", 0xbe, INSTR_RS_RURD }, + { "icm", 0xbf, INSTR_RS_RURD }, + { "mvn", 0xd1, INSTR_SS_L0RDRD }, + { "mvc", 0xd2, INSTR_SS_L0RDRD }, + { "mvz", 0xd3, INSTR_SS_L0RDRD }, + { "nc", 0xd4, INSTR_SS_L0RDRD }, + { "clc", 0xd5, INSTR_SS_L0RDRD }, + { "oc", 0xd6, INSTR_SS_L0RDRD }, + { "xc", 0xd7, INSTR_SS_L0RDRD }, + { "mvck", 0xd9, INSTR_SS_RRRDRD }, + { "mvcp", 0xda, INSTR_SS_RRRDRD }, + { "mvcs", 0xdb, INSTR_SS_RRRDRD }, + { "tr", 0xdc, INSTR_SS_L0RDRD }, + { "trt", 0xdd, INSTR_SS_L0RDRD }, + { "ed", 0xde, INSTR_SS_L0RDRD }, + { "edmk", 0xdf, INSTR_SS_L0RDRD }, + { "pku", 0xe1, INSTR_SS_L0RDRD }, + { "unpku", 0xe2, INSTR_SS_L0RDRD }, + { "mvcin", 0xe8, INSTR_SS_L0RDRD }, + { "pka", 0xe9, INSTR_SS_L0RDRD }, + { "unpka", 0xea, INSTR_SS_L0RDRD }, + { "plo", 0xee, INSTR_SS_RRRDRD2 }, + { "srp", 0xf0, INSTR_SS_LIRDRD }, + { "mvo", 0xf1, INSTR_SS_LLRDRD }, + { "pack", 0xf2, INSTR_SS_LLRDRD }, + { "unpk", 0xf3, INSTR_SS_LLRDRD }, + { "zap", 0xf8, INSTR_SS_LLRDRD }, + { "cp", 0xf9, INSTR_SS_LLRDRD }, + { "ap", 0xfa, INSTR_SS_LLRDRD }, + { "sp", 0xfb, INSTR_SS_LLRDRD }, + { "mp", 0xfc, INSTR_SS_LLRDRD }, + { "dp", 0xfd, INSTR_SS_LLRDRD }, + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_01[] = { +#ifdef CONFIG_64BIT + { "ptff", 0x04, INSTR_E }, + { "pfpo", 0x0a, INSTR_E }, + { "sam64", 0x0e, INSTR_E }, +#endif + { "pr", 0x01, INSTR_E }, + { "upt", 0x02, INSTR_E }, + { "sckpf", 0x07, INSTR_E }, + { "tam", 0x0b, INSTR_E }, + { "sam24", 0x0c, INSTR_E }, + { "sam31", 0x0d, INSTR_E }, + { "trap2", 0xff, INSTR_E }, + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_a5[] = { +#ifdef CONFIG_64BIT + { "iihh", 0x00, INSTR_RI_RU }, + { "iihl", 0x01, INSTR_RI_RU }, + { "iilh", 0x02, INSTR_RI_RU }, + { "iill", 0x03, INSTR_RI_RU }, + { "nihh", 0x04, INSTR_RI_RU }, + { "nihl", 0x05, INSTR_RI_RU }, + { "nilh", 0x06, INSTR_RI_RU }, + { "nill", 0x07, INSTR_RI_RU }, + { "oihh", 0x08, INSTR_RI_RU }, + { "oihl", 0x09, INSTR_RI_RU }, + { "oilh", 0x0a, INSTR_RI_RU }, + { "oill", 0x0b, INSTR_RI_RU }, + { "llihh", 0x0c, INSTR_RI_RU }, + { "llihl", 0x0d, INSTR_RI_RU }, + { "llilh", 0x0e, INSTR_RI_RU }, + { "llill", 0x0f, INSTR_RI_RU }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_a7[] = { +#ifdef CONFIG_64BIT + { "tmhh", 0x02, INSTR_RI_RU }, + { "tmhl", 0x03, INSTR_RI_RU }, + { "brctg", 0x07, INSTR_RI_RP }, + { "lghi", 0x09, INSTR_RI_RI }, + { "aghi", 0x0b, INSTR_RI_RI }, + { "mghi", 0x0d, INSTR_RI_RI }, + { "cghi", 0x0f, INSTR_RI_RI }, +#endif + { "tmlh", 0x00, INSTR_RI_RU }, + { "tmll", 0x01, INSTR_RI_RU }, + { "brc", 0x04, INSTR_RI_UP }, + { "bras", 0x05, INSTR_RI_RP }, + { "brct", 0x06, INSTR_RI_RP }, + { "lhi", 0x08, INSTR_RI_RI }, + { "ahi", 0x0a, INSTR_RI_RI }, + { "mhi", 0x0c, INSTR_RI_RI }, + { "chi", 0x0e, INSTR_RI_RI }, + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_aa[] = { +#ifdef CONFIG_64BIT + { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI }, + { "rion", 0x01, INSTR_RI_RI }, + { "tric", 0x02, INSTR_RI_RI }, + { "rioff", 0x03, INSTR_RI_RI }, + { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_b2[] = { +#ifdef CONFIG_64BIT + { "stckf", 0x7c, INSTR_S_RD }, + { "lpp", 0x80, INSTR_S_RD }, + { "lcctl", 0x84, INSTR_S_RD }, + { "lpctl", 0x85, INSTR_S_RD }, + { "qsi", 0x86, INSTR_S_RD }, + { "lsctl", 0x87, INSTR_S_RD }, + { "qctri", 0x8e, INSTR_S_RD }, + { "stfle", 0xb0, INSTR_S_RD }, + { "lpswe", 0xb2, INSTR_S_RD }, + { "srnmb", 0xb8, INSTR_S_RD }, + { "srnmt", 0xb9, INSTR_S_RD }, + { "lfas", 0xbd, INSTR_S_RD }, + { "scctr", 0xe0, INSTR_RRE_RR }, + { "spctr", 0xe1, INSTR_RRE_RR }, + { "ecctr", 0xe4, INSTR_RRE_RR }, + { "epctr", 0xe5, INSTR_RRE_RR }, + { "ppa", 0xe8, INSTR_RRF_U0RR }, + { "etnd", 0xec, INSTR_RRE_R0 }, + { "ecpga", 0xed, INSTR_RRE_RR }, + { "tend", 0xf8, INSTR_S_00 }, + { "niai", 0xfa, INSTR_IE_UU }, + { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD }, +#endif + { "stidp", 0x02, INSTR_S_RD }, + { "sck", 0x04, INSTR_S_RD }, + { "stck", 0x05, INSTR_S_RD }, + { "sckc", 0x06, INSTR_S_RD }, + { "stckc", 0x07, INSTR_S_RD }, + { "spt", 0x08, INSTR_S_RD }, + { "stpt", 0x09, INSTR_S_RD }, + { "spka", 0x0a, INSTR_S_RD }, + { "ipk", 0x0b, INSTR_S_00 }, + { "ptlb", 0x0d, INSTR_S_00 }, + { "spx", 0x10, INSTR_S_RD }, + { "stpx", 0x11, INSTR_S_RD }, + { "stap", 0x12, INSTR_S_RD }, + { "sie", 0x14, INSTR_S_RD }, + { "pc", 0x18, INSTR_S_RD }, + { "sac", 0x19, INSTR_S_RD }, + { "cfc", 0x1a, INSTR_S_RD }, + { "servc", 0x20, INSTR_RRE_RR }, + { "ipte", 0x21, INSTR_RRE_RR }, + { "ipm", 0x22, INSTR_RRE_R0 }, + { "ivsk", 0x23, INSTR_RRE_RR }, + { "iac", 0x24, INSTR_RRE_R0 }, + { "ssar", 0x25, INSTR_RRE_R0 }, + { "epar", 0x26, INSTR_RRE_R0 }, + { "esar", 0x27, INSTR_RRE_R0 }, + { "pt", 0x28, INSTR_RRE_RR }, + { "iske", 0x29, INSTR_RRE_RR }, + { "rrbe", 0x2a, INSTR_RRE_RR }, + { "sske", 0x2b, INSTR_RRF_M0RR }, + { "tb", 0x2c, INSTR_RRE_0R }, + { "dxr", 0x2d, INSTR_RRE_FF }, + { "pgin", 0x2e, INSTR_RRE_RR }, + { "pgout", 0x2f, INSTR_RRE_RR }, + { "csch", 0x30, INSTR_S_00 }, + { "hsch", 0x31, INSTR_S_00 }, + { "msch", 0x32, INSTR_S_RD }, + { "ssch", 0x33, INSTR_S_RD }, + { "stsch", 0x34, INSTR_S_RD }, + { "tsch", 0x35, INSTR_S_RD }, + { "tpi", 0x36, INSTR_S_RD }, + { "sal", 0x37, INSTR_S_00 }, + { "rsch", 0x38, INSTR_S_00 }, + { "stcrw", 0x39, INSTR_S_RD }, + { "stcps", 0x3a, INSTR_S_RD }, + { "rchp", 0x3b, INSTR_S_00 }, + { "schm", 0x3c, INSTR_S_00 }, + { "bakr", 0x40, INSTR_RRE_RR }, + { "cksm", 0x41, INSTR_RRE_RR }, + { "sqdr", 0x44, INSTR_RRE_FF }, + { "sqer", 0x45, INSTR_RRE_FF }, + { "stura", 0x46, INSTR_RRE_RR }, + { "msta", 0x47, INSTR_RRE_R0 }, + { "palb", 0x48, INSTR_RRE_00 }, + { "ereg", 0x49, INSTR_RRE_RR }, + { "esta", 0x4a, INSTR_RRE_RR }, + { "lura", 0x4b, INSTR_RRE_RR }, + { "tar", 0x4c, INSTR_RRE_AR }, + { "cpya", 0x4d, INSTR_RRE_AA }, + { "sar", 0x4e, INSTR_RRE_AR }, + { "ear", 0x4f, INSTR_RRE_RA }, + { "csp", 0x50, INSTR_RRE_RR }, + { "msr", 0x52, INSTR_RRE_RR }, + { "mvpg", 0x54, INSTR_RRE_RR }, + { "mvst", 0x55, INSTR_RRE_RR }, + { "cuse", 0x57, INSTR_RRE_RR }, + { "bsg", 0x58, INSTR_RRE_RR }, + { "bsa", 0x5a, INSTR_RRE_RR }, + { "clst", 0x5d, INSTR_RRE_RR }, + { "srst", 0x5e, INSTR_RRE_RR }, + { "cmpsc", 0x63, INSTR_RRE_RR }, + { "siga", 0x74, INSTR_S_RD }, + { "xsch", 0x76, INSTR_S_00 }, + { "rp", 0x77, INSTR_S_RD }, + { "stcke", 0x78, INSTR_S_RD }, + { "sacf", 0x79, INSTR_S_RD }, + { "stsi", 0x7d, INSTR_S_RD }, + { "srnm", 0x99, INSTR_S_RD }, + { "stfpc", 0x9c, INSTR_S_RD }, + { "lfpc", 0x9d, INSTR_S_RD }, + { "tre", 0xa5, INSTR_RRE_RR }, + { "cuutf", 0xa6, INSTR_RRF_M0RR }, + { "cutfu", 0xa7, INSTR_RRF_M0RR }, + { "stfl", 0xb1, INSTR_S_RD }, + { "trap4", 0xff, INSTR_S_RD }, + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_b3[] = { +#ifdef CONFIG_64BIT + { "maylr", 0x38, INSTR_RRF_F0FF }, + { "mylr", 0x39, INSTR_RRF_F0FF }, + { "mayr", 0x3a, INSTR_RRF_F0FF }, + { "myr", 0x3b, INSTR_RRF_F0FF }, + { "mayhr", 0x3c, INSTR_RRF_F0FF }, + { "myhr", 0x3d, INSTR_RRF_F0FF }, + { "lpdfr", 0x70, INSTR_RRE_FF }, + { "lndfr", 0x71, INSTR_RRE_FF }, + { "cpsdr", 0x72, INSTR_RRF_F0FF2 }, + { "lcdfr", 0x73, INSTR_RRE_FF }, + { "sfasr", 0x85, INSTR_RRE_R0 }, + { { 0, LONG_INSN_CELFBR }, 0x90, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLFBR }, 0x91, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXLFBR }, 0x92, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CEFBRA }, 0x94, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDFBRA }, 0x95, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXFBRA }, 0x96, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CFEBRA }, 0x98, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CFDBRA }, 0x99, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CFXBRA }, 0x9a, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CLFEBR }, 0x9c, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLFDBR }, 0x9d, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLFXBR }, 0x9e, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CELGBR }, 0xa0, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLGBR }, 0xa1, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXLGBR }, 0xa2, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CEGBRA }, 0xa4, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDGBRA }, 0xa5, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXGBRA }, 0xa6, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CGEBRA }, 0xa8, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CGDBRA }, 0xa9, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CGXBRA }, 0xaa, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CLGEBR }, 0xac, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGDBR }, 0xad, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGXBR }, 0xae, INSTR_RRF_UUFR }, + { "ldgr", 0xc1, INSTR_RRE_FR }, + { "cegr", 0xc4, INSTR_RRE_FR }, + { "cdgr", 0xc5, INSTR_RRE_FR }, + { "cxgr", 0xc6, INSTR_RRE_FR }, + { "cger", 0xc8, INSTR_RRF_U0RF }, + { "cgdr", 0xc9, INSTR_RRF_U0RF }, + { "cgxr", 0xca, INSTR_RRF_U0RF }, + { "lgdr", 0xcd, INSTR_RRE_RF }, + { "mdtra", 0xd0, INSTR_RRF_FUFF2 }, + { "ddtra", 0xd1, INSTR_RRF_FUFF2 }, + { "adtra", 0xd2, INSTR_RRF_FUFF2 }, + { "sdtra", 0xd3, INSTR_RRF_FUFF2 }, + { "ldetr", 0xd4, INSTR_RRF_0UFF }, + { "ledtr", 0xd5, INSTR_RRF_UUFF }, + { "ltdtr", 0xd6, INSTR_RRE_FF }, + { "fidtr", 0xd7, INSTR_RRF_UUFF }, + { "mxtra", 0xd8, INSTR_RRF_FUFF2 }, + { "dxtra", 0xd9, INSTR_RRF_FUFF2 }, + { "axtra", 0xda, INSTR_RRF_FUFF2 }, + { "sxtra", 0xdb, INSTR_RRF_FUFF2 }, + { "lxdtr", 0xdc, INSTR_RRF_0UFF }, + { "ldxtr", 0xdd, INSTR_RRF_UUFF }, + { "ltxtr", 0xde, INSTR_RRE_FF }, + { "fixtr", 0xdf, INSTR_RRF_UUFF }, + { "kdtr", 0xe0, INSTR_RRE_FF }, + { { 0, LONG_INSN_CGDTRA }, 0xe1, INSTR_RRF_UURF }, + { "cudtr", 0xe2, INSTR_RRE_RF }, + { "csdtr", 0xe3, INSTR_RRE_RF }, + { "cdtr", 0xe4, INSTR_RRE_FF }, + { "eedtr", 0xe5, INSTR_RRE_RF }, + { "esdtr", 0xe7, INSTR_RRE_RF }, + { "kxtr", 0xe8, INSTR_RRE_FF }, + { { 0, LONG_INSN_CGXTRA }, 0xe9, INSTR_RRF_UUFR }, + { "cuxtr", 0xea, INSTR_RRE_RF }, + { "csxtr", 0xeb, INSTR_RRE_RF }, + { "cxtr", 0xec, INSTR_RRE_FF }, + { "eextr", 0xed, INSTR_RRE_RF }, + { "esxtr", 0xef, INSTR_RRE_RF }, + { { 0, LONG_INSN_CDGTRA }, 0xf1, INSTR_RRF_UUFR }, + { "cdutr", 0xf2, INSTR_RRE_FR }, + { "cdstr", 0xf3, INSTR_RRE_FR }, + { "cedtr", 0xf4, INSTR_RRE_FF }, + { "qadtr", 0xf5, INSTR_RRF_FUFF }, + { "iedtr", 0xf6, INSTR_RRF_F0FR }, + { "rrdtr", 0xf7, INSTR_RRF_FFRU }, + { { 0, LONG_INSN_CXGTRA }, 0xf9, INSTR_RRF_UURF }, + { "cxutr", 0xfa, INSTR_RRE_FR }, + { "cxstr", 0xfb, INSTR_RRE_FR }, + { "cextr", 0xfc, INSTR_RRE_FF }, + { "qaxtr", 0xfd, INSTR_RRF_FUFF }, + { "iextr", 0xfe, INSTR_RRF_F0FR }, + { "rrxtr", 0xff, INSTR_RRF_FFRU }, +#endif + { "lpebr", 0x00, INSTR_RRE_FF }, + { "lnebr", 0x01, INSTR_RRE_FF }, + { "ltebr", 0x02, INSTR_RRE_FF }, + { "lcebr", 0x03, INSTR_RRE_FF }, + { "ldebr", 0x04, INSTR_RRE_FF }, + { "lxdbr", 0x05, INSTR_RRE_FF }, + { "lxebr", 0x06, INSTR_RRE_FF }, + { "mxdbr", 0x07, INSTR_RRE_FF }, + { "kebr", 0x08, INSTR_RRE_FF }, + { "cebr", 0x09, INSTR_RRE_FF }, + { "aebr", 0x0a, INSTR_RRE_FF }, + { "sebr", 0x0b, INSTR_RRE_FF }, + { "mdebr", 0x0c, INSTR_RRE_FF }, + { "debr", 0x0d, INSTR_RRE_FF }, + { "maebr", 0x0e, INSTR_RRF_F0FF }, + { "msebr", 0x0f, INSTR_RRF_F0FF }, + { "lpdbr", 0x10, INSTR_RRE_FF }, + { "lndbr", 0x11, INSTR_RRE_FF }, + { "ltdbr", 0x12, INSTR_RRE_FF }, + { "lcdbr", 0x13, INSTR_RRE_FF }, + { "sqebr", 0x14, INSTR_RRE_FF }, + { "sqdbr", 0x15, INSTR_RRE_FF }, + { "sqxbr", 0x16, INSTR_RRE_FF }, + { "meebr", 0x17, INSTR_RRE_FF }, + { "kdbr", 0x18, INSTR_RRE_FF }, + { "cdbr", 0x19, INSTR_RRE_FF }, + { "adbr", 0x1a, INSTR_RRE_FF }, + { "sdbr", 0x1b, INSTR_RRE_FF }, + { "mdbr", 0x1c, INSTR_RRE_FF }, + { "ddbr", 0x1d, INSTR_RRE_FF }, + { "madbr", 0x1e, INSTR_RRF_F0FF }, + { "msdbr", 0x1f, INSTR_RRF_F0FF }, + { "lder", 0x24, INSTR_RRE_FF }, + { "lxdr", 0x25, INSTR_RRE_FF }, + { "lxer", 0x26, INSTR_RRE_FF }, + { "maer", 0x2e, INSTR_RRF_F0FF }, + { "mser", 0x2f, INSTR_RRF_F0FF }, + { "sqxr", 0x36, INSTR_RRE_FF }, + { "meer", 0x37, INSTR_RRE_FF }, + { "madr", 0x3e, INSTR_RRF_F0FF }, + { "msdr", 0x3f, INSTR_RRF_F0FF }, + { "lpxbr", 0x40, INSTR_RRE_FF }, + { "lnxbr", 0x41, INSTR_RRE_FF }, + { "ltxbr", 0x42, INSTR_RRE_FF }, + { "lcxbr", 0x43, INSTR_RRE_FF }, + { { 0, LONG_INSN_LEDBRA }, 0x44, INSTR_RRF_UUFF }, + { { 0, LONG_INSN_LDXBRA }, 0x45, INSTR_RRF_UUFF }, + { { 0, LONG_INSN_LEXBRA }, 0x46, INSTR_RRF_UUFF }, + { { 0, LONG_INSN_FIXBRA }, 0x47, INSTR_RRF_UUFF }, + { "kxbr", 0x48, INSTR_RRE_FF }, + { "cxbr", 0x49, INSTR_RRE_FF }, + { "axbr", 0x4a, INSTR_RRE_FF }, + { "sxbr", 0x4b, INSTR_RRE_FF }, + { "mxbr", 0x4c, INSTR_RRE_FF }, + { "dxbr", 0x4d, INSTR_RRE_FF }, + { "tbedr", 0x50, INSTR_RRF_U0FF }, + { "tbdr", 0x51, INSTR_RRF_U0FF }, + { "diebr", 0x53, INSTR_RRF_FUFF }, + { { 0, LONG_INSN_FIEBRA }, 0x57, INSTR_RRF_UUFF }, + { "thder", 0x58, INSTR_RRE_FF }, + { "thdr", 0x59, INSTR_RRE_FF }, + { "didbr", 0x5b, INSTR_RRF_FUFF }, + { { 0, LONG_INSN_FIDBRA }, 0x5f, INSTR_RRF_UUFF }, + { "lpxr", 0x60, INSTR_RRE_FF }, + { "lnxr", 0x61, INSTR_RRE_FF }, + { "ltxr", 0x62, INSTR_RRE_FF }, + { "lcxr", 0x63, INSTR_RRE_FF }, + { "lxr", 0x65, INSTR_RRE_FF }, + { "lexr", 0x66, INSTR_RRE_FF }, + { "fixr", 0x67, INSTR_RRE_FF }, + { "cxr", 0x69, INSTR_RRE_FF }, + { "lzer", 0x74, INSTR_RRE_F0 }, + { "lzdr", 0x75, INSTR_RRE_F0 }, + { "lzxr", 0x76, INSTR_RRE_F0 }, + { "fier", 0x77, INSTR_RRE_FF }, + { "fidr", 0x7f, INSTR_RRE_FF }, + { "sfpc", 0x84, INSTR_RRE_RR_OPT }, + { "efpc", 0x8c, INSTR_RRE_RR_OPT }, + { "cefbr", 0x94, INSTR_RRE_RF }, + { "cdfbr", 0x95, INSTR_RRE_RF }, + { "cxfbr", 0x96, INSTR_RRE_RF }, + { "cfebr", 0x98, INSTR_RRF_U0RF }, + { "cfdbr", 0x99, INSTR_RRF_U0RF }, + { "cfxbr", 0x9a, INSTR_RRF_U0RF }, + { "cefr", 0xb4, INSTR_RRE_FR }, + { "cdfr", 0xb5, INSTR_RRE_FR }, + { "cxfr", 0xb6, INSTR_RRE_FR }, + { "cfer", 0xb8, INSTR_RRF_U0RF }, + { "cfdr", 0xb9, INSTR_RRF_U0RF }, + { "cfxr", 0xba, INSTR_RRF_U0RF }, + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_b9[] = { +#ifdef CONFIG_64BIT + { "lpgr", 0x00, INSTR_RRE_RR }, + { "lngr", 0x01, INSTR_RRE_RR }, + { "ltgr", 0x02, INSTR_RRE_RR }, + { "lcgr", 0x03, INSTR_RRE_RR }, + { "lgr", 0x04, INSTR_RRE_RR }, + { "lurag", 0x05, INSTR_RRE_RR }, + { "lgbr", 0x06, INSTR_RRE_RR }, + { "lghr", 0x07, INSTR_RRE_RR }, + { "agr", 0x08, INSTR_RRE_RR }, + { "sgr", 0x09, INSTR_RRE_RR }, + { "algr", 0x0a, INSTR_RRE_RR }, + { "slgr", 0x0b, INSTR_RRE_RR }, + { "msgr", 0x0c, INSTR_RRE_RR }, + { "dsgr", 0x0d, INSTR_RRE_RR }, + { "eregg", 0x0e, INSTR_RRE_RR }, + { "lrvgr", 0x0f, INSTR_RRE_RR }, + { "lpgfr", 0x10, INSTR_RRE_RR }, + { "lngfr", 0x11, INSTR_RRE_RR }, + { "ltgfr", 0x12, INSTR_RRE_RR }, + { "lcgfr", 0x13, INSTR_RRE_RR }, + { "lgfr", 0x14, INSTR_RRE_RR }, + { "llgfr", 0x16, INSTR_RRE_RR }, + { "llgtr", 0x17, INSTR_RRE_RR }, + { "agfr", 0x18, INSTR_RRE_RR }, + { "sgfr", 0x19, INSTR_RRE_RR }, + { "algfr", 0x1a, INSTR_RRE_RR }, + { "slgfr", 0x1b, INSTR_RRE_RR }, + { "msgfr", 0x1c, INSTR_RRE_RR }, + { "dsgfr", 0x1d, INSTR_RRE_RR }, + { "cgr", 0x20, INSTR_RRE_RR }, + { "clgr", 0x21, INSTR_RRE_RR }, + { "sturg", 0x25, INSTR_RRE_RR }, + { "lbr", 0x26, INSTR_RRE_RR }, + { "lhr", 0x27, INSTR_RRE_RR }, + { "cgfr", 0x30, INSTR_RRE_RR }, + { "clgfr", 0x31, INSTR_RRE_RR }, + { "cfdtr", 0x41, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGDTR }, 0x42, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLFDTR }, 0x43, INSTR_RRF_UURF }, + { "bctgr", 0x46, INSTR_RRE_RR }, + { "cfxtr", 0x49, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGXTR }, 0x4a, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CLFXTR }, 0x4b, INSTR_RRF_UUFR }, + { "cdftr", 0x51, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLGTR }, 0x52, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLFTR }, 0x53, INSTR_RRF_UUFR }, + { "cxftr", 0x59, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CXLGTR }, 0x5a, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CXLFTR }, 0x5b, INSTR_RRF_UUFR }, + { "cgrt", 0x60, INSTR_RRF_U0RR }, + { "clgrt", 0x61, INSTR_RRF_U0RR }, + { "crt", 0x72, INSTR_RRF_U0RR }, + { "clrt", 0x73, INSTR_RRF_U0RR }, + { "ngr", 0x80, INSTR_RRE_RR }, + { "ogr", 0x81, INSTR_RRE_RR }, + { "xgr", 0x82, INSTR_RRE_RR }, + { "flogr", 0x83, INSTR_RRE_RR }, + { "llgcr", 0x84, INSTR_RRE_RR }, + { "llghr", 0x85, INSTR_RRE_RR }, + { "mlgr", 0x86, INSTR_RRE_RR }, + { "dlgr", 0x87, INSTR_RRE_RR }, + { "alcgr", 0x88, INSTR_RRE_RR }, + { "slbgr", 0x89, INSTR_RRE_RR }, + { "cspg", 0x8a, INSTR_RRE_RR }, + { "idte", 0x8e, INSTR_RRF_R0RR }, + { "crdte", 0x8f, INSTR_RRF_RMRR }, + { "llcr", 0x94, INSTR_RRE_RR }, + { "llhr", 0x95, INSTR_RRE_RR }, + { "esea", 0x9d, INSTR_RRE_R0 }, + { "ptf", 0xa2, INSTR_RRE_R0 }, + { "lptea", 0xaa, INSTR_RRF_RURR }, + { "rrbm", 0xae, INSTR_RRE_RR }, + { "pfmf", 0xaf, INSTR_RRE_RR }, + { "cu14", 0xb0, INSTR_RRF_M0RR }, + { "cu24", 0xb1, INSTR_RRF_M0RR }, + { "cu41", 0xb2, INSTR_RRE_RR }, + { "cu42", 0xb3, INSTR_RRE_RR }, + { "trtre", 0xbd, INSTR_RRF_M0RR }, + { "srstu", 0xbe, INSTR_RRE_RR }, + { "trte", 0xbf, INSTR_RRF_M0RR }, + { "ahhhr", 0xc8, INSTR_RRF_R0RR2 }, + { "shhhr", 0xc9, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_ALHHHR }, 0xca, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_SLHHHR }, 0xcb, INSTR_RRF_R0RR2 }, + { "chhr", 0xcd, INSTR_RRE_RR }, + { "clhhr", 0xcf, INSTR_RRE_RR }, + { { 0, LONG_INSN_PCISTG }, 0xd0, INSTR_RRE_RR }, + { "pcilg", 0xd2, INSTR_RRE_RR }, + { "rpcit", 0xd3, INSTR_RRE_RR }, + { "ahhlr", 0xd8, INSTR_RRF_R0RR2 }, + { "shhlr", 0xd9, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_ALHHLR }, 0xda, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_SLHHLR }, 0xdb, INSTR_RRF_R0RR2 }, + { "chlr", 0xdd, INSTR_RRE_RR }, + { "clhlr", 0xdf, INSTR_RRE_RR }, + { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR }, + { "locgr", 0xe2, INSTR_RRF_M0RR }, + { "ngrk", 0xe4, INSTR_RRF_R0RR2 }, + { "ogrk", 0xe6, INSTR_RRF_R0RR2 }, + { "xgrk", 0xe7, INSTR_RRF_R0RR2 }, + { "agrk", 0xe8, INSTR_RRF_R0RR2 }, + { "sgrk", 0xe9, INSTR_RRF_R0RR2 }, + { "algrk", 0xea, INSTR_RRF_R0RR2 }, + { "slgrk", 0xeb, INSTR_RRF_R0RR2 }, + { "locr", 0xf2, INSTR_RRF_M0RR }, + { "nrk", 0xf4, INSTR_RRF_R0RR2 }, + { "ork", 0xf6, INSTR_RRF_R0RR2 }, + { "xrk", 0xf7, INSTR_RRF_R0RR2 }, + { "ark", 0xf8, INSTR_RRF_R0RR2 }, + { "srk", 0xf9, INSTR_RRF_R0RR2 }, + { "alrk", 0xfa, INSTR_RRF_R0RR2 }, + { "slrk", 0xfb, INSTR_RRF_R0RR2 }, +#endif + { "kmac", 0x1e, INSTR_RRE_RR }, + { "lrvr", 0x1f, INSTR_RRE_RR }, + { "km", 0x2e, INSTR_RRE_RR }, + { "kmc", 0x2f, INSTR_RRE_RR }, + { "kimd", 0x3e, INSTR_RRE_RR }, + { "klmd", 0x3f, INSTR_RRE_RR }, + { "epsw", 0x8d, INSTR_RRE_RR }, + { "trtt", 0x90, INSTR_RRF_M0RR }, + { "trto", 0x91, INSTR_RRF_M0RR }, + { "trot", 0x92, INSTR_RRF_M0RR }, + { "troo", 0x93, INSTR_RRF_M0RR }, + { "mlr", 0x96, INSTR_RRE_RR }, + { "dlr", 0x97, INSTR_RRE_RR }, + { "alcr", 0x98, INSTR_RRE_RR }, + { "slbr", 0x99, INSTR_RRE_RR }, + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_c0[] = { +#ifdef CONFIG_64BIT + { "lgfi", 0x01, INSTR_RIL_RI }, + { "xihf", 0x06, INSTR_RIL_RU }, + { "xilf", 0x07, INSTR_RIL_RU }, + { "iihf", 0x08, INSTR_RIL_RU }, + { "iilf", 0x09, INSTR_RIL_RU }, + { "nihf", 0x0a, INSTR_RIL_RU }, + { "nilf", 0x0b, INSTR_RIL_RU }, + { "oihf", 0x0c, INSTR_RIL_RU }, + { "oilf", 0x0d, INSTR_RIL_RU }, + { "llihf", 0x0e, INSTR_RIL_RU }, + { "llilf", 0x0f, INSTR_RIL_RU }, +#endif + { "larl", 0x00, INSTR_RIL_RP }, + { "brcl", 0x04, INSTR_RIL_UP }, + { "brasl", 0x05, INSTR_RIL_RP }, + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_c2[] = { +#ifdef CONFIG_64BIT + { "msgfi", 0x00, INSTR_RIL_RI }, + { "msfi", 0x01, INSTR_RIL_RI }, + { "slgfi", 0x04, INSTR_RIL_RU }, + { "slfi", 0x05, INSTR_RIL_RU }, + { "agfi", 0x08, INSTR_RIL_RI }, + { "afi", 0x09, INSTR_RIL_RI }, + { "algfi", 0x0a, INSTR_RIL_RU }, + { "alfi", 0x0b, INSTR_RIL_RU }, + { "cgfi", 0x0c, INSTR_RIL_RI }, + { "cfi", 0x0d, INSTR_RIL_RI }, + { "clgfi", 0x0e, INSTR_RIL_RU }, + { "clfi", 0x0f, INSTR_RIL_RU }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_c4[] = { +#ifdef CONFIG_64BIT + { "llhrl", 0x02, INSTR_RIL_RP }, + { "lghrl", 0x04, INSTR_RIL_RP }, + { "lhrl", 0x05, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP }, + { "sthrl", 0x07, INSTR_RIL_RP }, + { "lgrl", 0x08, INSTR_RIL_RP }, + { "stgrl", 0x0b, INSTR_RIL_RP }, + { "lgfrl", 0x0c, INSTR_RIL_RP }, + { "lrl", 0x0d, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP }, + { "strl", 0x0f, INSTR_RIL_RP }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_c6[] = { +#ifdef CONFIG_64BIT + { "exrl", 0x00, INSTR_RIL_RP }, + { "pfdrl", 0x02, INSTR_RIL_UP }, + { "cghrl", 0x04, INSTR_RIL_RP }, + { "chrl", 0x05, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP }, + { "clhrl", 0x07, INSTR_RIL_RP }, + { "cgrl", 0x08, INSTR_RIL_RP }, + { "clgrl", 0x0a, INSTR_RIL_RP }, + { "cgfrl", 0x0c, INSTR_RIL_RP }, + { "crl", 0x0d, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP }, + { "clrl", 0x0f, INSTR_RIL_RP }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_c8[] = { +#ifdef CONFIG_64BIT + { "mvcos", 0x00, INSTR_SSF_RRDRD }, + { "ectg", 0x01, INSTR_SSF_RRDRD }, + { "csst", 0x02, INSTR_SSF_RRDRD }, + { "lpd", 0x04, INSTR_SSF_RRDRD2 }, + { "lpdg", 0x05, INSTR_SSF_RRDRD2 }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_cc[] = { +#ifdef CONFIG_64BIT + { "brcth", 0x06, INSTR_RIL_RP }, + { "aih", 0x08, INSTR_RIL_RI }, + { "alsih", 0x0a, INSTR_RIL_RI }, + { { 0, LONG_INSN_ALSIHN }, 0x0b, INSTR_RIL_RI }, + { "cih", 0x0d, INSTR_RIL_RI }, + { "clih", 0x0f, INSTR_RIL_RI }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_e3[] = { +#ifdef CONFIG_64BIT + { "ltg", 0x02, INSTR_RXY_RRRD }, + { "lrag", 0x03, INSTR_RXY_RRRD }, + { "lg", 0x04, INSTR_RXY_RRRD }, + { "cvby", 0x06, INSTR_RXY_RRRD }, + { "ag", 0x08, INSTR_RXY_RRRD }, + { "sg", 0x09, INSTR_RXY_RRRD }, + { "alg", 0x0a, INSTR_RXY_RRRD }, + { "slg", 0x0b, INSTR_RXY_RRRD }, + { "msg", 0x0c, INSTR_RXY_RRRD }, + { "dsg", 0x0d, INSTR_RXY_RRRD }, + { "cvbg", 0x0e, INSTR_RXY_RRRD }, + { "lrvg", 0x0f, INSTR_RXY_RRRD }, + { "lt", 0x12, INSTR_RXY_RRRD }, + { "lray", 0x13, INSTR_RXY_RRRD }, + { "lgf", 0x14, INSTR_RXY_RRRD }, + { "lgh", 0x15, INSTR_RXY_RRRD }, + { "llgf", 0x16, INSTR_RXY_RRRD }, + { "llgt", 0x17, INSTR_RXY_RRRD }, + { "agf", 0x18, INSTR_RXY_RRRD }, + { "sgf", 0x19, INSTR_RXY_RRRD }, + { "algf", 0x1a, INSTR_RXY_RRRD }, + { "slgf", 0x1b, INSTR_RXY_RRRD }, + { "msgf", 0x1c, INSTR_RXY_RRRD }, + { "dsgf", 0x1d, INSTR_RXY_RRRD }, + { "cg", 0x20, INSTR_RXY_RRRD }, + { "clg", 0x21, INSTR_RXY_RRRD }, + { "stg", 0x24, INSTR_RXY_RRRD }, + { "ntstg", 0x25, INSTR_RXY_RRRD }, + { "cvdy", 0x26, INSTR_RXY_RRRD }, + { "cvdg", 0x2e, INSTR_RXY_RRRD }, + { "strvg", 0x2f, INSTR_RXY_RRRD }, + { "cgf", 0x30, INSTR_RXY_RRRD }, + { "clgf", 0x31, INSTR_RXY_RRRD }, + { "ltgf", 0x32, INSTR_RXY_RRRD }, + { "cgh", 0x34, INSTR_RXY_RRRD }, + { "pfd", 0x36, INSTR_RXY_URRD }, + { "strvh", 0x3f, INSTR_RXY_RRRD }, + { "bctg", 0x46, INSTR_RXY_RRRD }, + { "sty", 0x50, INSTR_RXY_RRRD }, + { "msy", 0x51, INSTR_RXY_RRRD }, + { "ny", 0x54, INSTR_RXY_RRRD }, + { "cly", 0x55, INSTR_RXY_RRRD }, + { "oy", 0x56, INSTR_RXY_RRRD }, + { "xy", 0x57, INSTR_RXY_RRRD }, + { "ly", 0x58, INSTR_RXY_RRRD }, + { "cy", 0x59, INSTR_RXY_RRRD }, + { "ay", 0x5a, INSTR_RXY_RRRD }, + { "sy", 0x5b, INSTR_RXY_RRRD }, + { "mfy", 0x5c, INSTR_RXY_RRRD }, + { "aly", 0x5e, INSTR_RXY_RRRD }, + { "sly", 0x5f, INSTR_RXY_RRRD }, + { "sthy", 0x70, INSTR_RXY_RRRD }, + { "lay", 0x71, INSTR_RXY_RRRD }, + { "stcy", 0x72, INSTR_RXY_RRRD }, + { "icy", 0x73, INSTR_RXY_RRRD }, + { "laey", 0x75, INSTR_RXY_RRRD }, + { "lb", 0x76, INSTR_RXY_RRRD }, + { "lgb", 0x77, INSTR_RXY_RRRD }, + { "lhy", 0x78, INSTR_RXY_RRRD }, + { "chy", 0x79, INSTR_RXY_RRRD }, + { "ahy", 0x7a, INSTR_RXY_RRRD }, + { "shy", 0x7b, INSTR_RXY_RRRD }, + { "mhy", 0x7c, INSTR_RXY_RRRD }, + { "ng", 0x80, INSTR_RXY_RRRD }, + { "og", 0x81, INSTR_RXY_RRRD }, + { "xg", 0x82, INSTR_RXY_RRRD }, + { "lgat", 0x85, INSTR_RXY_RRRD }, + { "mlg", 0x86, INSTR_RXY_RRRD }, + { "dlg", 0x87, INSTR_RXY_RRRD }, + { "alcg", 0x88, INSTR_RXY_RRRD }, + { "slbg", 0x89, INSTR_RXY_RRRD }, + { "stpq", 0x8e, INSTR_RXY_RRRD }, + { "lpq", 0x8f, INSTR_RXY_RRRD }, + { "llgc", 0x90, INSTR_RXY_RRRD }, + { "llgh", 0x91, INSTR_RXY_RRRD }, + { "llc", 0x94, INSTR_RXY_RRRD }, + { "llh", 0x95, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_LLGTAT }, 0x9c, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_LLGFAT }, 0x9d, INSTR_RXY_RRRD }, + { "lat", 0x9f, INSTR_RXY_RRRD }, + { "lbh", 0xc0, INSTR_RXY_RRRD }, + { "llch", 0xc2, INSTR_RXY_RRRD }, + { "stch", 0xc3, INSTR_RXY_RRRD }, + { "lhh", 0xc4, INSTR_RXY_RRRD }, + { "llhh", 0xc6, INSTR_RXY_RRRD }, + { "sthh", 0xc7, INSTR_RXY_RRRD }, + { "lfhat", 0xc8, INSTR_RXY_RRRD }, + { "lfh", 0xca, INSTR_RXY_RRRD }, + { "stfh", 0xcb, INSTR_RXY_RRRD }, + { "chf", 0xcd, INSTR_RXY_RRRD }, + { "clhf", 0xcf, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_MPCIFC }, 0xd0, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_STPCIFC }, 0xd4, INSTR_RXY_RRRD }, +#endif + { "lrv", 0x1e, INSTR_RXY_RRRD }, + { "lrvh", 0x1f, INSTR_RXY_RRRD }, + { "strv", 0x3e, INSTR_RXY_RRRD }, + { "ml", 0x96, INSTR_RXY_RRRD }, + { "dl", 0x97, INSTR_RXY_RRRD }, + { "alc", 0x98, INSTR_RXY_RRRD }, + { "slb", 0x99, INSTR_RXY_RRRD }, + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_e5[] = { +#ifdef CONFIG_64BIT + { "strag", 0x02, INSTR_SSE_RDRD }, + { "mvhhi", 0x44, INSTR_SIL_RDI }, + { "mvghi", 0x48, INSTR_SIL_RDI }, + { "mvhi", 0x4c, INSTR_SIL_RDI }, + { "chhsi", 0x54, INSTR_SIL_RDI }, + { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU }, + { "cghsi", 0x58, INSTR_SIL_RDI }, + { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU }, + { "chsi", 0x5c, INSTR_SIL_RDI }, + { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU }, + { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU }, + { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU }, +#endif + { "lasp", 0x00, INSTR_SSE_RDRD }, + { "tprot", 0x01, INSTR_SSE_RDRD }, + { "mvcsk", 0x0e, INSTR_SSE_RDRD }, + { "mvcdk", 0x0f, INSTR_SSE_RDRD }, + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_eb[] = { +#ifdef CONFIG_64BIT + { "lmg", 0x04, INSTR_RSY_RRRD }, + { "srag", 0x0a, INSTR_RSY_RRRD }, + { "slag", 0x0b, INSTR_RSY_RRRD }, + { "srlg", 0x0c, INSTR_RSY_RRRD }, + { "sllg", 0x0d, INSTR_RSY_RRRD }, + { "tracg", 0x0f, INSTR_RSY_RRRD }, + { "csy", 0x14, INSTR_RSY_RRRD }, + { "rllg", 0x1c, INSTR_RSY_RRRD }, + { "clmh", 0x20, INSTR_RSY_RURD }, + { "clmy", 0x21, INSTR_RSY_RURD }, + { "clt", 0x23, INSTR_RSY_RURD }, + { "stmg", 0x24, INSTR_RSY_RRRD }, + { "stctg", 0x25, INSTR_RSY_CCRD }, + { "stmh", 0x26, INSTR_RSY_RRRD }, + { "clgt", 0x2b, INSTR_RSY_RURD }, + { "stcmh", 0x2c, INSTR_RSY_RURD }, + { "stcmy", 0x2d, INSTR_RSY_RURD }, + { "lctlg", 0x2f, INSTR_RSY_CCRD }, + { "csg", 0x30, INSTR_RSY_RRRD }, + { "cdsy", 0x31, INSTR_RSY_RRRD }, + { "cdsg", 0x3e, INSTR_RSY_RRRD }, + { "bxhg", 0x44, INSTR_RSY_RRRD }, + { "bxleg", 0x45, INSTR_RSY_RRRD }, + { "ecag", 0x4c, INSTR_RSY_RRRD }, + { "tmy", 0x51, INSTR_SIY_URD }, + { "mviy", 0x52, INSTR_SIY_URD }, + { "niy", 0x54, INSTR_SIY_URD }, + { "cliy", 0x55, INSTR_SIY_URD }, + { "oiy", 0x56, INSTR_SIY_URD }, + { "xiy", 0x57, INSTR_SIY_URD }, + { "asi", 0x6a, INSTR_SIY_IRD }, + { "alsi", 0x6e, INSTR_SIY_IRD }, + { "agsi", 0x7a, INSTR_SIY_IRD }, + { "algsi", 0x7e, INSTR_SIY_IRD }, + { "icmh", 0x80, INSTR_RSY_RURD }, + { "icmy", 0x81, INSTR_RSY_RURD }, + { "clclu", 0x8f, INSTR_RSY_RRRD }, + { "stmy", 0x90, INSTR_RSY_RRRD }, + { "lmh", 0x96, INSTR_RSY_RRRD }, + { "lmy", 0x98, INSTR_RSY_RRRD }, + { "lamy", 0x9a, INSTR_RSY_AARD }, + { "stamy", 0x9b, INSTR_RSY_AARD }, + { { 0, LONG_INSN_PCISTB }, 0xd0, INSTR_RSY_RRRD }, + { "sic", 0xd1, INSTR_RSY_RRRD }, + { "srak", 0xdc, INSTR_RSY_RRRD }, + { "slak", 0xdd, INSTR_RSY_RRRD }, + { "srlk", 0xde, INSTR_RSY_RRRD }, + { "sllk", 0xdf, INSTR_RSY_RRRD }, + { "locg", 0xe2, INSTR_RSY_RDRM }, + { "stocg", 0xe3, INSTR_RSY_RDRM }, + { "lang", 0xe4, INSTR_RSY_RRRD }, + { "laog", 0xe6, INSTR_RSY_RRRD }, + { "laxg", 0xe7, INSTR_RSY_RRRD }, + { "laag", 0xe8, INSTR_RSY_RRRD }, + { "laalg", 0xea, INSTR_RSY_RRRD }, + { "loc", 0xf2, INSTR_RSY_RDRM }, + { "stoc", 0xf3, INSTR_RSY_RDRM }, + { "lan", 0xf4, INSTR_RSY_RRRD }, + { "lao", 0xf6, INSTR_RSY_RRRD }, + { "lax", 0xf7, INSTR_RSY_RRRD }, + { "laa", 0xf8, INSTR_RSY_RRRD }, + { "laal", 0xfa, INSTR_RSY_RRRD }, + { "lric", 0x60, INSTR_RSY_RDRM }, + { "stric", 0x61, INSTR_RSY_RDRM }, + { "mric", 0x62, INSTR_RSY_RDRM }, +#endif + { "rll", 0x1d, INSTR_RSY_RRRD }, + { "mvclu", 0x8e, INSTR_RSY_RRRD }, + { "tp", 0xc0, INSTR_RSL_R0RD }, + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_ec[] = { +#ifdef CONFIG_64BIT + { "brxhg", 0x44, INSTR_RIE_RRP }, + { "brxlg", 0x45, INSTR_RIE_RRP }, + { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU }, + { "rnsbg", 0x54, INSTR_RIE_RRUUU }, + { "risbg", 0x55, INSTR_RIE_RRUUU }, + { "rosbg", 0x56, INSTR_RIE_RRUUU }, + { "rxsbg", 0x57, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBGN }, 0x59, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU }, + { "cgrj", 0x64, INSTR_RIE_RRPU }, + { "clgrj", 0x65, INSTR_RIE_RRPU }, + { "cgit", 0x70, INSTR_RIE_R0IU }, + { "clgit", 0x71, INSTR_RIE_R0UU }, + { "cit", 0x72, INSTR_RIE_R0IU }, + { "clfit", 0x73, INSTR_RIE_R0UU }, + { "crj", 0x76, INSTR_RIE_RRPU }, + { "clrj", 0x77, INSTR_RIE_RRPU }, + { "cgij", 0x7c, INSTR_RIE_RUPI }, + { "clgij", 0x7d, INSTR_RIE_RUPU }, + { "cij", 0x7e, INSTR_RIE_RUPI }, + { "clij", 0x7f, INSTR_RIE_RUPU }, + { "ahik", 0xd8, INSTR_RIE_RRI0 }, + { "aghik", 0xd9, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 }, + { "cgrb", 0xe4, INSTR_RRS_RRRDU }, + { "clgrb", 0xe5, INSTR_RRS_RRRDU }, + { "crb", 0xf6, INSTR_RRS_RRRDU }, + { "clrb", 0xf7, INSTR_RRS_RRRDU }, + { "cgib", 0xfc, INSTR_RIS_RURDI }, + { "clgib", 0xfd, INSTR_RIS_RURDU }, + { "cib", 0xfe, INSTR_RIS_RURDI }, + { "clib", 0xff, INSTR_RIS_RURDU }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_ed[] = { +#ifdef CONFIG_64BIT + { "mayl", 0x38, INSTR_RXF_FRRDF }, + { "myl", 0x39, INSTR_RXF_FRRDF }, + { "may", 0x3a, INSTR_RXF_FRRDF }, + { "my", 0x3b, INSTR_RXF_FRRDF }, + { "mayh", 0x3c, INSTR_RXF_FRRDF }, + { "myh", 0x3d, INSTR_RXF_FRRDF }, + { "sldt", 0x40, INSTR_RXF_FRRDF }, + { "srdt", 0x41, INSTR_RXF_FRRDF }, + { "slxt", 0x48, INSTR_RXF_FRRDF }, + { "srxt", 0x49, INSTR_RXF_FRRDF }, + { "tdcet", 0x50, INSTR_RXE_FRRD }, + { "tdget", 0x51, INSTR_RXE_FRRD }, + { "tdcdt", 0x54, INSTR_RXE_FRRD }, + { "tdgdt", 0x55, INSTR_RXE_FRRD }, + { "tdcxt", 0x58, INSTR_RXE_FRRD }, + { "tdgxt", 0x59, INSTR_RXE_FRRD }, + { "ley", 0x64, INSTR_RXY_FRRD }, + { "ldy", 0x65, INSTR_RXY_FRRD }, + { "stey", 0x66, INSTR_RXY_FRRD }, + { "stdy", 0x67, INSTR_RXY_FRRD }, + { "czdt", 0xa8, INSTR_RSL_LRDFU }, + { "czxt", 0xa9, INSTR_RSL_LRDFU }, + { "cdzt", 0xaa, INSTR_RSL_LRDFU }, + { "cxzt", 0xab, INSTR_RSL_LRDFU }, +#endif + { "ldeb", 0x04, INSTR_RXE_FRRD }, + { "lxdb", 0x05, INSTR_RXE_FRRD }, + { "lxeb", 0x06, INSTR_RXE_FRRD }, + { "mxdb", 0x07, INSTR_RXE_FRRD }, + { "keb", 0x08, INSTR_RXE_FRRD }, + { "ceb", 0x09, INSTR_RXE_FRRD }, + { "aeb", 0x0a, INSTR_RXE_FRRD }, + { "seb", 0x0b, INSTR_RXE_FRRD }, + { "mdeb", 0x0c, INSTR_RXE_FRRD }, + { "deb", 0x0d, INSTR_RXE_FRRD }, + { "maeb", 0x0e, INSTR_RXF_FRRDF }, + { "mseb", 0x0f, INSTR_RXF_FRRDF }, + { "tceb", 0x10, INSTR_RXE_FRRD }, + { "tcdb", 0x11, INSTR_RXE_FRRD }, + { "tcxb", 0x12, INSTR_RXE_FRRD }, + { "sqeb", 0x14, INSTR_RXE_FRRD }, + { "sqdb", 0x15, INSTR_RXE_FRRD }, + { "meeb", 0x17, INSTR_RXE_FRRD }, + { "kdb", 0x18, INSTR_RXE_FRRD }, + { "cdb", 0x19, INSTR_RXE_FRRD }, + { "adb", 0x1a, INSTR_RXE_FRRD }, + { "sdb", 0x1b, INSTR_RXE_FRRD }, + { "mdb", 0x1c, INSTR_RXE_FRRD }, + { "ddb", 0x1d, INSTR_RXE_FRRD }, + { "madb", 0x1e, INSTR_RXF_FRRDF }, + { "msdb", 0x1f, INSTR_RXF_FRRDF }, + { "lde", 0x24, INSTR_RXE_FRRD }, + { "lxd", 0x25, INSTR_RXE_FRRD }, + { "lxe", 0x26, INSTR_RXE_FRRD }, + { "mae", 0x2e, INSTR_RXF_FRRDF }, + { "mse", 0x2f, INSTR_RXF_FRRDF }, + { "sqe", 0x34, INSTR_RXE_FRRD }, + { "sqd", 0x35, INSTR_RXE_FRRD }, + { "mee", 0x37, INSTR_RXE_FRRD }, + { "mad", 0x3e, INSTR_RXF_FRRDF }, + { "msd", 0x3f, INSTR_RXF_FRRDF }, + { "", 0, INSTR_INVALID } +}; + +/* Extracts an operand value from an instruction. */ +static unsigned int extract_operand(unsigned char *code, + const struct s390_operand *operand) +{ + unsigned int val; + int bits; + + /* Extract fragments of the operand byte for byte. */ + code += operand->shift / 8; + bits = (operand->shift & 7) + operand->bits; + val = 0; + do { + val <<= 8; + val |= (unsigned int) *code++; + bits -= 8; + } while (bits > 0); + val >>= -bits; + val &= ((1U << (operand->bits - 1)) << 1) - 1; + + /* Check for special long displacement case. */ + if (operand->bits == 20 && operand->shift == 20) + val = (val & 0xff) << 12 | (val & 0xfff00) >> 8; + + /* Sign extend value if the operand is signed or pc relative. */ + if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) && + (val & (1U << (operand->bits - 1)))) + val |= (-1U << (operand->bits - 1)) << 1; + + /* Double value if the operand is pc relative. */ + if (operand->flags & OPERAND_PCREL) + val <<= 1; + + /* Length x in an instructions has real length x + 1. */ + if (operand->flags & OPERAND_LENGTH) + val++; + return val; +} + +struct s390_insn *find_insn(unsigned char *code) +{ + unsigned char opfrag = code[1]; + unsigned char opmask; + struct s390_insn *table; + + switch (code[0]) { + case 0x01: + table = opcode_01; + break; + case 0xa5: + table = opcode_a5; + break; + case 0xa7: + table = opcode_a7; + break; + case 0xaa: + table = opcode_aa; + break; + case 0xb2: + table = opcode_b2; + break; + case 0xb3: + table = opcode_b3; + break; + case 0xb9: + table = opcode_b9; + break; + case 0xc0: + table = opcode_c0; + break; + case 0xc2: + table = opcode_c2; + break; + case 0xc4: + table = opcode_c4; + break; + case 0xc6: + table = opcode_c6; + break; + case 0xc8: + table = opcode_c8; + break; + case 0xcc: + table = opcode_cc; + break; + case 0xe3: + table = opcode_e3; + opfrag = code[5]; + break; + case 0xe5: + table = opcode_e5; + break; + case 0xeb: + table = opcode_eb; + opfrag = code[5]; + break; + case 0xec: + table = opcode_ec; + opfrag = code[5]; + break; + case 0xed: + table = opcode_ed; + opfrag = code[5]; + break; + default: + table = opcode; + opfrag = code[0]; + break; + } + while (table->format != INSTR_INVALID) { + opmask = formats[table->format][0]; + if (table->opfrag == (opfrag & opmask)) + return table; + table++; + } + return NULL; +} + +/** + * insn_to_mnemonic - decode an s390 instruction + * @instruction: instruction to decode + * @buf: buffer to fill with mnemonic + * @len: length of buffer + * + * Decode the instruction at @instruction and store the corresponding + * mnemonic into @buf of length @len. + * @buf is left unchanged if the instruction could not be decoded. + * Returns: + * %0 on success, %-ENOENT if the instruction was not found. + */ +int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len) +{ + struct s390_insn *insn; + + insn = find_insn(instruction); + if (!insn) + return -ENOENT; + if (insn->name[0] == '\0') + snprintf(buf, len, "%s", + long_insn_name[(int) insn->name[1]]); + else + snprintf(buf, len, "%.5s", insn->name); + return 0; +} +EXPORT_SYMBOL_GPL(insn_to_mnemonic); + +static int print_insn(char *buffer, unsigned char *code, unsigned long addr) +{ + struct s390_insn *insn; + const unsigned char *ops; + const struct s390_operand *operand; + unsigned int value; + char separator; + char *ptr; + int i; + + ptr = buffer; + insn = find_insn(code); + if (insn) { + if (insn->name[0] == '\0') + ptr += sprintf(ptr, "%s\t", + long_insn_name[(int) insn->name[1]]); + else + ptr += sprintf(ptr, "%.5s\t", insn->name); + /* Extract the operands. */ + separator = 0; + for (ops = formats[insn->format] + 1, i = 0; + *ops != 0 && i < 6; ops++, i++) { + operand = operands + *ops; + value = extract_operand(code, operand); + if ((operand->flags & OPERAND_INDEX) && value == 0) + continue; + if ((operand->flags & OPERAND_BASE) && + value == 0 && separator == '(') { + separator = ','; + continue; + } + if (separator) + ptr += sprintf(ptr, "%c", separator); + if (operand->flags & OPERAND_GPR) + ptr += sprintf(ptr, "%%r%i", value); + else if (operand->flags & OPERAND_FPR) + ptr += sprintf(ptr, "%%f%i", value); + else if (operand->flags & OPERAND_AR) + ptr += sprintf(ptr, "%%a%i", value); + else if (operand->flags & OPERAND_CR) + ptr += sprintf(ptr, "%%c%i", value); + else if (operand->flags & OPERAND_PCREL) + ptr += sprintf(ptr, "%lx", (signed int) value + + addr); + else if (operand->flags & OPERAND_SIGNED) + ptr += sprintf(ptr, "%i", value); + else + ptr += sprintf(ptr, "%u", value); + if (operand->flags & OPERAND_DISP) + separator = '('; + else if (operand->flags & OPERAND_BASE) { + ptr += sprintf(ptr, ")"); + separator = ','; + } else + separator = ','; + } + } else + ptr += sprintf(ptr, "unknown"); + return (int) (ptr - buffer); +} + +void show_code(struct pt_regs *regs) +{ + char *mode = user_mode(regs) ? "User" : "Krnl"; + unsigned char code[64]; + char buffer[64], *ptr; + mm_segment_t old_fs; + unsigned long addr; + int start, end, opsize, hops, i; + + /* Get a snapshot of the 64 bytes surrounding the fault address. */ + old_fs = get_fs(); + set_fs(user_mode(regs) ? USER_DS : KERNEL_DS); + for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) { + addr = regs->psw.addr - 34 + start; + if (__copy_from_user(code + start - 2, + (char __user *) addr, 2)) + break; + } + for (end = 32; end < 64; end += 2) { + addr = regs->psw.addr + end - 32; + if (__copy_from_user(code + end, + (char __user *) addr, 2)) + break; + } + set_fs(old_fs); + /* Code snapshot useable ? */ + if ((regs->psw.addr & 1) || start >= end) { + printk("%s Code: Bad PSW.\n", mode); + return; + } + /* Find a starting point for the disassembly. */ + while (start < 32) { + for (i = 0, hops = 0; start + i < 32 && hops < 3; hops++) { + if (!find_insn(code + start + i)) + break; + i += insn_length(code[start + i]); + } + if (start + i == 32) + /* Looks good, sequence ends at PSW. */ + break; + start += 2; + } + /* Decode the instructions. */ + ptr = buffer; + ptr += sprintf(ptr, "%s Code:", mode); + hops = 0; + while (start < end && hops < 8) { + opsize = insn_length(code[start]); + if (start + opsize == 32) + *ptr++ = '#'; + else if (start == 32) + *ptr++ = '>'; + else + *ptr++ = ' '; + addr = regs->psw.addr + start - 32; + ptr += sprintf(ptr, ONELONG, addr); + if (start + opsize >= end) + break; + for (i = 0; i < opsize; i++) + ptr += sprintf(ptr, "%02x", code[start + i]); + *ptr++ = '\t'; + if (i < 6) + *ptr++ = '\t'; + ptr += print_insn(ptr, code + start, addr); + start += opsize; + printk(buffer); + ptr = buffer; + ptr += sprintf(ptr, "\n "); + hops++; + } + printk("\n"); +} + +void print_fn_code(unsigned char *code, unsigned long len) +{ + char buffer[64], *ptr; + int opsize, i; + + while (len) { + ptr = buffer; + opsize = insn_length(*code); + if (opsize > len) + break; + ptr += sprintf(ptr, "%p: ", code); + for (i = 0; i < opsize; i++) + ptr += sprintf(ptr, "%02x", code[i]); + *ptr++ = '\t'; + if (i < 4) + *ptr++ = '\t'; + ptr += print_insn(ptr, code, (unsigned long) code); + *ptr++ = '\n'; + *ptr++ = 0; + printk(buffer); + code += opsize; + len -= opsize; + } +} diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c new file mode 100644 index 00000000000..acb412442e5 --- /dev/null +++ b/arch/s390/kernel/dumpstack.c @@ -0,0 +1,217 @@ +/* + * Stack dumping functions + * + * Copyright IBM Corp. 1999, 2013 + */ + +#include <linux/kallsyms.h> +#include <linux/hardirq.h> +#include <linux/kprobes.h> +#include <linux/utsname.h> +#include <linux/export.h> +#include <linux/kdebug.h> +#include <linux/ptrace.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <asm/processor.h> +#include <asm/debug.h> +#include <asm/dis.h> +#include <asm/ipl.h> + +#ifndef CONFIG_64BIT +#define LONG "%08lx " +#define FOURLONG "%08lx %08lx %08lx %08lx\n" +static int kstack_depth_to_print = 12; +#else /* CONFIG_64BIT */ +#define LONG "%016lx " +#define FOURLONG "%016lx %016lx %016lx %016lx\n" +static int kstack_depth_to_print = 20; +#endif /* CONFIG_64BIT */ + +/* + * For show_trace we have tree different stack to consider: + * - the panic stack which is used if the kernel stack has overflown + * - the asynchronous interrupt stack (cpu related) + * - the synchronous kernel stack (process related) + * The stack trace can start at any of the three stack and can potentially + * touch all of them. The order is: panic stack, async stack, sync stack. + */ +static unsigned long +__show_trace(unsigned long sp, unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + unsigned long addr; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + addr = sf->gprs[8] & PSW_ADDR_INSN; + printk("([<%016lx>] %pSR)\n", addr, (void *)addr); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + addr = sf->gprs[8] & PSW_ADDR_INSN; + printk(" [<%016lx>] %pSR\n", addr, (void *)addr); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + addr = regs->psw.addr & PSW_ADDR_INSN; + printk(" [<%016lx>] %pSR\n", addr, (void *)addr); + low = sp; + sp = regs->gprs[15]; + } +} + +static void show_trace(struct task_struct *task, unsigned long *stack) +{ + const unsigned long frame_size = + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + register unsigned long __r15 asm ("15"); + unsigned long sp; + + sp = (unsigned long) stack; + if (!sp) + sp = task ? task->thread.ksp : __r15; + printk("Call Trace:\n"); +#ifdef CONFIG_CHECK_STACK + sp = __show_trace(sp, + S390_lowcore.panic_stack + frame_size - 4096, + S390_lowcore.panic_stack + frame_size); +#endif + sp = __show_trace(sp, + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size); + if (task) + __show_trace(sp, (unsigned long) task_stack_page(task), + (unsigned long) task_stack_page(task) + THREAD_SIZE); + else + __show_trace(sp, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); + if (!task) + task = current; + debug_show_held_locks(task); +} + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + register unsigned long *__r15 asm ("15"); + unsigned long *stack; + int i; + + if (!sp) + stack = task ? (unsigned long *) task->thread.ksp : __r15; + else + stack = sp; + + for (i = 0; i < kstack_depth_to_print; i++) { + if (((addr_t) stack & (THREAD_SIZE-1)) == 0) + break; + if ((i * sizeof(long) % 32) == 0) + printk("%s ", i == 0 ? "" : "\n"); + printk(LONG, *stack++); + } + printk("\n"); + show_trace(task, sp); +} + +static void show_last_breaking_event(struct pt_regs *regs) +{ +#ifdef CONFIG_64BIT + printk("Last Breaking-Event-Address:\n"); + printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]); +#endif +} + +static inline int mask_bits(struct pt_regs *regs, unsigned long bits) +{ + return (regs->psw.mask & bits) / ((~bits + 1) & bits); +} + +void show_registers(struct pt_regs *regs) +{ + char *mode; + + mode = user_mode(regs) ? "User" : "Krnl"; + printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); + if (!user_mode(regs)) + printk(" (%pSR)", (void *)regs->psw.addr); + printk("\n"); + printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " + "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), + mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), + mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), + mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), + mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), + mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); +#ifdef CONFIG_64BIT + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); +#endif + printk("\n%s GPRS: " FOURLONG, mode, + regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); + printk(" " FOURLONG, + regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); + printk(" " FOURLONG, + regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); + printk(" " FOURLONG, + regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); + show_code(regs); +} + +void show_regs(struct pt_regs *regs) +{ + show_regs_print_info(KERN_DEFAULT); + show_registers(regs); + /* Show stack backtrace if pt_regs is from kernel mode */ + if (!user_mode(regs)) + show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_last_breaking_event(regs); +} + +static DEFINE_SPINLOCK(die_lock); + +void die(struct pt_regs *regs, const char *str) +{ + static int die_counter; + + oops_enter(); + lgr_info_log(); + debug_stop_all(); + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irq(&die_lock); + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception: panic_on_oops"); + oops_exit(); + do_exit(SIGSEGV); +} diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c new file mode 100644 index 00000000000..0dff972a169 --- /dev/null +++ b/arch/s390/kernel/early.c @@ -0,0 +1,497 @@ +/* + * Copyright IBM Corp. 2007, 2009 + * Author(s): Hongjie Yang <hongjie@us.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#define KMSG_COMPONENT "setup" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/compiler.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/ftrace.h> +#include <linux/lockdep.h> +#include <linux/module.h> +#include <linux/pfn.h> +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <asm/ebcdic.h> +#include <asm/ipl.h> +#include <asm/lowcore.h> +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/sysinfo.h> +#include <asm/cpcmd.h> +#include <asm/sclp.h> +#include <asm/facility.h> +#include "entry.h" + +/* + * Create a Kernel NSS if the SAVESYS= parameter is defined + */ +#define DEFSYS_CMD_SIZE 128 +#define SAVESYS_CMD_SIZE 32 + +char kernel_nss_name[NSS_NAME_SIZE + 1]; + +static void __init setup_boot_command_line(void); + +/* + * Get the TOD clock running. + */ +static void __init reset_tod_clock(void) +{ + u64 time; + + if (store_tod_clock(&time) == 0) + return; + /* TOD clock not running. Set the clock to Unix Epoch. */ + if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) + disabled_wait(0); + + sched_clock_base_cc = TOD_UNIX_EPOCH; + S390_lowcore.last_update_clock = sched_clock_base_cc; +} + +#ifdef CONFIG_SHARED_KERNEL +int __init savesys_ipl_nss(char *cmd, const int cmdlen); + +asm( + " .section .init.text,\"ax\",@progbits\n" + " .align 4\n" + " .type savesys_ipl_nss, @function\n" + "savesys_ipl_nss:\n" +#ifdef CONFIG_64BIT + " stmg 6,15,48(15)\n" + " lgr 14,3\n" + " sam31\n" + " diag 2,14,0x8\n" + " sam64\n" + " lgr 2,14\n" + " lmg 6,15,48(15)\n" +#else + " stm 6,15,24(15)\n" + " lr 14,3\n" + " diag 2,14,0x8\n" + " lr 2,14\n" + " lm 6,15,24(15)\n" +#endif + " br 14\n" + " .size savesys_ipl_nss, .-savesys_ipl_nss\n" + " .previous\n"); + +static __initdata char upper_command_line[COMMAND_LINE_SIZE]; + +static noinline __init void create_kernel_nss(void) +{ + unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size; +#ifdef CONFIG_BLK_DEV_INITRD + unsigned int sinitrd_pfn, einitrd_pfn; +#endif + int response; + int hlen; + size_t len; + char *savesys_ptr; + char defsys_cmd[DEFSYS_CMD_SIZE]; + char savesys_cmd[SAVESYS_CMD_SIZE]; + + /* Do nothing if we are not running under VM */ + if (!MACHINE_IS_VM) + return; + + /* Convert COMMAND_LINE to upper case */ + for (i = 0; i < strlen(boot_command_line); i++) + upper_command_line[i] = toupper(boot_command_line[i]); + + savesys_ptr = strstr(upper_command_line, "SAVESYS="); + + if (!savesys_ptr) + return; + + savesys_ptr += 8; /* Point to the beginning of the NSS name */ + for (i = 0; i < NSS_NAME_SIZE; i++) { + if (savesys_ptr[i] == ' ' || savesys_ptr[i] == '\0') + break; + kernel_nss_name[i] = savesys_ptr[i]; + } + + stext_pfn = PFN_DOWN(__pa(&_stext)); + eshared_pfn = PFN_DOWN(__pa(&_eshared)); + end_pfn = PFN_UP(__pa(&_end)); + min_size = end_pfn << 2; + + hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE, + "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", + kernel_nss_name, stext_pfn - 1, stext_pfn, + eshared_pfn - 1, eshared_pfn, end_pfn); + +#ifdef CONFIG_BLK_DEV_INITRD + if (INITRD_START && INITRD_SIZE) { + sinitrd_pfn = PFN_DOWN(__pa(INITRD_START)); + einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE)); + min_size = einitrd_pfn << 2; + hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, + " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn); + } +#endif + + snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, + " EW MINSIZE=%.7iK PARMREGS=0-13", min_size); + defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0'; + snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s", + kernel_nss_name, kernel_nss_name); + savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0'; + + __cpcmd(defsys_cmd, NULL, 0, &response); + + if (response != 0) { + pr_err("Defining the Linux kernel NSS failed with rc=%d\n", + response); + kernel_nss_name[0] = '\0'; + return; + } + + len = strlen(savesys_cmd); + ASCEBC(savesys_cmd, len); + response = savesys_ipl_nss(savesys_cmd, len); + + /* On success: response is equal to the command size, + * max SAVESYS_CMD_SIZE + * On error: response contains the numeric portion of cp error message. + * for SAVESYS it will be >= 263 + * for missing privilege class, it will be 1 + */ + if (response > SAVESYS_CMD_SIZE || response == 1) { + pr_err("Saving the Linux kernel NSS failed with rc=%d\n", + response); + kernel_nss_name[0] = '\0'; + return; + } + + /* re-initialize cputime accounting. */ + sched_clock_base_cc = get_tod_clock(); + S390_lowcore.last_update_clock = sched_clock_base_cc; + S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; + S390_lowcore.user_timer = 0; + S390_lowcore.system_timer = 0; + asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer)); + + /* re-setup boot command line with new ipl vm parms */ + ipl_update_parameters(); + setup_boot_command_line(); + + ipl_flags = IPL_NSS_VALID; +} + +#else /* CONFIG_SHARED_KERNEL */ + +static inline void create_kernel_nss(void) { } + +#endif /* CONFIG_SHARED_KERNEL */ + +/* + * Clear bss memory + */ +static noinline __init void clear_bss_section(void) +{ + memset(__bss_start, 0, __bss_stop - __bss_start); +} + +/* + * Initialize storage key for kernel pages + */ +static noinline __init void init_kernel_storage_key(void) +{ +#if PAGE_DEFAULT_KEY + unsigned long end_pfn, init_pfn; + + end_pfn = PFN_UP(__pa(&_end)); + + for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) + page_set_storage_key(init_pfn << PAGE_SHIFT, + PAGE_DEFAULT_KEY, 0); +#endif +} + +static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); + +static noinline __init void detect_machine_type(void) +{ + struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; + + /* Check current-configuration-level */ + if (stsi(NULL, 0, 0, 0) <= 2) { + S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR; + return; + } + /* Get virtual-machine cpu information. */ + if (stsi(vmms, 3, 2, 2) || !vmms->count) + return; + + /* Running under KVM? If not we assume z/VM */ + if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) + S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; + else + S390_lowcore.machine_flags |= MACHINE_FLAG_VM; +} + +static __init void setup_topology(void) +{ +#ifdef CONFIG_64BIT + int max_mnest; + + if (!test_facility(11)) + return; + S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; + for (max_mnest = 6; max_mnest > 1; max_mnest--) { + if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) + break; + } + topology_max_mnest = max_mnest; +#endif +} + +static void early_pgm_check_handler(void) +{ + const struct exception_table_entry *fixup; + unsigned long cr0, cr0_new; + unsigned long addr; + + addr = S390_lowcore.program_old_psw.addr; + fixup = search_exception_tables(addr & PSW_ADDR_INSN); + if (!fixup) + disabled_wait(0); + /* Disable low address protection before storing into lowcore. */ + __ctl_store(cr0, 0, 0); + cr0_new = cr0 & ~(1UL << 28); + __ctl_load(cr0_new, 0, 0); + S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE; + __ctl_load(cr0, 0, 0); +} + +static noinline __init void setup_lowcore_early(void) +{ + psw_t psw; + + psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; + psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; + S390_lowcore.external_new_psw = psw; + psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + S390_lowcore.program_new_psw = psw; + s390_base_pgm_handler_fn = early_pgm_check_handler; +} + +static noinline __init void setup_facility_list(void) +{ + stfle(S390_lowcore.stfle_fac_list, + ARRAY_SIZE(S390_lowcore.stfle_fac_list)); +} + +static __init void detect_mvpg(void) +{ +#ifndef CONFIG_64BIT + int rc; + + asm volatile( + " la 0,0\n" + " mvpg %2,%2\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG; +#endif +} + +static __init void detect_ieee(void) +{ +#ifndef CONFIG_64BIT + int rc, tmp; + + asm volatile( + " efpc %1,0\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE; +#endif +} + +static __init void detect_csp(void) +{ +#ifndef CONFIG_64BIT + int rc; + + asm volatile( + " la 0,0\n" + " la 1,0\n" + " la 2,4\n" + " csp 0,2\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_CSP; +#endif +} + +static __init void detect_diag9c(void) +{ + unsigned int cpu_address; + int rc; + + cpu_address = stap(); + asm volatile( + " diag %2,0,0x9c\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; +} + +static __init void detect_diag44(void) +{ +#ifdef CONFIG_64BIT + int rc; + + asm volatile( + " diag 0,0,0x44\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; +#endif +} + +static __init void detect_machine_facilities(void) +{ +#ifdef CONFIG_64BIT + if (test_facility(8)) { + S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1; + __ctl_set_bit(0, 23); + } + if (test_facility(78)) + S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2; + if (test_facility(3)) + S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; + if (test_facility(40)) + S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; + if (test_facility(50) && test_facility(73)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TE; + if (test_facility(66)) + S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; + if (test_facility(51)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; +#endif +} + +static __init void rescue_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); + /* + * Just like in case of IPL from VM reader we make sure there is a + * gap of 4MB between end of kernel and start of initrd. + * That way we can also be sure that saving an NSS will succeed, + * which however only requires different segments. + */ + if (!INITRD_START || !INITRD_SIZE) + return; + if (INITRD_START >= min_initrd_addr) + return; + memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = min_initrd_addr; +#endif +} + +/* Set up boot command line */ +static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) +{ + char *parm, *delim; + size_t rc, len; + + len = strlen(boot_command_line); + + delim = boot_command_line + len; /* '\0' character position */ + parm = boot_command_line + len + 1; /* append right after '\0' */ + + rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1); + if (rc) { + if (*parm == '=') + memmove(boot_command_line, parm + 1, rc); + else + *delim = ' '; /* replace '\0' with space */ + } +} + +static inline int has_ebcdic_char(const char *str) +{ + int i; + + for (i = 0; str[i]; i++) + if (str[i] & 0x80) + return 1; + return 0; +} + +static void __init setup_boot_command_line(void) +{ + COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; + /* convert arch command line to ascii if necessary */ + if (has_ebcdic_char(COMMAND_LINE)) + EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); + /* copy arch command line */ + strlcpy(boot_command_line, strstrip(COMMAND_LINE), + ARCH_COMMAND_LINE_SIZE); + + /* append IPL PARM data to the boot command line */ + if (MACHINE_IS_VM) + append_to_cmdline(append_ipl_vmparm); + + append_to_cmdline(append_ipl_scpdata); +} + +/* + * Save ipl parameters, clear bss memory, initialize storage keys + * and create a kernel NSS at startup if the SAVESYS= parm is defined + */ +void __init startup_init(void) +{ + reset_tod_clock(); + ipl_save_parameters(); + rescue_initrd(); + clear_bss_section(); + init_kernel_storage_key(); + lockdep_init(); + lockdep_off(); + setup_lowcore_early(); + setup_facility_list(); + detect_machine_type(); + ipl_update_parameters(); + setup_boot_command_line(); + create_kernel_nss(); + detect_mvpg(); + detect_ieee(); + detect_csp(); + detect_diag9c(); + detect_diag44(); + detect_machine_facilities(); + setup_topology(); + sclp_early_detect(); +#ifdef CONFIG_DYNAMIC_FTRACE + S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; +#endif + lockdep_on(); +} diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c index bb0f973137f..b971c6be629 100644 --- a/arch/s390/kernel/ebcdic.c +++ b/arch/s390/kernel/ebcdic.c @@ -1,16 +1,16 @@ /* - * arch/s390/kernel/ebcdic.c * ECBDIC -> ASCII, ASCII -> ECBDIC, * upper to lower case (EBCDIC) conversion tables. * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Martin Peschke <peschke@fh-brandenburg.de> */ #include <linux/module.h> #include <asm/types.h> +#include <asm/ebcdic.h> /* * ASCII (IBM PC 437) -> EBCDIC 037 diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 27b07730b7b..70203265196 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1,153 +1,144 @@ /* - * arch/s390/kernel/entry.S * S390 low-level entry points. * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2012 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Hartmut Penner (hp@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Hartmut Penner (hp@de.ibm.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * Heiko Carstens <heiko.carstens@de.ibm.com> */ -#include <linux/sys.h> +#include <linux/init.h> #include <linux/linkage.h> -#include <linux/config.h> +#include <asm/processor.h> #include <asm/cache.h> -#include <asm/lowcore.h> #include <asm/errno.h> #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/page.h> - -/* - * Stack layout for the system_call stack entry. - * The first few entries are identical to the user_regs_struct. - */ -SP_PTREGS = STACK_FRAME_OVERHEAD -SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS -SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW -SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS -SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 4 -SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8 -SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 12 -SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16 -SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 20 -SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24 -SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 28 -SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32 -SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 36 -SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40 -SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 44 -SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48 -SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52 -SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 -SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 -SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP -SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE - -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING | \ - _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) +#include <asm/sigp.h> +#include <asm/irq.h> + +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 4 +__PT_R2 = __PT_GPRS + 8 +__PT_R3 = __PT_GPRS + 12 +__PT_R4 = __PT_GPRS + 16 +__PT_R5 = __PT_GPRS + 20 +__PT_R6 = __PT_GPRS + 24 +__PT_R7 = __PT_GPRS + 28 +__PT_R8 = __PT_GPRS + 32 +__PT_R9 = __PT_GPRS + 36 +__PT_R10 = __PT_GPRS + 40 +__PT_R11 = __PT_GPRS + 44 +__PT_R12 = __PT_GPRS + 48 +__PT_R13 = __PT_GPRS + 524 +__PT_R14 = __PT_GPRS + 56 +__PT_R15 = __PT_GPRS + 60 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT +STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -#define BASED(name) name-system_call(%r13) +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) +_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) +_PIF_WORK = (_PIF_PER_TRAP) -/* - * Register usage in interrupt handlers: - * R9 - pointer to current task structure - * R13 - pointer to literal pool - * R14 - return register for function calls - * R15 - kernel stack pointer - */ +#define BASED(name) name-system_call(%r13) - .macro STORE_TIMER lc_offset -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - stpt \lc_offset + .macro TRACE_IRQS_ON +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + l %r1,BASED(.Lhardirqs_on) + basr %r14,%r1 # call trace_hardirqs_on_caller #endif .endm -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .macro UPDATE_VTIME lc_from,lc_to,lc_sum - lm %r10,%r11,\lc_from - sl %r10,\lc_to - sl %r11,\lc_to+4 - bc 3,BASED(0f) - sl %r10,BASED(.Lc_1) -0: al %r10,\lc_sum - al %r11,\lc_sum+4 - bc 12,BASED(1f) - al %r10,BASED(.Lc_1) -1: stm %r10,%r11,\lc_sum - .endm + .macro TRACE_IRQS_OFF +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + l %r1,BASED(.Lhardirqs_off) + basr %r14,%r1 # call trace_hardirqs_off_caller #endif + .endm - .macro SAVE_ALL_BASE savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 + .macro LOCKDEP_SYS_EXIT +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 + l %r1,BASED(.Llockdep_sys_exit) + basr %r14,%r1 # call lockdep_sys_exit +#endif .endm - .macro SAVE_ALL psworg,savearea,sync - la %r12,\psworg - .if \sync - tm \psworg+1,0x01 # test problem state bit - bz BASED(2f) # skip stack setup save - l %r15,__LC_KERNEL_STACK # problem state -> load ksp - .else - tm \psworg+1,0x01 # test problem state bit - bnz BASED(1f) # from user -> load async stack - clc \psworg+4(4),BASED(.Lcritical_end) - bhe BASED(0f) - clc \psworg+4(4),BASED(.Lcritical_start) - bl BASED(0f) - l %r14,BASED(.Lcleanup_critical) - basr %r14,%r14 - tm 1(%r12),0x01 # retest problem state after cleanup - bnz BASED(1f) -0: l %r14,__LC_ASYNC_STACK # are we already on the async stack ? - slr %r14,%r15 - sra %r14,STACK_SHIFT - be BASED(2f) -1: l %r15,__LC_ASYNC_STACK - .endif + .macro CHECK_STACK stacksize,savearea #ifdef CONFIG_CHECK_STACK - b BASED(3f) -2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bz BASED(stack_overflow) -3: + tml %r15,\stacksize - CONFIG_STACK_GUARD + la %r14,\savearea + jz stack_overflow #endif -2: .endm - .macro CREATE_STACK_FRAME psworg,savearea - s %r15,BASED(.Lc_spsize) # make room for registers & psw - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - la %r12,\psworg - st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - icm %r12,12,__LC_SVC_ILC - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - st %r12,SP_ILC(%r15) - mvc SP_R12(16,%r15),\savearea # move %r12-%r15 to stack - la %r12,0 - st %r12,__SF_BACKCHAIN(%r15) # clear back chain + .macro SWITCH_ASYNC savearea,stack,shift + tmh %r8,0x0001 # interrupting from user ? + jnz 1f + lr %r14,%r9 + sl %r14,BASED(.Lcritical_start) + cl %r14,BASED(.Lcritical_length) + jhe 0f + la %r11,\savearea # inside critical section, do cleanup + bras %r14,cleanup_critical + tmh %r8,0x0001 # retest problem state after cleanup + jnz 1f +0: l %r14,\stack # are we already on the target stack? + slr %r14,%r15 + sra %r14,\shift + jnz 1f + CHECK_STACK 1<<\shift,\savearea + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: l %r15,\stack # load target stack +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm - .macro RESTORE_ALL psworg,sync - mvc \psworg(8),SP_PSW(%r15) # move user PSW to lowcore - .if !\sync - ni \psworg+1,0xfd # clear wait state bit - .endif - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user - STORE_TIMER __LC_EXIT_TIMER - lpsw \psworg # back to caller + .macro ADD64 high,low,timer + al \high,\timer + al \low,4+\timer + brc 12,.+8 + ahi \high,1 .endm + .macro SUB64 high,low,timer + sl \high,\timer + sl \low,4+\timer + brc 3,.+8 + ahi \high,-1 + .endm + + .macro UPDATE_VTIME high,low,enter_timer + lm \high,\low,__LC_EXIT_TIMER + SUB64 \high,\low,\enter_timer + ADD64 \high,\low,__LC_USER_TIMER + stm \high,\low,__LC_USER_TIMER + lm \high,\low,__LC_LAST_UPDATE_TIMER + SUB64 \high,\low,__LC_EXIT_TIMER + ADD64 \high,\low,__LC_SYSTEM_TIMER + stm \high,\low,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer + .endm + + .macro REENABLE_IRQS + st %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW + .endm + + .section .kprobes.text, "ax" + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -155,34 +146,20 @@ STACK_SIZE = 1 << STACK_SHIFT * Returns: * gpr2 = prev */ - .globl __switch_to -__switch_to: - basr %r1,0 -__switch_to_base: - tm __THREAD_per(%r3),0xe8 # new process is using per ? - bz __switch_to_noper-__switch_to_base(%r1) # if not we're fine - stctl %c9,%c11,__SF_EMPTY(%r15) # We are using per stuff - clc __THREAD_per(12,%r3),__SF_EMPTY(%r15) - be __switch_to_noper-__switch_to_base(%r1) # we got away w/o bashing TLB's - lctl %c9,%c11,__THREAD_per(%r3) # Nope we didn't -__switch_to_noper: +ENTRY(__switch_to) + stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + st %r15,__THREAD_ksp(%r2) # store kernel stack of prev l %r4,__THREAD_info(%r2) # get thread_info of prev - tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? - bz __switch_to_no_mcck-__switch_to_base(%r1) - ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev - l %r4,__THREAD_info(%r3) # get thread_info of next - oi __TI_flags+3(%r4),_TIF_MCCK_PENDING # set it in next -__switch_to_no_mcck: - stm %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task - st %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp - l %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp - lm %r6,%r15,__SF_GPRS(%r15)# load __switch_to registers of next task - st %r3,__LC_CURRENT # __LC_CURRENT = current task struct - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - l %r3,__THREAD_info(%r3) # load thread_info from task struct - st %r3,__LC_THREAD_INFO - ahi %r3,STACK_SIZE - st %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack + l %r5,__THREAD_info(%r3) # get thread_info of next + lr %r15,%r5 + ahi %r15,STACK_INIT # end of kernel stack of next + st %r3,__LC_CURRENT # store task struct of next + st %r5,__LC_THREAD_INFO # store thread info of next + st %r15,__LC_KERNEL_STACK # store end of kernel stack + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next + l %r15,__THREAD_ksp(%r3) # load kernel stack of next + lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 __critical_start: @@ -191,487 +168,473 @@ __critical_start: * are executed with interrupts enabled. */ - .globl system_call -system_call: - STORE_TIMER __LC_SYNC_ENTER_TIMER -sysc_saveall: - SAVE_ALL_BASE __LC_SAVE_AREA - SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - lh %r7,0x8a # get svc number from lowcore -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +ENTRY(system_call) + stpt __LC_SYNC_ENTER_TIMER +sysc_stm: + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lhi %r14,_PIF_SYSCALL +sysc_per: + l %r15,__LC_KERNEL_STACK + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(sysc_do_svc) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -sysc_stime: - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -sysc_update: - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + st %r14,__PT_FLAGS(%r11) sysc_do_svc: - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - sla %r7,2 # *4 and test for svc 0 - bnz BASED(sysc_nr_ok) # svc number > 0 + l %r10,__TI_sysc_table(%r12) # 31 bit system call table + lh %r8,__PT_INT_CODE+2(%r11) + sla %r8,2 # shift and test for svc0 + jnz sysc_nr_ok # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) - bnl BASED(sysc_nr_ok) - lr %r7,%r1 # copy svc number to %r7 - sla %r7,2 # *4 + jnl sysc_nr_ok + sth %r1,__PT_INT_CODE+2(%r11) + lr %r8,%r1 + sla %r8,2 sysc_nr_ok: - mvc SP_ARGS(4,%r15),SP_R7(%r15) -sysc_do_restart: - tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) - l %r8,sys_call_table-system_call(%r7,%r13) # get system call addr. - bnz BASED(sysc_tracesys) - basr %r14,%r8 # call sys_xxxx - st %r2,SP_R2(%r15) # store return value (change R2 on stack) - # ATTENTION: check sys_execve_glue before - # changing anything here !! + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + st %r2,__PT_ORIG_GPR2(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r9,0(%r8,%r10) # get system call addr. + tm __TI_flags+3(%r12),_TIF_TRACE + jnz sysc_tracesys + basr %r14,%r9 # call sys_xxxx + st %r2,__PT_R2(%r11) # store return value sysc_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_leave) - tm __TI_flags+3(%r9),_TIF_WORK_SVC - bnz BASED(sysc_work) # there is work to do (signals etc.) -sysc_leave: - RESTORE_ALL __LC_RETURN_PSW,1 + LOCKDEP_SYS_EXIT +sysc_tif: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno sysc_restore + tm __PT_FLAGS+3(%r11),_PIF_WORK + jnz sysc_work + tm __TI_flags+3(%r12),_TIF_WORK + jnz sysc_work # check for thread work + tm __LC_CPU_FLAGS+3,_CIF_WORK + jnz sysc_work +sysc_restore: + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW +sysc_done: # -# recheck if there is more work to do -# -sysc_work_loop: - tm __TI_flags+3(%r9),_TIF_WORK_SVC - bz BASED(sysc_leave) # there is no work to do -# # One of the work bits is on. Find out which one. # sysc_work: - tm __TI_flags+3(%r9),_TIF_MCCK_PENDING - bo BASED(sysc_mcck_pending) - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED - bo BASED(sysc_reschedule) - tm __TI_flags+3(%r9),_TIF_SIGPENDING - bo BASED(sysc_sigpending) - tm __TI_flags+3(%r9),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r9),_TIF_SINGLE_STEP - bo BASED(sysc_singlestep) - b BASED(sysc_leave) + tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING + jo sysc_mcck_pending + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED + jo sysc_reschedule + tm __PT_FLAGS+3(%r11),_PIF_PER_TRAP + jo sysc_singlestep + tm __TI_flags+3(%r12),_TIF_SIGPENDING + jo sysc_sigpending + tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME + jo sysc_notify_resume + tm __LC_CPU_FLAGS+3,_CIF_ASCE + jo sysc_uaccess + j sysc_return # beware of critical section cleanup # # _TIF_NEED_RESCHED is set, call schedule -# -sysc_reschedule: - l %r1,BASED(.Lschedule) - la %r14,BASED(sysc_work_loop) - br %r1 # call scheduler +# +sysc_reschedule: + l %r1,BASED(.Lschedule) + la %r14,BASED(sysc_return) + br %r1 # call schedule # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: - l %r1,BASED(.Ls390_handle_mcck) - la %r14,BASED(sysc_work_loop) + l %r1,BASED(.Lhandle_mcck) + la %r14,BASED(sysc_return) br %r1 # TIF bit will be cleared by handler # -# _TIF_SIGPENDING is set, call do_signal +# _CIF_ASCE is set, load user space asce # -sysc_sigpending: - ni __TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP - la %r2,SP_PTREGS(%r15) # load pt_regs - sr %r3,%r3 # clear *oldset - l %r1,BASED(.Ldo_signal) - basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r9),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r9),_TIF_SINGLE_STEP - bo BASED(sysc_singlestep) - b BASED(sysc_work_loop) +sysc_uaccess: + ni __LC_CPU_FLAGS+3,255-_CIF_ASCE + lctl %c1,%c1,__LC_USER_ASCE # load primary asce + j sysc_return # -# _TIF_RESTART_SVC is set, set up registers and restart svc +# _TIF_SIGPENDING is set, call do_signal # -sysc_restart: - ni __TI_flags+3(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - l %r7,SP_R2(%r15) # load new svc number - sla %r7,2 - mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument - lm %r2,%r6,SP_R2(%r15) # load svc arguments - b BASED(sysc_do_restart) # restart svc +sysc_sigpending: + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_signal) + basr %r14,%r1 # call do_signal + tm __PT_FLAGS+3(%r11),_PIF_SYSCALL + jno sysc_return + lm %r2,%r7,__PT_R2(%r11) # load svc arguments + l %r10,__TI_sysc_table(%r12) # 31 bit system call table + xr %r8,%r8 # svc 0 returns -ENOSYS + clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2) + jnl sysc_nr_ok # invalid svc number -> do svc 0 + lh %r8,__PT_INT_CODE+2(%r11) # load new svc number + sla %r8,2 + j sysc_nr_ok # restart svc + +# +# _TIF_NOTIFY_RESUME is set, call do_notify_resume +# +sysc_notify_resume: + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_notify_resume) + la %r14,BASED(sysc_return) + br %r1 # call do_notify_resume # -# _TIF_SINGLE_STEP is set, call do_single_step +# _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP - mvi SP_TRAP+1(%r15),0x28 # set trap indication to pgm check - la %r2,SP_PTREGS(%r15) # address of register-save area - l %r1,BASED(.Lhandle_per) # load adr. of per handler - la %r14,BASED(sysc_return) # load adr. of system return - br %r1 # branch to do_single_step + ni __PT_FLAGS+3(%r11),255-_PIF_PER_TRAP + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_per_trap) + la %r14,BASED(sysc_return) + br %r1 # call do_per_trap # -# call trace before and after sys_call +# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before +# and after the system call # sysc_tracesys: - l %r1,BASED(.Ltrace) - la %r2,SP_PTREGS(%r15) # load pt_regs + l %r1,BASED(.Ltrace_enter) + lr %r2,%r11 # pass pointer to pt_regs la %r3,0 - srl %r7,2 - st %r7,SP_R2(%r15) - basr %r14,%r1 - clc SP_R2(4,%r15),BASED(.Lnr_syscalls) - bnl BASED(sysc_tracenogo) - l %r7,SP_R2(%r15) # strace might have changed the - sll %r7,2 # system call - l %r8,sys_call_table-system_call(%r7,%r13) + xr %r0,%r0 + icm %r0,3,__PT_INT_CODE+2(%r11) + st %r0,__PT_R2(%r11) + basr %r14,%r1 # call do_syscall_trace_enter + cl %r2,BASED(.Lnr_syscalls) + jnl sysc_tracenogo + lr %r8,%r2 + sll %r8,2 + l %r9,0(%r8,%r10) sysc_tracego: - lm %r3,%r6,SP_R3(%r15) - l %r2,SP_ORIG_R2(%r15) - basr %r14,%r8 # call sys_xxx - st %r2,SP_R2(%r15) # store return value + lm %r3,%r7,__PT_R3(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + st %r2,__PT_R2(%r11) # store return value sysc_tracenogo: - tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) - bz BASED(sysc_return) - l %r1,BASED(.Ltrace) - la %r2,SP_PTREGS(%r15) # load pt_regs - la %r3,1 + tm __TI_flags+3(%r12),_TIF_TRACE + jz sysc_return + l %r1,BASED(.Ltrace_exit) + lr %r2,%r11 # pass pointer to pt_regs la %r14,BASED(sysc_return) - br %r1 + br %r1 # call do_syscall_trace_exit # # a new process exits the kernel with ret_from_fork # - .globl ret_from_fork -ret_from_fork: +ENTRY(ret_from_fork) + la %r11,STACK_FRAME_OVERHEAD(%r15) + l %r12,__LC_THREAD_INFO l %r13,__LC_SVC_NEW_PSW+4 - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? - bo BASED(0f) - st %r15,SP_R15(%r15) # store stack pointer for new kthread -0: l %r1,BASED(.Lschedtail) - basr %r14,%r1 - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - b BASED(sysc_return) - -# -# clone, fork, vfork, exec and sigreturn need glue, -# because they all expect pt_regs as parameter, -# but are called with different parameter. -# return-address is set up above -# -sys_clone_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs - l %r1,BASED(.Lclone) - br %r1 # branch to sys_clone - -sys_fork_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs - l %r1,BASED(.Lfork) - br %r1 # branch to sys_fork - -sys_vfork_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs - l %r1,BASED(.Lvfork) - br %r1 # branch to sys_vfork - -sys_execve_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs - l %r1,BASED(.Lexecve) - lr %r12,%r14 # save return address - basr %r14,%r1 # call sys_execve - ltr %r2,%r2 # check if execve failed - bnz 0(%r12) # it did fail -> store result in gpr2 - b 4(%r12) # SKIP ST 2,SP_R2(15) after BASR 14,8 - # in system_call/sysc_tracesys - -sys_sigreturn_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs as parameter - l %r1,BASED(.Lsigreturn) - br %r1 # branch to sys_sigreturn - -sys_rt_sigreturn_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs as parameter - l %r1,BASED(.Lrt_sigreturn) - br %r1 # branch to sys_sigreturn - -# -# sigsuspend and rt_sigsuspend need pt_regs as an additional -# parameter and they have to skip the store of %r2 into the -# user register %r2 because the return value was set in -# sigsuspend and rt_sigsuspend already and must not be overwritten! -# - -sys_sigsuspend_glue: - lr %r5,%r4 # move mask back - lr %r4,%r3 # move history1 parameter - lr %r3,%r2 # move history0 parameter - la %r2,SP_PTREGS(%r15) # load pt_regs as first parameter - l %r1,BASED(.Lsigsuspend) - la %r14,4(%r14) # skip store of return value - br %r1 # branch to sys_sigsuspend - -sys_rt_sigsuspend_glue: - lr %r4,%r3 # move sigsetsize parameter - lr %r3,%r2 # move unewset parameter - la %r2,SP_PTREGS(%r15) # load pt_regs as first parameter - l %r1,BASED(.Lrt_sigsuspend) - la %r14,4(%r14) # skip store of return value - br %r1 # branch to sys_rt_sigsuspend - -sys_sigaltstack_glue: - la %r4,SP_PTREGS(%r15) # load pt_regs as parameter - l %r1,BASED(.Lsigaltstack) - br %r1 # branch to sys_sigreturn - + l %r1,BASED(.Lschedule_tail) + basr %r14,%r1 # call schedule_tail + TRACE_IRQS_ON + ssm __LC_SVC_NEW_PSW # reenable interrupts + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? + jne sysc_tracenogo + # it's a kernel thread + lm %r9,%r10,__PT_R9(%r11) # load gprs +ENTRY(kernel_thread_starter) + la %r2,0(%r10) + basr %r14,%r9 + j sysc_tracenogo /* * Program check handler routine */ - .globl pgm_check_handler -pgm_check_handler: -/* - * First we need to check for a special case: - * Single stepping an instruction that disables the PER event mask will - * cause a PER event AFTER the mask has been set. Example: SVC or LPSW. - * For a single stepped SVC the program check handler gets control after - * the SVC new PSW has been loaded. But we want to execute the SVC first and - * then handle the PER event. Therefore we update the SVC old PSW to point - * to the pgm_check_handler and branch to the SVC handler after we checked - * if we have to load the kernel stack register. - * For every other possible cause for PER event without the PER mask set - * we just ignore the PER event (FIXME: is there anything we have to do - * for LPSW?). - */ - STORE_TIMER __LC_SYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA - tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception - bnz BASED(pgm_per) # got per exception -> special case - SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime: -#endif - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - l %r3,__LC_PGM_ILC # load program interruption code - la %r8,0x7f - nr %r8,%r3 -pgm_do_call: - l %r7,BASED(.Ljump_table) - sll %r8,2 - l %r7,0(%r8,%r7) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area - la %r14,BASED(sysc_return) - br %r7 # branch to interrupt-handler - -# -# handle per exception -# -pgm_per: - tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on - bnz BASED(pgm_per_std) # ok, normal per event from user space -# ok its one of the special cases, now we need to find out which one - clc __LC_PGM_OLD_PSW(8),__LC_SVC_NEW_PSW - be BASED(pgm_svcper) -# no interesting special case, ignore PER event - lm %r12,%r15,__LC_SAVE_AREA - lpsw 0x28 - -# -# Normal per exception -# -pgm_per_std: - SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime2) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime2: -#endif - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - l %r1,__TI_task(%r9) - mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - l %r3,__LC_PGM_ILC # load program interruption code - la %r8,0x7f - nr %r8,%r3 # clear per-event-bit and ilc - be BASED(sysc_return) # only per or per+check ? - b BASED(pgm_do_call) - -# -# it was a single stepped SVC that is causing all the trouble +ENTRY(pgm_check_handler) + stpt __LC_SYNC_ENTER_TIMER + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_PGM_OLD_PSW + tmh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + l %r15,__LC_KERNEL_STACK +2: la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f + l %r1,__TI_task(%r12) + tmh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __PT_FLAGS+3(%r11),_PIF_PER_TRAP + mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r1),__LC_PER_CODE + mvc __THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID +0: REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ljump_table) + la %r10,0x7f + n %r10,__PT_INT_CODE(%r11) + je sysc_return + sll %r10,2 + l %r1,0(%r10,%r1) # load address of handler routine + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # branch to interrupt-handler + j sysc_return + +# +# PER event in supervisor state, must be kprobes +# +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_per_trap) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_per_trap + j sysc_return + +# +# single stepped system call # pgm_svcper: - SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime3) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime3: -#endif - lh %r7,0x8a # get svc number from lowcore - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - l %r1,__TI_task(%r9) - mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - b BASED(sysc_do_svc) + mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW + mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per) + lhi %r14,_PIF_SYSCALL | _PIF_PER_TRAP + lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs /* * IO interrupt handler routine */ - .globl io_int_handler -io_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER +ENTRY(io_int_handler) stck __LC_INT_CLOCK - SAVE_ALL_BASE __LC_SAVE_AREA+16 - SAVE_ALL __LC_IO_OLD_PSW,__LC_SAVE_AREA+16,0 - CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(io_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -io_no_vtime: -#endif - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to standard irq handler - + stpt __LC_ASYNC_ENTER_TIMER + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_IO_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz io_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +io_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) + TRACE_IRQS_OFF + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) +io_loop: + l %r1,BASED(.Ldo_IRQ) + lr %r2,%r11 # pass pointer to pt_regs + lhi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lhi %r3,THIN_INTERRUPT +io_call: + basr %r14,%r1 # call do_IRQ + tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? -#ifdef CONFIG_PREEMPT - bno BASED(io_preempt) # no -> check for preemptive scheduling -#else - bno BASED(io_leave) # no-> skip resched & signal -#endif - tm __TI_flags+3(%r9),_TIF_WORK_INT - bnz BASED(io_work) # there is work to do (signals etc.) -io_leave: - RESTORE_ALL __LC_RETURN_PSW,0 + LOCKDEP_SYS_EXIT + TRACE_IRQS_ON +io_tif: + tm __TI_flags+3(%r12),_TIF_WORK + jnz io_work # there is work to do (signals etc.) + tm __LC_CPU_FLAGS+3,_CIF_WORK + jnz io_work +io_restore: + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW io_done: +# +# There is work todo, find out in which context we have been interrupted: +# 1) if we return to user space we can do all _TIF_WORK work +# 2) if we return to kernel code and preemptive scheduling is enabled check +# the preemption counter and if it is zero call preempt_schedule_irq +# Before any work can be done, a switch to the kernel stack is required. +# +io_work: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT -io_preempt: - icm %r0,15,__TI_precount(%r9) - bnz BASED(io_leave) - l %r1,SP_R15(%r15) - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + # check for preemptive scheduling + icm %r0,15,__TI_precount(%r12) + jnz io_restore # preemption disabled + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED + jno io_restore + # switch to kernel stack + l %r1,__PT_R15(%r11) + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lr %r15,%r1 -io_resume_loop: - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED - bno BASED(io_leave) - mvc __TI_precount(4,%r9),BASED(.Lc_pactive) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - l %r1,BASED(.Lschedule) - basr %r14,%r1 # call schedule - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts - xc __TI_precount(4,%r9),__TI_precount(%r9) - b BASED(io_resume_loop) + # TRACE_IRQS_ON already done at io_return, call + # TRACE_IRQS_OFF to keep things symmetrical + TRACE_IRQS_OFF + l %r1,BASED(.Lpreempt_irq) + basr %r14,%r1 # call preempt_schedule_irq + j io_return +#else + j io_restore #endif # -# switch to kernel stack, then check the TIF bits +# Need to do work before returning to userspace, switch to kernel stack # -io_work: +io_work_user: l %r1,__LC_KERNEL_STACK - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lr %r15,%r1 + # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED and _TIF_MCCK_PENDING # -io_work_loop: - tm __TI_flags+3(%r9),_TIF_MCCK_PENDING - bo BASED(io_mcck_pending) - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED - bo BASED(io_reschedule) - tm __TI_flags+3(%r9),_TIF_SIGPENDING - bo BASED(io_sigpending) - b BASED(io_leave) +io_work_tif: + tm __LC_CPU_FLAGS+3(%r12),_CIF_MCCK_PENDING + jo io_mcck_pending + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED + jo io_reschedule + tm __TI_flags+3(%r12),_TIF_SIGPENDING + jo io_sigpending + tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME + jo io_notify_resume + tm __LC_CPU_FLAGS+3,_CIF_ASCE + jo io_uaccess + j io_return # beware of critical section cleanup # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # io_mcck_pending: - l %r1,BASED(.Ls390_handle_mcck) - la %r14,BASED(io_work_loop) - br %r1 # TIF bit will be cleared by handler + # TRACE_IRQS_ON already done at io_return + l %r1,BASED(.Lhandle_mcck) + basr %r14,%r1 # TIF bit will be cleared by handler + TRACE_IRQS_OFF + j io_return + +# +# _CIF_ASCE is set, load user space asce +# +io_uaccess: + ni __LC_CPU_FLAGS+3,255-_CIF_ASCE + lctl %c1,%c1,__LC_USER_ASCE # load primary asce + j io_return # # _TIF_NEED_RESCHED is set, call schedule -# -io_reschedule: - l %r1,BASED(.Lschedule) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - basr %r14,%r1 # call scheduler - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts - tm __TI_flags+3(%r9),_TIF_WORK_INT - bz BASED(io_leave) # there is no work to do - b BASED(io_work_loop) +# +io_reschedule: + # TRACE_IRQS_ON already done at io_return + l %r1,BASED(.Lschedule) + ssm __LC_SVC_NEW_PSW # reenable interrupts + basr %r14,%r1 # call scheduler + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return + +# +# _TIF_SIGPENDING is set, call do_signal +# +io_sigpending: + # TRACE_IRQS_ON already done at io_return + l %r1,BASED(.Ldo_signal) + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_signal + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return # # _TIF_SIGPENDING is set, call do_signal # -io_sigpending: - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs - sr %r3,%r3 # clear *oldset - l %r1,BASED(.Ldo_signal) - basr %r14,%r1 # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts - b BASED(io_work_loop) +io_notify_resume: + # TRACE_IRQS_ON already done at io_return + l %r1,BASED(.Ldo_notify_resume) + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return /* * External interrupt handler routine */ - .globl ext_int_handler -ext_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER +ENTRY(ext_int_handler) stck __LC_INT_CLOCK - SAVE_ALL_BASE __LC_SAVE_AREA+16 - SAVE_ALL __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16,0 - CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(ext_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -ext_no_vtime: -#endif - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - la %r2,SP_PTREGS(%r15) # address of register-save area - lh %r3,__LC_EXT_INT_CODE # get interruption code - l %r1,BASED(.Ldo_extint) - basr %r14,%r1 - b BASED(io_return) + stpt __LC_ASYNC_ENTER_TIMER + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_EXT_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +ext_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) + TRACE_IRQS_OFF + l %r1,BASED(.Ldo_IRQ) + lr %r2,%r11 # pass pointer to pt_regs + lhi %r3,EXT_INTERRUPT + basr %r14,%r1 # call do_IRQ + j io_return + +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + st %r3,__SF_EMPTY(%r15) + basr %r1,0 + la %r1,psw_idle_lpsw+4-.(%r1) + st %r1,__SF_EMPTY+4(%r15) + oi __SF_EMPTY+4(%r15),0x80 + stck __CLOCK_IDLE_ENTER(%r2) + stpt __TIMER_IDLE_ENTER(%r2) +psw_idle_lpsw: + lpsw __SF_EMPTY(%r15) + br %r14 +psw_idle_end: __critical_end: @@ -679,99 +642,108 @@ __critical_end: * Machine check handler routines */ - .globl mcck_int_handler -mcck_int_handler: +ENTRY(mcck_int_handler) + stck __LC_MCCK_CLOCK spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs - SAVE_ALL_BASE __LC_SAVE_AREA+32 - la %r12,__LC_MCK_OLD_PSW - tm __LC_MCCK_CODE,0x80 # system damage? - bo BASED(mcck_int_main) # yes -> rest of mcck code invalid - tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - bo BASED(0f) - spt __LC_LAST_UPDATE_TIMER # revalidate cpu timer -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_LAST_UPDATE_TIMER - mvc __LC_SYNC_ENTER_TIMER(8),__LC_LAST_UPDATE_TIMER - mvc __LC_EXIT_TIMER(8),__LC_LAST_UPDATE_TIMER -#endif -0: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - bno BASED(mcck_int_main) # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit - bnz BASED(mcck_int_main) # from user -> load async stack - clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_end) - bhe BASED(mcck_int_main) - clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_start) - bl BASED(mcck_int_main) - l %r14,BASED(.Lcleanup_critical) - basr %r14,%r14 -mcck_int_main: - l %r14,__LC_PANIC_STACK # are we already on the panic stack? - slr %r14,%r15 - sra %r14,PAGE_SHIFT - be BASED(0f) - l %r15,__LC_PANIC_STACK # load panic stack -0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+32 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? - bno BASED(mcck_no_vtime) # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # interrupting from user ? - bz BASED(mcck_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -mcck_no_vtime: -#endif - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - la %r2,SP_PTREGS(%r15) # load pt_regs - l %r1,BASED(.Ls390_mcck) - basr %r14,%r1 # call machine check handler - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(mcck_return) - l %r1,__LC_KERNEL_STACK # switch to kernel stack - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_MCK_OLD_PSW + tm __LC_MCCK_CODE,0x80 # system damage? + jo mcck_panic # yes -> rest of mcck code invalid + la %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) + tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? + jo 3f + la %r14,__LC_SYNC_ENTER_TIMER + clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER + jl 0f + la %r14,__LC_ASYNC_ENTER_TIMER +0: clc 0(8,%r14),__LC_EXIT_TIMER + jl 1f + la %r14,__LC_EXIT_TIMER +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f + la %r14,__LC_LAST_UPDATE_TIMER +2: spt 0(%r14) + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER +mcck_skip: + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32 + stm %r8,%r9,__PT_PSW(%r11) + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_machine_check) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call s390_do_machine_check + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno mcck_return + l %r1,__LC_KERNEL_STACK # switch to kernel stack + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r15) lr %r15,%r1 - stosm __SF_EMPTY(%r15),0x04 # turn dat on - tm __TI_flags+3(%r9),_TIF_MCCK_PENDING - bno BASED(mcck_return) - l %r1,BASED(.Ls390_handle_mcck) - basr %r14,%r1 # call machine check handler + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off + tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING + jno mcck_return + TRACE_IRQS_OFF + l %r1,BASED(.Lhandle_mcck) + basr %r14,%r1 # call s390_handle_mcck + TRACE_IRQS_ON mcck_return: - RESTORE_ALL __LC_RETURN_MCCK_PSW,0 - -#ifdef CONFIG_SMP -/* - * Restart interruption handler, kick starter for additional CPUs - */ - .globl restart_int_handler -restart_int_handler: - l %r15,__LC_SAVE_AREA+60 # load ksp - lctl %c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs - lam %a0,%a15,__LC_AREGS_SAVE_AREA - lm %r6,%r15,__SF_GPRS(%r15) # load registers from clone - stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on - basr %r14,0 - l %r14,restart_addr-.(%r14) - br %r14 # branch to start_secondary -restart_addr: - .long start_secondary -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ - .globl restart_int_handler -restart_int_handler: - basr %r1,0 -restart_base: - lpsw restart_crash-restart_base(%r1) - .align 8 -restart_crash: - .long 0x000a0000,0x00000000 -restart_go: -#endif + mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW + tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? + jno 0f + lm %r0,%r15,__PT_R0(%r11) + stpt __LC_EXIT_TIMER + lpsw __LC_RETURN_MCCK_PSW +0: lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_MCCK_PSW + +mcck_panic: + l %r14,__LC_PANIC_STACK + slr %r14,%r15 + sra %r14,PAGE_SHIFT + jz 0f + l %r15,__LC_PANIC_STACK + j mcck_skip +0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip + +# +# PSW restart interrupt handler +# +ENTRY(restart_int_handler) + st %r15,__LC_SAVE_AREA_RESTART + l %r15,__LC_RESTART_STACK + ahi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) + stm %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw + ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + l %r1,__LC_RESTART_FN # load fn, parm & source cpu + l %r2,__LC_RESTART_DATA + l %r3,__LC_RESTART_SOURCE + ltr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + lh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu + brc 2,2b +3: j 3b + + .section .kprobes.text, "ax" #ifdef CONFIG_CHECK_STACK /* @@ -781,232 +753,214 @@ restart_go: */ stack_overflow: l %r15,__LC_PANIC_STACK # change to panic stack - sl %r15,BASED(.Lc_spsize) - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - la %r1,__LC_SAVE_AREA - ch %r12,BASED(.L0x020) # old psw addr == __LC_SVC_OLD_PSW ? - be BASED(0f) - ch %r12,BASED(.L0x028) # old psw addr == __LC_PGM_OLD_PSW ? - be BASED(0f) - la %r1,__LC_SAVE_AREA+16 -0: mvc SP_R12(16,%r15),0(%r1) # move %r12-%r15 to stack - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear back chain - l %r1,BASED(1f) # branch to kernel_stack_overflow - la %r2,SP_PTREGS(%r15) # load pt_regs - br %r1 -1: .long kernel_stack_overflow + la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_R8(32,%r11),0(%r14) + l %r1,BASED(1f) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + lr %r2,%r11 # pass pointer to pt_regs + br %r1 # branch to kernel_stack_overflow +1: .long kernel_stack_overflow #endif -cleanup_table_system_call: - .long system_call + 0x80000000, sysc_do_svc + 0x80000000 -cleanup_table_sysc_return: - .long sysc_return + 0x80000000, sysc_leave + 0x80000000 -cleanup_table_sysc_leave: - .long sysc_leave + 0x80000000, sysc_work_loop + 0x80000000 -cleanup_table_sysc_work_loop: - .long sysc_work_loop + 0x80000000, sysc_reschedule + 0x80000000 -cleanup_table_io_leave: - .long io_leave + 0x80000000, io_done + 0x80000000 -cleanup_table_io_work_loop: - .long io_work_loop + 0x80000000, io_mcck_pending + 0x80000000 +cleanup_table: + .long system_call + 0x80000000 + .long sysc_do_svc + 0x80000000 + .long sysc_tif + 0x80000000 + .long sysc_restore + 0x80000000 + .long sysc_done + 0x80000000 + .long io_tif + 0x80000000 + .long io_restore + 0x80000000 + .long io_done + 0x80000000 + .long psw_idle + 0x80000000 + .long psw_idle_end + 0x80000000 cleanup_critical: - clc 4(4,%r12),BASED(cleanup_table_system_call) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_system_call+4) - bl BASED(cleanup_system_call) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_return) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_return+4) - bl BASED(cleanup_sysc_return) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_leave) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_leave+4) - bl BASED(cleanup_sysc_leave) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop+4) - bl BASED(cleanup_sysc_return) -0: - clc 4(4,%r12),BASED(cleanup_table_io_leave) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_leave+4) - bl BASED(cleanup_io_leave) -0: - clc 4(4,%r12),BASED(cleanup_table_io_work_loop) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_work_loop+4) - bl BASED(cleanup_io_return) -0: - br %r14 + cl %r9,BASED(cleanup_table) # system_call + jl 0f + cl %r9,BASED(cleanup_table+4) # sysc_do_svc + jl cleanup_system_call + cl %r9,BASED(cleanup_table+8) # sysc_tif + jl 0f + cl %r9,BASED(cleanup_table+12) # sysc_restore + jl cleanup_sysc_tif + cl %r9,BASED(cleanup_table+16) # sysc_done + jl cleanup_sysc_restore + cl %r9,BASED(cleanup_table+20) # io_tif + jl 0f + cl %r9,BASED(cleanup_table+24) # io_restore + jl cleanup_io_tif + cl %r9,BASED(cleanup_table+28) # io_done + jl cleanup_io_restore + cl %r9,BASED(cleanup_table+32) # psw_idle + jl 0f + cl %r9,BASED(cleanup_table+36) # psw_idle_end + jl cleanup_idle +0: br %r14 cleanup_system_call: - mvc __LC_RETURN_PSW(8),0(%r12) - c %r12,BASED(.Lmck_old_psw) - be BASED(0f) - la %r12,__LC_SAVE_AREA+16 - b BASED(1f) -0: la %r12,__LC_SAVE_AREA+32 -1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+4) - bh BASED(0f) + # check if stpt has been executed + cl %r9,BASED(cleanup_system_call_insn) + jh 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+8) - bhe BASED(cleanup_vtime) -#endif - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn) - bh BASED(0f) - mvc __LC_SAVE_AREA(16),0(%r12) -0: st %r13,4(%r12) - st %r12,__LC_SAVE_AREA+48 # argh - SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - l %r12,__LC_SAVE_AREA+48 # argh - st %r15,12(%r12) - lh %r7,0x8a -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -cleanup_vtime: - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) - bhe BASED(cleanup_stime) - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(cleanup_novtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -cleanup_stime: - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+16) - bh BASED(cleanup_update) - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -cleanup_update: + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stm has been executed + cl %r9,BASED(cleanup_system_call_insn+4) + jh 0f + mvc __LC_SAVE_AREA_SYNC(32),0(%r11) +0: # set up saved registers r12, and r13 + st %r12,16(%r11) # r12 thread-info pointer + st %r13,20(%r11) # r13 literal-pool pointer + # check if the user time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+8) + jh 0f + l %r10,__LC_EXIT_TIMER + l %r15,__LC_EXIT_TIMER+4 + SUB64 %r10,%r15,__LC_SYNC_ENTER_TIMER + ADD64 %r10,%r15,__LC_USER_TIMER + st %r10,__LC_USER_TIMER + st %r15,__LC_USER_TIMER+4 +0: # check if the system time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+12) + jh 0f + l %r10,__LC_LAST_UPDATE_TIMER + l %r15,__LC_LAST_UPDATE_TIMER+4 + SUB64 %r10,%r15,__LC_EXIT_TIMER + ADD64 %r10,%r15,__LC_SYSTEM_TIMER + st %r10,__LC_SYSTEM_TIMER + st %r15,__LC_SYSTEM_TIMER+4 +0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -cleanup_novtime: -#endif - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_system_call+4) - la %r12,__LC_RETURN_PSW + # set up saved register 11 + l %r15,__LC_KERNEL_STACK + la %r9,STACK_FRAME_OVERHEAD(%r15) + st %r9,12(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(32,%r9),__LC_SAVE_AREA_SYNC + stm %r0,%r7,__PT_R0(%r9) + mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC + xc __PT_FLAGS(4,%r9),__PT_FLAGS(%r9) + mvi __PT_FLAGS+3(%r9),_PIF_SYSCALL + # setup saved register 15 + st %r15,28(%r11) # r15 stack pointer + # set new psw address and exit + l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000 br %r14 cleanup_system_call_insn: - .long sysc_saveall + 0x80000000 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .long system_call + 0x80000000 - .long sysc_vtime + 0x80000000 - .long sysc_stime + 0x80000000 - .long sysc_update + 0x80000000 -#endif + .long system_call + 0x80000000 + .long sysc_stm + 0x80000000 + .long sysc_vtime + 0x80000000 + 36 + .long sysc_vtime + 0x80000000 + 76 -cleanup_sysc_return: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_sysc_return) - la %r12,__LC_RETURN_PSW +cleanup_sysc_tif: + l %r9,BASED(cleanup_table+8) # sysc_tif + 0x80000000 br %r14 -cleanup_sysc_leave: - clc 4(4,%r12),BASED(cleanup_sysc_leave_insn) - be BASED(2f) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 4(4,%r12),BASED(cleanup_sysc_leave_insn+4) - be BASED(2f) -#endif - mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - c %r12,BASED(.Lmck_old_psw) - bne BASED(0f) - mvc __LC_SAVE_AREA+32(16),SP_R12(%r15) - b BASED(1f) -0: mvc __LC_SAVE_AREA+16(16),SP_R12(%r15) -1: lm %r0,%r11,SP_R0(%r15) - l %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +cleanup_sysc_restore: + cl %r9,BASED(cleanup_sysc_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW br %r14 -cleanup_sysc_leave_insn: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .long sysc_leave + 14 + 0x80000000 -#endif - .long sysc_leave + 10 + 0x80000000 +cleanup_sysc_restore_insn: + .long sysc_done - 4 + 0x80000000 -cleanup_io_return: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_work_loop) - la %r12,__LC_RETURN_PSW +cleanup_io_tif: + l %r9,BASED(cleanup_table+20) # io_tif + 0x80000000 br %r14 -cleanup_io_leave: - clc 4(4,%r12),BASED(cleanup_io_leave_insn) - be BASED(2f) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 4(4,%r12),BASED(cleanup_io_leave_insn+4) - be BASED(2f) -#endif - mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - c %r12,BASED(.Lmck_old_psw) - bne BASED(0f) - mvc __LC_SAVE_AREA+32(16),SP_R12(%r15) - b BASED(1f) -0: mvc __LC_SAVE_AREA+16(16),SP_R12(%r15) -1: lm %r0,%r11,SP_R0(%r15) - l %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +cleanup_io_restore: + cl %r9,BASED(cleanup_io_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW br %r14 -cleanup_io_leave_insn: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .long io_leave + 18 + 0x80000000 -#endif - .long io_leave + 14 + 0x80000000 +cleanup_io_restore_insn: + .long io_done - 4 + 0x80000000 + +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER +0: # check if stck has been executed + cl %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) + mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r3) +1: # account system time going idle + lm %r9,%r10,__LC_STEAL_TIMER + ADD64 %r9,%r10,__CLOCK_IDLE_ENTER(%r2) + SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK + stm %r9,%r10,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) + lm %r9,%r10,__LC_SYSTEM_TIMER + ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER + SUB64 %r9,%r10,__TIMER_IDLE_ENTER(%r2) + stm %r9,%r10,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) + # prepare return psw + n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits + l %r9,24(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .long psw_idle_lpsw + 0x80000000 +cleanup_idle_wait: + .long 0xfcfdffff /* * Integer constants */ - .align 4 -.Lc_spsize: .long SP_SIZE -.Lc_overhead: .long STACK_FRAME_OVERHEAD -.Lc_pactive: .long PREEMPT_ACTIVE -.Lnr_syscalls: .long NR_syscalls -.L0x018: .short 0x018 -.L0x020: .short 0x020 -.L0x028: .short 0x028 -.L0x030: .short 0x030 -.L0x038: .short 0x038 -.Lc_1: .long 1 + .align 4 +.Lnr_syscalls: + .long NR_syscalls +.Lvtimer_max: + .quad 0x7fffffffffffffff /* * Symbol constants */ -.Ls390_mcck: .long s390_do_machine_check -.Ls390_handle_mcck: - .long s390_handle_mcck -.Lmck_old_psw: .long __LC_MCK_OLD_PSW -.Ldo_IRQ: .long do_IRQ -.Ldo_extint: .long do_extint -.Ldo_signal: .long do_signal -.Lhandle_per: .long do_single_step -.Ljump_table: .long pgm_check_table -.Lschedule: .long schedule -.Lclone: .long sys_clone -.Lexecve: .long sys_execve -.Lfork: .long sys_fork -.Lrt_sigreturn:.long sys_rt_sigreturn -.Lrt_sigsuspend: - .long sys_rt_sigsuspend -.Lsigreturn: .long sys_sigreturn -.Lsigsuspend: .long sys_sigsuspend -.Lsigaltstack: .long sys_sigaltstack -.Ltrace: .long syscall_trace -.Lvfork: .long sys_vfork -.Lschedtail: .long schedule_tail - -.Lcritical_start: - .long __critical_start + 0x80000000 -.Lcritical_end: - .long __critical_end + 0x80000000 -.Lcleanup_critical: - .long cleanup_critical +.Ldo_machine_check: .long s390_do_machine_check +.Lhandle_mcck: .long s390_handle_mcck +.Ldo_IRQ: .long do_IRQ +.Ldo_signal: .long do_signal +.Ldo_notify_resume: .long do_notify_resume +.Ldo_per_trap: .long do_per_trap +.Ljump_table: .long pgm_check_table +.Lschedule: .long schedule +#ifdef CONFIG_PREEMPT +.Lpreempt_irq: .long preempt_schedule_irq +#endif +.Ltrace_enter: .long do_syscall_trace_enter +.Ltrace_exit: .long do_syscall_trace_exit +.Lschedule_tail: .long schedule_tail +.Lsysc_per: .long sysc_per + 0x80000000 +#ifdef CONFIG_TRACE_IRQFLAGS +.Lhardirqs_on: .long trace_hardirqs_on_caller +.Lhardirqs_off: .long trace_hardirqs_off_caller +#endif +#ifdef CONFIG_LOCKDEP +.Llockdep_sys_exit: .long lockdep_sys_exit +#endif +.Lcritical_start: .long __critical_start + 0x80000000 +.Lcritical_length: .long __critical_end - __critical_start + .section .rodata, "a" #define SYSCALL(esa,esame,emu) .long esa - .globl sys_call_table + .globl sys_call_table sys_call_table: #include "syscalls.S" #undef SYSCALL - diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h new file mode 100644 index 00000000000..6ac78192455 --- /dev/null +++ b/arch/s390/kernel/entry.h @@ -0,0 +1,73 @@ +#ifndef _ENTRY_H +#define _ENTRY_H + +#include <linux/types.h> +#include <linux/signal.h> +#include <asm/ptrace.h> +#include <asm/cputime.h> + +extern void *restart_stack; +extern unsigned long suspend_zero_pages; + +void system_call(void); +void pgm_check_handler(void); +void ext_int_handler(void); +void io_int_handler(void); +void mcck_int_handler(void); +void restart_int_handler(void); +void restart_call_handler(void); +void psw_idle(struct s390_idle_data *, unsigned long); + +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); + +void do_protection_exception(struct pt_regs *regs); +void do_dat_exception(struct pt_regs *regs); + +void addressing_exception(struct pt_regs *regs); +void data_exception(struct pt_regs *regs); +void default_trap_handler(struct pt_regs *regs); +void divide_exception(struct pt_regs *regs); +void execute_exception(struct pt_regs *regs); +void hfp_divide_exception(struct pt_regs *regs); +void hfp_overflow_exception(struct pt_regs *regs); +void hfp_significance_exception(struct pt_regs *regs); +void hfp_sqrt_exception(struct pt_regs *regs); +void hfp_underflow_exception(struct pt_regs *regs); +void illegal_op(struct pt_regs *regs); +void operand_exception(struct pt_regs *regs); +void overflow_exception(struct pt_regs *regs); +void privileged_op(struct pt_regs *regs); +void space_switch_exception(struct pt_regs *regs); +void special_op_exception(struct pt_regs *regs); +void specification_exception(struct pt_regs *regs); +void transaction_exception(struct pt_regs *regs); +void translation_exception(struct pt_regs *regs); + +void do_per_trap(struct pt_regs *regs); +void syscall_trace(struct pt_regs *regs, int entryexit); +void kernel_stack_overflow(struct pt_regs * regs); +void do_signal(struct pt_regs *regs); +void handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); +void do_notify_resume(struct pt_regs *regs); + +void __init init_IRQ(void); +void do_IRQ(struct pt_regs *regs, int irq); +void do_restart(void); +void __init startup_init(void); +void die(struct pt_regs *regs, const char *str); +int setup_profiling_timer(unsigned int multiplier); +void __init time_init(void); +int pfn_is_nosave(unsigned long); +void s390_early_resume(void); +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); + +struct s390_mmap_arg_struct; +struct fadvise64_64_args; +struct old_sigaction; + +long sys_s390_personality(unsigned int personality); +long sys_s390_runtime_instr(int command, int signum); + +#endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 369ab4413ec..f2e674c702e 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -1,146 +1,173 @@ /* - * arch/s390/kernel/entry.S * S390 low-level entry points. * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2012 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Hartmut Penner (hp@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Hartmut Penner (hp@de.ibm.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * Heiko Carstens <heiko.carstens@de.ibm.com> */ -#include <linux/sys.h> +#include <linux/init.h> #include <linux/linkage.h> -#include <linux/config.h> +#include <asm/processor.h> #include <asm/cache.h> -#include <asm/lowcore.h> #include <asm/errno.h> #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/page.h> - -/* - * Stack layout for the system_call stack entry. - * The first few entries are identical to the user_regs_struct. - */ -SP_PTREGS = STACK_FRAME_OVERHEAD -SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS -SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW -SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS -SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8 -SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16 -SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24 -SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32 -SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40 -SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48 -SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 -SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 64 -SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 72 -SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 80 -SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 88 -SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 96 -SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104 -SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112 -SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 -SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP -SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE +#include <asm/sigp.h> +#include <asm/irq.h> + +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 8 +__PT_R2 = __PT_GPRS + 16 +__PT_R3 = __PT_GPRS + 24 +__PT_R4 = __PT_GPRS + 32 +__PT_R5 = __PT_GPRS + 40 +__PT_R6 = __PT_GPRS + 48 +__PT_R7 = __PT_GPRS + 56 +__PT_R8 = __PT_GPRS + 64 +__PT_R9 = __PT_GPRS + 72 +__PT_R10 = __PT_GPRS + 80 +__PT_R11 = __PT_GPRS + 88 +__PT_R12 = __PT_GPRS + 96 +__PT_R13 = __PT_GPRS + 104 +__PT_R14 = __PT_GPRS + 112 +__PT_R15 = __PT_GPRS + 120 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT +STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING | \ - _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) +_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) +_PIF_WORK = (_PIF_PER_TRAP) #define BASED(name) name-system_call(%r13) - .macro STORE_TIMER lc_offset -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - stpt \lc_offset + .macro TRACE_IRQS_ON +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + brasl %r14,trace_hardirqs_on_caller #endif .endm -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .macro UPDATE_VTIME lc_from,lc_to,lc_sum - lg %r10,\lc_from - slg %r10,\lc_to - alg %r10,\lc_sum - stg %r10,\lc_sum - .endm + .macro TRACE_IRQS_OFF +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + brasl %r14,trace_hardirqs_off_caller #endif + .endm -/* - * Register usage in interrupt handlers: - * R9 - pointer to current task structure - * R13 - pointer to literal pool - * R14 - return register for function calls - * R15 - kernel stack pointer - */ + .macro LOCKDEP_SYS_EXIT +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 + brasl %r14,lockdep_sys_exit +#endif + .endm - .macro SAVE_ALL_BASE savearea - stmg %r12,%r15,\savearea - larl %r13,system_call + .macro LPP newpp +#if IS_ENABLED(CONFIG_KVM) + tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP + jz .+8 + .insn s,0xb2800000,\newpp +#endif .endm - .macro SAVE_ALL psworg,savearea,sync - la %r12,\psworg - .if \sync - tm \psworg+1,0x01 # test problem state bit - jz 2f # skip stack setup save - lg %r15,__LC_KERNEL_STACK # problem state -> load ksp + .macro HANDLE_SIE_INTERCEPT scratch,reason +#if IS_ENABLED(CONFIG_KVM) + tmhh %r8,0x0001 # interrupting from user ? + jnz .+62 + lgr \scratch,%r9 + slg \scratch,BASED(.Lsie_critical) + clg \scratch,BASED(.Lsie_critical_length) + .if \reason==1 + # Some program interrupts are suppressing (e.g. protection). + # We must also check the instruction after SIE in that case. + # do_protection_exception will rewind to rewind_pad + jh .+42 .else - tm \psworg+1,0x01 # test problem state bit - jnz 1f # from user -> load kernel stack - clc \psworg+8(8),BASED(.Lcritical_end) - jhe 0f - clc \psworg+8(8),BASED(.Lcritical_start) - jl 0f - brasl %r14,cleanup_critical - tm 1(%r12),0x01 # retest problem state after cleanup - jnz 1f -0: lg %r14,__LC_ASYNC_STACK # are we already on the async. stack ? - slgr %r14,%r15 - srag %r14,%r14,STACK_SHIFT - jz 2f -1: lg %r15,__LC_ASYNC_STACK # load async stack + jhe .+42 .endif + lg %r14,__SF_EMPTY(%r15) # get control block pointer + LPP __SF_EMPTY+16(%r15) # set host id + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + larl %r9,sie_exit # skip forward to sie_exit + mvi __SF_EMPTY+31(%r15),\reason # set exit reason +#endif + .endm + + .macro CHECK_STACK stacksize,savearea #ifdef CONFIG_CHECK_STACK - j 3f -2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD + tml %r15,\stacksize - CONFIG_STACK_GUARD + lghi %r14,\savearea jz stack_overflow -3: #endif -2: .endm - .macro CREATE_STACK_FRAME psworg,savearea - aghi %r15,-SP_SIZE # make room for registers & psw - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - la %r12,\psworg - stg %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - icm %r12,12,__LC_SVC_ILC - stmg %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - st %r12,SP_ILC(%r15) - mvc SP_R12(32,%r15),\savearea # move %r12-%r15 to stack - la %r12,0 - stg %r12,__SF_BACKCHAIN(%r15) - .endm - - .macro RESTORE_ALL psworg,sync - mvc \psworg(16),SP_PSW(%r15) # move user PSW to lowcore - .if !\sync - ni \psworg+1,0xfd # clear wait state bit - .endif - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user - STORE_TIMER __LC_EXIT_TIMER - lpswe \psworg # back to caller + .macro SWITCH_ASYNC savearea,stack,shift + tmhh %r8,0x0001 # interrupting from user ? + jnz 1f + lgr %r14,%r9 + slg %r14,BASED(.Lcritical_start) + clg %r14,BASED(.Lcritical_length) + jhe 0f + lghi %r11,\savearea # inside critical section, do cleanup + brasl %r14,cleanup_critical + tmhh %r8,0x0001 # retest problem state after cleanup + jnz 1f +0: lg %r14,\stack # are we already on the target stack? + slgr %r14,%r15 + srag %r14,%r14,\shift + jnz 1f + CHECK_STACK 1<<\shift,\savearea + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: lg %r15,\stack # load target stack +2: la %r11,STACK_FRAME_OVERHEAD(%r15) + .endm + + .macro UPDATE_VTIME scratch,enter_timer + lg \scratch,__LC_EXIT_TIMER + slg \scratch,\enter_timer + alg \scratch,__LC_USER_TIMER + stg \scratch,__LC_USER_TIMER + lg \scratch,__LC_LAST_UPDATE_TIMER + slg \scratch,__LC_EXIT_TIMER + alg \scratch,__LC_SYSTEM_TIMER + stg \scratch,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer + .endm + + .macro LAST_BREAK scratch + srag \scratch,%r10,23 + jz .+10 + stg %r10,__TI_last_break(%r12) + .endm + + .macro REENABLE_IRQS + stg %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW + .endm + + .macro STCK savearea +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + .insn s,0xb27c0000,\savearea # store clock fast +#else + .insn s,0xb2050000,\savearea # store clock +#endif .endm + .section .kprobes.text, "ax" + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -148,32 +175,20 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) * Returns: * gpr2 = prev */ - .globl __switch_to -__switch_to: - tm __THREAD_per+4(%r3),0xe8 # is the new process using per ? - jz __switch_to_noper # if not we're fine - stctg %c9,%c11,__SF_EMPTY(%r15)# We are using per stuff - clc __THREAD_per(24,%r3),__SF_EMPTY(%r15) - je __switch_to_noper # we got away without bashing TLB's - lctlg %c9,%c11,__THREAD_per(%r3) # Nope we didn't -__switch_to_noper: - lg %r4,__THREAD_info(%r2) # get thread_info of prev - tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending? - jz __switch_to_no_mcck - ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev - lg %r4,__THREAD_info(%r3) # get thread_info of next - oi __TI_flags+7(%r4),_TIF_MCCK_PENDING # set it in next -__switch_to_no_mcck: - stmg %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task - stg %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp - lg %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp - lmg %r6,%r15,__SF_GPRS(%r15)# load __switch_to registers of next task - stg %r3,__LC_CURRENT # __LC_CURRENT = current task struct - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - lg %r3,__THREAD_info(%r3) # load thread_info from task struct - stg %r3,__LC_THREAD_INFO - aghi %r3,STACK_SIZE - stg %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack +ENTRY(__switch_to) + stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev + lg %r4,__THREAD_info(%r2) # get thread_info of prev + lg %r5,__THREAD_info(%r3) # get thread_info of next + lgr %r15,%r5 + aghi %r15,STACK_INIT # end of kernel stack of next + stg %r3,__LC_CURRENT # store task struct of next + stg %r5,__LC_THREAD_INFO # store thread info of next + stg %r15,__LC_KERNEL_STACK # store end of kernel stack + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next + lg %r15,__THREAD_ksp(%r3) # load kernel stack of next + lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 __critical_start: @@ -182,628 +197,589 @@ __critical_start: * are executed with interrupts enabled. */ - .globl system_call -system_call: - STORE_TIMER __LC_SYNC_ENTER_TIMER -sysc_saveall: - SAVE_ALL_BASE __LC_SAVE_AREA - SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +ENTRY(system_call) + stpt __LC_SYNC_ENTER_TIMER +sysc_stmg: + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + lghi %r14,_PIF_SYSCALL +sysc_per: + lg %r15,__LC_KERNEL_STACK + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz sysc_do_svc - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -sysc_stime: - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -sysc_update: - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif + UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r13 + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + stg %r14,__PT_FLAGS(%r11) sysc_do_svc: - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - slag %r7,%r7,2 # *4 and test for svc 0 + lg %r10,__TI_sysc_table(%r12) # address of system call table + llgh %r8,__PT_INT_CODE+2(%r11) + slag %r8,%r8,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 - cl %r1,BASED(.Lnr_syscalls) + llgfr %r1,%r1 # clear high word in r1 + cghi %r1,NR_syscalls jnl sysc_nr_ok - lgfr %r7,%r1 # clear high word in r1 - slag %r7,%r7,2 # svc 0: system call number in %r1 + sth %r1,__PT_INT_CODE+2(%r11) + slag %r8,%r1,2 sysc_nr_ok: - mvc SP_ARGS(8,%r15),SP_R7(%r15) -sysc_do_restart: - larl %r10,sys_call_table -#ifdef CONFIG_COMPAT - tm __TI_flags+5(%r9),(_TIF_31BIT>>16) # running in 31 bit mode ? - jno sysc_noemu - larl %r10,sys_call_table_emu # use 31 bit emulation system calls -sysc_noemu: -#endif - tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) - lgf %r8,0(%r7,%r10) # load address of system call routine - jnz sysc_tracesys - basr %r14,%r8 # call sys_xxxx - stg %r2,SP_R2(%r15) # store return value (change R2 on stack) - # ATTENTION: check sys_execve_glue before - # changing anything here !! + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stg %r2,__PT_ORIG_GPR2(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lgf %r9,0(%r8,%r10) # get system call add. + tm __TI_flags+7(%r12),_TIF_TRACE + jnz sysc_tracesys + basr %r14,%r9 # call sys_xxxx + stg %r2,__PT_R2(%r11) # store return value sysc_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? - jno sysc_leave - tm __TI_flags+7(%r9),_TIF_WORK_SVC - jnz sysc_work # there is work to do (signals etc.) -sysc_leave: - RESTORE_ALL __LC_RETURN_PSW,1 + LOCKDEP_SYS_EXIT +sysc_tif: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno sysc_restore + tm __PT_FLAGS+7(%r11),_PIF_WORK + jnz sysc_work + tm __TI_flags+7(%r12),_TIF_WORK + jnz sysc_work # check for work + tm __LC_CPU_FLAGS+7,_CIF_WORK + jnz sysc_work +sysc_restore: + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW +sysc_done: # -# recheck if there is more work to do -# -sysc_work_loop: - tm __TI_flags+7(%r9),_TIF_WORK_SVC - jz sysc_leave # there is no work to do -# # One of the work bits is on. Find out which one. # sysc_work: - tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jo sysc_mcck_pending - tm __TI_flags+7(%r9),_TIF_NEED_RESCHED + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo sysc_reschedule - tm __TI_flags+7(%r9),_TIF_SIGPENDING - jo sysc_sigpending - tm __TI_flags+7(%r9),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r9),_TIF_SINGLE_STEP + tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP jo sysc_singlestep - j sysc_leave + tm __TI_flags+7(%r12),_TIF_SIGPENDING + jo sysc_sigpending + tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME + jo sysc_notify_resume + tm __LC_CPU_FLAGS+7,_CIF_ASCE + jo sysc_uaccess + j sysc_return # beware of critical section cleanup # # _TIF_NEED_RESCHED is set, call schedule -# -sysc_reschedule: - larl %r14,sysc_work_loop - jg schedule # return point is sysc_return +# +sysc_reschedule: + larl %r14,sysc_return + jg schedule # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: - larl %r14,sysc_work_loop - jg s390_handle_mcck # TIF bit will be cleared by handler + larl %r14,sysc_return + jg s390_handle_mcck # TIF bit will be cleared by handler + +# +# _CIF_ASCE is set, load user space asce +# +sysc_uaccess: + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + j sysc_return # # _TIF_SIGPENDING is set, call do_signal # -sysc_sigpending: - ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP - la %r2,SP_PTREGS(%r15) # load pt_regs - sgr %r3,%r3 # clear *oldset - brasl %r14,do_signal # call do_signal - tm __TI_flags+7(%r9),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r9),_TIF_SINGLE_STEP - jo sysc_singlestep - j sysc_work_loop +sysc_sigpending: + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + tm __PT_FLAGS+7(%r11),_PIF_SYSCALL + jno sysc_return + lmg %r2,%r7,__PT_R2(%r11) # load svc arguments + lg %r10,__TI_sysc_table(%r12) # address of system call table + lghi %r8,0 # svc 0 returns -ENOSYS + llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r8,%r1,2 + j sysc_nr_ok # restart svc # -# _TIF_RESTART_SVC is set, set up registers and restart svc +# _TIF_NOTIFY_RESUME is set, call do_notify_resume # -sysc_restart: - ni __TI_flags+7(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lg %r7,SP_R2(%r15) # load new svc number - slag %r7,%r7,2 # *4 - mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - j sysc_do_restart # restart svc +sysc_notify_resume: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg do_notify_resume # -# _TIF_SINGLE_STEP is set, call do_single_step +# _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP - lhi %r0,__LC_PGM_OLD_PSW - sth %r0,SP_TRAP(%r15) # set trap indication to pgm check - la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_return # load adr. of system return - jg do_single_step # branch to do_sigtrap - + ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg do_per_trap # -# call syscall_trace before and after system call -# special linkage: %r12 contains the return address for trace_svc +# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before +# and after the system call # sysc_tracesys: - la %r2,SP_PTREGS(%r15) # load pt_regs + lgr %r2,%r11 # pass pointer to pt_regs la %r3,0 - srl %r7,2 - stg %r7,SP_R2(%r15) - brasl %r14,syscall_trace + llgh %r0,__PT_INT_CODE+2(%r11) + stg %r0,__PT_R2(%r11) + brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls - clg %r0,SP_R2(%r15) + clgr %r0,%r2 jnh sysc_tracenogo - lg %r7,SP_R2(%r15) # strace might have changed the - sll %r7,2 # system call - lgf %r8,0(%r7,%r10) + sllg %r8,%r2,2 + lgf %r9,0(%r8,%r10) sysc_tracego: - lmg %r3,%r6,SP_R3(%r15) - lg %r2,SP_ORIG_R2(%r15) - basr %r14,%r8 # call sys_xxx - stg %r2,SP_R2(%r15) # store return value + lmg %r3,%r7,__PT_R3(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lg %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + stg %r2,__PT_R2(%r11) # store return value sysc_tracenogo: - tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) - jz sysc_return - la %r2,SP_PTREGS(%r15) # load pt_regs - la %r3,1 - larl %r14,sysc_return # return point is sysc_return - jg syscall_trace + tm __TI_flags+7(%r12),_TIF_TRACE + jz sysc_return + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg do_syscall_trace_exit # # a new process exits the kernel with ret_from_fork # - .globl ret_from_fork -ret_from_fork: - lg %r13,__LC_SVC_NEW_PSW+8 - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? - jo 0f - stg %r15,SP_R15(%r15) # store stack pointer for new kthread -0: brasl %r14,schedule_tail - stosm 24(%r15),0x03 # reenable interrupts - j sysc_return - -# -# clone, fork, vfork, exec and sigreturn need glue, -# because they all expect pt_regs as parameter, -# but are called with different parameter. -# return-address is set up above -# -sys_clone_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs - jg sys_clone # branch to sys_clone - -#ifdef CONFIG_COMPAT -sys32_clone_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs - jg sys32_clone # branch to sys32_clone -#endif - -sys_fork_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs - jg sys_fork # branch to sys_fork - -sys_vfork_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs - jg sys_vfork # branch to sys_vfork - -sys_execve_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs - lgr %r12,%r14 # save return address - brasl %r14,sys_execve # call sys_execve - ltgr %r2,%r2 # check if execve failed - bnz 0(%r12) # it did fail -> store result in gpr2 - b 6(%r12) # SKIP STG 2,SP_R2(15) in - # system_call/sysc_tracesys -#ifdef CONFIG_COMPAT -sys32_execve_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs - lgr %r12,%r14 # save return address - brasl %r14,sys32_execve # call sys32_execve - ltgr %r2,%r2 # check if execve failed - bnz 0(%r12) # it did fail -> store result in gpr2 - b 6(%r12) # SKIP STG 2,SP_R2(15) in - # system_call/sysc_tracesys -#endif - -sys_sigreturn_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs as parameter - jg sys_sigreturn # branch to sys_sigreturn - -#ifdef CONFIG_COMPAT -sys32_sigreturn_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs as parameter - jg sys32_sigreturn # branch to sys32_sigreturn -#endif - -sys_rt_sigreturn_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs as parameter - jg sys_rt_sigreturn # branch to sys_sigreturn - -#ifdef CONFIG_COMPAT -sys32_rt_sigreturn_glue: - la %r2,SP_PTREGS(%r15) # load pt_regs as parameter - jg sys32_rt_sigreturn # branch to sys32_sigreturn -#endif - -# -# sigsuspend and rt_sigsuspend need pt_regs as an additional -# parameter and they have to skip the store of %r2 into the -# user register %r2 because the return value was set in -# sigsuspend and rt_sigsuspend already and must not be overwritten! -# - -sys_sigsuspend_glue: - lgr %r5,%r4 # move mask back - lgr %r4,%r3 # move history1 parameter - lgr %r3,%r2 # move history0 parameter - la %r2,SP_PTREGS(%r15) # load pt_regs as first parameter - la %r14,6(%r14) # skip store of return value - jg sys_sigsuspend # branch to sys_sigsuspend - -#ifdef CONFIG_COMPAT -sys32_sigsuspend_glue: - llgfr %r4,%r4 # unsigned long - lgr %r5,%r4 # move mask back - lgfr %r3,%r3 # int - lgr %r4,%r3 # move history1 parameter - lgfr %r2,%r2 # int - lgr %r3,%r2 # move history0 parameter - la %r2,SP_PTREGS(%r15) # load pt_regs as first parameter - la %r14,6(%r14) # skip store of return value - jg sys32_sigsuspend # branch to sys32_sigsuspend -#endif - -sys_rt_sigsuspend_glue: - lgr %r4,%r3 # move sigsetsize parameter - lgr %r3,%r2 # move unewset parameter - la %r2,SP_PTREGS(%r15) # load pt_regs as first parameter - la %r14,6(%r14) # skip store of return value - jg sys_rt_sigsuspend # branch to sys_rt_sigsuspend - -#ifdef CONFIG_COMPAT -sys32_rt_sigsuspend_glue: - llgfr %r3,%r3 # size_t - lgr %r4,%r3 # move sigsetsize parameter - llgtr %r2,%r2 # sigset_emu31_t * - lgr %r3,%r2 # move unewset parameter - la %r2,SP_PTREGS(%r15) # load pt_regs as first parameter - la %r14,6(%r14) # skip store of return value - jg sys32_rt_sigsuspend # branch to sys32_rt_sigsuspend -#endif - -sys_sigaltstack_glue: - la %r4,SP_PTREGS(%r15) # load pt_regs as parameter - jg sys_sigaltstack # branch to sys_sigreturn - -#ifdef CONFIG_COMPAT -sys32_sigaltstack_glue: - la %r4,SP_PTREGS(%r15) # load pt_regs as parameter - jg sys32_sigaltstack_wrapper # branch to sys_sigreturn -#endif +ENTRY(ret_from_fork) + la %r11,STACK_FRAME_OVERHEAD(%r15) + lg %r12,__LC_THREAD_INFO + brasl %r14,schedule_tail + TRACE_IRQS_ON + ssm __LC_SVC_NEW_PSW # reenable interrupts + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? + jne sysc_tracenogo + # it's a kernel thread + lmg %r9,%r10,__PT_R9(%r11) # load gprs +ENTRY(kernel_thread_starter) + la %r2,0(%r10) + basr %r14,%r9 + j sysc_tracenogo /* * Program check handler routine */ - .globl pgm_check_handler -pgm_check_handler: -/* - * First we need to check for a special case: - * Single stepping an instruction that disables the PER event mask will - * cause a PER event AFTER the mask has been set. Example: SVC or LPSW. - * For a single stepped SVC the program check handler gets control after - * the SVC new PSW has been loaded. But we want to execute the SVC first and - * then handle the PER event. Therefore we update the SVC old PSW to point - * to the pgm_check_handler and branch to the SVC handler after we checked - * if we have to load the kernel stack register. - * For every other possible cause for PER event without the PER mask set - * we just ignore the PER event (FIXME: is there anything we have to do - * for LPSW?). - */ - STORE_TIMER __LC_SYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA - tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception - jnz pgm_per # got per exception -> special case - SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime: -#endif - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - lgf %r3,__LC_PGM_ILC # load program interruption code - lghi %r8,0x7f - ngr %r8,%r3 -pgm_do_call: - sll %r8,3 - larl %r1,pgm_check_table - lg %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_return - br %r1 # branch to interrupt-handler - -# -# handle per exception -# -pgm_per: - tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on - jnz pgm_per_std # ok, normal per event from user space -# ok its one of the special cases, now we need to find out which one - clc __LC_PGM_OLD_PSW(16),__LC_SVC_NEW_PSW - je pgm_svcper -# no interesting special case, ignore PER event - lmg %r12,%r15,__LC_SAVE_AREA - lpswe __LC_PGM_OLD_PSW +ENTRY(pgm_check_handler) + stpt __LC_SYNC_ENTER_TIMER + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_PGM_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,1 + tmhh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r14 + lg %r15,__LC_KERNEL_STACK + lg %r14,__TI_task(%r12) + lghi %r13,__LC_PGM_TDB + tm __LC_PGM_ILC+2,0x02 # check for transaction abort + jz 2f + mvc __THREAD_trap_tdb(256,%r14),0(%r13) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + stg %r10,__PT_ARGS(%r11) + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f + tmhh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP + mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE + mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID +0: REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + larl %r1,pgm_check_table + llgh %r10,__PT_INT_CODE+2(%r11) + nill %r10,0x007f + sll %r10,2 + je sysc_return + lgf %r1,0(%r10,%r1) # load address of handler routine + lgr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # branch to interrupt-handler + j sysc_return # -# Normal per exception +# PER event in supervisor state, must be kprobes # -pgm_per_std: - SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime2 - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime2: -#endif - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - lg %r1,__TI_task(%r9) - mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - lgf %r3,__LC_PGM_ILC # load program interruption code - lghi %r8,0x7f - ngr %r8,%r3 # clear per-event-bit and ilc - je sysc_return - j pgm_do_call +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_per_trap + j sysc_return # -# it was a single stepped SVC that is causing all the trouble +# single stepped system call # pgm_svcper: - SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime3 - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime3: -#endif - llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - lg %r1,__TI_task(%r9) - mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - j sysc_do_svc + mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW + larl %r14,sysc_per + stg %r14,__LC_RETURN_PSW+8 + lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP + lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs /* * IO interrupt handler routine */ - .globl io_int_handler -io_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER - stck __LC_INT_CLOCK - SAVE_ALL_BASE __LC_SAVE_AREA+32 - SAVE_ALL __LC_IO_OLD_PSW,__LC_SAVE_AREA+32,0 - CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz io_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -io_no_vtime: -#endif - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - la %r2,SP_PTREGS(%r15) # address of register-save area - brasl %r14,do_IRQ # call standard irq handler - +ENTRY(io_int_handler) + STCK __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_IO_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,2 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user? + jz io_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +io_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + TRACE_IRQS_OFF + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +io_loop: + lgr %r2,%r11 # pass pointer to pt_regs + lghi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lghi %r3,THIN_INTERRUPT +io_call: + brasl %r14,do_IRQ + tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? -#ifdef CONFIG_PREEMPT - jno io_preempt # no -> check for preemptive scheduling -#else - jno io_leave # no-> skip resched & signal -#endif - tm __TI_flags+7(%r9),_TIF_WORK_INT - jnz io_work # there is work to do (signals etc.) -io_leave: - RESTORE_ALL __LC_RETURN_PSW,0 + LOCKDEP_SYS_EXIT + TRACE_IRQS_ON +io_tif: + tm __TI_flags+7(%r12),_TIF_WORK + jnz io_work # there is work to do (signals etc.) + tm __LC_CPU_FLAGS+7,_CIF_WORK + jnz io_work +io_restore: + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW io_done: +# +# There is work todo, find out in which context we have been interrupted: +# 1) if we return to user space we can do all _TIF_WORK work +# 2) if we return to kernel code and kvm is enabled check if we need to +# modify the psw to leave SIE +# 3) if we return to kernel code and preemptive scheduling is enabled check +# the preemption counter and if it is zero call preempt_schedule_irq +# Before any work can be done, a switch to the kernel stack is required. +# +io_work: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT -io_preempt: - icm %r0,15,__TI_precount(%r9) - jnz io_leave + # check for preemptive scheduling + icm %r0,15,__TI_precount(%r12) + jnz io_restore # preemption is disabled + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED + jno io_restore # switch to kernel stack - lg %r1,SP_R15(%r15) - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + lg %r1,__PT_R15(%r11) + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 -io_resume_loop: - tm __TI_flags+7(%r9),_TIF_NEED_RESCHED - jno io_leave - larl %r1,.Lc_pactive - mvc __TI_precount(4,%r9),0(%r1) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - brasl %r14,schedule # call schedule - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts - xc __TI_precount(4,%r9),__TI_precount(%r9) - j io_resume_loop + # TRACE_IRQS_ON already done at io_return, call + # TRACE_IRQS_OFF to keep things symmetrical + TRACE_IRQS_OFF + brasl %r14,preempt_schedule_irq + j io_return +#else + j io_restore #endif # -# switch to kernel stack, then check TIF bits +# Need to do work before returning to userspace, switch to kernel stack # -io_work: +io_work_user: lg %r1,__LC_KERNEL_STACK - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 + # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED and _TIF_MCCK_PENDING # -io_work_loop: - tm __TI_flags+7(%r9),_TIF_MCCK_PENDING +io_work_tif: + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jo io_mcck_pending - tm __TI_flags+7(%r9),_TIF_NEED_RESCHED + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo io_reschedule - tm __TI_flags+7(%r9),_TIF_SIGPENDING + tm __TI_flags+7(%r12),_TIF_SIGPENDING jo io_sigpending - j io_leave + tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME + jo io_notify_resume + tm __LC_CPU_FLAGS+7,_CIF_ASCE + jo io_uaccess + j io_return # beware of critical section cleanup # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # io_mcck_pending: - larl %r14,io_work_loop - jg s390_handle_mcck # TIF bit will be cleared by handler + # TRACE_IRQS_ON already done at io_return + brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler + TRACE_IRQS_OFF + j io_return + +# +# _CIF_ASCE is set, load user space asce +# +io_uaccess: + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + j io_return # # _TIF_NEED_RESCHED is set, call schedule -# -io_reschedule: - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - brasl %r14,schedule # call scheduler - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts - tm __TI_flags+7(%r9),_TIF_WORK_INT - jz io_leave # there is no work to do - j io_work_loop +# +io_reschedule: + # TRACE_IRQS_ON already done at io_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + brasl %r14,schedule # call scheduler + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return # -# _TIF_SIGPENDING is set, call do_signal +# _TIF_SIGPENDING or is set, call do_signal +# +io_sigpending: + # TRACE_IRQS_ON already done at io_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return + +# +# _TIF_NOTIFY_RESUME or is set, call do_notify_resume # -io_sigpending: - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs - slgr %r3,%r3 # clear *oldset - brasl %r14,do_signal # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts - j io_work_loop +io_notify_resume: + # TRACE_IRQS_ON already done at io_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return /* * External interrupt handler routine */ - .globl ext_int_handler -ext_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER - stck __LC_INT_CLOCK - SAVE_ALL_BASE __LC_SAVE_AREA+32 - SAVE_ALL __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32,0 - CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz ext_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -ext_no_vtime: -#endif - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - la %r2,SP_PTREGS(%r15) # address of register-save area - llgh %r3,__LC_EXT_INT_CODE # get interruption code - brasl %r14,do_extint +ENTRY(ext_int_handler) + STCK __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_EXT_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,3 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +ext_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) + lghi %r1,__LC_EXT_PARAMS2 + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + TRACE_IRQS_OFF + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + lghi %r3,EXT_INTERRUPT + brasl %r14,do_IRQ j io_return +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + stg %r3,__SF_EMPTY(%r15) + larl %r1,psw_idle_lpsw+4 + stg %r1,__SF_EMPTY+8(%r15) + STCK __CLOCK_IDLE_ENTER(%r2) + stpt __TIMER_IDLE_ENTER(%r2) +psw_idle_lpsw: + lpswe __SF_EMPTY(%r15) + br %r14 +psw_idle_end: + __critical_end: /* * Machine check handler routines */ - .globl mcck_int_handler -mcck_int_handler: +ENTRY(mcck_int_handler) + STCK __LC_MCCK_CLOCK la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r1) - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs - SAVE_ALL_BASE __LC_SAVE_AREA+64 - la %r12,__LC_MCK_OLD_PSW - tm __LC_MCCK_CODE,0x80 # system damage? - jo mcck_int_main # yes -> rest of mcck code invalid - tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - jo 0f - spt __LC_LAST_UPDATE_TIMER -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_LAST_UPDATE_TIMER - mvc __LC_SYNC_ENTER_TIMER(8),__LC_LAST_UPDATE_TIMER - mvc __LC_EXIT_TIMER(8),__LC_LAST_UPDATE_TIMER -#endif -0: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - jno mcck_int_main # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit - jnz mcck_int_main # from user -> load kernel stack - clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_end) - jhe mcck_int_main - clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_start) - jl mcck_int_main - brasl %r14,cleanup_critical -mcck_int_main: - lg %r14,__LC_PANIC_STACK # are we already on the panic stack? - slgr %r14,%r15 - srag %r14,%r14,PAGE_SHIFT - jz 0f - lg %r15,__LC_PANIC_STACK # load panic stack -0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? - jno mcck_no_vtime # no -> no timer update - tm __LC_MCK_OLD_PSW+1,0x01 # interrupting from user ? - jz mcck_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -mcck_no_vtime: -#endif - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - la %r2,SP_PTREGS(%r15) # load pt_regs + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_MCK_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,4 + tm __LC_MCCK_CODE,0x80 # system damage? + jo mcck_panic # yes -> rest of mcck code invalid + lghi %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) + tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? + jo 3f + la %r14,__LC_SYNC_ENTER_TIMER + clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER + jl 0f + la %r14,__LC_ASYNC_ENTER_TIMER +0: clc 0(8,%r14),__LC_EXIT_TIMER + jl 1f + la %r14,__LC_EXIT_TIMER +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f + la %r14,__LC_LAST_UPDATE_TIMER +2: spt 0(%r14) + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER + LAST_BREAK %r14 +mcck_skip: + lghi %r14,__LC_GPREGS_SAVE_AREA+64 + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),0(%r14) + stmg %r8,%r9,__PT_PSW(%r11) + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - tm SP_PSW+1(%r15),0x01 # returning to user ? + tm __PT_PSW+1(%r11),0x01 # returning to user ? jno mcck_return lg %r1,__LC_KERNEL_STACK # switch to kernel stack - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - stosm __SF_EMPTY(%r15),0x04 # turn dat on - tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jno mcck_return + TRACE_IRQS_OFF brasl %r14,s390_handle_mcck + TRACE_IRQS_ON mcck_return: - RESTORE_ALL __LC_RETURN_MCCK_PSW,0 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW + tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? + jno 0f + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER +0: lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_MCCK_PSW + +mcck_panic: + lg %r14,__LC_PANIC_STACK + slgr %r14,%r15 + srag %r14,%r14,PAGE_SHIFT + jz 0f + lg %r15,__LC_PANIC_STACK +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip -#ifdef CONFIG_SMP -/* - * Restart interruption handler, kick starter for additional CPUs - */ - .globl restart_int_handler -restart_int_handler: - lg %r15,__LC_SAVE_AREA+120 # load ksp - lghi %r10,__LC_CREGS_SAVE_AREA - lctlg %c0,%c15,0(%r10) # get new ctl regs - lghi %r10,__LC_AREGS_SAVE_AREA - lam %a0,%a15,0(%r10) - lmg %r6,%r15,__SF_GPRS(%r15) # load registers from clone - stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on - jg start_secondary -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ - .globl restart_int_handler -restart_int_handler: - basr %r1,0 -restart_base: - lpswe restart_crash-restart_base(%r1) - .align 8 -restart_crash: - .long 0x000a0000,0x00000000,0x00000000,0x00000000 -restart_go: -#endif +# +# PSW restart interrupt handler +# +ENTRY(restart_int_handler) + stg %r15,__LC_SAVE_AREA_RESTART + lg %r15,__LC_RESTART_STACK + aghi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) + stmg %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw + aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + lg %r1,__LC_RESTART_FN # load fn, parm & source cpu + lg %r2,__LC_RESTART_DATA + lg %r3,__LC_RESTART_SOURCE + ltgr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + llgh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu + brc 2,2b +3: j 3b + + .section .kprobes.text, "ax" #ifdef CONFIG_CHECK_STACK /* @@ -813,198 +789,247 @@ restart_go: */ stack_overflow: lg %r15,__LC_PANIC_STACK # change to panic stack - aghi %r1,-SP_SIZE - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - stmg %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - la %r1,__LC_SAVE_AREA - chi %r12,__LC_SVC_OLD_PSW - je 0f - chi %r12,__LC_PGM_OLD_PSW - je 0f - la %r1,__LC_SAVE_AREA+16 -0: mvc SP_R12(32,%r15),0(%r1) # move %r12-%r15 to stack - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain - la %r2,SP_PTREGS(%r15) # load pt_regs + la %r11,STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r7,__PT_R0(%r11) + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_R8(64,%r11),0(%r14) + stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_overflow #endif -cleanup_table_system_call: - .quad system_call, sysc_do_svc -cleanup_table_sysc_return: - .quad sysc_return, sysc_leave -cleanup_table_sysc_leave: - .quad sysc_leave, sysc_work_loop -cleanup_table_sysc_work_loop: - .quad sysc_work_loop, sysc_reschedule -cleanup_table_io_leave: - .quad io_leave, io_done -cleanup_table_io_work_loop: - .quad io_work_loop, io_mcck_pending + .align 8 +cleanup_table: + .quad system_call + .quad sysc_do_svc + .quad sysc_tif + .quad sysc_restore + .quad sysc_done + .quad io_tif + .quad io_restore + .quad io_done + .quad psw_idle + .quad psw_idle_end cleanup_critical: - clc 8(8,%r12),BASED(cleanup_table_system_call) + clg %r9,BASED(cleanup_table) # system_call jl 0f - clc 8(8,%r12),BASED(cleanup_table_system_call+8) + clg %r9,BASED(cleanup_table+8) # sysc_do_svc jl cleanup_system_call -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_return) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_return+8) - jl cleanup_sysc_return -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_leave) + clg %r9,BASED(cleanup_table+16) # sysc_tif jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_leave+8) - jl cleanup_sysc_leave -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop) + clg %r9,BASED(cleanup_table+24) # sysc_restore + jl cleanup_sysc_tif + clg %r9,BASED(cleanup_table+32) # sysc_done + jl cleanup_sysc_restore + clg %r9,BASED(cleanup_table+40) # io_tif jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop+8) - jl cleanup_sysc_return -0: - clc 8(8,%r12),BASED(cleanup_table_io_leave) + clg %r9,BASED(cleanup_table+48) # io_restore + jl cleanup_io_tif + clg %r9,BASED(cleanup_table+56) # io_done + jl cleanup_io_restore + clg %r9,BASED(cleanup_table+64) # psw_idle jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_leave+8) - jl cleanup_io_leave -0: - clc 8(8,%r12),BASED(cleanup_table_io_work_loop) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_work_loop+8) - jl cleanup_io_return -0: - br %r14 + clg %r9,BASED(cleanup_table+72) # psw_idle_end + jl cleanup_idle +0: br %r14 + cleanup_system_call: - mvc __LC_RETURN_PSW(16),0(%r12) - cghi %r12,__LC_MCK_OLD_PSW - je 0f - la %r12,__LC_SAVE_AREA+32 - j 1f -0: la %r12,__LC_SAVE_AREA+64 -1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+8) + # check if stpt has been executed + clg %r9,BASED(cleanup_system_call_insn) jh 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16) - jhe cleanup_vtime -#endif - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn) + cghi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stmg has been executed + clg %r9,BASED(cleanup_system_call_insn+8) + jh 0f + mvc __LC_SAVE_AREA_SYNC(64),0(%r11) +0: # check if base register setup + TIF bit load has been done + clg %r9,BASED(cleanup_system_call_insn+16) + jhe 0f + # set up saved registers r10 and r12 + stg %r10,16(%r11) # r10 last break + stg %r12,32(%r11) # r12 thread-info pointer +0: # check if the user time update has been done + clg %r9,BASED(cleanup_system_call_insn+24) + jh 0f + lg %r15,__LC_EXIT_TIMER + slg %r15,__LC_SYNC_ENTER_TIMER + alg %r15,__LC_USER_TIMER + stg %r15,__LC_USER_TIMER +0: # check if the system time update has been done + clg %r9,BASED(cleanup_system_call_insn+32) jh 0f - mvc __LC_SAVE_AREA(32),0(%r12) -0: stg %r13,8(%r12) - stg %r12,__LC_SAVE_AREA+96 # argh - SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - lg %r12,__LC_SAVE_AREA+96 # argh - stg %r15,24(%r12) - llgh %r7,__LC_SVC_INT_CODE -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -cleanup_vtime: - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) - jhe cleanup_stime - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz cleanup_novtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -cleanup_stime: - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+32) - jh cleanup_update - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -cleanup_update: + lg %r15,__LC_LAST_UPDATE_TIMER + slg %r15,__LC_EXIT_TIMER + alg %r15,__LC_SYSTEM_TIMER + stg %r15,__LC_SYSTEM_TIMER +0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -cleanup_novtime: -#endif - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8) - la %r12,__LC_RETURN_PSW + # do LAST_BREAK + lg %r9,16(%r11) + srag %r9,%r9,23 + jz 0f + mvc __TI_last_break(8,%r12),16(%r11) +0: # set up saved register r11 + lg %r15,__LC_KERNEL_STACK + la %r9,STACK_FRAME_OVERHEAD(%r15) + stg %r9,24(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC + stmg %r0,%r7,__PT_R0(%r9) + mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC + xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9) + mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL + # setup saved register r15 + stg %r15,56(%r11) # r15 stack pointer + # set new psw address and exit + larl %r9,sysc_do_svc br %r14 cleanup_system_call_insn: - .quad sysc_saveall -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .quad system_call - .quad sysc_vtime - .quad sysc_stime - .quad sysc_update -#endif - -cleanup_sysc_return: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_sysc_return) - la %r12,__LC_RETURN_PSW + .quad system_call + .quad sysc_stmg + .quad sysc_per + .quad sysc_vtime+18 + .quad sysc_vtime+42 + +cleanup_sysc_tif: + larl %r9,sysc_tif br %r14 -cleanup_sysc_leave: - clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) - je 2f -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) - je 2f -#endif - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - cghi %r12,__LC_MCK_OLD_PSW - jne 0f - mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) - lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +cleanup_sysc_restore: + clg %r9,BASED(cleanup_sysc_restore_insn) + je 0f + lg %r9,24(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW br %r14 -cleanup_sysc_leave_insn: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .quad sysc_leave + 16 -#endif - .quad sysc_leave + 12 +cleanup_sysc_restore_insn: + .quad sysc_done - 4 -cleanup_io_return: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_work_loop) - la %r12,__LC_RETURN_PSW +cleanup_io_tif: + larl %r9,io_tif br %r14 -cleanup_io_leave: - clc 8(8,%r12),BASED(cleanup_io_leave_insn) - je 2f -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) - je 2f -#endif - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - cghi %r12,__LC_MCK_OLD_PSW - jne 0f - mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) - lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +cleanup_io_restore: + clg %r9,BASED(cleanup_io_restore_insn) + je 0f + lg %r9,24(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW br %r14 -cleanup_io_leave_insn: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .quad io_leave + 20 -#endif - .quad io_leave + 16 +cleanup_io_restore_insn: + .quad io_done - 4 + +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER + cghi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER +0: # check if stck & stpt have been executed + clg %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) + mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2) +1: # account system time going idle + lg %r9,__LC_STEAL_TIMER + alg %r9,__CLOCK_IDLE_ENTER(%r2) + slg %r9,__LC_LAST_UPDATE_CLOCK + stg %r9,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) + lg %r9,__LC_SYSTEM_TIMER + alg %r9,__LC_LAST_UPDATE_TIMER + slg %r9,__TIMER_IDLE_ENTER(%r2) + stg %r9,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) + # prepare return psw + nihh %r8,0xfcfd # clear irq & wait state bits + lg %r9,48(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .quad psw_idle_lpsw /* * Integer constants */ - .align 4 -.Lconst: -.Lc_pactive: .long PREEMPT_ACTIVE -.Lnr_syscalls: .long NR_syscalls -.L0x0130: .short 0x130 -.L0x0140: .short 0x140 -.L0x0150: .short 0x150 -.L0x0160: .short 0x160 -.L0x0170: .short 0x170 + .align 8 .Lcritical_start: - .quad __critical_start -.Lcritical_end: - .quad __critical_end + .quad __critical_start +.Lcritical_length: + .quad __critical_end - __critical_start + + +#if IS_ENABLED(CONFIG_KVM) +/* + * sie64a calling convention: + * %r2 pointer to sie control block + * %r3 guest register save area + */ +ENTRY(sie64a) + stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers + stg %r2,__SF_EMPTY(%r15) # save control block pointer + stg %r3,__SF_EMPTY+8(%r15) # save guest register save area + xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason + lmg %r0,%r13,0(%r3) # load guest gprs 0-13 + lg %r14,__LC_GMAP # get gmap pointer + ltgr %r14,%r14 + jz sie_gmap + lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce +sie_gmap: + lg %r14,__SF_EMPTY(%r15) # get control block pointer + oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now + tm __SIE_PROG20+3(%r14),1 # last exit... + jnz sie_done + LPP __SF_EMPTY(%r15) # set guest id + sie 0(%r14) +sie_done: + LPP __SF_EMPTY+16(%r15) # set host id + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce +# some program checks are suppressing. C code (e.g. do_protection_exception) +# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other +# instructions between sie64a and sie_done should not cause program +# interrupts. So lets use a nop (47 00 00 00) as a landing pad. +# See also HANDLE_SIE_INTERCEPT +rewind_pad: + nop 0 + .globl sie_exit +sie_exit: + lg %r14,__SF_EMPTY+8(%r15) # load guest register save area + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers + lg %r2,__SF_EMPTY+24(%r15) # return exit reason code + br %r14 +sie_fault: + lghi %r14,-EFAULT + stg %r14,__SF_EMPTY+24(%r15) # set exit reason code + j sie_exit + + .align 8 +.Lsie_critical: + .quad sie_gmap +.Lsie_critical_length: + .quad sie_done - sie_gmap + + EX_TABLE(rewind_pad,sie_fault) + EX_TABLE(sie_exit,sie_fault) +#endif + .section .rodata, "a" #define SYSCALL(esa,esame,emu) .long esame - .globl sys_call_table + .globl sys_call_table sys_call_table: #include "syscalls.S" #undef SYSCALL @@ -1012,7 +1037,7 @@ sys_call_table: #ifdef CONFIG_COMPAT #define SYSCALL(esa,esame,emu) .long emu - .globl sys_call_table_emu + .globl sys_call_table_emu sys_call_table_emu: #include "syscalls.S" #undef SYSCALL diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c new file mode 100644 index 00000000000..54d6493c4a5 --- /dev/null +++ b/arch/s390/kernel/ftrace.c @@ -0,0 +1,192 @@ +/* + * Dynamic function tracer architecture backend. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/hardirq.h> +#include <linux/uaccess.h> +#include <linux/ftrace.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/kprobes.h> +#include <trace/syscall.h> +#include <asm/asm-offsets.h> +#include "entry.h" + +#ifdef CONFIG_DYNAMIC_FTRACE + +void ftrace_disable_code(void); +void ftrace_enable_insn(void); + +#ifdef CONFIG_64BIT +/* + * The 64-bit mcount code looks like this: + * stg %r14,8(%r15) # offset 0 + * > larl %r1,<&counter> # offset 6 + * > brasl %r14,_mcount # offset 12 + * lg %r14,8(%r15) # offset 18 + * Total length is 24 bytes. The middle two instructions of the mcount + * block get overwritten by ftrace_make_nop / ftrace_make_call. + * The 64-bit enabled ftrace code block looks like this: + * stg %r14,8(%r15) # offset 0 + * > lg %r1,__LC_FTRACE_FUNC # offset 6 + * > lgr %r0,%r0 # offset 12 + * > basr %r14,%r1 # offset 16 + * lg %r14,8(%15) # offset 18 + * The return points of the mcount/ftrace function have the same offset 18. + * The 64-bit disable ftrace code block looks like this: + * stg %r14,8(%r15) # offset 0 + * > jg .+18 # offset 6 + * > lgr %r0,%r0 # offset 12 + * > basr %r14,%r1 # offset 16 + * lg %r14,8(%15) # offset 18 + * The jg instruction branches to offset 24 to skip as many instructions + * as possible. + */ +asm( + " .align 4\n" + "ftrace_disable_code:\n" + " jg 0f\n" + " lgr %r0,%r0\n" + " basr %r14,%r1\n" + "0:\n" + " .align 4\n" + "ftrace_enable_insn:\n" + " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); + +#define FTRACE_INSN_SIZE 6 + +#else /* CONFIG_64BIT */ +/* + * The 31-bit mcount code looks like this: + * st %r14,4(%r15) # offset 0 + * > bras %r1,0f # offset 4 + * > .long _mcount # offset 8 + * > .long <&counter> # offset 12 + * > 0: l %r14,0(%r1) # offset 16 + * > l %r1,4(%r1) # offset 20 + * basr %r14,%r14 # offset 24 + * l %r14,4(%r15) # offset 26 + * Total length is 30 bytes. The twenty bytes starting from offset 4 + * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call. + * The 31-bit enabled ftrace code block looks like this: + * st %r14,4(%r15) # offset 0 + * > l %r14,__LC_FTRACE_FUNC # offset 4 + * > j 0f # offset 8 + * > .fill 12,1,0x07 # offset 12 + * 0: basr %r14,%r14 # offset 24 + * l %r14,4(%r14) # offset 26 + * The return points of the mcount/ftrace function have the same offset 26. + * The 31-bit disabled ftrace code block looks like this: + * st %r14,4(%r15) # offset 0 + * > j .+26 # offset 4 + * > j 0f # offset 8 + * > .fill 12,1,0x07 # offset 12 + * 0: basr %r14,%r14 # offset 24 + * l %r14,4(%r14) # offset 26 + * The j instruction branches to offset 30 to skip as many instructions + * as possible. + */ +asm( + " .align 4\n" + "ftrace_disable_code:\n" + " j 1f\n" + " j 0f\n" + " .fill 12,1,0x07\n" + "0: basr %r14,%r14\n" + "1:\n" + " .align 4\n" + "ftrace_enable_insn:\n" + " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n"); + +#define FTRACE_INSN_SIZE 4 + +#endif /* CONFIG_64BIT */ + + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, + MCOUNT_INSN_SIZE)) + return -EPERM; + return 0; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn, + FTRACE_INSN_SIZE)) + return -EPERM; + return 0; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + return 0; +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * Hook the return address and push it in the stack of return addresses + * in current thread info. + */ +unsigned long __kprobes prepare_ftrace_return(unsigned long parent, + unsigned long ip) +{ + struct ftrace_graph_ent trace; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + goto out; + ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE; + trace.func = ip; + trace.depth = current->curr_ret_stack + 1; + /* Only trace if the calling function expects to. */ + if (!ftrace_graph_entry(&trace)) + goto out; + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) + goto out; + parent = (unsigned long) return_to_handler; +out: + return parent; +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Patch the kernel code at ftrace_graph_caller location. The instruction + * there is branch relative and save to prepare_ftrace_return. To disable + * the call to prepare_ftrace_return we patch the bras offset to point + * directly after the instructions. To enable the call we calculate + * the original offset to prepare_ftrace_return and put it back. + */ +int ftrace_enable_ftrace_graph_caller(void) +{ + unsigned short offset; + + offset = ((void *) prepare_ftrace_return - + (void *) ftrace_graph_caller) / 2; + return probe_kernel_write((void *) ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + static unsigned short offset = 0x0002; + + return probe_kernel_write((void *) ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index ea88d066bf0..e88d35d7495 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/head.S - * - * (C) Copyright IBM Corp. 1999, 2005 + * Copyright IBM Corp. 1999, 2010 * * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -23,9 +21,8 @@ * */ -#include <linux/config.h> -#include <asm/setup.h> -#include <asm/lowcore.h> +#include <linux/init.h> +#include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> @@ -36,511 +33,469 @@ #define ARCH_OFFSET 0 #endif -#ifndef CONFIG_IPL - .org 0 - .long 0x00080000,0x80000000+startup # Just a restart PSW -#else -#ifdef CONFIG_IPL_TAPE -#define IPL_BS 1024 - .org 0 - .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded - .long 0x27000000,0x60000001 # by ipl to addresses 0-23. - .long 0x02000000,0x20000000+IPL_BS # (a PSW and two CCWs). - .long 0x00000000,0x00000000 # external old psw - .long 0x00000000,0x00000000 # svc old psw - .long 0x00000000,0x00000000 # program check old psw - .long 0x00000000,0x00000000 # machine check old psw - .long 0x00000000,0x00000000 # io old psw - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x000a0000,0x00000058 # external new psw - .long 0x000a0000,0x00000060 # svc new psw - .long 0x000a0000,0x00000068 # program check new psw - .long 0x000a0000,0x00000070 # machine check new psw - .long 0x00080000,0x80000000+.Lioint # io new psw - - .org 0x100 -# -# subroutine for loading from tape -# Paramters: -# R1 = device number -# R2 = load address -.Lloader: - st %r14,.Lldret - la %r3,.Lorbread # r3 = address of orb - la %r5,.Lirb # r5 = address of irb - st %r2,.Lccwread+4 # initialize CCW data addresses - lctl %c6,%c6,.Lcr6 - slr %r2,%r2 -.Lldlp: - la %r6,3 # 3 retries -.Lssch: - ssch 0(%r3) # load chunk of IPL_BS bytes - bnz .Llderr -.Lw4end: - bas %r14,.Lwait4io - tm 8(%r5),0x82 # do we have a problem ? - bnz .Lrecov - slr %r7,%r7 - icm %r7,3,10(%r5) # get residual count - lcr %r7,%r7 - la %r7,IPL_BS(%r7) # IPL_BS-residual=#bytes read - ar %r2,%r7 # add to total size - tm 8(%r5),0x01 # found a tape mark ? - bnz .Ldone - l %r0,.Lccwread+4 # update CCW data addresses - ar %r0,%r7 - st %r0,.Lccwread+4 - b .Lldlp -.Ldone: - l %r14,.Lldret - br %r14 # r2 contains the total size -.Lrecov: - bas %r14,.Lsense # do the sensing - bct %r6,.Lssch # dec. retry count & branch - b .Llderr +__HEAD + +#define IPL_BS 0x730 + .org 0 + .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded + .long 0x02000018,0x60000050 # by ipl to addresses 0-23. + .long 0x02000068,0x60000050 # (a PSW and two CCWs). + .fill 80-24,1,0x40 # bytes 24-79 are discarded !! + .long 0x020000f0,0x60000050 # The next 160 byte are loaded + .long 0x02000140,0x60000050 # to addresses 0x18-0xb7 + .long 0x02000190,0x60000050 # They form the continuation + .long 0x020001e0,0x60000050 # of the CCW program started + .long 0x02000230,0x60000050 # by ipl and load the range + .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image + .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730 + .long 0x02000320,0x60000050 # in memory. At the end of + .long 0x02000370,0x60000050 # the channel program the PSW + .long 0x020003c0,0x60000050 # at location 0 is loaded. + .long 0x02000410,0x60000050 # Initial processing starts + .long 0x02000460,0x60000050 # at 0x200 = iplstart. + .long 0x020004b0,0x60000050 + .long 0x02000500,0x60000050 + .long 0x02000550,0x60000050 + .long 0x020005a0,0x60000050 + .long 0x020005f0,0x60000050 + .long 0x02000640,0x60000050 + .long 0x02000690,0x60000050 + .long 0x020006e0,0x20000050 + + .org 0x200 # -# Sense subroutine +# subroutine to set architecture mode # -.Lsense: - st %r14,.Lsnsret - la %r7,.Lorbsense - ssch 0(%r7) # start sense command - bnz .Llderr - bas %r14,.Lwait4io - l %r14,.Lsnsret - tm 8(%r5),0x82 # do we have a problem ? - bnz .Llderr - br %r14 +.Lsetmode: +#ifdef CONFIG_64BIT + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam31 # switch to 31 bit addressing mode +#else + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +#endif + br %r14 + # -# Wait for interrupt subroutine +# subroutine to wait for end I/O # -.Lwait4io: - lpsw .Lwaitpsw +.Lirqwait: +#ifdef CONFIG_64BIT + mvc 0x1f0(16),.Lnewpsw # set up IO interrupt psw + lpsw .Lwaitpsw .Lioint: - c %r1,0xb8 # compare subchannel number - bne .Lwait4io - tsch 0(%r5) - slr %r0,%r0 - tm 8(%r5),0x82 # do we have a problem ? - bnz .Lwtexit - tm 8(%r5),0x04 # got device end ? - bz .Lwait4io -.Lwtexit: - br %r14 -.Llderr: - lpsw .Lcrash - - .align 8 -.Lorbread: - .long 0x00000000,0x0080ff00,.Lccwread - .align 8 -.Lorbsense: - .long 0x00000000,0x0080ff00,.Lccwsense - .align 8 -.Lccwread: - .long 0x02200000+IPL_BS,0x00000000 -.Lccwsense: - .long 0x04200001,0x00000000 + br %r14 + .align 8 +.Lnewpsw: + .quad 0x0000000080000000,.Lioint +#else + mvc 0x78(8),.Lnewpsw # set up IO interrupt psw + lpsw .Lwaitpsw +.Lioint: + br %r14 + .align 8 +.Lnewpsw: + .long 0x00080000,0x80000000+.Lioint +#endif .Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint - -.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -.Lcr6: .long 0xff000000 - .align 8 -.Lcrash:.long 0x000a0000,0x00000000 -.Lldret:.long 0 -.Lsnsret: .long 0 -#endif /* CONFIG_IPL_TAPE */ - -#ifdef CONFIG_IPL_VM -#define IPL_BS 0x730 - .org 0 - .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded - .long 0x02000018,0x60000050 # by ipl to addresses 0-23. - .long 0x02000068,0x60000050 # (a PSW and two CCWs). - .fill 80-24,1,0x40 # bytes 24-79 are discarded !! - .long 0x020000f0,0x60000050 # The next 160 byte are loaded - .long 0x02000140,0x60000050 # to addresses 0x18-0xb7 - .long 0x02000190,0x60000050 # They form the continuation - .long 0x020001e0,0x60000050 # of the CCW program started - .long 0x02000230,0x60000050 # by ipl and load the range - .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image - .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730 - .long 0x02000320,0x60000050 # in memory. At the end of - .long 0x02000370,0x60000050 # the channel program the PSW - .long 0x020003c0,0x60000050 # at location 0 is loaded. - .long 0x02000410,0x60000050 # Initial processing starts - .long 0x02000460,0x60000050 # at 0xf0 = iplstart. - .long 0x020004b0,0x60000050 - .long 0x02000500,0x60000050 - .long 0x02000550,0x60000050 - .long 0x020005a0,0x60000050 - .long 0x020005f0,0x60000050 - .long 0x02000640,0x60000050 - .long 0x02000690,0x60000050 - .long 0x020006e0,0x20000050 - - .org 0xf0 + .long 0x020a0000,0x80000000+.Lioint + # # subroutine for loading cards from the reader # -.Lloader: - la %r3,.Lorb # r2 = address of orb into r2 - la %r5,.Lirb # r4 = address of irb - la %r6,.Lccws - la %r7,20 +.Lloader: + la %r4,0(%r14) + la %r3,.Lorb # r2 = address of orb into r2 + la %r5,.Lirb # r4 = address of irb + la %r6,.Lccws + la %r7,20 .Linit: - st %r2,4(%r6) # initialize CCW data addresses - la %r2,0x50(%r2) - la %r6,8(%r6) - bct 7,.Linit + st %r2,4(%r6) # initialize CCW data addresses + la %r2,0x50(%r2) + la %r6,8(%r6) + bct 7,.Linit - lctl %c6,%c6,.Lcr6 # set IO subclass mask - slr %r2,%r2 + lctl %c6,%c6,.Lcr6 # set IO subclass mask + slr %r2,%r2 .Lldlp: - ssch 0(%r3) # load chunk of 1600 bytes - bnz .Llderr + ssch 0(%r3) # load chunk of 1600 bytes + bnz .Llderr .Lwait4irq: - mvc 0x78(8),.Lnewpsw # set up IO interrupt psw - lpsw .Lwaitpsw -.Lioint: - c %r1,0xb8 # compare subchannel number - bne .Lwait4irq - tsch 0(%r5) - - slr %r0,%r0 - ic %r0,8(%r5) # get device status - chi %r0,8 # channel end ? - be .Lcont - chi %r0,12 # channel end + device end ? - be .Lcont - - l %r0,4(%r5) - s %r0,8(%r3) # r0/8 = number of ccws executed - mhi %r0,10 # *10 = number of bytes in ccws - lh %r3,10(%r5) # get residual count - sr %r0,%r3 # #ccws*80-residual=#bytes read - ar %r2,%r0 - - br %r14 # r2 contains the total size + bas %r14,.Lirqwait + c %r1,0xb8 # compare subchannel number + bne .Lwait4irq + tsch 0(%r5) + + slr %r0,%r0 + ic %r0,8(%r5) # get device status + chi %r0,8 # channel end ? + be .Lcont + chi %r0,12 # channel end + device end ? + be .Lcont + + l %r0,4(%r5) + s %r0,8(%r3) # r0/8 = number of ccws executed + mhi %r0,10 # *10 = number of bytes in ccws + lh %r3,10(%r5) # get residual count + sr %r0,%r3 # #ccws*80-residual=#bytes read + ar %r2,%r0 + + br %r4 # r2 contains the total size .Lcont: - ahi %r2,0x640 # add 0x640 to total size - la %r6,.Lccws - la %r7,20 + ahi %r2,0x640 # add 0x640 to total size + la %r6,.Lccws + la %r7,20 .Lincr: - l %r0,4(%r6) # update CCW data addresses - ahi %r0,0x640 - st %r0,4(%r6) - ahi %r6,8 - bct 7,.Lincr + l %r0,4(%r6) # update CCW data addresses + ahi %r0,0x640 + st %r0,4(%r6) + ahi %r6,8 + bct 7,.Lincr - b .Lldlp + b .Lldlp .Llderr: - lpsw .Lcrash - - .align 8 -.Lorb: .long 0x00000000,0x0080ff00,.Lccws -.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -.Lcr6: .long 0xff000000 -.Lloadp:.long 0,0 - .align 8 -.Lcrash:.long 0x000a0000,0x00000000 -.Lnewpsw: - .long 0x00080000,0x80000000+.Lioint -.Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint - - .align 8 -.Lccws: .rept 19 - .long 0x02600050,0x00000000 - .endr - .long 0x02200050,0x00000000 -#endif /* CONFIG_IPL_VM */ + lpsw .Lcrash + + .align 8 +.Lorb: .long 0x00000000,0x0080ff00,.Lccws +.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.Lcr6: .long 0xff000000 +.Lloadp:.long 0,0 + .align 8 +.Lcrash:.long 0x000a0000,0x00000000 + + .align 8 +.Lccws: .rept 19 + .long 0x02600050,0x00000000 + .endr + .long 0x02200050,0x00000000 iplstart: - lh %r1,0xb8 # test if subchannel number - bct %r1,.Lnoload # is valid - l %r1,0xb8 # load ipl subchannel number - la %r2,IPL_BS # load start address - bas %r14,.Lloader # load rest of ipl image - l %r12,.Lparm # pointer to parameter area - st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number + bas %r14,.Lsetmode # Immediately switch to 64 bit mode + lh %r1,0xb8 # test if subchannel number + bct %r1,.Lnoload # is valid + l %r1,0xb8 # load ipl subchannel number + la %r2,IPL_BS # load start address + bas %r14,.Lloader # load rest of ipl image + l %r12,.Lparm # pointer to parameter area + st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number # # load parameter file from ipl device # .Lagain1: - l %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # ramdisk loc. is temp - bas %r14,.Lloader # load parameter file - ltr %r2,%r2 # got anything ? - bz .Lnopf - chi %r2,895 - bnh .Lnotrunc - la %r2,895 + l %r2,.Linitrd # ramdisk loc. is temp + bas %r14,.Lloader # load parameter file + ltr %r2,%r2 # got anything ? + bz .Lnopf + chi %r2,895 + bnh .Lnotrunc + la %r2,895 .Lnotrunc: - l %r4,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) - clc 0(3,%r4),.L_hdr # if it is HDRx - bz .Lagain1 # skip dataset header - clc 0(3,%r4),.L_eof # if it is EOFx - bz .Lagain1 # skip dateset trailer - la %r5,0(%r4,%r2) - lr %r3,%r2 -.Lidebc: - tm 0(%r5),0x80 # high order bit set ? - bo .Ldocv # yes -> convert from EBCDIC - ahi %r5,-1 - bct %r3,.Lidebc - b .Lnocv -.Ldocv: - l %r3,.Lcvtab - tr 0(256,%r4),0(%r3) # convert parameters to ascii - tr 256(256,%r4),0(%r3) - tr 512(256,%r4),0(%r3) - tr 768(122,%r4),0(%r3) -.Lnocv: la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line - mvc 0(256,%r3),0(%r4) - mvc 256(256,%r3),256(%r4) - mvc 512(256,%r3),512(%r4) - mvc 768(122,%r3),768(%r4) - slr %r0,%r0 - b .Lcntlp + l %r4,.Linitrd + clc 0(3,%r4),.L_hdr # if it is HDRx + bz .Lagain1 # skip dataset header + clc 0(3,%r4),.L_eof # if it is EOFx + bz .Lagain1 # skip dateset trailer + la %r5,0(%r4,%r2) + lr %r3,%r2 + la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line + mvc 0(256,%r3),0(%r4) + mvc 256(256,%r3),256(%r4) + mvc 512(256,%r3),512(%r4) + mvc 768(122,%r3),768(%r4) + slr %r0,%r0 + b .Lcntlp .Ldelspc: - ic %r0,0(%r2,%r3) - chi %r0,0x20 # is it a space ? - be .Lcntlp - ahi %r2,1 - b .Leolp + ic %r0,0(%r2,%r3) + chi %r0,0x20 # is it a space ? + be .Lcntlp + ahi %r2,1 + b .Leolp .Lcntlp: - brct %r2,.Ldelspc + brct %r2,.Ldelspc .Leolp: - slr %r0,%r0 - stc %r0,0(%r2,%r3) # terminate buffer + slr %r0,%r0 + stc %r0,0(%r2,%r3) # terminate buffer .Lnopf: # # load ramdisk from ipl device -# +# .Lagain2: - l %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # addr of ramdisk - bas %r14,.Lloader # load ramdisk - st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of ramdisk - ltr %r2,%r2 - bnz .Lrdcont - st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found + l %r2,.Linitrd # addr of ramdisk + st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) + bas %r14,.Lloader # load ramdisk + st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd + ltr %r2,%r2 + bnz .Lrdcont + st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found .Lrdcont: - l %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) + l %r2,.Linitrd - clc 0(3,%r2),.L_hdr # skip HDRx and EOFx - bz .Lagain2 - clc 0(3,%r2),.L_eof - bz .Lagain2 + clc 0(3,%r2),.L_hdr # skip HDRx and EOFx + bz .Lagain2 + clc 0(3,%r2),.L_eof + bz .Lagain2 -#ifdef CONFIG_IPL_VM # # reset files in VM reader # - stidp __LC_CPUID # store cpuid - tm __LC_CPUID,0xff # running VM ? - bno .Lnoreset - la %r2,.Lreset - lhi %r3,26 - diag %r2,%r3,8 - la %r5,.Lirb - stsch 0(%r5) # check if irq is pending - tm 30(%r5),0x0f # by verifying if any of the - bnz .Lwaitforirq # activity or status control - tm 31(%r5),0xff # bits is set in the schib - bz .Lnoreset + stidp .Lcpuid # store cpuid + tm .Lcpuid,0xff # running VM ? + bno .Lnoreset + la %r2,.Lreset + lhi %r3,26 + diag %r2,%r3,8 + la %r5,.Lirb + stsch 0(%r5) # check if irq is pending + tm 30(%r5),0x0f # by verifying if any of the + bnz .Lwaitforirq # activity or status control + tm 31(%r5),0xff # bits is set in the schib + bz .Lnoreset .Lwaitforirq: - mvc 0x78(8),.Lrdrnewpsw # set up IO interrupt psw -.Lwaitrdrirq: - lpsw .Lrdrwaitpsw -.Lrdrint: - c %r1,0xb8 # compare subchannel number - bne .Lwaitrdrirq - la %r5,.Lirb - tsch 0(%r5) + bas %r14,.Lirqwait # wait for IO interrupt + c %r1,0xb8 # compare subchannel number + bne .Lwaitforirq + la %r5,.Lirb + tsch 0(%r5) .Lnoreset: - b .Lnoload - - .align 8 -.Lrdrnewpsw: - .long 0x00080000,0x80000000+.Lrdrint -.Lrdrwaitpsw: - .long 0x020a0000,0x80000000+.Lrdrint -#endif + b .Lnoload # # everything loaded, go for it # .Lnoload: - l %r1,.Lstartup - br %r1 + l %r1,.Lstartup + br %r1 +.Linitrd:.long _end # default address of initrd .Lparm: .long PARMAREA .Lstartup: .long startup -.Lcvtab:.long _ebcasc # ebcdic to ascii table -.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40 - .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6 - .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" -.L_eof: .long 0xc5d6c600 /* C'EOF' */ -.L_hdr: .long 0xc8c4d900 /* C'HDR' */ - -#endif /* CONFIG_IPL */ +.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40 + .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6 + .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" +.L_eof: .long 0xc5d6c600 /* C'EOF' */ +.L_hdr: .long 0xc8c4d900 /* C'HDR' */ + .align 8 +.Lcpuid:.fill 8,1,0 # # SALIPL loader support. Based on a patch by Rob van der Heij. # This entry point is called directly from the SALIPL loader and # doesn't need a builtin ipl record. # - .org 0x800 - .globl start -start: - stm %r0,%r15,0x07b0 # store registers - basr %r12,%r0 + .org 0x800 +ENTRY(start) + stm %r0,%r15,0x07b0 # store registers + bas %r14,.Lsetmode # Immediately switch to 64 bit mode + basr %r12,%r0 .base: - l %r11,.parm - l %r8,.cmd # pointer to command buffer + l %r11,.parm + l %r8,.cmd # pointer to command buffer - ltr %r9,%r9 # do we have SALIPL parameters? - bp .sk8x8 + ltr %r9,%r9 # do we have SALIPL parameters? + bp .sk8x8 - mvc 0(64,%r8),0x00b0 # copy saved registers - xc 64(240-64,%r8),0(%r8) # remainder of buffer - tr 0(64,%r8),.lowcase - b .gotr + mvc 0(64,%r8),0x00b0 # copy saved registers + xc 64(240-64,%r8),0(%r8) # remainder of buffer + tr 0(64,%r8),.lowcase + b .gotr .sk8x8: - mvc 0(240,%r8),0(%r9) # copy iplparms into buffer + mvc 0(240,%r8),0(%r9) # copy iplparms into buffer .gotr: - l %r10,.tbl # EBCDIC to ASCII table - tr 0(240,%r8),0(%r10) - stidp __LC_CPUID # Are we running on VM maybe - cli __LC_CPUID,0xff - bnz .test - .long 0x83300060 # diag 3,0,x'0060' - storage size - b .done -.test: - mvc 0x68(8),.pgmnw # set up pgm check handler - l %r2,.fourmeg - lr %r3,%r2 - bctr %r3,%r0 # 4M-1 -.loop: iske %r0,%r3 - ar %r3,%r2 -.pgmx: - sr %r3,%r2 - la %r3,1(%r3) -.done: - l %r1,.memsize - st %r3,ARCH_OFFSET(%r1) - slr %r0,%r0 - st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11) - st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11) - j startup # continue with startup -.tbl: .long _ebcasc # translate table -.cmd: .long COMMAND_LINE # address of command line buffer -.parm: .long PARMAREA -.memsize: .long memory_size -.fourmeg: .long 0x00400000 # 4M -.pgmnw: .long 0x00080000,.pgmx + slr %r0,%r0 + st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11) + st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11) + j startup # continue with startup +.cmd: .long COMMAND_LINE # address of command line buffer +.parm: .long PARMAREA .lowcase: - .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07 + .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07 .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f - .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17 + .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17 .byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f - .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27 + .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27 .byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f - .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37 + .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37 .byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f - .byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47 + .byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47 .byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f - .byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57 + .byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57 .byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f - .byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67 + .byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67 .byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f - .byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77 + .byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77 .byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f - .byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87 + .byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87 .byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f - .byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97 + .byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97 .byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f - .byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 + .byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 .byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf - .byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7 + .byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7 .byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf - .byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87 # .abcdefg + .byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87 # .abcdefg .byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf # hi - .byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97 # .jklmnop + .byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97 # .jklmnop .byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf # qr .byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 # ..stuvwx .byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef # yz - .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 + .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff -.macro GET_IPL_DEVICE -.Lget_ipl_device: - basr %r12,0 -.LGID: l %r1,0xb8 # get sid - sll %r1,15 # test if subchannel is enabled - srl %r1,31 - ltr %r1,%r1 - bz 0(%r14) # subchannel disabled - l %r1,0xb8 - la %r5,.Lipl_schib-.LGID(%r12) - stsch 0(%r5) # get schib of subchannel - bnz 0(%r14) # schib not available - tm 5(%r5),0x01 # devno valid? - bno 0(%r14) - la %r6,ipl_parameter_flags-.LGID(%r12) - oi 3(%r6),0x01 # set flag - la %r2,ipl_devno-.LGID(%r12) - mvc 0(2,%r2),6(%r5) # store devno - tm 4(%r5),0x80 # qdio capable device? - bno 0(%r14) - oi 3(%r6),0x02 # set flag - - # copy ipl parameters - - lhi %r0,4096 - l %r2,20(%r0) # get address of parameter list - lhi %r3,IPL_PARMBLOCK_ORIGIN - st %r3,20(%r0) - lhi %r4,1 - cr %r2,%r3 # start parameters < destination ? - jl 0f - lhi %r1,1 # copy direction is upwards - j 1f -0: lhi %r1,-1 # copy direction is downwards - ar %r2,%r0 - ar %r3,%r0 - ar %r2,%r1 - ar %r3,%r1 -1: mvc 0(1,%r3),0(%r2) # finally copy ipl parameters - ar %r3,%r1 - ar %r2,%r1 - sr %r0,%r4 - jne 1b - b 0(%r14) - - .align 4 -.Lipl_schib: - .rept 13 - .long 0 - .endr - - .globl ipl_parameter_flags -ipl_parameter_flags: - .long 0 - .globl ipl_devno -ipl_devno: - .word 0 -.endm +# +# startup-code at 0x10000, running in absolute addressing mode +# this is called either by the ipl loader or directly by PSW restart +# or linload or SALIPL +# + .org 0x10000 +ENTRY(startup) + j .Lep_startup_normal + .org 0x10008 +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii "S390EP" + .byte 0x00,0x01 +# +# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# + .org 0x10010 +ENTRY(startup_kdump) + j .Lep_startup_kdump +.Lep_startup_normal: +#ifdef CONFIG_64BIT + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam31 # switch to 31 bit addressing mode +#else + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +#endif + basr %r13,0 # get base +.LPG0: + xc 0x200(256),0x200 # partially clear lowcore + xc 0x300(256),0x300 + xc 0xe00(256),0xe00 + stck __LC_LAST_UPDATE_CLOCK + spt 6f-.LPG0(%r13) + mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) + xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST +#ifndef CONFIG_MARCH_G5 + # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} + .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list + tm __LC_STFL_FAC_LIST,0x01 # stfle available ? + jz 0f + la %r0,1 + .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended + # verify if all required facilities are supported by the machine +0: la %r1,__LC_STFL_FAC_LIST + la %r2,3f+8-.LPG0(%r13) + l %r3,0(%r2) +1: l %r0,0(%r1) + n %r0,4(%r2) + cl %r0,4(%r2) + jne 2f + la %r1,4(%r1) + la %r2,4(%r2) + ahi %r3,-1 + jnz 1b + j 4f +2: l %r15,.Lstack-.LPG0(%r13) + ahi %r15,-96 + la %r2,.Lals_string-.LPG0(%r13) + l %r3,.Lsclp_print-.LPG0(%r13) + basr %r14,%r3 + lpsw 3f-.LPG0(%r13) # machine type not good enough, crash +.Lals_string: + .asciz "The Linux kernel requires more recent processor hardware" +.Lsclp_print: + .long _sclp_print_early +.Lstack: + .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) + .align 16 +3: .long 0x000a0000,0x8badcccc + +# List of facilities that are required. If not all facilities are present +# the kernel will crash. Format is number of facility words with bits set, +# followed by the facility words. + +#if defined(CONFIG_64BIT) +#if defined(CONFIG_MARCH_ZEC12) + .long 3, 0xc100eff2, 0xf46ce800, 0x00400000 +#elif defined(CONFIG_MARCH_Z196) + .long 2, 0xc100eff2, 0xf46c0000 +#elif defined(CONFIG_MARCH_Z10) + .long 2, 0xc100eff2, 0xf0680000 +#elif defined(CONFIG_MARCH_Z9_109) + .long 1, 0xc100efc2 +#elif defined(CONFIG_MARCH_Z990) + .long 1, 0xc0002000 +#elif defined(CONFIG_MARCH_Z900) + .long 1, 0xc0000000 +#endif +#else +#if defined(CONFIG_MARCH_ZEC12) + .long 1, 0x8100c880 +#elif defined(CONFIG_MARCH_Z196) + .long 1, 0x8100c880 +#elif defined(CONFIG_MARCH_Z10) + .long 1, 0x8100c880 +#elif defined(CONFIG_MARCH_Z9_109) + .long 1, 0x8100c880 +#elif defined(CONFIG_MARCH_Z990) + .long 1, 0x80002000 +#elif defined(CONFIG_MARCH_Z900) + .long 1, 0x80000000 +#endif +#endif +4: +#endif #ifdef CONFIG_64BIT -#include "head64.S" + /* Continue with 64bit startup code in head64.S */ + sam64 # switch to 64 bit mode + jg startup_continue #else -#include "head31.S" + /* Continue with 31bit startup code in head31.S */ + l %r13,5f-.LPG0(%r13) + b 0(%r13) + .align 8 +5: .long startup_continue #endif + + .align 8 +6: .long 0x7fffffff,0xffffffff + +#include "head_kdump.S" + +# +# params at 10400 (setup.h) +# + .org PARMAREA + .long 0,0 # IPL_DEVICE + .long 0,0 # INITRD_START + .long 0,0 # INITRD_SIZE + .long 0,0 # OLDMEM_BASE + .long 0,0 # OLDMEM_SIZE + + .org COMMAND_LINE + .byte "root=/dev/ram0 ro" + .byte 0 + + .org 0x11000 diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index 2d3b089bfb8..6dbe80983a2 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/head31.S - * - * (C) Copyright IBM Corp. 2005 + * Copyright IBM Corp. 2005, 2010 * * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -10,228 +8,47 @@ * */ -# -# startup-code at 0x10000, running in absolute addressing mode -# this is called either by the ipl loader or directly by PSW restart -# or linload or SALIPL -# - .org 0x10000 -startup:basr %r13,0 # get base -.LPG1: l %r1, .Lget_ipl_device_addr-.LPG1(%r13) - basr %r14, %r1 - lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers - la %r12,_pstart-.LPG1(%r13) # pointer to parameter area - # move IPL device to lowcore - mvc __LC_IPLDEV(4),IPL_DEVICE-PARMAREA(%r12) - -# -# clear bss memory -# - l %r2,.Lbss_bgn-.LPG1(%r13) # start of bss - l %r3,.Lbss_end-.LPG1(%r13) # end of bss - sr %r3,%r2 # length of bss - sr %r4,%r4 - sr %r5,%r5 # set src,length and pad to zero - sr %r0,%r0 - mvcle %r2,%r4,0 # clear mem - jo .-4 # branch back, if not finish - - l %r2,.Lrcp-.LPG1(%r13) # Read SCP forced command word -.Lservicecall: - stosm .Lpmask-.LPG1(%r13),0x01 # authorize ext interrupts - - stctl %r0, %r0,.Lcr-.LPG1(%r13) # get cr0 - la %r1,0x200 # set bit 22 - o %r1,.Lcr-.LPG1(%r13) # or old cr0 with r1 - st %r1,.Lcr-.LPG1(%r13) - lctl %r0, %r0,.Lcr-.LPG1(%r13) # load modified cr0 - - mvc __LC_EXT_NEW_PSW(8),.Lpcext-.LPG1(%r13) # set postcall psw - la %r1, .Lsclph-.LPG1(%r13) - a %r1,__LC_EXT_NEW_PSW+4 # set handler - st %r1,__LC_EXT_NEW_PSW+4 - - la %r4,_pstart-.LPG1(%r13) # %r4 is our index for sccb stuff - la %r1, .Lsccb-PARMAREA(%r4) # our sccb - .insn rre,0xb2200000,%r2,%r1 # service call - ipm %r1 - srl %r1,28 # get cc code - xr %r3, %r3 - chi %r1,3 - be .Lfchunk-.LPG1(%r13) # leave - chi %r1,2 - be .Lservicecall-.LPG1(%r13) - lpsw .Lwaitsclp-.LPG1(%r13) -.Lsclph: - lh %r1,.Lsccbr-PARMAREA(%r4) - chi %r1,0x10 # 0x0010 is the sucess code - je .Lprocsccb # let's process the sccb - chi %r1,0x1f0 - bne .Lfchunk-.LPG1(%r13) # unhandled error code - c %r2, .Lrcp-.LPG1(%r13) # Did we try Read SCP forced - bne .Lfchunk-.LPG1(%r13) # if no, give up - l %r2, .Lrcp2-.LPG1(%r13) # try with Read SCP - b .Lservicecall-.LPG1(%r13) -.Lprocsccb: - lhi %r1,0 - icm %r1,3,.Lscpincr1-PARMAREA(%r4) # use this one if != 0 - jnz .Lscnd - lhi %r1,0x800 # otherwise report 2GB -.Lscnd: - lhi %r3,0x800 # limit reported memory size to 2GB - cr %r1,%r3 - jl .Lno2gb - lr %r1,%r3 -.Lno2gb: - xr %r3,%r3 # same logic - ic %r3,.Lscpa1-PARMAREA(%r4) - chi %r3,0x00 - jne .Lcompmem - l %r3,.Lscpa2-PARMAREA(%r13) -.Lcompmem: - mr %r2,%r1 # mem in MB on 128-bit - l %r1,.Lonemb-.LPG1(%r13) - mr %r2,%r1 # mem size in bytes in %r3 - b .Lfchunk-.LPG1(%r13) +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> - .align 4 -.Lget_ipl_device_addr: - .long .Lget_ipl_device -.Lpmask: - .byte 0 -.align 8 -.Lpcext:.long 0x00080000,0x80000000 -.Lcr: - .long 0x00 # place holder for cr0 -.Lwaitsclp: - .long 0x010a0000,0x80000000 + .Lsclph -.Lrcp: - .int 0x00120001 # Read SCP forced code -.Lrcp2: - .int 0x00020001 # Read SCP code -.Lonemb: - .int 0x100000 -.Lfchunk: - -# -# find memory chunks. -# - lr %r9,%r3 # end of mem - mvc __LC_PGM_NEW_PSW(8),.Lpcmem-.LPG1(%r13) - la %r1,1 # test in increments of 128KB - sll %r1,17 - l %r3,.Lmchunk-.LPG1(%r13) # get pointer to memory_chunk array - slr %r4,%r4 # set start of chunk to zero - slr %r5,%r5 # set end of chunk to zero - slr %r6,%r6 # set access code to zero - la %r10, MEMORY_CHUNKS # number of chunks -.Lloop: - tprot 0(%r5),0 # test protection of first byte - ipm %r7 - srl %r7,28 - clr %r6,%r7 # compare cc with last access code - be .Lsame-.LPG1(%r13) - b .Lchkmem-.LPG1(%r13) -.Lsame: - ar %r5,%r1 # add 128KB to end of chunk - bno .Lloop-.LPG1(%r13) # r1 < 0x80000000 -> loop -.Lchkmem: # > 2GB or tprot got a program check - clr %r4,%r5 # chunk size > 0? - be .Lchkloop-.LPG1(%r13) - st %r4,0(%r3) # store start address of chunk - lr %r0,%r5 - slr %r0,%r4 - st %r0,4(%r3) # store size of chunk - st %r6,8(%r3) # store type of chunk - la %r3,12(%r3) - l %r4,.Lmemsize-.LPG1(%r13) # address of variable memory_size - st %r5,0(%r4) # store last end to memory size - ahi %r10,-1 # update chunk number -.Lchkloop: - lr %r6,%r7 # set access code to last cc - # we got an exception or we're starting a new - # chunk , we must check if we should - # still try to find valid memory (if we detected - # the amount of available storage), and if we - # have chunks left - xr %r0,%r0 - clr %r0,%r9 # did we detect memory? - je .Ldonemem # if not, leave - chi %r10,0 # do we have chunks left? - je .Ldonemem - alr %r5,%r1 # add 128KB to end of chunk - lr %r4,%r5 # potential new chunk - clr %r5,%r9 # should we go on? - jl .Lloop -.Ldonemem: - l %r12,.Lmflags-.LPG1(%r13) # get address of machine_flags -# -# find out if we are running under VM -# - stidp __LC_CPUID # store cpuid - tm __LC_CPUID,0xff # running under VM ? - bno .Lnovm-.LPG1(%r13) - oi 3(%r12),1 # set VM flag -.Lnovm: - lh %r0,__LC_CPUID+4 # get cpu version - chi %r0,0x7490 # running on a P/390 ? - bne .Lnop390-.LPG1(%r13) - oi 3(%r12),4 # set P/390 flag -.Lnop390: - -# -# find out if we have an IEEE fpu -# - mvc __LC_PGM_NEW_PSW(8),.Lpcfpu-.LPG1(%r13) - efpc %r0,0 # test IEEE extract fpc instruction - oi 3(%r12),2 # set IEEE fpu flag -.Lchkfpu: - -# -# find out if we have the CSP instruction -# - mvc __LC_PGM_NEW_PSW(8),.Lpccsp-.LPG1(%r13) - la %r0,0 - lr %r1,%r0 - la %r2,4 - csp %r0,%r2 # Test CSP instruction - oi 3(%r12),8 # set CSP flag -.Lchkcsp: +__HEAD +ENTRY(startup_continue) + basr %r13,0 # get base +.LPG1: + l %r1,.Lbase_cc-.LPG1(%r13) + mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK + lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers + l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area + # move IPL device to lowcore # -# find out if we have the MVPG instruction +# Setup stack # - mvc __LC_PGM_NEW_PSW(8),.Lpcmvpg-.LPG1(%r13) - sr %r0,%r0 - la %r1,0 - la %r2,0 - mvpg %r1,%r2 # Test CSP instruction - oi 3(%r12),16 # set MVPG flag -.Lchkmvpg: - + l %r15,.Linittu-.LPG1(%r13) + st %r15,__LC_THREAD_INFO # cache thread info in lowcore + mvc __LC_CURRENT(4),__TI_task(%r15) + ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE + st %r15,__LC_KERNEL_STACK # set end of kernel stack + ahi %r15,-96 # -# find out if we have the IDTE instruction +# Save ipl parameters, clear bss memory, initialize storage key for kernel pages, +# and create a kernel NSS if the SAVESYS= parm is defined # - mvc __LC_PGM_NEW_PSW(8),.Lpcidte-.LPG1(%r13) - .long 0xb2b10000 # store facility list - tm 0xc8,0x08 # check bit for clearing-by-ASCE - bno .Lchkidte-.LPG1(%r13) - lhi %r1,2094 - lhi %r2,0 - .long 0xb98e2001 - oi 3(%r12),0x80 # set IDTE flag -.Lchkidte: - + l %r14,.Lstartup_init-.LPG1(%r13) + basr %r14,%r14 lpsw .Lentry-.LPG1(13) # jump to _stext in primary-space, # virtual and never return ... .align 8 .Lentry:.long 0x00080000,0x80000000 + _stext -.Lctl: .long 0x04b50002 # cr0: various things +.Lctl: .long 0x04b50000 # cr0: various things .long 0 # cr1: primary space segment table .long .Lduct # cr2: dispatchable unit control table .long 0 # cr3: instruction authorization .long 0 # cr4: instruction authorization - .long 0xffffffff # cr5: primary-aste origin + .long .Lduct # cr5: primary-aste origin .long 0 # cr6: I/O interrupts .long 0 # cr7: secondary space segment table .long 0 # cr8: access registers translation @@ -242,80 +59,34 @@ startup:basr %r13,0 # get base .long 0 # cr13: home space segment table .long 0xc0000000 # cr14: machine check handling off .long 0 # cr15: linkage stack operations -.Lpcmem:.long 0x00080000,0x80000000 + .Lchkmem -.Lpcfpu:.long 0x00080000,0x80000000 + .Lchkfpu -.Lpccsp:.long 0x00080000,0x80000000 + .Lchkcsp -.Lpcmvpg:.long 0x00080000,0x80000000 + .Lchkmvpg -.Lpcidte:.long 0x00080000,0x80000000 + .Lchkidte -.Lmemsize:.long memory_size -.Lmchunk:.long memory_chunk -.Lmflags:.long machine_flags .Lbss_bgn: .long __bss_start .Lbss_end: .long _end - - .org PARMAREA-64 -.Lduct: .long 0,0,0,0,0,0,0,0 +.Lparmaddr: .long PARMAREA +.Linittu: .long init_thread_union +.Lstartup_init: + .long startup_init + .align 64 +.Lduct: .long 0,0,0,0,.Lduald,0,0,0 .long 0,0,0,0,0,0,0,0 + .align 128 +.Lduald:.rept 8 + .long 0x80000000,0,0,0 # invalid access-list entries + .endr +.Lbase_cc: + .long sched_clock_base_cc -# -# params at 10400 (setup.h) -# - .org PARMAREA - .global _pstart -_pstart: - .long 0,0 # IPL_DEVICE - .long 0,RAMDISK_ORIGIN # INITRD_START - .long 0,RAMDISK_SIZE # INITRD_SIZE - - .org COMMAND_LINE - .byte "root=/dev/ram0 ro" - .byte 0 - .org 0x11000 -.Lsccb: - .hword 0x1000 # length, one page - .byte 0x00,0x00,0x00 - .byte 0x80 # variable response bit set -.Lsccbr: - .hword 0x00 # response code -.Lscpincr1: - .hword 0x00 -.Lscpa1: - .byte 0x00 - .fill 89,1,0 -.Lscpa2: - .int 0x00 -.Lscpincr2: - .quad 0x00 - .fill 3984,1,0 - .org 0x12000 - .global _pend -_pend: - - GET_IPL_DEVICE - -#ifdef CONFIG_SHARED_KERNEL - .org 0x100000 -#endif +ENTRY(_ehead) + .org 0x100000 - 0x11000 # head.o ends at 0x11000 # -# startup-code, running in virtual mode +# startup-code, running in absolute addressing mode # - .globl _stext -_stext: basr %r13,0 # get base +ENTRY(_stext) + basr %r13,0 # get base .LPG3: -# -# Setup stack -# - l %r15,.Linittu-.LPG3(%r13) - mvc __LC_CURRENT(4),__TI_task(%r15) - ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE - st %r15,__LC_KERNEL_STACK # set end of kernel stack - ahi %r15,-96 - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain - # check control registers stctl %c0,%c15,0(%r15) - oi 2(%r15),0x40 # enable sigp emergency signal + oi 2(%r15),0x60 # enable sigp emergency & external call oi 0(%r15),0x10 # switch on low address protection lctl %c0,%c15,0(%r15) @@ -331,6 +102,5 @@ _stext: basr %r13,0 # get base # .align 8 .Ldw: .long 0x000a0000,0x00000000 -.Linittu:.long init_thread_union .Lstart:.long start_kernel .Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index f08c06f45d5..d7c00507568 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/head64.S - * - * (C) Copyright IBM Corp. 1999,2005 + * Copyright IBM Corp. 1999, 2010 * * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -10,320 +8,98 @@ * */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> + +__HEAD +ENTRY(startup_continue) + larl %r1,sched_clock_base_cc + mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK + larl %r13,.LPG1 # get base + lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers + lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area + # move IPL device to lowcore + lghi %r0,__LC_PASTE + stg %r0,__LC_VDSO_PER_CPU # -# startup-code at 0x10000, running in absolute addressing mode -# this is called either by the ipl loader or directly by PSW restart -# or linload or SALIPL -# - .org 0x10000 -startup:basr %r13,0 # get base -.LPG1: sll %r13,1 # remove high order bit - srl %r13,1 - l %r1,.Lget_ipl_device_addr-.LPG1(%r13) - basr %r14,%r1 - lhi %r1,1 # mode 1 = esame - slr %r0,%r0 # set cpuid to zero - sigp %r1,%r0,0x12 # switch to esame mode - sam64 # switch to 64 bit mode - lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers - larl %r12,_pstart # pointer to parameter area - # move IPL device to lowcore - mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) - -# -# clear bss memory -# - larl %r2,__bss_start # start of bss segment - larl %r3,_end # end of bss segment - sgr %r3,%r2 # length of bss - sgr %r4,%r4 # - sgr %r5,%r5 # set src,length and pad to zero - mvcle %r2,%r4,0 # clear mem - jo .-4 # branch back, if not finish - - l %r2,.Lrcp-.LPG1(%r13) # Read SCP forced command word -.Lservicecall: - stosm .Lpmask-.LPG1(%r13),0x01 # authorize ext interrupts - - stctg %r0,%r0,.Lcr-.LPG1(%r13) # get cr0 - la %r1,0x200 # set bit 22 - og %r1,.Lcr-.LPG1(%r13) # or old cr0 with r1 - stg %r1,.Lcr-.LPG1(%r13) - lctlg %r0,%r0,.Lcr-.LPG1(%r13) # load modified cr0 - - mvc __LC_EXT_NEW_PSW(8),.Lpcmsk-.LPG1(%r13) # set postcall psw - larl %r1,.Lsclph - stg %r1,__LC_EXT_NEW_PSW+8 # set handler - - larl %r4,_pstart # %r4 is our index for sccb stuff - la %r1,.Lsccb-PARMAREA(%r4) # our sccb - .insn rre,0xb2200000,%r2,%r1 # service call - ipm %r1 - srl %r1,28 # get cc code - xr %r3,%r3 - chi %r1,3 - be .Lfchunk-.LPG1(%r13) # leave - chi %r1,2 - be .Lservicecall-.LPG1(%r13) - lpswe .Lwaitsclp-.LPG1(%r13) -.Lsclph: - lh %r1,.Lsccbr-PARMAREA(%r4) - chi %r1,0x10 # 0x0010 is the sucess code - je .Lprocsccb # let's process the sccb - chi %r1,0x1f0 - bne .Lfchunk-.LPG1(%r13) # unhandled error code - c %r2,.Lrcp-.LPG1(%r13) # Did we try Read SCP forced - bne .Lfchunk-.LPG1(%r13) # if no, give up - l %r2,.Lrcp2-.LPG1(%r13) # try with Read SCP - b .Lservicecall-.LPG1(%r13) -.Lprocsccb: - lghi %r1,0 - icm %r1,3,.Lscpincr1-PARMAREA(%r4) # use this one if != 0 - jnz .Lscnd - lg %r1,.Lscpincr2-PARMAREA(%r4) # otherwise use this one -.Lscnd: - xr %r3,%r3 # same logic - ic %r3,.Lscpa1-PARMAREA(%r4) - chi %r3,0x00 - jne .Lcompmem - l %r3,.Lscpa2-PARMAREA(%r13) -.Lcompmem: - mlgr %r2,%r1 # mem in MB on 128-bit - l %r1,.Lonemb-.LPG1(%r13) - mlgr %r2,%r1 # mem size in bytes in %r3 - b .Lfchunk-.LPG1(%r13) - - .align 4 -.Lget_ipl_device_addr: - .long .Lget_ipl_device -.Lpmask: - .byte 0 - .align 8 -.Lcr: - .quad 0x00 # place holder for cr0 -.Lwaitsclp: - .quad 0x0102000180000000,.Lsclph -.Lrcp: - .int 0x00120001 # Read SCP forced code -.Lrcp2: - .int 0x00020001 # Read SCP code -.Lonemb: - .int 0x100000 - -.Lfchunk: - # set program check new psw mask - mvc __LC_PGM_NEW_PSW(8),.Lpcmsk-.LPG1(%r13) - -# -# find memory chunks. -# - lgr %r9,%r3 # end of mem - larl %r1,.Lchkmem # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - la %r1,1 # test in increments of 128KB - sllg %r1,%r1,17 - larl %r3,memory_chunk - slgr %r4,%r4 # set start of chunk to zero - slgr %r5,%r5 # set end of chunk to zero - slr %r6,%r6 # set access code to zero - la %r10,MEMORY_CHUNKS # number of chunks -.Lloop: - tprot 0(%r5),0 # test protection of first byte - ipm %r7 - srl %r7,28 - clr %r6,%r7 # compare cc with last access code - je .Lsame - j .Lchkmem -.Lsame: - algr %r5,%r1 # add 128KB to end of chunk - # no need to check here, - brc 12,.Lloop # this is the same chunk -.Lchkmem: # > 16EB or tprot got a program check - clgr %r4,%r5 # chunk size > 0? - je .Lchkloop - stg %r4,0(%r3) # store start address of chunk - lgr %r0,%r5 - slgr %r0,%r4 - stg %r0,8(%r3) # store size of chunk - st %r6,20(%r3) # store type of chunk - la %r3,24(%r3) - larl %r8,memory_size - stg %r5,0(%r8) # store memory size - ahi %r10,-1 # update chunk number -.Lchkloop: - lr %r6,%r7 # set access code to last cc - # we got an exception or we're starting a new - # chunk , we must check if we should - # still try to find valid memory (if we detected - # the amount of available storage), and if we - # have chunks left - lghi %r4,1 - sllg %r4,%r4,31 - clgr %r5,%r4 - je .Lhsaskip - xr %r0, %r0 - clgr %r0, %r9 # did we detect memory? - je .Ldonemem # if not, leave - chi %r10, 0 # do we have chunks left? - je .Ldonemem -.Lhsaskip: - algr %r5,%r1 # add 128KB to end of chunk - lgr %r4,%r5 # potential new chunk - clgr %r5,%r9 # should we go on? - jl .Lloop -.Ldonemem: - - larl %r12,machine_flags -# -# find out if we are running under VM -# - stidp __LC_CPUID # store cpuid - tm __LC_CPUID,0xff # running under VM ? - bno 0f-.LPG1(%r13) - oi 7(%r12),1 # set VM flag -0: lh %r0,__LC_CPUID+4 # get cpu version - chi %r0,0x7490 # running on a P/390 ? - bne 1f-.LPG1(%r13) - oi 7(%r12),4 # set P/390 flag -1: - -# -# find out if we have the MVPG instruction -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - sgr %r0,%r0 - lghi %r1,0 - lghi %r2,0 - mvpg %r1,%r2 # test MVPG instruction - oi 7(%r12),16 # set MVPG flag -0: - -# -# find out if the diag 0x44 works in 64 bit mode -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - diag 0,0,0x44 # test diag 0x44 - oi 7(%r12),32 # set diag44 flag -0: - -# -# find out if we have the IDTE instruction +# Setup stack # - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - .long 0xb2b10000 # store facility list - tm 0xc8,0x08 # check bit for clearing-by-ASCE - bno 0f-.LPG1(%r13) - lhi %r1,2094 - lhi %r2,0 - .long 0xb98e2001 - oi 7(%r12),0x80 # set IDTE flag -0: - - lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, - # virtual and never return ... - .align 16 -.Lentry:.quad 0x0000000180000000,_stext -.Lctl: .quad 0x04b50002 # cr0: various things - .quad 0 # cr1: primary space segment table - .quad .Lduct # cr2: dispatchable unit control table - .quad 0 # cr3: instruction authorization - .quad 0 # cr4: instruction authorization - .quad 0xffffffffffffffff # cr5: primary-aste origin - .quad 0 # cr6: I/O interrupts - .quad 0 # cr7: secondary space segment table - .quad 0 # cr8: access registers translation - .quad 0 # cr9: tracing off - .quad 0 # cr10: tracing off - .quad 0 # cr11: tracing off - .quad 0 # cr12: tracing off - .quad 0 # cr13: home space segment table - .quad 0xc0000000 # cr14: machine check handling off - .quad 0 # cr15: linkage stack operations -.Lpcmsk:.quad 0x0000000180000000 + larl %r15,init_thread_union + stg %r15,__LC_THREAD_INFO # cache thread info in lowcore + lg %r14,__TI_task(%r15) # cache current in lowcore + stg %r14,__LC_CURRENT + aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE + stg %r15,__LC_KERNEL_STACK # set end of kernel stack + aghi %r15,-160 +# +# Save ipl parameters, clear bss memory, initialize storage key for kernel pages, +# and create a kernel NSS if the SAVESYS= parm is defined +# + brasl %r14,startup_init + lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, + # virtual and never return ... + .align 16 +.LPG1: +.Lentry:.quad 0x0000000180000000,_stext +.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space + .quad 0 # cr1: primary space segment table + .quad .Lduct # cr2: dispatchable unit control table + .quad 0 # cr3: instruction authorization + .quad 0 # cr4: instruction authorization + .quad .Lduct # cr5: primary-aste origin + .quad 0 # cr6: I/O interrupts + .quad 0 # cr7: secondary space segment table + .quad 0 # cr8: access registers translation + .quad 0 # cr9: tracing off + .quad 0 # cr10: tracing off + .quad 0 # cr11: tracing off + .quad 0 # cr12: tracing off + .quad 0 # cr13: home space segment table + .quad 0xc0000000 # cr14: machine check handling off + .quad .Llinkage_stack # cr15: linkage stack operations +.Lpcmsk:.quad 0x0000000180000000 .L4malign:.quad 0xffffffffffc00000 -.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8 -.Lnop: .long 0x07000700 - - .org PARMAREA-64 -.Lduct: .long 0,0,0,0,0,0,0,0 - .long 0,0,0,0,0,0,0,0 - -# -# params at 10400 (setup.h) -# - .org PARMAREA - .global _pstart -_pstart: - .quad 0 # IPL_DEVICE - .quad RAMDISK_ORIGIN # INITRD_START - .quad RAMDISK_SIZE # INITRD_SIZE - - .org COMMAND_LINE - .byte "root=/dev/ram0 ro" - .byte 0 - .org 0x11000 -.Lsccb: - .hword 0x1000 # length, one page - .byte 0x00,0x00,0x00 - .byte 0x80 # variable response bit set -.Lsccbr: - .hword 0x00 # response code -.Lscpincr1: - .hword 0x00 -.Lscpa1: - .byte 0x00 - .fill 89,1,0 -.Lscpa2: - .int 0x00 -.Lscpincr2: - .quad 0x00 - .fill 3984,1,0 - .org 0x12000 - .global _pend -_pend: - - GET_IPL_DEVICE - -#ifdef CONFIG_SHARED_KERNEL - .org 0x100000 -#endif - -# -# startup-code, running in virtual mode -# - .globl _stext -_stext: basr %r13,0 # get base +.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8 +.Lnop: .long 0x07000700 +.Lparmaddr: + .quad PARMAREA + .align 64 +.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0 + .long 0,0,0,0,0,0,0,0 +.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0 + .align 128 +.Lduald:.rept 8 + .long 0x80000000,0,0,0 # invalid access-list entries + .endr +.Llinkage_stack: + .long 0,0,0x89000000,0,0,0,0x8a000000,0 + +ENTRY(_ehead) + + .org 0x100000 - 0x11000 # head.o ends at 0x11000 +# +# startup-code, running in absolute addressing mode +# +ENTRY(_stext) + basr %r13,0 # get base .LPG3: -# -# Setup stack -# - larl %r15,init_thread_union - lg %r14,__TI_task(%r15) # cache current in lowcore - stg %r14,__LC_CURRENT - aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE - stg %r15,__LC_KERNEL_STACK # set end of kernel stack - aghi %r15,-160 - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain - # check control registers - stctg %c0,%c15,0(%r15) - oi 6(%r15),0x40 # enable sigp emergency signal - oi 4(%r15),0x10 # switch on low address proctection - lctlg %c0,%c15,0(%r15) + stctg %c0,%c15,0(%r15) + oi 6(%r15),0x60 # enable sigp emergency & external call + oi 4(%r15),0x10 # switch on low address proctection + lctlg %c0,%c15,0(%r15) -# - lam 0,15,.Laregs-.LPG3(%r13) # load access regs needed by uaccess - brasl %r14,start_kernel # go to C code + lam 0,15,.Laregs-.LPG3(%r13) # load acrs needed by uaccess + brasl %r14,start_kernel # go to C code # # We returned from start_kernel ?!? PANIK # - basr %r13,0 - lpswe .Ldw-.(%r13) # load disabled wait psw -# - .align 8 -.Ldw: .quad 0x0002000180000000,0x0000000000000000 -.Laregs: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + basr %r13,0 + lpswe .Ldw-.(%r13) # load disabled wait psw + .align 8 +.Ldw: .quad 0x0002000180000000,0x0000000000000000 +.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S new file mode 100644 index 00000000000..085a95eb315 --- /dev/null +++ b/arch/s390/kernel/head_kdump.S @@ -0,0 +1,108 @@ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <asm/sigp.h> + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +.align 2 +.Lep_startup_kdump: + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# +.align 2 +startup_kdump_relocated: + basr %r13,0 +0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... +.align 8 +.Lrestart_psw: + .quad 0x0000000080000000,0x0000000000000000 + startup +#else +.align 2 +.Lep_startup_kdump: +#ifdef CONFIG_64BIT + larl %r13,startup_kdump_crash + lpswe 0(%r13) +.align 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#else + basr %r13,0 +0: lpsw startup_kdump_crash-0b(%r13) +.align 8 +startup_kdump_crash: + .long 0x000a0000,0x00000000 + startup_kdump_crash +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c deleted file mode 100644 index d73a74013e7..00000000000 --- a/arch/s390/kernel/init_task.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * arch/s390/kernel/init_task.c - * - * S390 version - * - * Derived from "arch/i386/kernel/init_task.c" - */ - -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/init_task.h> -#include <linux/mqueue.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> - -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -struct mm_struct init_mm = INIT_MM(init_mm); - -EXPORT_SYMBOL(init_mm); - -/* - * Initial thread structure. - * - * We need to make sure that this is 8192-byte aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c new file mode 100644 index 00000000000..633ca750453 --- /dev/null +++ b/arch/s390/kernel/ipl.c @@ -0,0 +1,2069 @@ +/* + * ipl/reipl/dump support for Linux on s390. + * + * Copyright IBM Corp. 2005, 2012 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + * Heiko Carstens <heiko.carstens@de.ibm.com> + * Volker Sameske <sameske@de.ibm.com> + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/ctype.h> +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/crash_dump.h> +#include <linux/debug_locks.h> +#include <asm/ipl.h> +#include <asm/smp.h> +#include <asm/setup.h> +#include <asm/cpcmd.h> +#include <asm/cio.h> +#include <asm/ebcdic.h> +#include <asm/reset.h> +#include <asm/sclp.h> +#include <asm/checksum.h> +#include <asm/debug.h> +#include <asm/os_info.h> +#include "entry.h" + +#define IPL_PARM_BLOCK_VERSION 0 + +#define IPL_UNKNOWN_STR "unknown" +#define IPL_CCW_STR "ccw" +#define IPL_FCP_STR "fcp" +#define IPL_FCP_DUMP_STR "fcp_dump" +#define IPL_NSS_STR "nss" + +#define DUMP_CCW_STR "ccw" +#define DUMP_FCP_STR "fcp" +#define DUMP_NONE_STR "none" + +/* + * Four shutdown trigger types are supported: + * - panic + * - halt + * - power off + * - reipl + * - restart + */ +#define ON_PANIC_STR "on_panic" +#define ON_HALT_STR "on_halt" +#define ON_POFF_STR "on_poff" +#define ON_REIPL_STR "on_reboot" +#define ON_RESTART_STR "on_restart" + +struct shutdown_action; +struct shutdown_trigger { + char *name; + struct shutdown_action *action; +}; + +/* + * The following shutdown action types are supported: + */ +#define SHUTDOWN_ACTION_IPL_STR "ipl" +#define SHUTDOWN_ACTION_REIPL_STR "reipl" +#define SHUTDOWN_ACTION_DUMP_STR "dump" +#define SHUTDOWN_ACTION_VMCMD_STR "vmcmd" +#define SHUTDOWN_ACTION_STOP_STR "stop" +#define SHUTDOWN_ACTION_DUMP_REIPL_STR "dump_reipl" + +struct shutdown_action { + char *name; + void (*fn) (struct shutdown_trigger *trigger); + int (*init) (void); + int init_rc; +}; + +static char *ipl_type_str(enum ipl_type type) +{ + switch (type) { + case IPL_TYPE_CCW: + return IPL_CCW_STR; + case IPL_TYPE_FCP: + return IPL_FCP_STR; + case IPL_TYPE_FCP_DUMP: + return IPL_FCP_DUMP_STR; + case IPL_TYPE_NSS: + return IPL_NSS_STR; + case IPL_TYPE_UNKNOWN: + default: + return IPL_UNKNOWN_STR; + } +} + +enum dump_type { + DUMP_TYPE_NONE = 1, + DUMP_TYPE_CCW = 2, + DUMP_TYPE_FCP = 4, +}; + +static char *dump_type_str(enum dump_type type) +{ + switch (type) { + case DUMP_TYPE_NONE: + return DUMP_NONE_STR; + case DUMP_TYPE_CCW: + return DUMP_CCW_STR; + case DUMP_TYPE_FCP: + return DUMP_FCP_STR; + default: + return NULL; + } +} + +/* + * Must be in data section since the bss section + * is not cleared when these are accessed. + */ +static u16 ipl_devno __attribute__((__section__(".data"))) = 0; +u32 ipl_flags __attribute__((__section__(".data"))) = 0; + +enum ipl_method { + REIPL_METHOD_CCW_CIO, + REIPL_METHOD_CCW_DIAG, + REIPL_METHOD_CCW_VM, + REIPL_METHOD_FCP_RO_DIAG, + REIPL_METHOD_FCP_RW_DIAG, + REIPL_METHOD_FCP_RO_VM, + REIPL_METHOD_FCP_DUMP, + REIPL_METHOD_NSS, + REIPL_METHOD_NSS_DIAG, + REIPL_METHOD_DEFAULT, +}; + +enum dump_method { + DUMP_METHOD_NONE, + DUMP_METHOD_CCW_CIO, + DUMP_METHOD_CCW_DIAG, + DUMP_METHOD_CCW_VM, + DUMP_METHOD_FCP_DIAG, +}; + +static int diag308_set_works = 0; + +static struct ipl_parameter_block ipl_block; + +static int reipl_capabilities = IPL_TYPE_UNKNOWN; + +static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN; +static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT; +static struct ipl_parameter_block *reipl_block_fcp; +static struct ipl_parameter_block *reipl_block_ccw; +static struct ipl_parameter_block *reipl_block_nss; +static struct ipl_parameter_block *reipl_block_actual; + +static int dump_capabilities = DUMP_TYPE_NONE; +static enum dump_type dump_type = DUMP_TYPE_NONE; +static enum dump_method dump_method = DUMP_METHOD_NONE; +static struct ipl_parameter_block *dump_block_fcp; +static struct ipl_parameter_block *dump_block_ccw; + +static struct sclp_ipl_info sclp_ipl_info; + +int diag308(unsigned long subcode, void *addr) +{ + register unsigned long _addr asm("0") = (unsigned long) addr; + register unsigned long _rc asm("1") = 0; + + asm volatile( + " diag %0,%2,0x308\n" + "0:\n" + EX_TABLE(0b,0b) + : "+d" (_addr), "+d" (_rc) + : "d" (subcode) : "cc", "memory"); + return _rc; +} +EXPORT_SYMBOL_GPL(diag308); + +/* SYSFS */ + +#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \ +static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + return sprintf(page, _format, _value); \ +} \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL); + +#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \ +static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + return sprintf(page, _fmt_out, \ + (unsigned long long) _value); \ +} \ +static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + unsigned long long value; \ + if (sscanf(buf, _fmt_in, &value) != 1) \ + return -EINVAL; \ + _value = value; \ + return len; \ +} \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name,(S_IRUGO | S_IWUSR), \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store); + +#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\ +static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + return sprintf(page, _fmt_out, _value); \ +} \ +static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + strncpy(_value, buf, sizeof(_value) - 1); \ + strim(_value); \ + return len; \ +} \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name,(S_IRUGO | S_IWUSR), \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store); + +static void make_attrs_ro(struct attribute **attrs) +{ + while (*attrs) { + (*attrs)->mode = S_IRUGO; + attrs++; + } +} + +/* + * ipl section + */ + +static __init enum ipl_type get_ipl_type(void) +{ + struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; + + if (ipl_flags & IPL_NSS_VALID) + return IPL_TYPE_NSS; + if (!(ipl_flags & IPL_DEVNO_VALID)) + return IPL_TYPE_UNKNOWN; + if (!(ipl_flags & IPL_PARMBLOCK_VALID)) + return IPL_TYPE_CCW; + if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION) + return IPL_TYPE_UNKNOWN; + if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP) + return IPL_TYPE_UNKNOWN; + if (ipl->ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP) + return IPL_TYPE_FCP_DUMP; + return IPL_TYPE_FCP; +} + +struct ipl_info ipl_info; +EXPORT_SYMBOL_GPL(ipl_info); + +static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sprintf(page, "%s\n", ipl_type_str(ipl_info.type)); +} + +static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); + +/* VM IPL PARM routines */ +static size_t reipl_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + int i; + size_t len; + char has_lowercase = 0; + + len = 0; + if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) && + (ipb->ipl_info.ccw.vm_parm_len > 0)) { + + len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len); + memcpy(dest, ipb->ipl_info.ccw.vm_parm, len); + /* If at least one character is lowercase, we assume mixed + * case; otherwise we convert everything to lowercase. + */ + for (i = 0; i < len; i++) + if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */ + (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */ + (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */ + has_lowercase = 1; + break; + } + if (!has_lowercase) + EBC_TOLOWER(dest, len); + EBCASC(dest, len); + } + dest[len] = 0; + + return len; +} + +size_t append_ipl_vmparm(char *dest, size_t size) +{ + size_t rc; + + rc = 0; + if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)) + rc = reipl_get_ascii_vmparm(dest, size, &ipl_block); + else + dest[0] = 0; + return rc; +} + +static ssize_t ipl_vm_parm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + char parm[DIAG308_VMPARM_SIZE + 1] = {}; + + append_ipl_vmparm(parm, sizeof(parm)); + return sprintf(page, "%s\n", parm); +} + +static size_t scpdata_length(const char* buf, size_t count) +{ + while (count) { + if (buf[count - 1] != '\0' && buf[count - 1] != ' ') + break; + count--; + } + return count; +} + +static size_t reipl_append_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + size_t count; + size_t i; + int has_lowercase; + + count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data, + ipb->ipl_info.fcp.scp_data_len)); + if (!count) + goto out; + + has_lowercase = 0; + for (i = 0; i < count; i++) { + if (!isascii(ipb->ipl_info.fcp.scp_data[i])) { + count = 0; + goto out; + } + if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i])) + has_lowercase = 1; + } + + if (has_lowercase) + memcpy(dest, ipb->ipl_info.fcp.scp_data, count); + else + for (i = 0; i < count; i++) + dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]); +out: + dest[count] = '\0'; + return count; +} + +size_t append_ipl_scpdata(char *dest, size_t len) +{ + size_t rc; + + rc = 0; + if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP) + rc = reipl_append_ascii_scpdata(dest, len, &ipl_block); + else + dest[0] = 0; + return rc; +} + + +static struct kobj_attribute sys_ipl_vm_parm_attr = + __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL); + +static ssize_t sys_ipl_device_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; + + switch (ipl_info.type) { + case IPL_TYPE_CCW: + return sprintf(page, "0.0.%04x\n", ipl_devno); + case IPL_TYPE_FCP: + case IPL_TYPE_FCP_DUMP: + return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno); + default: + return 0; + } +} + +static struct kobj_attribute sys_ipl_device_attr = + __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL); + +static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START, + IPL_PARMBLOCK_SIZE); +} + +static struct bin_attribute ipl_parameter_attr = { + .attr = { + .name = "binary_parameter", + .mode = S_IRUGO, + }, + .size = PAGE_SIZE, + .read = &ipl_parameter_read, +}; + +static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len; + void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + +static struct bin_attribute ipl_scp_data_attr = { + .attr = { + .name = "scp_data", + .mode = S_IRUGO, + }, + .size = PAGE_SIZE, + .read = ipl_scp_data_read, +}; + +/* FCP ipl device attributes */ + +DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.wwpn); +DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.lun); +DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.bootprog); +DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long) + IPL_PARMBLOCK_START->ipl_info.fcp.br_lba); + +static struct attribute *ipl_fcp_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_device_attr.attr, + &sys_ipl_fcp_wwpn_attr.attr, + &sys_ipl_fcp_lun_attr.attr, + &sys_ipl_fcp_bootprog_attr.attr, + &sys_ipl_fcp_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group ipl_fcp_attr_group = { + .attrs = ipl_fcp_attrs, +}; + +/* CCW ipl device attributes */ + +static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + char loadparm[LOADPARM_LEN + 1] = {}; + + if (!sclp_ipl_info.is_valid) + return sprintf(page, "#unknown#\n"); + memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN); + EBCASC(loadparm, LOADPARM_LEN); + strim(loadparm); + return sprintf(page, "%s\n", loadparm); +} + +static struct kobj_attribute sys_ipl_ccw_loadparm_attr = + __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL); + +static struct attribute *ipl_ccw_attrs_vm[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_device_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + &sys_ipl_vm_parm_attr.attr, + NULL, +}; + +static struct attribute *ipl_ccw_attrs_lpar[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_device_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + NULL, +}; + +static struct attribute_group ipl_ccw_attr_group_vm = { + .attrs = ipl_ccw_attrs_vm, +}; + +static struct attribute_group ipl_ccw_attr_group_lpar = { + .attrs = ipl_ccw_attrs_lpar +}; + +/* NSS ipl device attributes */ + +DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name); + +static struct attribute *ipl_nss_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_nss_name_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + &sys_ipl_vm_parm_attr.attr, + NULL, +}; + +static struct attribute_group ipl_nss_attr_group = { + .attrs = ipl_nss_attrs, +}; + +/* UNKNOWN ipl device attributes */ + +static struct attribute *ipl_unknown_attrs[] = { + &sys_ipl_type_attr.attr, + NULL, +}; + +static struct attribute_group ipl_unknown_attr_group = { + .attrs = ipl_unknown_attrs, +}; + +static struct kset *ipl_kset; + +static int __init ipl_register_fcp_files(void) +{ + int rc; + + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); + if (rc) + goto out; + rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_parameter_attr); + if (rc) + goto out_ipl_parm; + rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_scp_data_attr); + if (!rc) + goto out; + + sysfs_remove_bin_file(&ipl_kset->kobj, &ipl_parameter_attr); + +out_ipl_parm: + sysfs_remove_group(&ipl_kset->kobj, &ipl_fcp_attr_group); +out: + return rc; +} + +static void __ipl_run(void *unused) +{ + diag308(DIAG308_IPL, NULL); + if (MACHINE_IS_VM) + __cpcmd("IPL", NULL, 0, NULL); + else if (ipl_info.type == IPL_TYPE_CCW) + reipl_ccw_dev(&ipl_info.data.ccw.dev_id); +} + +static void ipl_run(struct shutdown_trigger *trigger) +{ + smp_call_ipl_cpu(__ipl_run, NULL); +} + +static int __init ipl_init(void) +{ + int rc; + + ipl_kset = kset_create_and_add("ipl", NULL, firmware_kobj); + if (!ipl_kset) { + rc = -ENOMEM; + goto out; + } + switch (ipl_info.type) { + case IPL_TYPE_CCW: + if (MACHINE_IS_VM) + rc = sysfs_create_group(&ipl_kset->kobj, + &ipl_ccw_attr_group_vm); + else + rc = sysfs_create_group(&ipl_kset->kobj, + &ipl_ccw_attr_group_lpar); + break; + case IPL_TYPE_FCP: + case IPL_TYPE_FCP_DUMP: + rc = ipl_register_fcp_files(); + break; + case IPL_TYPE_NSS: + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nss_attr_group); + break; + default: + rc = sysfs_create_group(&ipl_kset->kobj, + &ipl_unknown_attr_group); + break; + } +out: + if (rc) + panic("ipl_init failed: rc = %i\n", rc); + + return 0; +} + +static struct shutdown_action __refdata ipl_action = { + .name = SHUTDOWN_ACTION_IPL_STR, + .fn = ipl_run, + .init = ipl_init, +}; + +/* + * reipl shutdown action: Reboot Linux on shutdown. + */ + +/* VM IPL PARM attributes */ +static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb, + char *page) +{ + char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; + + reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); + return sprintf(page, "%s\n", vmparm); +} + +static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb, + size_t vmparm_max, + const char *buf, size_t len) +{ + int i, ip_len; + + /* ignore trailing newline */ + ip_len = len; + if ((len > 0) && (buf[len - 1] == '\n')) + ip_len--; + + if (ip_len > vmparm_max) + return -EINVAL; + + /* parm is used to store kernel options, check for common chars */ + for (i = 0; i < ip_len; i++) + if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i]))) + return -EINVAL; + + memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE); + ipb->ipl_info.ccw.vm_parm_len = ip_len; + if (ip_len > 0) { + ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID; + memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len); + ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len); + } else { + ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID; + } + + return len; +} + +/* NSS wrapper */ +static ssize_t reipl_nss_vmparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_vmparm_show(reipl_block_nss, page); +} + +static ssize_t reipl_nss_vmparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len); +} + +/* CCW wrapper */ +static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_vmparm_show(reipl_block_ccw, page); +} + +static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len); +} + +static struct kobj_attribute sys_reipl_nss_vmparm_attr = + __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show, + reipl_nss_vmparm_store); +static struct kobj_attribute sys_reipl_ccw_vmparm_attr = + __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show, + reipl_ccw_vmparm_store); + +/* FCP reipl device attributes */ + +static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len; + void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + +static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t padding; + size_t scpdata_len; + + if (off < 0) + return -EINVAL; + + if (off >= DIAG308_SCPDATA_SIZE) + return -ENOSPC; + + if (count > DIAG308_SCPDATA_SIZE - off) + count = DIAG308_SCPDATA_SIZE - off; + + memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count); + scpdata_len = off + count; + + if (scpdata_len % 8) { + padding = 8 - (scpdata_len % 8); + memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len, + 0, padding); + scpdata_len += padding; + } + + reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len; + reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len; + reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len; + + return count; +} + +static struct bin_attribute sys_reipl_fcp_scp_data_attr = { + .attr = { + .name = "scp_data", + .mode = S_IRUGO | S_IWUSR, + }, + .size = PAGE_SIZE, + .read = reipl_fcp_scpdata_read, + .write = reipl_fcp_scpdata_write, +}; + +DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n", + reipl_block_fcp->ipl_info.fcp.wwpn); +DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n", + reipl_block_fcp->ipl_info.fcp.lun); +DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", + reipl_block_fcp->ipl_info.fcp.bootprog); +DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n", + reipl_block_fcp->ipl_info.fcp.br_lba); +DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", + reipl_block_fcp->ipl_info.fcp.devno); + +static struct attribute *reipl_fcp_attrs[] = { + &sys_reipl_fcp_device_attr.attr, + &sys_reipl_fcp_wwpn_attr.attr, + &sys_reipl_fcp_lun_attr.attr, + &sys_reipl_fcp_bootprog_attr.attr, + &sys_reipl_fcp_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group reipl_fcp_attr_group = { + .attrs = reipl_fcp_attrs, +}; + +/* CCW reipl device attributes */ + +DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", + reipl_block_ccw->ipl_info.ccw.devno); + +static void reipl_get_ascii_loadparm(char *loadparm, + struct ipl_parameter_block *ibp) +{ + memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN); + EBCASC(loadparm, LOADPARM_LEN); + loadparm[LOADPARM_LEN] = 0; + strim(loadparm); +} + +static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb, + char *page) +{ + char buf[LOADPARM_LEN + 1]; + + reipl_get_ascii_loadparm(buf, ipb); + return sprintf(page, "%s\n", buf); +} + +static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, + const char *buf, size_t len) +{ + int i, lp_len; + + /* ignore trailing newline */ + lp_len = len; + if ((len > 0) && (buf[len - 1] == '\n')) + lp_len--; + /* loadparm can have max 8 characters and must not start with a blank */ + if ((lp_len > LOADPARM_LEN) || ((lp_len > 0) && (buf[0] == ' '))) + return -EINVAL; + /* loadparm can only contain "a-z,A-Z,0-9,SP,." */ + for (i = 0; i < lp_len; i++) { + if (isalpha(buf[i]) || isdigit(buf[i]) || (buf[i] == ' ') || + (buf[i] == '.')) + continue; + return -EINVAL; + } + /* initialize loadparm with blanks */ + memset(ipb->ipl_info.ccw.load_parm, ' ', LOADPARM_LEN); + /* copy and convert to ebcdic */ + memcpy(ipb->ipl_info.ccw.load_parm, buf, lp_len); + ASCEBC(ipb->ipl_info.ccw.load_parm, LOADPARM_LEN); + return len; +} + +/* NSS wrapper */ +static ssize_t reipl_nss_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_loadparm_show(reipl_block_nss, page); +} + +static ssize_t reipl_nss_loadparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_loadparm_store(reipl_block_nss, buf, len); +} + +/* CCW wrapper */ +static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_loadparm_show(reipl_block_ccw, page); +} + +static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_loadparm_store(reipl_block_ccw, buf, len); +} + +static struct kobj_attribute sys_reipl_ccw_loadparm_attr = + __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show, + reipl_ccw_loadparm_store); + +static struct attribute *reipl_ccw_attrs_vm[] = { + &sys_reipl_ccw_device_attr.attr, + &sys_reipl_ccw_loadparm_attr.attr, + &sys_reipl_ccw_vmparm_attr.attr, + NULL, +}; + +static struct attribute *reipl_ccw_attrs_lpar[] = { + &sys_reipl_ccw_device_attr.attr, + &sys_reipl_ccw_loadparm_attr.attr, + NULL, +}; + +static struct attribute_group reipl_ccw_attr_group_vm = { + .name = IPL_CCW_STR, + .attrs = reipl_ccw_attrs_vm, +}; + +static struct attribute_group reipl_ccw_attr_group_lpar = { + .name = IPL_CCW_STR, + .attrs = reipl_ccw_attrs_lpar, +}; + + +/* NSS reipl device attributes */ +static void reipl_get_ascii_nss_name(char *dst, + struct ipl_parameter_block *ipb) +{ + memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE); + EBCASC(dst, NSS_NAME_SIZE); + dst[NSS_NAME_SIZE] = 0; +} + +static ssize_t reipl_nss_name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + char nss_name[NSS_NAME_SIZE + 1] = {}; + + reipl_get_ascii_nss_name(nss_name, reipl_block_nss); + return sprintf(page, "%s\n", nss_name); +} + +static ssize_t reipl_nss_name_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int nss_len; + + /* ignore trailing newline */ + nss_len = len; + if ((len > 0) && (buf[len - 1] == '\n')) + nss_len--; + + if (nss_len > NSS_NAME_SIZE) + return -EINVAL; + + memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE); + if (nss_len > 0) { + reipl_block_nss->ipl_info.ccw.vm_flags |= + DIAG308_VM_FLAGS_NSS_VALID; + memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len); + ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len); + EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len); + } else { + reipl_block_nss->ipl_info.ccw.vm_flags &= + ~DIAG308_VM_FLAGS_NSS_VALID; + } + + return len; +} + +static struct kobj_attribute sys_reipl_nss_name_attr = + __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show, + reipl_nss_name_store); + +static struct kobj_attribute sys_reipl_nss_loadparm_attr = + __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show, + reipl_nss_loadparm_store); + +static struct attribute *reipl_nss_attrs[] = { + &sys_reipl_nss_name_attr.attr, + &sys_reipl_nss_loadparm_attr.attr, + &sys_reipl_nss_vmparm_attr.attr, + NULL, +}; + +static struct attribute_group reipl_nss_attr_group = { + .name = IPL_NSS_STR, + .attrs = reipl_nss_attrs, +}; + +static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block) +{ + reipl_block_actual = reipl_block; + os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual, + reipl_block->hdr.len); +} + +/* reipl type */ + +static int reipl_set_type(enum ipl_type type) +{ + if (!(reipl_capabilities & type)) + return -EINVAL; + + switch(type) { + case IPL_TYPE_CCW: + if (diag308_set_works) + reipl_method = REIPL_METHOD_CCW_DIAG; + else if (MACHINE_IS_VM) + reipl_method = REIPL_METHOD_CCW_VM; + else + reipl_method = REIPL_METHOD_CCW_CIO; + set_reipl_block_actual(reipl_block_ccw); + break; + case IPL_TYPE_FCP: + if (diag308_set_works) + reipl_method = REIPL_METHOD_FCP_RW_DIAG; + else if (MACHINE_IS_VM) + reipl_method = REIPL_METHOD_FCP_RO_VM; + else + reipl_method = REIPL_METHOD_FCP_RO_DIAG; + set_reipl_block_actual(reipl_block_fcp); + break; + case IPL_TYPE_FCP_DUMP: + reipl_method = REIPL_METHOD_FCP_DUMP; + break; + case IPL_TYPE_NSS: + if (diag308_set_works) + reipl_method = REIPL_METHOD_NSS_DIAG; + else + reipl_method = REIPL_METHOD_NSS; + set_reipl_block_actual(reipl_block_nss); + break; + case IPL_TYPE_UNKNOWN: + reipl_method = REIPL_METHOD_DEFAULT; + break; + default: + BUG(); + } + reipl_type = type; + return 0; +} + +static ssize_t reipl_type_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", ipl_type_str(reipl_type)); +} + +static ssize_t reipl_type_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int rc = -EINVAL; + + if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_CCW); + else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_FCP); + else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_NSS); + return (rc != 0) ? rc : len; +} + +static struct kobj_attribute reipl_type_attr = + __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store); + +static struct kset *reipl_kset; +static struct kset *reipl_fcp_kset; + +static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb, + const enum ipl_method m) +{ + char loadparm[LOADPARM_LEN + 1] = {}; + char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; + char nss_name[NSS_NAME_SIZE + 1] = {}; + size_t pos = 0; + + reipl_get_ascii_loadparm(loadparm, ipb); + reipl_get_ascii_nss_name(nss_name, ipb); + reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); + + switch (m) { + case REIPL_METHOD_CCW_VM: + pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno); + break; + case REIPL_METHOD_NSS: + pos = sprintf(dst, "IPL %s", nss_name); + break; + default: + break; + } + if (strlen(loadparm) > 0) + pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm); + if (strlen(vmparm) > 0) + sprintf(dst + pos, " PARM %s", vmparm); +} + +static void __reipl_run(void *unused) +{ + struct ccw_dev_id devid; + static char buf[128]; + + switch (reipl_method) { + case REIPL_METHOD_CCW_CIO: + devid.devno = reipl_block_ccw->ipl_info.ccw.devno; + devid.ssid = 0; + reipl_ccw_dev(&devid); + break; + case REIPL_METHOD_CCW_VM: + get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM); + __cpcmd(buf, NULL, 0, NULL); + break; + case REIPL_METHOD_CCW_DIAG: + diag308(DIAG308_SET, reipl_block_ccw); + diag308(DIAG308_IPL, NULL); + break; + case REIPL_METHOD_FCP_RW_DIAG: + diag308(DIAG308_SET, reipl_block_fcp); + diag308(DIAG308_IPL, NULL); + break; + case REIPL_METHOD_FCP_RO_DIAG: + diag308(DIAG308_IPL, NULL); + break; + case REIPL_METHOD_FCP_RO_VM: + __cpcmd("IPL", NULL, 0, NULL); + break; + case REIPL_METHOD_NSS_DIAG: + diag308(DIAG308_SET, reipl_block_nss); + diag308(DIAG308_IPL, NULL); + break; + case REIPL_METHOD_NSS: + get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS); + __cpcmd(buf, NULL, 0, NULL); + break; + case REIPL_METHOD_DEFAULT: + if (MACHINE_IS_VM) + __cpcmd("IPL", NULL, 0, NULL); + diag308(DIAG308_IPL, NULL); + break; + case REIPL_METHOD_FCP_DUMP: + break; + } + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +static void reipl_run(struct shutdown_trigger *trigger) +{ + smp_call_ipl_cpu(__reipl_run, NULL); +} + +static void reipl_block_ccw_init(struct ipl_parameter_block *ipb) +{ + ipb->hdr.len = IPL_PARM_BLK_CCW_LEN; + ipb->hdr.version = IPL_PARM_BLOCK_VERSION; + ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN; + ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW; +} + +static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) +{ + /* LOADPARM */ + /* check if read scp info worked and set loadparm */ + if (sclp_ipl_info.is_valid) + memcpy(ipb->ipl_info.ccw.load_parm, + &sclp_ipl_info.loadparm, LOADPARM_LEN); + else + /* read scp info failed: set empty loadparm (EBCDIC blanks) */ + memset(ipb->ipl_info.ccw.load_parm, 0x40, LOADPARM_LEN); + ipb->hdr.flags = DIAG308_FLAGS_LP_VALID; + + /* VM PARM */ + if (MACHINE_IS_VM && diag308_set_works && + (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) { + + ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID; + ipb->ipl_info.ccw.vm_parm_len = + ipl_block.ipl_info.ccw.vm_parm_len; + memcpy(ipb->ipl_info.ccw.vm_parm, + ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE); + } +} + +static int __init reipl_nss_init(void) +{ + int rc; + + if (!MACHINE_IS_VM) + return 0; + + reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_nss) + return -ENOMEM; + + if (!diag308_set_works) + sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO; + + rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group); + if (rc) + return rc; + + reipl_block_ccw_init(reipl_block_nss); + if (ipl_info.type == IPL_TYPE_NSS) { + memset(reipl_block_nss->ipl_info.ccw.nss_name, + ' ', NSS_NAME_SIZE); + memcpy(reipl_block_nss->ipl_info.ccw.nss_name, + kernel_nss_name, strlen(kernel_nss_name)); + ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE); + reipl_block_nss->ipl_info.ccw.vm_flags |= + DIAG308_VM_FLAGS_NSS_VALID; + + reipl_block_ccw_fill_parms(reipl_block_nss); + } + + reipl_capabilities |= IPL_TYPE_NSS; + return 0; +} + +static int __init reipl_ccw_init(void) +{ + int rc; + + reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_ccw) + return -ENOMEM; + + if (MACHINE_IS_VM) { + if (!diag308_set_works) + sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO; + rc = sysfs_create_group(&reipl_kset->kobj, + &reipl_ccw_attr_group_vm); + } else { + if(!diag308_set_works) + sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO; + rc = sysfs_create_group(&reipl_kset->kobj, + &reipl_ccw_attr_group_lpar); + } + if (rc) + return rc; + + reipl_block_ccw_init(reipl_block_ccw); + if (ipl_info.type == IPL_TYPE_CCW) { + reipl_block_ccw->ipl_info.ccw.devno = ipl_devno; + reipl_block_ccw_fill_parms(reipl_block_ccw); + } + + reipl_capabilities |= IPL_TYPE_CCW; + return 0; +} + +static int __init reipl_fcp_init(void) +{ + int rc; + + if (!diag308_set_works) { + if (ipl_info.type == IPL_TYPE_FCP) { + make_attrs_ro(reipl_fcp_attrs); + sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO; + } else + return 0; + } + + reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_fcp) + return -ENOMEM; + + /* sysfs: create fcp kset for mixing attr group and bin attrs */ + reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL, + &reipl_kset->kobj); + if (!reipl_fcp_kset) { + free_page((unsigned long) reipl_block_fcp); + return -ENOMEM; + } + + rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); + if (rc) { + kset_unregister(reipl_fcp_kset); + free_page((unsigned long) reipl_block_fcp); + return rc; + } + + rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj, + &sys_reipl_fcp_scp_data_attr); + if (rc) { + sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); + kset_unregister(reipl_fcp_kset); + free_page((unsigned long) reipl_block_fcp); + return rc; + } + + if (ipl_info.type == IPL_TYPE_FCP) + memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE); + else { + reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; + reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; + reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN; + reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP; + reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL; + } + reipl_capabilities |= IPL_TYPE_FCP; + return 0; +} + +static int __init reipl_type_init(void) +{ + enum ipl_type reipl_type = ipl_info.type; + struct ipl_parameter_block *reipl_block; + unsigned long size; + + reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size); + if (!reipl_block) + goto out; + /* + * If we have an OS info reipl block, this will be used + */ + if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) { + memcpy(reipl_block_fcp, reipl_block, size); + reipl_type = IPL_TYPE_FCP; + } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) { + memcpy(reipl_block_ccw, reipl_block, size); + reipl_type = IPL_TYPE_CCW; + } +out: + return reipl_set_type(reipl_type); +} + +static int __init reipl_init(void) +{ + int rc; + + reipl_kset = kset_create_and_add("reipl", NULL, firmware_kobj); + if (!reipl_kset) + return -ENOMEM; + rc = sysfs_create_file(&reipl_kset->kobj, &reipl_type_attr.attr); + if (rc) { + kset_unregister(reipl_kset); + return rc; + } + rc = reipl_ccw_init(); + if (rc) + return rc; + rc = reipl_fcp_init(); + if (rc) + return rc; + rc = reipl_nss_init(); + if (rc) + return rc; + return reipl_type_init(); +} + +static struct shutdown_action __refdata reipl_action = { + .name = SHUTDOWN_ACTION_REIPL_STR, + .fn = reipl_run, + .init = reipl_init, +}; + +/* + * dump shutdown action: Dump Linux on shutdown. + */ + +/* FCP dump device attributes */ + +DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n", + dump_block_fcp->ipl_info.fcp.wwpn); +DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n", + dump_block_fcp->ipl_info.fcp.lun); +DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", + dump_block_fcp->ipl_info.fcp.bootprog); +DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n", + dump_block_fcp->ipl_info.fcp.br_lba); +DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", + dump_block_fcp->ipl_info.fcp.devno); + +static struct attribute *dump_fcp_attrs[] = { + &sys_dump_fcp_device_attr.attr, + &sys_dump_fcp_wwpn_attr.attr, + &sys_dump_fcp_lun_attr.attr, + &sys_dump_fcp_bootprog_attr.attr, + &sys_dump_fcp_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group dump_fcp_attr_group = { + .name = IPL_FCP_STR, + .attrs = dump_fcp_attrs, +}; + +/* CCW dump device attributes */ + +DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", + dump_block_ccw->ipl_info.ccw.devno); + +static struct attribute *dump_ccw_attrs[] = { + &sys_dump_ccw_device_attr.attr, + NULL, +}; + +static struct attribute_group dump_ccw_attr_group = { + .name = IPL_CCW_STR, + .attrs = dump_ccw_attrs, +}; + +/* dump type */ + +static int dump_set_type(enum dump_type type) +{ + if (!(dump_capabilities & type)) + return -EINVAL; + switch (type) { + case DUMP_TYPE_CCW: + if (diag308_set_works) + dump_method = DUMP_METHOD_CCW_DIAG; + else if (MACHINE_IS_VM) + dump_method = DUMP_METHOD_CCW_VM; + else + dump_method = DUMP_METHOD_CCW_CIO; + break; + case DUMP_TYPE_FCP: + dump_method = DUMP_METHOD_FCP_DIAG; + break; + default: + dump_method = DUMP_METHOD_NONE; + } + dump_type = type; + return 0; +} + +static ssize_t dump_type_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", dump_type_str(dump_type)); +} + +static ssize_t dump_type_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int rc = -EINVAL; + + if (strncmp(buf, DUMP_NONE_STR, strlen(DUMP_NONE_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_NONE); + else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_CCW); + else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_FCP); + return (rc != 0) ? rc : len; +} + +static struct kobj_attribute dump_type_attr = + __ATTR(dump_type, 0644, dump_type_show, dump_type_store); + +static struct kset *dump_kset; + +static void diag308_dump(void *dump_block) +{ + diag308(DIAG308_SET, dump_block); + while (1) { + if (diag308(DIAG308_DUMP, NULL) != 0x302) + break; + udelay_simple(USEC_PER_SEC); + } +} + +static void __dump_run(void *unused) +{ + struct ccw_dev_id devid; + static char buf[100]; + + switch (dump_method) { + case DUMP_METHOD_CCW_CIO: + devid.devno = dump_block_ccw->ipl_info.ccw.devno; + devid.ssid = 0; + reipl_ccw_dev(&devid); + break; + case DUMP_METHOD_CCW_VM: + sprintf(buf, "STORE STATUS"); + __cpcmd(buf, NULL, 0, NULL); + sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno); + __cpcmd(buf, NULL, 0, NULL); + break; + case DUMP_METHOD_CCW_DIAG: + diag308_dump(dump_block_ccw); + break; + case DUMP_METHOD_FCP_DIAG: + diag308_dump(dump_block_fcp); + break; + default: + break; + } +} + +static void dump_run(struct shutdown_trigger *trigger) +{ + if (dump_method == DUMP_METHOD_NONE) + return; + smp_send_stop(); + smp_call_ipl_cpu(__dump_run, NULL); +} + +static int __init dump_ccw_init(void) +{ + int rc; + + dump_block_ccw = (void *) get_zeroed_page(GFP_KERNEL); + if (!dump_block_ccw) + return -ENOMEM; + rc = sysfs_create_group(&dump_kset->kobj, &dump_ccw_attr_group); + if (rc) { + free_page((unsigned long)dump_block_ccw); + return rc; + } + dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN; + dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN; + dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW; + dump_capabilities |= DUMP_TYPE_CCW; + return 0; +} + +static int __init dump_fcp_init(void) +{ + int rc; + + if (!sclp_ipl_info.has_dump) + return 0; /* LDIPL DUMP is not installed */ + if (!diag308_set_works) + return 0; + dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); + if (!dump_block_fcp) + return -ENOMEM; + rc = sysfs_create_group(&dump_kset->kobj, &dump_fcp_attr_group); + if (rc) { + free_page((unsigned long)dump_block_fcp); + return rc; + } + dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; + dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN; + dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP; + dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP; + dump_capabilities |= DUMP_TYPE_FCP; + return 0; +} + +static int __init dump_init(void) +{ + int rc; + + dump_kset = kset_create_and_add("dump", NULL, firmware_kobj); + if (!dump_kset) + return -ENOMEM; + rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr); + if (rc) { + kset_unregister(dump_kset); + return rc; + } + rc = dump_ccw_init(); + if (rc) + return rc; + rc = dump_fcp_init(); + if (rc) + return rc; + dump_set_type(DUMP_TYPE_NONE); + return 0; +} + +static struct shutdown_action __refdata dump_action = { + .name = SHUTDOWN_ACTION_DUMP_STR, + .fn = dump_run, + .init = dump_init, +}; + +static void dump_reipl_run(struct shutdown_trigger *trigger) +{ + unsigned long ipib = (unsigned long) reipl_block_actual; + unsigned int csum; + + csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); + mem_assign_absolute(S390_lowcore.ipib, ipib); + mem_assign_absolute(S390_lowcore.ipib_checksum, csum); + dump_run(trigger); +} + +static int __init dump_reipl_init(void) +{ + if (!diag308_set_works) + return -EOPNOTSUPP; + else + return 0; +} + +static struct shutdown_action __refdata dump_reipl_action = { + .name = SHUTDOWN_ACTION_DUMP_REIPL_STR, + .fn = dump_reipl_run, + .init = dump_reipl_init, +}; + +/* + * vmcmd shutdown action: Trigger vm command on shutdown. + */ + +static char vmcmd_on_reboot[128]; +static char vmcmd_on_panic[128]; +static char vmcmd_on_halt[128]; +static char vmcmd_on_poff[128]; +static char vmcmd_on_restart[128]; + +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart); + +static struct attribute *vmcmd_attrs[] = { + &sys_vmcmd_on_reboot_attr.attr, + &sys_vmcmd_on_panic_attr.attr, + &sys_vmcmd_on_halt_attr.attr, + &sys_vmcmd_on_poff_attr.attr, + &sys_vmcmd_on_restart_attr.attr, + NULL, +}; + +static struct attribute_group vmcmd_attr_group = { + .attrs = vmcmd_attrs, +}; + +static struct kset *vmcmd_kset; + +static void vmcmd_run(struct shutdown_trigger *trigger) +{ + char *cmd; + + if (strcmp(trigger->name, ON_REIPL_STR) == 0) + cmd = vmcmd_on_reboot; + else if (strcmp(trigger->name, ON_PANIC_STR) == 0) + cmd = vmcmd_on_panic; + else if (strcmp(trigger->name, ON_HALT_STR) == 0) + cmd = vmcmd_on_halt; + else if (strcmp(trigger->name, ON_POFF_STR) == 0) + cmd = vmcmd_on_poff; + else if (strcmp(trigger->name, ON_RESTART_STR) == 0) + cmd = vmcmd_on_restart; + else + return; + + if (strlen(cmd) == 0) + return; + __cpcmd(cmd, NULL, 0, NULL); +} + +static int vmcmd_init(void) +{ + if (!MACHINE_IS_VM) + return -EOPNOTSUPP; + vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj); + if (!vmcmd_kset) + return -ENOMEM; + return sysfs_create_group(&vmcmd_kset->kobj, &vmcmd_attr_group); +} + +static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR, + vmcmd_run, vmcmd_init}; + +/* + * stop shutdown action: Stop Linux on shutdown. + */ + +static void stop_run(struct shutdown_trigger *trigger) +{ + if (strcmp(trigger->name, ON_PANIC_STR) == 0 || + strcmp(trigger->name, ON_RESTART_STR) == 0) + disabled_wait((unsigned long) __builtin_return_address(0)); + smp_stop_cpu(); +} + +static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR, + stop_run, NULL}; + +/* action list */ + +static struct shutdown_action *shutdown_actions_list[] = { + &ipl_action, &reipl_action, &dump_reipl_action, &dump_action, + &vmcmd_action, &stop_action}; +#define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *)) + +/* + * Trigger section + */ + +static struct kset *shutdown_actions_kset; + +static int set_trigger(const char *buf, struct shutdown_trigger *trigger, + size_t len) +{ + int i; + + for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) { + if (sysfs_streq(buf, shutdown_actions_list[i]->name)) { + if (shutdown_actions_list[i]->init_rc) { + return shutdown_actions_list[i]->init_rc; + } else { + trigger->action = shutdown_actions_list[i]; + return len; + } + } + } + return -EINVAL; +} + +/* on reipl */ + +static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR, + &reipl_action}; + +static ssize_t on_reboot_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_reboot_trigger.action->name); +} + +static ssize_t on_reboot_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_reboot_trigger, len); +} + +static struct kobj_attribute on_reboot_attr = + __ATTR(on_reboot, 0644, on_reboot_show, on_reboot_store); + +static void do_machine_restart(char *__unused) +{ + smp_send_stop(); + on_reboot_trigger.action->fn(&on_reboot_trigger); + reipl_run(NULL); +} +void (*_machine_restart)(char *command) = do_machine_restart; + +/* on panic */ + +static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action}; + +static ssize_t on_panic_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_panic_trigger.action->name); +} + +static ssize_t on_panic_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_panic_trigger, len); +} + +static struct kobj_attribute on_panic_attr = + __ATTR(on_panic, 0644, on_panic_show, on_panic_store); + +static void do_panic(void) +{ + lgr_info_log(); + on_panic_trigger.action->fn(&on_panic_trigger); + stop_run(&on_panic_trigger); +} + +/* on restart */ + +static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR, + &stop_action}; + +static ssize_t on_restart_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_restart_trigger.action->name); +} + +static ssize_t on_restart_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_restart_trigger, len); +} + +static struct kobj_attribute on_restart_attr = + __ATTR(on_restart, 0644, on_restart_show, on_restart_store); + +static void __do_restart(void *ignore) +{ + __arch_local_irq_stosm(0x04); /* enable DAT */ + smp_send_stop(); +#ifdef CONFIG_CRASH_DUMP + crash_kexec(NULL); +#endif + on_restart_trigger.action->fn(&on_restart_trigger); + stop_run(&on_restart_trigger); +} + +void do_restart(void) +{ + tracing_off(); + debug_locks_off(); + lgr_info_log(); + smp_call_online_cpu(__do_restart, NULL); +} + +/* on halt */ + +static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; + +static ssize_t on_halt_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_halt_trigger.action->name); +} + +static ssize_t on_halt_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_halt_trigger, len); +} + +static struct kobj_attribute on_halt_attr = + __ATTR(on_halt, 0644, on_halt_show, on_halt_store); + + +static void do_machine_halt(void) +{ + smp_send_stop(); + on_halt_trigger.action->fn(&on_halt_trigger); + stop_run(&on_halt_trigger); +} +void (*_machine_halt)(void) = do_machine_halt; + +/* on power off */ + +static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action}; + +static ssize_t on_poff_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_poff_trigger.action->name); +} + +static ssize_t on_poff_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_poff_trigger, len); +} + +static struct kobj_attribute on_poff_attr = + __ATTR(on_poff, 0644, on_poff_show, on_poff_store); + + +static void do_machine_power_off(void) +{ + smp_send_stop(); + on_poff_trigger.action->fn(&on_poff_trigger); + stop_run(&on_poff_trigger); +} +void (*_machine_power_off)(void) = do_machine_power_off; + +static void __init shutdown_triggers_init(void) +{ + shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL, + firmware_kobj); + if (!shutdown_actions_kset) + goto fail; + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_reboot_attr.attr)) + goto fail; + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_panic_attr.attr)) + goto fail; + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_halt_attr.attr)) + goto fail; + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_poff_attr.attr)) + goto fail; + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_restart_attr.attr)) + goto fail; + return; +fail: + panic("shutdown_triggers_init failed\n"); +} + +static void __init shutdown_actions_init(void) +{ + int i; + + for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) { + if (!shutdown_actions_list[i]->init) + continue; + shutdown_actions_list[i]->init_rc = + shutdown_actions_list[i]->init(); + } +} + +static int __init s390_ipl_init(void) +{ + sclp_get_ipl_info(&sclp_ipl_info); + shutdown_actions_init(); + shutdown_triggers_init(); + return 0; +} + +__initcall(s390_ipl_init); + +static void __init strncpy_skip_quote(char *dst, char *src, int n) +{ + int sx, dx; + + dx = 0; + for (sx = 0; src[sx] != 0; sx++) { + if (src[sx] == '"') + continue; + dst[dx++] = src[sx]; + if (dx >= n) + break; + } +} + +static int __init vmcmd_on_reboot_setup(char *str) +{ + if (!MACHINE_IS_VM) + return 1; + strncpy_skip_quote(vmcmd_on_reboot, str, 127); + vmcmd_on_reboot[127] = 0; + on_reboot_trigger.action = &vmcmd_action; + return 1; +} +__setup("vmreboot=", vmcmd_on_reboot_setup); + +static int __init vmcmd_on_panic_setup(char *str) +{ + if (!MACHINE_IS_VM) + return 1; + strncpy_skip_quote(vmcmd_on_panic, str, 127); + vmcmd_on_panic[127] = 0; + on_panic_trigger.action = &vmcmd_action; + return 1; +} +__setup("vmpanic=", vmcmd_on_panic_setup); + +static int __init vmcmd_on_halt_setup(char *str) +{ + if (!MACHINE_IS_VM) + return 1; + strncpy_skip_quote(vmcmd_on_halt, str, 127); + vmcmd_on_halt[127] = 0; + on_halt_trigger.action = &vmcmd_action; + return 1; +} +__setup("vmhalt=", vmcmd_on_halt_setup); + +static int __init vmcmd_on_poff_setup(char *str) +{ + if (!MACHINE_IS_VM) + return 1; + strncpy_skip_quote(vmcmd_on_poff, str, 127); + vmcmd_on_poff[127] = 0; + on_poff_trigger.action = &vmcmd_action; + return 1; +} +__setup("vmpoff=", vmcmd_on_poff_setup); + +static int on_panic_notify(struct notifier_block *self, + unsigned long event, void *data) +{ + do_panic(); + return NOTIFY_OK; +} + +static struct notifier_block on_panic_nb = { + .notifier_call = on_panic_notify, + .priority = INT_MIN, +}; + +void __init setup_ipl(void) +{ + ipl_info.type = get_ipl_type(); + switch (ipl_info.type) { + case IPL_TYPE_CCW: + ipl_info.data.ccw.dev_id.devno = ipl_devno; + ipl_info.data.ccw.dev_id.ssid = 0; + break; + case IPL_TYPE_FCP: + case IPL_TYPE_FCP_DUMP: + ipl_info.data.fcp.dev_id.devno = + IPL_PARMBLOCK_START->ipl_info.fcp.devno; + ipl_info.data.fcp.dev_id.ssid = 0; + ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn; + ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun; + break; + case IPL_TYPE_NSS: + strncpy(ipl_info.data.nss.name, kernel_nss_name, + sizeof(ipl_info.data.nss.name)); + break; + case IPL_TYPE_UNKNOWN: + /* We have no info to copy */ + break; + } + atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb); +} + +void __init ipl_update_parameters(void) +{ + int rc; + + rc = diag308(DIAG308_STORE, &ipl_block); + if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG)) + diag308_set_works = 1; +} + +void __init ipl_save_parameters(void) +{ + struct cio_iplinfo iplinfo; + void *src, *dst; + + if (cio_get_iplinfo(&iplinfo)) + return; + + ipl_devno = iplinfo.devno; + ipl_flags |= IPL_DEVNO_VALID; + if (!iplinfo.is_qdio) + return; + ipl_flags |= IPL_PARMBLOCK_VALID; + src = (void *)(unsigned long)S390_lowcore.ipl_parmblock_ptr; + dst = (void *)IPL_PARMBLOCK_ORIGIN; + memmove(dst, src, PAGE_SIZE); + S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN; +} + +static LIST_HEAD(rcall); +static DEFINE_MUTEX(rcall_mutex); + +void register_reset_call(struct reset_call *reset) +{ + mutex_lock(&rcall_mutex); + list_add(&reset->list, &rcall); + mutex_unlock(&rcall_mutex); +} +EXPORT_SYMBOL_GPL(register_reset_call); + +void unregister_reset_call(struct reset_call *reset) +{ + mutex_lock(&rcall_mutex); + list_del(&reset->list); + mutex_unlock(&rcall_mutex); +} +EXPORT_SYMBOL_GPL(unregister_reset_call); + +static void do_reset_calls(void) +{ + struct reset_call *reset; + +#ifdef CONFIG_64BIT + if (diag308_set_works) { + diag308_reset(); + return; + } +#endif + list_for_each_entry(reset, &rcall, list) + reset->fn(); +} + +u32 dump_prefix_page; + +void s390_reset_system(void (*func)(void *), void *data) +{ + struct _lowcore *lc; + + lc = (struct _lowcore *)(unsigned long) store_prefix(); + + /* Stack for interrupt/machine check handler */ + lc->panic_stack = S390_lowcore.panic_stack; + + /* Save prefix page address for dump case */ + dump_prefix_page = (u32)(unsigned long) lc; + + /* Disable prefixing */ + set_prefix(0); + + /* Disable lowcore protection */ + __ctl_clear_bit(0,28); + + /* Set new machine check handler */ + S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT; + S390_lowcore.mcck_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; + + /* Set new program check handler */ + S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT; + S390_lowcore.program_new_psw.addr = + PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + + /* Store status at absolute zero */ + store_status(); + + do_reset_calls(); + if (func) + func(data); +} diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 480b6a5fef3..99b0b09646c 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -1,105 +1,307 @@ /* - * arch/s390/kernel/irq.c - * - * S390 version - * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Copyright IBM Corp. 2004, 2011 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Thomas Spatzier <tspat@de.ibm.com>, * * This file contains interrupt related functions. */ -#include <linux/module.h> -#include <linux/kernel.h> #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/seq_file.h> +#include <linux/proc_fs.h> +#include <linux/profile.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/ftrace.h> +#include <linux/errno.h> +#include <linux/slab.h> #include <linux/cpu.h> +#include <linux/irq.h> +#include <asm/irq_regs.h> +#include <asm/cputime.h> +#include <asm/lowcore.h> +#include <asm/irq.h> +#include <asm/hw_irq.h> +#include "entry.h" + +DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); +EXPORT_PER_CPU_SYMBOL_GPL(irq_stat); + +struct irq_class { + char *name; + char *desc; +}; + +/* + * The list of "main" irq classes on s390. This is the list of interrupts + * that appear both in /proc/stat ("intr" line) and /proc/interrupts. + * Historically only external and I/O interrupts have been part of /proc/stat. + * We can't add the split external and I/O sub classes since the first field + * in the "intr" line in /proc/stat is supposed to be the sum of all other + * fields. + * Since the external and I/O interrupt fields are already sums we would end + * up with having a sum which accounts each interrupt twice. + */ +static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { + [EXT_INTERRUPT] = {.name = "EXT"}, + [IO_INTERRUPT] = {.name = "I/O"}, + [THIN_INTERRUPT] = {.name = "AIO"}, +}; + +/* + * The list of split external and I/O interrupts that appear only in + * /proc/interrupts. + * In addition this list contains non external / I/O events like NMIs. + */ +static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { + [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"}, + [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"}, + [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"}, + [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"}, + [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"}, + [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, + [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"}, + [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"}, + [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"}, + [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"}, + [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, + [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, + [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, + [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, + [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, + [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"}, + [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"}, + [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"}, + [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"}, + [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"}, + [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"}, + [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"}, + [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"}, + [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"}, + [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"}, + [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"}, + [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, + [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, + [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, + [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, + [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, + [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, +}; + +void __init init_IRQ(void) +{ + init_cio_interrupts(); + init_airq_interrupts(); + init_ext_interrupts(); +} + +void do_IRQ(struct pt_regs *regs, int irq) +{ + struct pt_regs *old_regs; + + old_regs = set_irq_regs(regs); + irq_enter(); + if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + /* Serve timer interrupts first. */ + clock_comparator_work(); + generic_handle_irq(irq); + irq_exit(); + set_irq_regs(old_regs); +} /* * show_interrupts is needed by /proc/interrupts. */ int show_interrupts(struct seq_file *p, void *v) { - static const char *intrclass_names[] = { "EXT", "I/O", }; - int i = *(loff_t *) v, j; + int irq = *(loff_t *) v; + int cpu; - if (i == 0) { + get_online_cpus(); + if (irq == 0) { seq_puts(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%d ",j); + for_each_online_cpu(cpu) + seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); + goto out; } + if (irq < NR_IRQS) { + if (irq >= NR_IRQS_BASE) + goto out; + seq_printf(p, "%s: ", irqclass_main_desc[irq].name); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); + seq_putc(p, '\n'); + goto out; + } + for (irq = 0; irq < NR_ARCH_IRQS; irq++) { + seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", + per_cpu(irq_stat, cpu).irqs[irq]); + if (irqclass_sub_desc[irq].desc) + seq_printf(p, " %s", irqclass_sub_desc[irq].desc); + seq_putc(p, '\n'); + } +out: + put_online_cpus(); + return 0; +} - if (i < NR_IRQS) { - seq_printf(p, "%s: ", intrclass_names[i]); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); -#endif - seq_putc(p, '\n'); - - } - - return 0; +unsigned int arch_dynirq_lower_bound(unsigned int from) +{ + return from < THIN_INTERRUPT ? THIN_INTERRUPT : from; } /* - * For compatibilty only. S/390 specific setup of interrupts et al. is done - * much later in init_channel_subsystem(). + * Switch to the asynchronous interrupt stack for softirq execution. */ -void __init -init_IRQ(void) +void do_softirq_own_stack(void) { - /* nothing... */ + unsigned long old, new; + + /* Get current stack pointer. */ + asm volatile("la %0,0(15)" : "=a" (old)); + /* Check against async. stack address range. */ + new = S390_lowcore.async_stack; + if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { + /* Need to switch to the async. stack. */ + new -= STACK_FRAME_OVERHEAD; + ((struct stack_frame *) new)->back_chain = old; + asm volatile(" la 15,0(%0)\n" + " basr 14,%2\n" + " la 15,0(%1)\n" + : : "a" (new), "a" (old), + "a" (__do_softirq) + : "0", "1", "2", "3", "4", "5", "14", + "cc", "memory" ); + } else { + /* We are already on the async stack. */ + __do_softirq(); + } } /* - * Switch to the asynchronous interrupt stack for softirq execution. + * ext_int_hash[index] is the list head for all external interrupts that hash + * to this index. */ -extern void __do_softirq(void); +static struct hlist_head ext_int_hash[32] ____cacheline_aligned; + +struct ext_int_info { + ext_int_handler_t handler; + struct hlist_node entry; + struct rcu_head rcu; + u16 code; +}; + +/* ext_int_hash_lock protects the handler lists for external interrupts */ +static DEFINE_SPINLOCK(ext_int_hash_lock); + +static inline int ext_hash(u16 code) +{ + BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash))); + + return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1); +} -asmlinkage void do_softirq(void) +int register_external_irq(u16 code, ext_int_handler_t handler) { - unsigned long flags, old, new; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - account_system_vtime(current); - - local_bh_disable(); - - if (local_softirq_pending()) { - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (old)); - /* Check against async. stack address range. */ - new = S390_lowcore.async_stack; - if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { - /* Need to switch to the async. stack. */ - new -= STACK_FRAME_OVERHEAD; - ((struct stack_frame *) new)->back_chain = old; - - asm volatile(" la 15,0(%0)\n" - " basr 14,%2\n" - " la 15,0(%1)\n" - : : "a" (new), "a" (old), - "a" (__do_softirq) - : "0", "1", "2", "3", "4", "5", "14", - "cc", "memory" ); - } else - /* We are already on the async stack. */ - __do_softirq(); + struct ext_int_info *p; + unsigned long flags; + int index; + + p = kmalloc(sizeof(*p), GFP_ATOMIC); + if (!p) + return -ENOMEM; + p->code = code; + p->handler = handler; + index = ext_hash(code); + + spin_lock_irqsave(&ext_int_hash_lock, flags); + hlist_add_head_rcu(&p->entry, &ext_int_hash[index]); + spin_unlock_irqrestore(&ext_int_hash_lock, flags); + return 0; +} +EXPORT_SYMBOL(register_external_irq); + +int unregister_external_irq(u16 code, ext_int_handler_t handler) +{ + struct ext_int_info *p; + unsigned long flags; + int index = ext_hash(code); + + spin_lock_irqsave(&ext_int_hash_lock, flags); + hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + if (p->code == code && p->handler == handler) { + hlist_del_rcu(&p->entry); + kfree_rcu(p, rcu); + } } + spin_unlock_irqrestore(&ext_int_hash_lock, flags); + return 0; +} +EXPORT_SYMBOL(unregister_external_irq); - account_system_vtime(current); +static irqreturn_t do_ext_interrupt(int irq, void *dummy) +{ + struct pt_regs *regs = get_irq_regs(); + struct ext_code ext_code; + struct ext_int_info *p; + int index; + + ext_code = *(struct ext_code *) ®s->int_code; + if (ext_code.code != EXT_IRQ_CLK_COMP) + __get_cpu_var(s390_idle).nohz_delay = 1; + + index = ext_hash(ext_code.code); + rcu_read_lock(); + hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + if (unlikely(p->code != ext_code.code)) + continue; + p->handler(ext_code, regs->int_parm, regs->int_parm_long); + } + rcu_read_unlock(); + return IRQ_HANDLED; +} + +static struct irqaction external_interrupt = { + .name = "EXT", + .handler = do_ext_interrupt, +}; + +void __init init_ext_interrupts(void) +{ + int idx; - __local_bh_enable(); + for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) + INIT_HLIST_HEAD(&ext_int_hash[idx]); - local_irq_restore(flags); + irq_set_chip_and_handler(EXT_INTERRUPT, + &dummy_irq_chip, handle_percpu_irq); + setup_irq(EXT_INTERRUPT, &external_interrupt); } -EXPORT_SYMBOL(do_softirq); +static DEFINE_SPINLOCK(irq_subclass_lock); +static unsigned char irq_subclass_refcount[64]; + +void irq_subclass_register(enum irq_subclass subclass) +{ + spin_lock(&irq_subclass_lock); + if (!irq_subclass_refcount[subclass]) + ctl_set_bit(0, subclass); + irq_subclass_refcount[subclass]++; + spin_unlock(&irq_subclass_lock); +} +EXPORT_SYMBOL(irq_subclass_register); + +void irq_subclass_unregister(enum irq_subclass subclass) +{ + spin_lock(&irq_subclass_lock); + irq_subclass_refcount[subclass]--; + if (!irq_subclass_refcount[subclass]) + ctl_clear_bit(0, subclass); + spin_unlock(&irq_subclass_lock); +} +EXPORT_SYMBOL(irq_subclass_unregister); diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c new file mode 100644 index 00000000000..b987ab2c154 --- /dev/null +++ b/arch/s390/kernel/jump_label.c @@ -0,0 +1,70 @@ +/* + * Jump label s390 support + * + * Copyright IBM Corp. 2011 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/stop_machine.h> +#include <linux/jump_label.h> +#include <asm/ipl.h> + +#ifdef HAVE_JUMP_LABEL + +struct insn { + u16 opcode; + s32 offset; +} __packed; + +struct insn_args { + struct jump_entry *entry; + enum jump_label_type type; +}; + +static void __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn insn; + int rc; + + if (type == JUMP_LABEL_ENABLE) { + /* brcl 15,offset */ + insn.opcode = 0xc0f4; + insn.offset = (entry->target - entry->code) >> 1; + } else { + /* brcl 0,0 */ + insn.opcode = 0xc004; + insn.offset = 0; + } + + rc = probe_kernel_write((void *)entry->code, &insn, JUMP_LABEL_NOP_SIZE); + WARN_ON_ONCE(rc < 0); +} + +static int __sm_arch_jump_label_transform(void *data) +{ + struct insn_args *args = data; + + __jump_label_transform(args->entry, args->type); + return 0; +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn_args args; + + args.entry = entry; + args.type = type; + + stop_machine(__sm_arch_jump_label_transform, &args, NULL); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __jump_label_transform(entry, type); +} + +#endif diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c new file mode 100644 index 00000000000..bc71a7b95af --- /dev/null +++ b/arch/s390/kernel/kprobes.c @@ -0,0 +1,825 @@ +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corp. 2002, 2006 + * + * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com> + */ + +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/preempt.h> +#include <linux/stop_machine.h> +#include <linux/kdebug.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/hardirq.h> +#include <asm/cacheflush.h> +#include <asm/sections.h> +#include <asm/dis.h> + +DEFINE_PER_CPU(struct kprobe *, current_kprobe); +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +struct kretprobe_blackpoint kretprobe_blacklist[] = { }; + +DEFINE_INSN_CACHE_OPS(dmainsn); + +static void *alloc_dmainsn_page(void) +{ + return (void *)__get_free_page(GFP_KERNEL | GFP_DMA); +} + +static void free_dmainsn_page(void *page) +{ + free_page((unsigned long)page); +} + +struct kprobe_insn_cache kprobe_dmainsn_slots = { + .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex), + .alloc = alloc_dmainsn_page, + .free = free_dmainsn_page, + .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages), + .insn_size = MAX_INSN_SIZE, +}; + +static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) +{ + if (!is_known_insn((unsigned char *)insn)) + return -EINVAL; + switch (insn[0] >> 8) { + case 0x0c: /* bassm */ + case 0x0b: /* bsm */ + case 0x83: /* diag */ + case 0x44: /* ex */ + case 0xac: /* stnsm */ + case 0xad: /* stosm */ + return -EINVAL; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x00: /* exrl */ + return -EINVAL; + } + } + switch (insn[0]) { + case 0x0101: /* pr */ + case 0xb25a: /* bsa */ + case 0xb240: /* bakr */ + case 0xb258: /* bsg */ + case 0xb218: /* pc */ + case 0xb228: /* pt */ + case 0xb98d: /* epsw */ + return -EINVAL; + } + return 0; +} + +static int __kprobes get_fixup_type(kprobe_opcode_t *insn) +{ + /* default fixup method */ + int fixup = FIXUP_PSW_NORMAL; + + switch (insn[0] >> 8) { + case 0x05: /* balr */ + case 0x0d: /* basr */ + fixup = FIXUP_RETURN_REGISTER; + /* if r2 = 0, no branch will be taken */ + if ((insn[0] & 0x0f) == 0) + fixup |= FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x06: /* bctr */ + case 0x07: /* bcr */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x45: /* bal */ + case 0x4d: /* bas */ + fixup = FIXUP_RETURN_REGISTER; + break; + case 0x47: /* bc */ + case 0x46: /* bct */ + case 0x86: /* bxh */ + case 0x87: /* bxle */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x82: /* lpsw */ + fixup = FIXUP_NOT_REQUIRED; + break; + case 0xb2: /* lpswe */ + if ((insn[0] & 0xff) == 0xb2) + fixup = FIXUP_NOT_REQUIRED; + break; + case 0xa7: /* bras */ + if ((insn[0] & 0x0f) == 0x05) + fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xc0: + if ((insn[0] & 0x0f) == 0x05) /* brasl */ + fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xeb: + switch (insn[2] & 0xff) { + case 0x44: /* bxhg */ + case 0x45: /* bxleg */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; + case 0xe3: /* bctg */ + if ((insn[2] & 0xff) == 0x46) + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0xec: + switch (insn[2] & 0xff) { + case 0xe5: /* clgrb */ + case 0xe6: /* cgrb */ + case 0xf6: /* crb */ + case 0xf7: /* clrb */ + case 0xfc: /* cgib */ + case 0xfd: /* cglib */ + case 0xfe: /* cib */ + case 0xff: /* clib */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; + } + return fixup; +} + +static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn) +{ + /* Check if we have a RIL-b or RIL-c format instruction which + * we need to modify in order to avoid instruction emulation. */ + switch (insn[0] >> 8) { + case 0xc0: + if ((insn[0] & 0x0f) == 0x00) /* larl */ + return true; + break; + case 0xc4: + switch (insn[0] & 0x0f) { + case 0x02: /* llhrl */ + case 0x04: /* lghrl */ + case 0x05: /* lhrl */ + case 0x06: /* llghrl */ + case 0x07: /* sthrl */ + case 0x08: /* lgrl */ + case 0x0b: /* stgrl */ + case 0x0c: /* lgfrl */ + case 0x0d: /* lrl */ + case 0x0e: /* llgfrl */ + case 0x0f: /* strl */ + return true; + } + break; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x02: /* pfdrl */ + case 0x04: /* cghrl */ + case 0x05: /* chrl */ + case 0x06: /* clghrl */ + case 0x07: /* clhrl */ + case 0x08: /* cgrl */ + case 0x0a: /* clgrl */ + case 0x0c: /* cgfrl */ + case 0x0d: /* crl */ + case 0x0e: /* clgfrl */ + case 0x0f: /* clrl */ + return true; + } + break; + } + return false; +} + +static void __kprobes copy_instruction(struct kprobe *p) +{ + s64 disp, new_disp; + u64 addr, new_addr; + + memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8)); + if (!is_insn_relative_long(p->ainsn.insn)) + return; + /* + * For pc-relative instructions in RIL-b or RIL-c format patch the + * RI2 displacement field. We have already made sure that the insn + * slot for the patched instruction is within the same 2GB area + * as the original instruction (either kernel image or module area). + * Therefore the new displacement will always fit. + */ + disp = *(s32 *)&p->ainsn.insn[1]; + addr = (u64)(unsigned long)p->addr; + new_addr = (u64)(unsigned long)p->ainsn.insn; + new_disp = ((addr + (disp * 2)) - new_addr) / 2; + *(s32 *)&p->ainsn.insn[1] = new_disp; +} + +static inline int is_kernel_addr(void *addr) +{ + return addr < (void *)_end; +} + +static inline int is_module_addr(void *addr) +{ +#ifdef CONFIG_64BIT + BUILD_BUG_ON(MODULES_LEN > (1UL << 31)); + if (addr < (void *)MODULES_VADDR) + return 0; + if (addr > (void *)MODULES_END) + return 0; +#endif + return 1; +} + +static int __kprobes s390_get_insn_slot(struct kprobe *p) +{ + /* + * Get an insn slot that is within the same 2GB area like the original + * instruction. That way instructions with a 32bit signed displacement + * field can be patched and executed within the insn slot. + */ + p->ainsn.insn = NULL; + if (is_kernel_addr(p->addr)) + p->ainsn.insn = get_dmainsn_slot(); + else if (is_module_addr(p->addr)) + p->ainsn.insn = get_insn_slot(); + return p->ainsn.insn ? 0 : -ENOMEM; +} + +static void __kprobes s390_free_insn_slot(struct kprobe *p) +{ + if (!p->ainsn.insn) + return; + if (is_kernel_addr(p->addr)) + free_dmainsn_slot(p->ainsn.insn, 0); + else + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + if ((unsigned long) p->addr & 0x01) + return -EINVAL; + /* Make sure the probe isn't going on a difficult instruction */ + if (is_prohibited_opcode(p->addr)) + return -EINVAL; + if (s390_get_insn_slot(p)) + return -ENOMEM; + p->opcode = *p->addr; + copy_instruction(p); + return 0; +} + +struct ins_replace_args { + kprobe_opcode_t *ptr; + kprobe_opcode_t opcode; +}; + +static int __kprobes swap_instruction(void *aref) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long status = kcb->kprobe_status; + struct ins_replace_args *args = aref; + + kcb->kprobe_status = KPROBE_SWAP_INST; + probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode)); + kcb->kprobe_status = status; + return 0; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + struct ins_replace_args args; + + args.ptr = p->addr; + args.opcode = BREAKPOINT_INSTRUCTION; + stop_machine(swap_instruction, &args, NULL); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + struct ins_replace_args args; + + args.ptr = p->addr; + args.opcode = p->opcode; + stop_machine(swap_instruction, &args, NULL); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + s390_free_insn_slot(p); +} + +static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb, + struct pt_regs *regs, + unsigned long ip) +{ + struct per_regs per_kprobe; + + /* Set up the PER control registers %cr9-%cr11 */ + per_kprobe.control = PER_EVENT_IFETCH; + per_kprobe.start = ip; + per_kprobe.end = ip; + + /* Save control regs and psw mask */ + __ctl_store(kcb->kprobe_saved_ctl, 9, 11); + kcb->kprobe_saved_imask = regs->psw.mask & + (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT); + + /* Set PER control regs, turns on single step for the given address */ + __ctl_load(per_kprobe, 9, 11); + regs->psw.mask |= PSW_MASK_PER; + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); + regs->psw.addr = ip | PSW_ADDR_AMODE; +} + +static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb, + struct pt_regs *regs, + unsigned long ip) +{ + /* Restore control regs and psw mask, set new psw address */ + __ctl_load(kcb->kprobe_saved_ctl, 9, 11); + regs->psw.mask &= ~PSW_MASK_PER; + regs->psw.mask |= kcb->kprobe_saved_imask; + regs->psw.addr = ip | PSW_ADDR_AMODE; +} + +/* + * Activate a kprobe by storing its pointer to current_kprobe. The + * previous kprobe is stored in kcb->prev_kprobe. A stack of up to + * two kprobes can be active, see KPROBE_REENTER. + */ +static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) +{ + kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe); + kcb->prev_kprobe.status = kcb->kprobe_status; + __get_cpu_var(current_kprobe) = p; +} + +/* + * Deactivate a kprobe by backing up to the previous state. If the + * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL, + * for any other state prev_kprobe.kp will be NULL. + */ +static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb) +{ + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14]; + + /* Replace the return addr with trampoline addr */ + regs->gprs[14] = (unsigned long) &kretprobe_trampoline; +} + +static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb, + struct kprobe *p) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + default: + /* + * A kprobe on the code path to single step an instruction + * is a BUG. The code path resides in the .kprobes.text + * section and is executed with interrupts disabled. + */ + printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr); + dump_kprobe(p); + BUG(); + } +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb; + struct kprobe *p; + + /* + * We want to disable preemption for the entire duration of kprobe + * processing. That includes the calls to the pre/post handlers + * and single stepping the kprobe instruction. + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2)); + + if (p) { + if (kprobe_running()) { + /* + * We have hit a kprobe while another is still + * active. This can happen in the pre and post + * handler. Single step the instruction of the + * new probe but do not call any handler function + * of this secondary kprobe. + * push_kprobe and pop_kprobe saves and restores + * the currently active kprobe. + */ + kprobe_reenter_check(kcb, p); + push_kprobe(kcb, p); + kcb->kprobe_status = KPROBE_REENTER; + } else { + /* + * If we have no pre-handler or it returned 0, we + * continue with single stepping. If we have a + * pre-handler and it returned non-zero, it prepped + * for calling the break_handler below on re-entry + * for jprobe processing, so get out doing nothing + * more here. + */ + push_kprobe(kcb, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (p->pre_handler && p->pre_handler(p, regs)) + return 1; + kcb->kprobe_status = KPROBE_HIT_SS; + } + enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn); + return 1; + } else if (kprobe_running()) { + p = __get_cpu_var(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) { + /* + * Continuation after the jprobe completed and + * caused the jprobe_return trap. The jprobe + * break_handler "returns" to the original + * function that still has the kprobe breakpoint + * installed. We continue with single stepping. + */ + kcb->kprobe_status = KPROBE_HIT_SS; + enable_singlestep(kcb, regs, + (unsigned long) p->ainsn.insn); + return 1; + } /* else: + * No kprobe at this address and the current kprobe + * has no break handler (no jprobe!). The kernel just + * exploded, let the standard trap handler pick up the + * pieces. + */ + } /* else: + * No kprobe at this address and no active kprobe. The trap has + * not been caused by a kprobe breakpoint. The race of breakpoint + * vs. kprobe remove does not exist because on s390 as we use + * stop_machine to arm/disarm the breakpoints. + */ + preempt_enable_no_resched(); + return 0; +} + +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +static void __used kretprobe_trampoline_holder(void) +{ + asm volatile(".global kretprobe_trampoline\n" + "kretprobe_trampoline: bcr 0,0\n"); +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +static int __kprobes trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs) +{ + struct kretprobe_instance *ri; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address; + unsigned long trampoline_address; + kprobe_opcode_t *correct_ret_addr; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because an multiple functions in the call path + * have a return probe installed on them, and/or more than one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + ri = NULL; + orig_ret_address = 0; + correct_ret_addr = NULL; + trampoline_address = (unsigned long) &kretprobe_trampoline; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long) ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long) ri->ret_addr; + + if (ri->rp && ri->rp->handler) { + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + } + + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; + + pop_kprobe(get_kprobe_ctlblk()); + kretprobe_hash_unlock(current, &flags); + preempt_enable_no_resched(); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +/* + * Called after single-stepping. p->addr is the address of the + * instruction whose first byte has been replaced by the "breakpoint" + * instruction. To avoid the SMP problems that can occur when we + * temporarily put back the original opcode to single-step, we + * single-stepped a copy of the instruction. The address of this + * copy is p->ainsn.insn. + */ +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; + int fixup = get_fixup_type(p->ainsn.insn); + + if (fixup & FIXUP_PSW_NORMAL) + ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; + + if (fixup & FIXUP_BRANCH_NOT_TAKEN) { + int ilen = insn_length(p->ainsn.insn[0] >> 8); + if (ip - (unsigned long) p->ainsn.insn == ilen) + ip = (unsigned long) p->addr + ilen; + } + + if (fixup & FIXUP_RETURN_REGISTER) { + int reg = (p->ainsn.insn[0] & 0xf0) >> 4; + regs->gprs[reg] += (unsigned long) p->addr - + (unsigned long) p->ainsn.insn; + } + + disable_singlestep(kcb, regs, ip); +} + +static int __kprobes post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe *p = kprobe_running(); + + if (!p) + return 0; + + if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + + resume_execution(p, regs); + pop_kprobe(kcb); + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, psw mask + * will have PER set, in which case, continue the remaining processing + * of do_single_step, as if this is not a probe hit. + */ + if (regs->psw.mask & PSW_MASK_PER) + return 0; + + return 1; +} + +static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe *p = kprobe_running(); + const struct exception_table_entry *entry; + + switch(kcb->kprobe_status) { + case KPROBE_SWAP_INST: + /* We are here because the instruction replacement failed */ + return 0; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the nip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + disable_singlestep(kcb, regs, (unsigned long) p->addr); + pop_kprobe(kcb); + preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accounting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(p); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (p->fault_handler && p->fault_handler(p, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); + if (entry) { + regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE; + return 1; + } + + /* + * fixup_exception() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; + } + return 0; +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + int ret; + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_disable(); + ret = kprobe_trap_handler(regs, trapnr); + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); + return ret; +} + +/* + * Wrapper routine to for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *) data; + struct pt_regs *regs = args->regs; + int ret = NOTIFY_DONE; + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_disable(); + + switch (val) { + case DIE_BPT: + if (kprobe_handler(regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEP: + if (post_kprobe_handler(regs)) + ret = NOTIFY_STOP; + break; + case DIE_TRAP: + if (!preemptible() && kprobe_running() && + kprobe_trap_handler(regs, args->trapnr)) + ret = NOTIFY_STOP; + break; + default: + break; + } + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); + + return ret; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long stack; + + memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); + + /* setup return addr to the jprobe handler routine */ + regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE; + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); + + /* r15 is the stack pointer */ + stack = (unsigned long) regs->gprs[15]; + + memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack)); + return 1; +} + +void __kprobes jprobe_return(void) +{ + asm volatile(".word 0x0002"); +} + +static void __used __kprobes jprobe_return_end(void) +{ + asm volatile("bcr 0,0"); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long stack; + + stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15]; + + /* Put the regs back */ + memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + /* put the stack back */ + memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack)); + preempt_enable_no_resched(); + return 1; +} + +static struct kprobe trampoline = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline); +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline; +} diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c new file mode 100644 index 00000000000..6ea6d69339b --- /dev/null +++ b/arch/s390/kernel/lgr.c @@ -0,0 +1,186 @@ +/* + * Linux Guest Relocation (LGR) detection + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/slab.h> +#include <asm/facility.h> +#include <asm/sysinfo.h> +#include <asm/ebcdic.h> +#include <asm/debug.h> +#include <asm/ipl.h> + +#define LGR_TIMER_INTERVAL_SECS (30 * 60) +#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */ + +/* + * LGR info: Contains stfle and stsi data + */ +struct lgr_info { + /* Bit field with facility information: 4 DWORDs are stored */ + u64 stfle_fac_list[4]; + /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */ + u32 level; + /* Level 1: CEC info (stsi 1.1.1) */ + char manufacturer[16]; + char type[4]; + char sequence[16]; + char plant[4]; + char model[16]; + /* Level 2: LPAR info (stsi 2.2.2) */ + u16 lpar_number; + char name[8]; + /* Level 3: VM info (stsi 3.2.2) */ + u8 vm_count; + struct { + char name[8]; + char cpi[16]; + } vm[VM_LEVEL_MAX]; +} __packed __aligned(8); + +/* + * LGR globals + */ +static char lgr_page[PAGE_SIZE] __aligned(PAGE_SIZE); +static struct lgr_info lgr_info_last; +static struct lgr_info lgr_info_cur; +static struct debug_info *lgr_dbf; + +/* + * Copy buffer and then convert it to ASCII + */ +static void cpascii(char *dst, char *src, int size) +{ + memcpy(dst, src, size); + EBCASC(dst, size); +} + +/* + * Fill LGR info with 1.1.1 stsi data + */ +static void lgr_stsi_1_1_1(struct lgr_info *lgr_info) +{ + struct sysinfo_1_1_1 *si = (void *) lgr_page; + + if (stsi(si, 1, 1, 1)) + return; + cpascii(lgr_info->manufacturer, si->manufacturer, + sizeof(si->manufacturer)); + cpascii(lgr_info->type, si->type, sizeof(si->type)); + cpascii(lgr_info->model, si->model, sizeof(si->model)); + cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence)); + cpascii(lgr_info->plant, si->plant, sizeof(si->plant)); +} + +/* + * Fill LGR info with 2.2.2 stsi data + */ +static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_2_2_2 *si = (void *) lgr_page; + + if (stsi(si, 2, 2, 2)) + return; + cpascii(lgr_info->name, si->name, sizeof(si->name)); + memcpy(&lgr_info->lpar_number, &si->lpar_number, + sizeof(lgr_info->lpar_number)); +} + +/* + * Fill LGR info with 3.2.2 stsi data + */ +static void lgr_stsi_3_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_3_2_2 *si = (void *) lgr_page; + int i; + + if (stsi(si, 3, 2, 2)) + return; + for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) { + cpascii(lgr_info->vm[i].name, si->vm[i].name, + sizeof(si->vm[i].name)); + cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi, + sizeof(si->vm[i].cpi)); + } + lgr_info->vm_count = si->count; +} + +/* + * Fill LGR info with current data + */ +static void lgr_info_get(struct lgr_info *lgr_info) +{ + int level; + + memset(lgr_info, 0, sizeof(*lgr_info)); + stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list)); + level = stsi(NULL, 0, 0, 0); + lgr_info->level = level; + if (level >= 1) + lgr_stsi_1_1_1(lgr_info); + if (level >= 2) + lgr_stsi_2_2_2(lgr_info); + if (level >= 3) + lgr_stsi_3_2_2(lgr_info); +} + +/* + * Check if LGR info has changed and if yes log new LGR info to s390dbf + */ +void lgr_info_log(void) +{ + static DEFINE_SPINLOCK(lgr_info_lock); + unsigned long flags; + + if (!spin_trylock_irqsave(&lgr_info_lock, flags)) + return; + lgr_info_get(&lgr_info_cur); + if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) { + debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur)); + lgr_info_last = lgr_info_cur; + } + spin_unlock_irqrestore(&lgr_info_lock, flags); +} +EXPORT_SYMBOL_GPL(lgr_info_log); + +static void lgr_timer_set(void); + +/* + * LGR timer callback + */ +static void lgr_timer_fn(unsigned long ignored) +{ + lgr_info_log(); + lgr_timer_set(); +} + +static struct timer_list lgr_timer = + TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0); + +/* + * Setup next LGR timer + */ +static void lgr_timer_set(void) +{ + mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); +} + +/* + * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer + */ +static int __init lgr_init(void) +{ + lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info)); + if (!lgr_dbf) + return -ENOMEM; + debug_register_view(lgr_dbf, &debug_hex_ascii_view); + lgr_info_get(&lgr_info_last); + debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last)); + lgr_timer_set(); + return 0; +} +module_init(lgr_init); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index f0ed5c642c7..719e27b2cf2 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,105 +1,272 @@ /* - * arch/s390/kernel/machine_kexec.c - * - * (C) Copyright IBM Corp. 2005 - * - * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * Copyright IBM Corp. 2005, 2011 * + * Author(s): Rolf Adelsberger, + * Heiko Carstens <heiko.carstens@de.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> */ -/* - * s390_machine_kexec.c - handle the transition of Linux booting another kernel - * on the S390 architecture. - */ - -#include <asm/cio.h> -#include <asm/setup.h> #include <linux/device.h> #include <linux/mm.h> #include <linux/kexec.h> #include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/ftrace.h> +#include <linux/debug_locks.h> +#include <linux/suspend.h> +#include <asm/cio.h> +#include <asm/setup.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> -#include <asm/system.h> +#include <asm/smp.h> +#include <asm/reset.h> +#include <asm/ipl.h> +#include <asm/diag.h> +#include <asm/elf.h> +#include <asm/asm-offsets.h> +#include <asm/os_info.h> + +typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); + +extern const unsigned char relocate_kernel[]; +extern const unsigned long long relocate_kernel_len; + +#ifdef CONFIG_CRASH_DUMP + +/* + * Create ELF notes for one CPU + */ +static void add_elf_notes(int cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + void *ptr; + + memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); + ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); + ptr = fill_cpu_elf_notes(ptr, sa); + memset(ptr, 0, sizeof(struct elf_note)); +} + +/* + * Initialize CPU ELF notes + */ +static void setup_regs(void) +{ + unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + int cpu, this_cpu; + + this_cpu = smp_find_processor_id(stap()); + add_elf_notes(this_cpu); + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + if (smp_store_status(cpu)) + continue; + add_elf_notes(cpu); + } + /* Copy dump CPU store status info to absolute zero */ + memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); +} -static void kexec_halt_all_cpus(void *); +/* + * PM notifier callback for kdump + */ +static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + if (crashk_res.start) + crash_map_reserved_pages(); + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + if (crashk_res.start) + crash_unmap_reserved_pages(); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} -typedef void (*relocate_kernel_t) (kimage_entry_t *, unsigned long); +static int __init machine_kdump_pm_init(void) +{ + pm_notifier(machine_kdump_pm_cb, 0); + return 0; +} +arch_initcall(machine_kdump_pm_init); +#endif -const extern unsigned char relocate_kernel[]; -const extern unsigned long long relocate_kernel_len; +/* + * Start kdump: We expect here that a store status has been done on our CPU + */ +static void __do_machine_kdump(void *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + + setup_regs(); + __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); + start_kdump(1); +#endif +} -int -machine_kexec_prepare(struct kimage *image) +/* + * Check if kdump checksums are valid: We call purgatory with parameter "0" + */ +static int kdump_csum_valid(struct kimage *image) { - unsigned long reboot_code_buffer; +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)image->start; + int rc; + + __arch_local_irq_stnsm(0xfb); /* disable DAT */ + rc = start_kdump(0); + __arch_local_irq_stosm(0x04); /* enable DAT */ + return rc ? 0 : -EINVAL; +#else + return -EINVAL; +#endif +} + +/* + * Map or unmap crashkernel memory + */ +static void crash_map_pages(int enable) +{ + unsigned long size = resource_size(&crashk_res); + + BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || + size % KEXEC_CRASH_MEM_ALIGN); + if (enable) + vmem_add_mapping(crashk_res.start, size); + else { + vmem_remove_mapping(crashk_res.start, size); + if (size) + os_info_crashkernel_add(crashk_res.start, size); + else + os_info_crashkernel_add(0, 0); + } +} + +/* + * Map crashkernel memory + */ +void crash_map_reserved_pages(void) +{ + crash_map_pages(1); +} + +/* + * Unmap crashkernel memory + */ +void crash_unmap_reserved_pages(void) +{ + crash_map_pages(0); +} + +/* + * Give back memory to hypervisor before new kdump is loaded + */ +static int machine_kexec_prepare_kdump(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (MACHINE_IS_VM) + diag10_range(PFN_DOWN(crashk_res.start), + PFN_DOWN(crashk_res.end - crashk_res.start + 1)); + return 0; +#else + return -EINVAL; +#endif +} + +int machine_kexec_prepare(struct kimage *image) +{ + void *reboot_code_buffer; + + /* Can't replace kernel image since it is read-only. */ + if (ipl_flags & IPL_NSS_VALID) + return -EOPNOTSUPP; + + if (image->type == KEXEC_TYPE_CRASH) + return machine_kexec_prepare_kdump(); /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) return -EINVAL; /* Get the destination where the assembler code should be copied to.*/ - reboot_code_buffer = page_to_pfn(image->control_code_page)<<PAGE_SHIFT; + reboot_code_buffer = (void *) page_to_phys(image->control_code_page); /* Then copy it */ - memcpy((void *) reboot_code_buffer, relocate_kernel, - relocate_kernel_len); + memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len); return 0; } -void -machine_kexec_cleanup(struct kimage *image) +void machine_kexec_cleanup(struct kimage *image) { } -void -machine_shutdown(void) +void arch_crash_save_vmcoreinfo(void) { - printk(KERN_INFO "kexec: machine_shutdown called\n"); + VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_SYMBOL(high_memory); + VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); } -NORET_TYPE void -machine_kexec(struct kimage *image) +void machine_shutdown(void) { - clear_all_subchannels(); - - /* Disable lowcore protection */ - ctl_clear_bit(0,28); - - on_each_cpu(kexec_halt_all_cpus, image, 0, 0); - for (;;); } -extern void pfault_fini(void); +void machine_crash_shutdown(struct pt_regs *regs) +{ +} -static void -kexec_halt_all_cpus(void *kernel_image) +/* + * Do normal kexec + */ +static void __do_machine_kexec(void *data) { - static atomic_t cpuid = ATOMIC_INIT(-1); - int cpu; - struct kimage *image; relocate_kernel_t data_mover; + struct kimage *image = data; -#ifdef CONFIG_PFAULT - if (MACHINE_IS_VM) - pfault_fini(); -#endif + data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); + + /* Call the moving routine */ + (*data_mover)(&image->head, image->start); +} - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) != -1) - signal_processor(smp_processor_id(), sigp_stop); +/* + * Reset system and call either kdump or normal kexec + */ +static void __machine_kexec(void *data) +{ + struct kimage *image = data; - /* Wait for all other cpus to enter stopped state */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - while (!smp_cpu_not_running(cpu)) - cpu_relax(); + __arch_local_irq_stosm(0x04); /* enable DAT */ + pfault_fini(); + tracing_off(); + debug_locks_off(); + if (image->type == KEXEC_TYPE_CRASH) { + lgr_info_log(); + s390_reset_system(__do_machine_kdump, data); + } else { + s390_reset_system(__do_machine_kexec, data); } + disabled_wait((unsigned long) __builtin_return_address(0)); +} - image = (struct kimage *) kernel_image; - data_mover = (relocate_kernel_t) - (page_to_pfn(image->control_code_page) << PAGE_SHIFT); - - /* Call the moving routine */ - (*data_mover) (&image->head, image->start); +/* + * Do either kdump or normal kexec. In case of kdump we first ask + * purgatory, if kdump checksums are valid. + */ +void machine_kexec(struct kimage *image) +{ + if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image)) + return; + tracer_disable(); + smp_send_stop(); + smp_call_ipl_cpu(__machine_kexec, image); } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S new file mode 100644 index 00000000000..08dcf21cb8d --- /dev/null +++ b/arch/s390/kernel/mcount.S @@ -0,0 +1,73 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> + + .section .kprobes.text, "ax" + +ENTRY(ftrace_stub) + br %r14 + +ENTRY(_mcount) +#ifdef CONFIG_DYNAMIC_FTRACE + br %r14 + +ENTRY(ftrace_caller) +#endif + stm %r2,%r5,16(%r15) + bras %r1,2f +0: .long ftrace_trace_function +1: .long function_trace_stop +2: l %r2,1b-0b(%r1) + icm %r2,0xf,0(%r2) + jnz 3f + st %r14,56(%r15) + lr %r0,%r15 + ahi %r15,-96 + l %r3,100(%r15) + la %r2,0(%r14) + st %r0,__SF_BACKCHAIN(%r15) + la %r3,0(%r3) + ahi %r2,-MCOUNT_INSN_SIZE + l %r14,0b-0b(%r1) + l %r14,0(%r14) + basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + l %r2,100(%r15) + l %r3,152(%r15) +ENTRY(ftrace_graph_caller) +# The bras instruction gets runtime patched to call prepare_ftrace_return. +# See ftrace_enable_ftrace_graph_caller. The patched instruction is: +# bras %r14,prepare_ftrace_return + bras %r14,0f +0: st %r2,100(%r15) +#endif + ahi %r15,96 + l %r14,56(%r15) +3: lm %r2,%r5,16(%r15) + br %r14 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +ENTRY(return_to_handler) + stm %r2,%r5,16(%r15) + st %r14,56(%r15) + lr %r0,%r15 + ahi %r15,-96 + st %r0,__SF_BACKCHAIN(%r15) + bras %r1,0f + .long ftrace_return_to_handler +0: l %r2,0b-0b(%r1) + basr %r14,%r2 + lr %r14,%r2 + ahi %r15,96 + lm %r2,%r5,16(%r15) + br %r14 + +#endif diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S new file mode 100644 index 00000000000..1c52eae3396 --- /dev/null +++ b/arch/s390/kernel/mcount64.S @@ -0,0 +1,65 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> + + .section .kprobes.text, "ax" + +ENTRY(ftrace_stub) + br %r14 + +ENTRY(_mcount) +#ifdef CONFIG_DYNAMIC_FTRACE + br %r14 + +ENTRY(ftrace_caller) +#endif + larl %r1,function_trace_stop + icm %r1,0xf,0(%r1) + bnzr %r14 + stmg %r2,%r5,32(%r15) + stg %r14,112(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,__SF_BACKCHAIN(%r15) + lgr %r2,%r14 + lg %r3,168(%r15) + aghi %r2,-MCOUNT_INSN_SIZE + larl %r14,ftrace_trace_function + lg %r14,0(%r14) + basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + lg %r2,168(%r15) + lg %r3,272(%r15) +ENTRY(ftrace_graph_caller) +# The bras instruction gets runtime patched to call prepare_ftrace_return. +# See ftrace_enable_ftrace_graph_caller. The patched instruction is: +# bras %r14,prepare_ftrace_return + bras %r14,0f +0: stg %r2,168(%r15) +#endif + aghi %r15,160 + lmg %r2,%r5,32(%r15) + lg %r14,112(%r15) + br %r14 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +ENTRY(return_to_handler) + stmg %r2,%r5,32(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,__SF_BACKCHAIN(%r15) + brasl %r14,ftrace_return_to_handler + aghi %r15,160 + lgr %r14,%r2 + lmg %r2,%r5,32(%r15) + br %r14 + +#endif diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index c271cdab58e..b89b59158b9 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -1,9 +1,8 @@ /* - * arch/s390/kernel/module.c - Kernel module help for s390. + * Kernel module help for s390. * * S390 version - * Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH, - * IBM Corporation + * Copyright IBM Corp. 2002, 2003 * Author(s): Arnd Bergmann (arndb@de.ibm.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -30,6 +29,8 @@ #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/moduleloader.h> +#include <linux/bug.h> #if 0 #define DEBUGP printk @@ -43,23 +44,28 @@ #define PLT_ENTRY_SIZE 20 #endif /* CONFIG_64BIT */ +#ifdef CONFIG_64BIT void *module_alloc(unsigned long size) { - if (size == 0) + if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; - return vmalloc(size); + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + __builtin_return_address(0)); } +#endif /* Free memory returned from module_alloc */ void module_free(struct module *mod, void *module_region) { + if (mod) { + vfree(mod->arch.syminfo); + mod->arch.syminfo = NULL; + } vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } -static inline void -check_rela(Elf_Rela *rela, struct module *me) +static void check_rela(Elf_Rela *rela, struct module *me) { struct mod_arch_syminfo *info; @@ -108,9 +114,8 @@ check_rela(Elf_Rela *rela, struct module *me) * Account for GOT and PLT relocations. We can't add sections for * got and plt but we can increase the core module size. */ -int -module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, - char *secstrings, struct module *me) +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *me) { Elf_Shdr *symtab; Elf_Sym *symbols; @@ -119,7 +124,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, int nrela, i, j; /* Find symbol table and string table. */ - symtab = 0; + symtab = NULL; for (i = 0; i < hdr->e_shnum; i++) switch (sechdrs[i].sh_type) { case SHT_SYMTAB: @@ -172,22 +177,52 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, return 0; } -int -apply_relocate(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, - unsigned int relsec, struct module *me) +static int apply_rela_bits(Elf_Addr loc, Elf_Addr val, + int sign, int bits, int shift) { - printk(KERN_ERR "module %s: RELOCATION unsupported\n", - me->name); - return -ENOEXEC; + unsigned long umax; + long min, max; + + if (val & ((1UL << shift) - 1)) + return -ENOEXEC; + if (sign) { + val = (Elf_Addr)(((long) val) >> shift); + min = -(1L << (bits - 1)); + max = (1L << (bits - 1)) - 1; + if ((long) val < min || (long) val > max) + return -ENOEXEC; + } else { + val >>= shift; + umax = ((1UL << (bits - 1)) << 1) - 1; + if ((unsigned long) val > umax) + return -ENOEXEC; + } + + if (bits == 8) + *(unsigned char *) loc = val; + else if (bits == 12) + *(unsigned short *) loc = (val & 0xfff) | + (*(unsigned short *) loc & 0xf000); + else if (bits == 16) + *(unsigned short *) loc = val; + else if (bits == 20) + *(unsigned int *) loc = (val & 0xfff) << 16 | + (val & 0xff000) >> 4 | + (*(unsigned int *) loc & 0xf00000ff); + else if (bits == 32) + *(unsigned int *) loc = val; + else if (bits == 64) + *(unsigned long *) loc = val; + return 0; } -static inline int -apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, - struct module *me) +static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, + const char *strtab, struct module *me) { struct mod_arch_syminfo *info; Elf_Addr loc, val; int r_type, r_sym; + int rc = -ENOEXEC; /* This is where to make the change */ loc = base + rela->r_offset; @@ -199,6 +234,9 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = symtab[r_sym].st_value; switch (r_type) { + case R_390_NONE: /* No relocation. */ + rc = 0; + break; case R_390_8: /* Direct 8 bit. */ case R_390_12: /* Direct 12 bit. */ case R_390_16: /* Direct 16 bit. */ @@ -207,20 +245,17 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_64: /* Direct 64 bit. */ val += rela->r_addend; if (r_type == R_390_8) - *(unsigned char *) loc = val; + rc = apply_rela_bits(loc, val, 0, 8, 0); else if (r_type == R_390_12) - *(unsigned short *) loc = (val & 0xfff) | - (*(unsigned short *) loc & 0xf000); + rc = apply_rela_bits(loc, val, 0, 12, 0); else if (r_type == R_390_16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_20) - *(unsigned int *) loc = - (*(unsigned int *) loc & 0xf00000ff) | - (val & 0xfff) << 16 | (val & 0xff000) >> 4; + rc = apply_rela_bits(loc, val, 1, 20, 0); else if (r_type == R_390_32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); break; case R_390_PC16: /* PC relative 16 bit. */ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ @@ -229,15 +264,15 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PC64: /* PC relative 64 bit. */ val += rela->r_addend - loc; if (r_type == R_390_PC16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 1, 16, 0); else if (r_type == R_390_PC16DBL) - *(unsigned short *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 16, 1); else if (r_type == R_390_PC32DBL) - *(unsigned int *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 32, 1); else if (r_type == R_390_PC32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 1, 32, 0); else if (r_type == R_390_PC64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 1, 64, 0); break; case R_390_GOT12: /* 12 bit GOT offset. */ case R_390_GOT16: /* 16 bit GOT offset. */ @@ -262,26 +297,24 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = info->got_offset + rela->r_addend; if (r_type == R_390_GOT12 || r_type == R_390_GOTPLT12) - *(unsigned short *) loc = (val & 0xfff) | - (*(unsigned short *) loc & 0xf000); + rc = apply_rela_bits(loc, val, 0, 12, 0); else if (r_type == R_390_GOT16 || r_type == R_390_GOTPLT16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_GOT20 || r_type == R_390_GOTPLT20) - *(unsigned int *) loc = - (*(unsigned int *) loc & 0xf00000ff) | - (val & 0xfff) << 16 | (val & 0xff000) >> 4; + rc = apply_rela_bits(loc, val, 1, 20, 0); else if (r_type == R_390_GOT32 || r_type == R_390_GOTPLT32) - *(unsigned int *) loc = val; - else if (r_type == R_390_GOTENT || - r_type == R_390_GOTPLTENT) - *(unsigned int *) loc = - (val + (Elf_Addr) me->module_core - loc) >> 1; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_GOT64 || r_type == R_390_GOTPLT64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); + else if (r_type == R_390_GOTENT || + r_type == R_390_GOTPLTENT) { + val += (Elf_Addr) me->module_core - loc; + rc = apply_rela_bits(loc, val, 1, 32, 1); + } break; case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */ case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */ @@ -308,27 +341,32 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, info->plt_initialized = 1; } if (r_type == R_390_PLTOFF16 || - r_type == R_390_PLTOFF32 - || r_type == R_390_PLTOFF64 - ) + r_type == R_390_PLTOFF32 || + r_type == R_390_PLTOFF64) val = me->arch.plt_offset - me->arch.got_offset + info->plt_offset + rela->r_addend; - else - val = (Elf_Addr) me->module_core + - me->arch.plt_offset + info->plt_offset + - rela->r_addend - loc; + else { + if (!((r_type == R_390_PLT16DBL && + val - loc + 0xffffUL < 0x1ffffeUL) || + (r_type == R_390_PLT32DBL && + val - loc + 0xffffffffULL < 0x1fffffffeULL))) + val = (Elf_Addr) me->module_core + + me->arch.plt_offset + + info->plt_offset; + val += rela->r_addend - loc; + } if (r_type == R_390_PLT16DBL) - *(unsigned short *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 16, 1); else if (r_type == R_390_PLTOFF16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_PLT32DBL) - *(unsigned int *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 32, 1); else if (r_type == R_390_PLT32 || r_type == R_390_PLTOFF32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_PLT64 || r_type == R_390_PLTOFF64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); break; case R_390_GOTOFF16: /* 16 bit offset to GOT. */ case R_390_GOTOFF32: /* 32 bit offset to GOT. */ @@ -336,20 +374,20 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = val + rela->r_addend - ((Elf_Addr) me->module_core + me->arch.got_offset); if (r_type == R_390_GOTOFF16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_GOTOFF32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_GOTOFF64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ val = (Elf_Addr) me->module_core + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 1, 32, 0); else if (r_type == R_390_GOTPCDBL) - *(unsigned int *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 32, 1); break; case R_390_COPY: case R_390_GLOB_DAT: /* Create GOT entry. */ @@ -357,19 +395,25 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_RELATIVE: /* Adjust by program base. */ /* Only needed if we want to support loading of modules linked with -shared. */ - break; + return -ENOEXEC; default: - printk(KERN_ERR "module %s: Unknown relocation: %u\n", + printk(KERN_ERR "module %s: unknown relocation: %u\n", me->name, r_type); return -ENOEXEC; } + if (rc) { + printk(KERN_ERR "module %s: relocation error for symbol %s " + "(r_type %i, value 0x%lx)\n", + me->name, strtab + symtab[r_sym].st_name, + r_type, (unsigned long) val); + return rc; + } return 0; } -int -apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, - unsigned int symindex, unsigned int relsec, - struct module *me) +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *me) { Elf_Addr base; Elf_Sym *symtab; @@ -385,7 +429,7 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, n = sechdrs[relsec].sh_size / sizeof(Elf_Rela); for (i = 0; i < n; i++, rela++) { - rc = apply_rela(rela, base, symtab, me); + rc = apply_rela(rela, base, symtab, strtab, me); if (rc) return rc; } @@ -397,9 +441,6 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *me) { vfree(me->arch.syminfo); + me->arch.syminfo = NULL; return 0; } - -void module_arch_cleanup(struct module *mod) -{ -} diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c new file mode 100644 index 00000000000..210e1285f75 --- /dev/null +++ b/arch/s390/kernel/nmi.c @@ -0,0 +1,372 @@ +/* + * Machine check handler + * + * Copyright IBM Corp. 2000, 2009 + * Author(s): Ingo Adlung <adlung@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Cornelia Huck <cornelia.huck@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#include <linux/kernel_stat.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/hardirq.h> +#include <linux/time.h> +#include <linux/module.h> +#include <asm/lowcore.h> +#include <asm/smp.h> +#include <asm/etr.h> +#include <asm/cputime.h> +#include <asm/nmi.h> +#include <asm/crw.h> + +struct mcck_struct { + int kill_task; + int channel_report; + int warning; + unsigned long long mcck_code; +}; + +static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); + +static void s390_handle_damage(char *msg) +{ + smp_send_stop(); + disabled_wait((unsigned long) __builtin_return_address(0)); + while (1); +} + +/* + * Main machine check handler function. Will be called with interrupts enabled + * or disabled and machine checks enabled or disabled. + */ +void s390_handle_mcck(void) +{ + unsigned long flags; + struct mcck_struct mcck; + + /* + * Disable machine checks and get the current state of accumulated + * machine checks. Afterwards delete the old state and enable machine + * checks again. + */ + local_irq_save(flags); + local_mcck_disable(); + mcck = __get_cpu_var(cpu_mcck); + memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); + clear_cpu_flag(CIF_MCCK_PENDING); + local_mcck_enable(); + local_irq_restore(flags); + + if (mcck.channel_report) + crw_handle_channel_report(); + /* + * A warning may remain for a prolonged period on the bare iron. + * (actually until the machine is powered off, or the problem is gone) + * So we just stop listening for the WARNING MCH and avoid continuously + * being interrupted. One caveat is however, that we must do this per + * processor and cannot use the smp version of ctl_clear_bit(). + * On VM we only get one interrupt per virtally presented machinecheck. + * Though one suffices, we may get one interrupt per (virtual) cpu. + */ + if (mcck.warning) { /* WARNING pending ? */ + static int mchchk_wng_posted = 0; + + /* Use single cpu clear, as we cannot handle smp here. */ + __ctl_clear_bit(14, 24); /* Disable WARNING MCH */ + if (xchg(&mchchk_wng_posted, 1) == 0) + kill_cad_pid(SIGPWR, 1); + } + if (mcck.kill_task) { + local_irq_enable(); + printk(KERN_EMERG "mcck: Terminating task because of machine " + "malfunction (code 0x%016llx).\n", mcck.mcck_code); + printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", + current->comm, current->pid); + do_exit(SIGSEGV); + } +} +EXPORT_SYMBOL_GPL(s390_handle_mcck); + +/* + * returns 0 if all registers could be validated + * returns 1 otherwise + */ +static int notrace s390_revalidate_registers(struct mci *mci) +{ + int kill_task; + u64 zero; + void *fpt_save_area, *fpt_creg_save_area; + + kill_task = 0; + zero = 0; + + if (!mci->gr) { + /* + * General purpose registers couldn't be restored and have + * unknown contents. Process needs to be terminated. + */ + kill_task = 1; + } + if (!mci->fp) { + /* + * Floating point registers can't be restored and + * therefore the process needs to be terminated. + */ + kill_task = 1; + } +#ifndef CONFIG_64BIT + asm volatile( + " ld 0,0(%0)\n" + " ld 2,8(%0)\n" + " ld 4,16(%0)\n" + " ld 6,24(%0)" + : : "a" (&S390_lowcore.floating_pt_save_area)); +#endif + + if (MACHINE_HAS_IEEE) { +#ifdef CONFIG_64BIT + fpt_save_area = &S390_lowcore.floating_pt_save_area; + fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area; +#else + fpt_save_area = (void *) S390_lowcore.extended_save_area_addr; + fpt_creg_save_area = fpt_save_area + 128; +#endif + if (!mci->fc) { + /* + * Floating point control register can't be restored. + * Task will be terminated. + */ + asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero)); + kill_task = 1; + + } else + asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); + + asm volatile( + " ld 0,0(%0)\n" + " ld 1,8(%0)\n" + " ld 2,16(%0)\n" + " ld 3,24(%0)\n" + " ld 4,32(%0)\n" + " ld 5,40(%0)\n" + " ld 6,48(%0)\n" + " ld 7,56(%0)\n" + " ld 8,64(%0)\n" + " ld 9,72(%0)\n" + " ld 10,80(%0)\n" + " ld 11,88(%0)\n" + " ld 12,96(%0)\n" + " ld 13,104(%0)\n" + " ld 14,112(%0)\n" + " ld 15,120(%0)\n" + : : "a" (fpt_save_area)); + } + /* Revalidate access registers */ + asm volatile( + " lam 0,15,0(%0)" + : : "a" (&S390_lowcore.access_regs_save_area)); + if (!mci->ar) { + /* + * Access registers have unknown contents. + * Terminating task. + */ + kill_task = 1; + } + /* Revalidate control registers */ + if (!mci->cr) { + /* + * Control registers have unknown contents. + * Can't recover and therefore stopping machine. + */ + s390_handle_damage("invalid control registers."); + } else { +#ifdef CONFIG_64BIT + asm volatile( + " lctlg 0,15,0(%0)" + : : "a" (&S390_lowcore.cregs_save_area)); +#else + asm volatile( + " lctl 0,15,0(%0)" + : : "a" (&S390_lowcore.cregs_save_area)); +#endif + } + /* + * We don't even try to revalidate the TOD register, since we simply + * can't write something sensible into that register. + */ +#ifdef CONFIG_64BIT + /* + * See if we can revalidate the TOD programmable register with its + * old contents (should be zero) otherwise set it to zero. + */ + if (!mci->pr) + asm volatile( + " sr 0,0\n" + " sckpf" + : : : "0", "cc"); + else + asm volatile( + " l 0,0(%0)\n" + " sckpf" + : : "a" (&S390_lowcore.tod_progreg_save_area) + : "0", "cc"); +#endif + /* Revalidate clock comparator register */ + set_clock_comparator(S390_lowcore.clock_comparator); + /* Check if old PSW is valid */ + if (!mci->wp) + /* + * Can't tell if we come from user or kernel mode + * -> stopping machine. + */ + s390_handle_damage("old psw invalid."); + + if (!mci->ms || !mci->pm || !mci->ia) + kill_task = 1; + + return kill_task; +} + +#define MAX_IPD_COUNT 29 +#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */ + +#define ED_STP_ISLAND 6 /* External damage STP island check */ +#define ED_STP_SYNC 7 /* External damage STP sync check */ +#define ED_ETR_SYNC 12 /* External damage ETR sync check */ +#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */ + +/* + * machine check handler. + */ +void notrace s390_do_machine_check(struct pt_regs *regs) +{ + static int ipd_count; + static DEFINE_SPINLOCK(ipd_lock); + static unsigned long long last_ipd; + struct mcck_struct *mcck; + unsigned long long tmp; + struct mci *mci; + int umode; + + nmi_enter(); + inc_irq_stat(NMI_NMI); + mci = (struct mci *) &S390_lowcore.mcck_interruption_code; + mcck = &__get_cpu_var(cpu_mcck); + umode = user_mode(regs); + + if (mci->sd) { + /* System damage -> stopping machine */ + s390_handle_damage("received system damage machine check."); + } + if (mci->pd) { + if (mci->b) { + /* Processing backup -> verify if we can survive this */ + u64 z_mcic, o_mcic, t_mcic; +#ifdef CONFIG_64BIT + z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29); + o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | + 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | + 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 | + 1ULL<<16); +#else + z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 | + 1ULL<<29); + o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | + 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | + 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16); +#endif + t_mcic = *(u64 *)mci; + + if (((t_mcic & z_mcic) != 0) || + ((t_mcic & o_mcic) != o_mcic)) { + s390_handle_damage("processing backup machine " + "check with damage."); + } + + /* + * Nullifying exigent condition, therefore we might + * retry this instruction. + */ + spin_lock(&ipd_lock); + tmp = get_tod_clock(); + if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME) + ipd_count++; + else + ipd_count = 1; + last_ipd = tmp; + if (ipd_count == MAX_IPD_COUNT) + s390_handle_damage("too many ipd retries."); + spin_unlock(&ipd_lock); + } else { + /* Processing damage -> stopping machine */ + s390_handle_damage("received instruction processing " + "damage machine check."); + } + } + if (s390_revalidate_registers(mci)) { + if (umode) { + /* + * Couldn't restore all register contents while in + * user mode -> mark task for termination. + */ + mcck->kill_task = 1; + mcck->mcck_code = *(unsigned long long *) mci; + set_cpu_flag(CIF_MCCK_PENDING); + } else { + /* + * Couldn't restore all register contents while in + * kernel mode -> stopping machine. + */ + s390_handle_damage("unable to revalidate registers."); + } + } + if (mci->cd) { + /* Timing facility damage */ + s390_handle_damage("TOD clock damaged"); + } + if (mci->ed && mci->ec) { + /* External damage */ + if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC)) + etr_sync_check(); + if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH)) + etr_switch_to_local(); + if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC)) + stp_sync_check(); + if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) + stp_island_check(); + } + if (mci->se) + /* Storage error uncorrected */ + s390_handle_damage("received storage error uncorrected " + "machine check."); + if (mci->ke) + /* Storage key-error uncorrected */ + s390_handle_damage("received storage key-error uncorrected " + "machine check."); + if (mci->ds && mci->fa) + /* Storage degradation */ + s390_handle_damage("received storage degradation machine " + "check."); + if (mci->cp) { + /* Channel report word pending */ + mcck->channel_report = 1; + set_cpu_flag(CIF_MCCK_PENDING); + } + if (mci->w) { + /* Warning pending */ + mcck->warning = 1; + set_cpu_flag(CIF_MCCK_PENDING); + } + nmi_exit(); +} + +static int __init machine_check_init(void) +{ + ctl_set_bit(14, 25); /* enable external damage MCH */ + ctl_set_bit(14, 27); /* enable system recovery MCH */ + ctl_set_bit(14, 24); /* enable warning MCH */ + return 0; +} +arch_initcall(machine_check_init); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c new file mode 100644 index 00000000000..d112fc66f99 --- /dev/null +++ b/arch/s390/kernel/os_info.c @@ -0,0 +1,168 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#define KMSG_COMPONENT "os_info" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/crash_dump.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <asm/checksum.h> +#include <asm/lowcore.h> +#include <asm/os_info.h> + +/* + * OS info structure has to be page aligned + */ +static struct os_info os_info __page_aligned_data; + +/* + * Compute checksum over OS info structure + */ +u32 os_info_csum(struct os_info *os_info) +{ + int size = sizeof(*os_info) - offsetof(struct os_info, version_major); + return csum_partial(&os_info->version_major, size, 0); +} + +/* + * Add crashkernel info to OS info and update checksum + */ +void os_info_crashkernel_add(unsigned long base, unsigned long size) +{ + os_info.crashkernel_addr = (u64)(unsigned long)base; + os_info.crashkernel_size = (u64)(unsigned long)size; + os_info.csum = os_info_csum(&os_info); +} + +/* + * Add OS info entry and update checksum + */ +void os_info_entry_add(int nr, void *ptr, u64 size) +{ + os_info.entry[nr].addr = (u64)(unsigned long)ptr; + os_info.entry[nr].size = size; + os_info.entry[nr].csum = csum_partial(ptr, size, 0); + os_info.csum = os_info_csum(&os_info); +} + +/* + * Initialize OS info struture and set lowcore pointer + */ +void __init os_info_init(void) +{ + void *ptr = &os_info; + + os_info.version_major = OS_INFO_VERSION_MAJOR; + os_info.version_minor = OS_INFO_VERSION_MINOR; + os_info.magic = OS_INFO_MAGIC; + os_info.csum = os_info_csum(&os_info); + mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr); +} + +#ifdef CONFIG_CRASH_DUMP + +static struct os_info *os_info_old; + +/* + * Allocate and copy OS info entry from oldmem + */ +static void os_info_old_alloc(int nr, int align) +{ + unsigned long addr, size = 0; + char *buf, *buf_align, *msg; + u32 csum; + + addr = os_info_old->entry[nr].addr; + if (!addr) { + msg = "not available"; + goto fail; + } + size = os_info_old->entry[nr].size; + buf = kmalloc(size + align - 1, GFP_KERNEL); + if (!buf) { + msg = "alloc failed"; + goto fail; + } + buf_align = PTR_ALIGN(buf, align); + if (copy_from_oldmem(buf_align, (void *) addr, size)) { + msg = "copy failed"; + goto fail_free; + } + csum = csum_partial(buf_align, size, 0); + if (csum != os_info_old->entry[nr].csum) { + msg = "checksum failed"; + goto fail_free; + } + os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align; + msg = "copied"; + goto out; +fail_free: + kfree(buf); +fail: + os_info_old->entry[nr].addr = 0; +out: + pr_info("entry %i: %s (addr=0x%lx size=%lu)\n", + nr, msg, addr, size); +} + +/* + * Initialize os info and os info entries from oldmem + */ +static void os_info_old_init(void) +{ + static int os_info_init; + unsigned long addr; + + if (os_info_init) + return; + if (!OLDMEM_BASE) + goto fail; + if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr))) + goto fail; + if (addr == 0 || addr % PAGE_SIZE) + goto fail; + os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); + if (!os_info_old) + goto fail; + if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old))) + goto fail_free; + if (os_info_old->magic != OS_INFO_MAGIC) + goto fail_free; + if (os_info_old->csum != os_info_csum(os_info_old)) + goto fail_free; + if (os_info_old->version_major > OS_INFO_VERSION_MAJOR) + goto fail_free; + os_info_old_alloc(OS_INFO_VMCOREINFO, 1); + os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1); + pr_info("crashkernel: addr=0x%lx size=%lu\n", + (unsigned long) os_info_old->crashkernel_addr, + (unsigned long) os_info_old->crashkernel_size); + os_info_init = 1; + return; +fail_free: + kfree(os_info_old); +fail: + os_info_init = 1; + os_info_old = NULL; +} + +/* + * Return pointer to os infor entry and its size + */ +void *os_info_old_entry(int nr, unsigned long *size) +{ + os_info_old_init(); + + if (!os_info_old) + return NULL; + if (!os_info_old->entry[nr].addr) + return NULL; + *size = (unsigned long) os_info_old->entry[nr].size; + return (void *)(unsigned long)os_info_old->entry[nr].addr; +} +#endif diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c new file mode 100644 index 00000000000..ea75d011a6f --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -0,0 +1,696 @@ +/* + * Performance event support for s390x - CPU-measurement Counter Facility + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_cf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/export.h> +#include <asm/ctl_reg.h> +#include <asm/irq.h> +#include <asm/cpu_mf.h> + +/* CPU-measurement counter facility supports these CPU counter sets: + * For CPU counter sets: + * Basic counter set: 0-31 + * Problem-state counter set: 32-63 + * Crypto-activity counter set: 64-127 + * Extented counter set: 128-159 + */ +enum cpumf_ctr_set { + /* CPU counter sets */ + CPUMF_CTR_SET_BASIC = 0, + CPUMF_CTR_SET_USER = 1, + CPUMF_CTR_SET_CRYPTO = 2, + CPUMF_CTR_SET_EXT = 3, + + /* Maximum number of counter sets */ + CPUMF_CTR_SET_MAX, +}; + +#define CPUMF_LCCTL_ENABLE_SHIFT 16 +#define CPUMF_LCCTL_ACTCTL_SHIFT 0 +static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { + [CPUMF_CTR_SET_BASIC] = 0x02, + [CPUMF_CTR_SET_USER] = 0x04, + [CPUMF_CTR_SET_CRYPTO] = 0x08, + [CPUMF_CTR_SET_EXT] = 0x01, +}; + +static void ctr_set_enable(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; +} +static void ctr_set_disable(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); +} +static void ctr_set_start(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; +} +static void ctr_set_stop(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); +} + +/* Local CPUMF event structure */ +struct cpu_hw_events { + struct cpumf_ctr_info info; + atomic_t ctr_set[CPUMF_CTR_SET_MAX]; + u64 state, tx_state; + unsigned int flags; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .ctr_set = { + [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + }, + .state = 0, + .flags = 0, +}; + +static int get_counter_set(u64 event) +{ + int set = -1; + + if (event < 32) + set = CPUMF_CTR_SET_BASIC; + else if (event < 64) + set = CPUMF_CTR_SET_USER; + else if (event < 128) + set = CPUMF_CTR_SET_CRYPTO; + else if (event < 256) + set = CPUMF_CTR_SET_EXT; + + return set; +} + +static int validate_event(const struct hw_perf_event *hwc) +{ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + /* check for reserved counters */ + if ((hwc->config >= 6 && hwc->config <= 31) || + (hwc->config >= 38 && hwc->config <= 63) || + (hwc->config >= 80 && hwc->config <= 127)) + return -EOPNOTSUPP; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int validate_ctr_version(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check required version for counter sets */ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + if (cpuhw->info.cfvn < 1) + err = -EOPNOTSUPP; + break; + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + if (cpuhw->info.csvn < 1) + err = -EOPNOTSUPP; + if ((cpuhw->info.csvn == 1 && hwc->config > 159) || + (cpuhw->info.csvn == 2 && hwc->config > 175) || + (cpuhw->info.csvn > 2 && hwc->config > 255)) + err = -EOPNOTSUPP; + break; + } + + put_cpu_var(cpu_hw_events); + return err; +} + +static int validate_ctr_auth(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + u64 ctrs_state; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check authorization for cpu counter sets */ + ctrs_state = cpumf_state_ctl[hwc->config_base]; + if (!(ctrs_state & cpuhw->info.auth_ctl)) + err = -EPERM; + + put_cpu_var(cpu_hw_events); + return err; +} + +/* + * Change the CPUMF state to active. + * Enable and activate the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + err = lcctl(cpuhw->state); + if (err) { + pr_err("Enabling the performance measuring unit " + "failed with rc=%x\n", err); + return; + } + + cpuhw->flags |= PMU_F_ENABLED; +} + +/* + * Change the CPUMF state to inactive. + * Disable and enable (inactive) the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + u64 inactive; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + err = lcctl(inactive); + if (err) { + pr_err("Disabling the performance measuring unit " + "failed with rc=%x\n", err); + return; + } + + cpuhw->flags &= ~PMU_F_ENABLED; +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events = ATOMIC_INIT(0); +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* CPU-measurement alerts for the counter facility */ +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_events *cpuhw; + + if (!(alert & CPU_MF_INT_CF_MASK)) + return; + + inc_irq_stat(IRQEXT_CMC); + cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* counter authorization change alert */ + if (alert & CPU_MF_INT_CF_CACA) + qctri(&cpuhw->info); + + /* loss of counter data alert */ + if (alert & CPU_MF_INT_CF_LCDA) + pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); +} + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +static void setup_pmc_cpu(void *flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + switch (*((int *) flags)) { + case PMC_INIT: + memset(&cpuhw->info, 0, sizeof(cpuhw->info)); + qctri(&cpuhw->info); + cpuhw->flags |= PMU_F_RESERVED; + break; + + case PMC_RELEASE: + cpuhw->flags &= ~PMU_F_RESERVED; + break; + } + + /* Disable CPU counter sets */ + lcctl(0); +} + +/* Initialize the CPU-measurement facility */ +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +/* Release the CPU-measurement facility */ +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); +} + +/* Release the PMU if event is the last perf event */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ +static const int cpumf_generic_events_basic[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0, + [PERF_COUNT_HW_INSTRUCTIONS] = 1, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; +/* CPUMF <-> perf event mappings for userspace (problem-state set) */ +static const int cpumf_generic_events_user[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 32, + [PERF_COUNT_HW_INSTRUCTIONS] = 33, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int err; + u64 ev; + + switch (attr->type) { + case PERF_TYPE_RAW: + /* Raw events are used to access counters directly, + * hence do not permit excludes */ + if (attr->exclude_kernel || attr->exclude_user || + attr->exclude_hv) + return -EOPNOTSUPP; + ev = attr->config; + break; + + case PERF_TYPE_HARDWARE: + ev = attr->config; + /* Count user space (problem-state) only */ + if (!attr->exclude_user && attr->exclude_kernel) { + if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_user[ev]; + + /* No support for kernel space counters only */ + } else if (!attr->exclude_kernel && attr->exclude_user) { + return -EOPNOTSUPP; + + /* Count user and kernel space */ + } else { + if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_basic[ev]; + } + break; + + default: + return -ENOENT; + } + + if (ev == -1) + return -ENOENT; + + if (ev >= PERF_CPUM_CF_MAX_CTR) + return -EINVAL; + + /* Use the hardware perf event structure to store the counter number + * in 'config' member and the counter set to which the counter belongs + * in the 'config_base'. The counter set (config_base) is then used + * to enable/disable the counters. + */ + hwc->config = ev; + hwc->config_base = get_counter_set(ev); + + /* Validate the counter that is assigned to this event. + * Because the counter facility can use numerous counters at the + * same time without constraints, it is not necessary to explicity + * validate event groups (event->group_leader != event). + */ + err = validate_event(hwc); + if (err) + return err; + + /* Initialize for using the CPU-measurement counter facility */ + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + /* Finally, validate version and authorization of the counter set */ + err = validate_ctr_auth(hwc); + if (!err) + err = validate_ctr_version(hwc); + + return err; +} + +static int cpumf_pmu_event_init(struct perf_event *event) +{ + int err; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_RAW: + /* The CPU measurement counter facility does not have overflow + * interrupts to do sampling. Sampling must be provided by + * external means, for example, by timers. + */ + if (is_sampling_event(event)) + return -ENOENT; + err = __hw_perf_event_init(event); + break; + default: + return -ENOENT; + } + + if (unlikely(err) && event->destroy) + event->destroy(event); + + return err; +} + +static int hw_perf_event_reset(struct perf_event *event) +{ + u64 prev, new; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) { + if (err != 3) + break; + /* The counter is not (yet) available. This + * might happen if the counter set to which + * this counter belongs is in the disabled + * state. + */ + new = 0; + } + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + return err; +} + +static int hw_perf_event_update(struct perf_event *event) +{ + u64 prev, new, delta; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) + goto out; + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + delta = (prev <= new) ? new - prev + : (-1ULL - prev) + new + 1; /* overflow */ + local64_add(delta, &event->count); +out: + return err; +} + +static void cpumf_pmu_read(struct perf_event *event) +{ + if (event->hw.state & PERF_HES_STOPPED) + return; + + hw_perf_event_update(event); +} + +static void cpumf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(hwc->config == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* (Re-)enable and activate the counter set */ + ctr_set_enable(&cpuhw->state, hwc->config_base); + ctr_set_start(&cpuhw->state, hwc->config_base); + + /* The counter set to which this counter belongs can be already active. + * Because all counters in a set are active, the event->hw.prev_count + * needs to be synchronized. At this point, the counter set can be in + * the inactive or disabled state. + */ + hw_perf_event_reset(event); + + /* increment refcount for this counter set */ + atomic_inc(&cpuhw->ctr_set[hwc->config_base]); +} + +static void cpumf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* Decrement reference count for this counter set and if this + * is the last used counter in the set, clear activation + * control and set the counter set state to inactive. + */ + if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base])) + ctr_set_stop(&cpuhw->state, hwc->config_base); + event->hw.state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static int cpumf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Check authorization for the counter set to which this + * counter belongs. + * For group events transaction, the authorization check is + * done in cpumf_pmu_commit_txn(). + */ + if (!(cpuhw->flags & PERF_EVENT_TXN)) + if (validate_ctr_auth(&event->hw)) + return -EPERM; + + ctr_set_enable(&cpuhw->state, event->hw.config_base); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + cpumf_pmu_start(event, PERF_EF_RELOAD); + + perf_event_update_userpage(event); + + return 0; +} + +static void cpumf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpumf_pmu_stop(event, PERF_EF_UPDATE); + + /* Check if any counter in the counter set is still used. If not used, + * change the counter set to the disabled state. This also clears the + * content of all counters in the set. + * + * When a new perf event has been added but not yet started, this can + * clear enable control and resets all counters in a set. Therefore, + * cpumf_pmu_start() always has to reenable a counter set. + */ + if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base])) + ctr_set_disable(&cpuhw->state, event->hw.config_base); + + perf_event_update_userpage(event); +} + +/* + * Start group events scheduling transaction. + * Set flags to perform a single test at commit time. + */ +static void cpumf_pmu_start_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + perf_pmu_disable(pmu); + cpuhw->flags |= PERF_EVENT_TXN; + cpuhw->tx_state = cpuhw->state; +} + +/* + * Stop and cancel a group events scheduling tranctions. + * Assumes cpumf_pmu_del() is called for each successful added + * cpumf_pmu_add() during the transaction. + */ +static void cpumf_pmu_cancel_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + WARN_ON(cpuhw->tx_state != cpuhw->state); + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); +} + +/* + * Commit the group events scheduling transaction. On success, the + * transaction is closed. On error, the transaction is kept open + * until cpumf_pmu_cancel_txn() is called. + */ +static int cpumf_pmu_commit_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + u64 state; + + /* check if the updated state can be scheduled */ + state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + state >>= CPUMF_LCCTL_ENABLE_SHIFT; + if ((state & cpuhw->info.auth_ctl) != state) + return -EPERM; + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); + return 0; +} + +/* Performance monitoring unit for s390x */ +static struct pmu cpumf_pmu = { + .pmu_enable = cpumf_pmu_enable, + .pmu_disable = cpumf_pmu_disable, + .event_init = cpumf_pmu_event_init, + .add = cpumf_pmu_add, + .del = cpumf_pmu_del, + .start = cpumf_pmu_start, + .stop = cpumf_pmu_stop, + .read = cpumf_pmu_read, + .start_txn = cpumf_pmu_start_txn, + .commit_txn = cpumf_pmu_commit_txn, + .cancel_txn = cpumf_pmu_cancel_txn, +}; + +static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int __init cpumf_pmu_init(void) +{ + int rc; + + if (!cpum_cf_avail()) + return -ENODEV; + + /* clear bit 15 of cr0 to unauthorize problem-state to + * extract measurement counters */ + ctl_clear_bit(0, 48); + + /* register handler for measurement-alert interruptions */ + rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + if (rc) { + pr_err("Registering for CPU-measurement alerts " + "failed with rc=%i\n", rc); + goto out; + } + + cpumf_pmu.attr_groups = cpumf_cf_event_group(); + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + if (rc) { + pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return rc; +} +early_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c new file mode 100644 index 00000000000..4554a4bae39 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -0,0 +1,322 @@ +/* + * Perf PMU sysfs events attributes for available CPU-measurement counters + * + */ + +#include <linux/slab.h> +#include <linux/perf_event.h> + + +/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */ + +CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000); +CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001); +CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002); +CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025); +CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004); +CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005); +CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040); +CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041); +CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042); +CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043); +CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044); +CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045); +CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046); +CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047); +CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048); +CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049); +CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a); +CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b); +CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c); +CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d); +CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e); +CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f); +CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092); +CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083); +CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); + +static struct attribute *cpumcf_pmu_event_attr[] = { + CPUMF_EVENT_PTR(cf, CPU_CYCLES), + CPUMF_EVENT_PTR(cf, INSTRUCTIONS), + CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS), + CPUMF_EVENT_PTR(cf, PRNG_CYCLES), + CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS), + CPUMF_EVENT_PTR(cf, SHA_CYCLES), + CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS), + CPUMF_EVENT_PTR(cf, DEA_CYCLES), + CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, AES_FUNCTIONS), + CPUMF_EVENT_PTR(cf, AES_CYCLES), + CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES), + NULL, +}; + +static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT), + NULL, +}; + +static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES), + NULL, +}; + +static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL), + NULL, +}; + +/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumcf_pmu_event_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; + +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + + +static __init struct attribute **merge_attr(struct attribute **a, + struct attribute **b) +{ + struct attribute **new; + int j, i; + + for (j = 0; a[j]; j++) + ; + for (i = 0; b[i]; i++) + j++; + j++; + + new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL); + if (!new) + return NULL; + j = 0; + for (i = 0; a[i]; i++) + new[j++] = a[i]; + for (i = 0; b[i]; i++) + new[j++] = b[i]; + new[j] = NULL; + + return new; +} + +__init const struct attribute_group **cpumf_cf_event_group(void) +{ + struct attribute **combined, **model; + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + model = cpumcf_z10_pmu_event_attr; + break; + case 0x2817: + case 0x2818: + model = cpumcf_z196_pmu_event_attr; + break; + case 0x2827: + case 0x2828: + model = cpumcf_zec12_pmu_event_attr; + break; + default: + model = NULL; + break; + }; + + if (!model) + goto out; + + combined = merge_attr(cpumcf_pmu_event_attr, model); + if (combined) + cpumsf_pmu_events_group.attrs = combined; +out: + return cpumsf_pmu_attr_groups; +} diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c new file mode 100644 index 00000000000..ea0c7b2ef03 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -0,0 +1,1643 @@ +/* + * Performance event support for the System z CPU-measurement Sampling Facility + * + * Copyright IBM Corp. 2013 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_sf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/moduleparam.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> +#include <asm/debug.h> +#include <asm/timex.h> + +/* Minimum number of sample-data-block-tables: + * At least one table is required for the sampling buffer structure. + * A single table contains up to 511 pointers to sample-data-blocks. + */ +#define CPUM_SF_MIN_SDBT 1 + +/* Number of sample-data-blocks per sample-data-block-table (SDBT): + * A table contains SDB pointers (8 bytes) and one table-link entry + * that points to the origin of the next SDBT. + */ +#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) + +/* Maximum page offset for an SDBT table-link entry: + * If this page offset is reached, a table-link entry to the next SDBT + * must be added. + */ +#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) +static inline int require_table_link(const void *sdbt) +{ + return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; +} + +/* Minimum and maximum sampling buffer sizes: + * + * This number represents the maximum size of the sampling buffer taking + * the number of sample-data-block-tables into account. Note that these + * numbers apply to the basic-sampling function only. + * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if + * the diagnostic-sampling function is active. + * + * Sampling buffer size Buffer characteristics + * --------------------------------------------------- + * 64KB == 16 pages (4KB per page) + * 1 page for SDB-tables + * 15 pages for SDBs + * + * 32MB == 8192 pages (4KB per page) + * 16 pages for SDB-tables + * 8176 pages for SDBs + */ +static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; +static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; +static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; + +struct sf_buffer { + unsigned long *sdbt; /* Sample-data-block-table origin */ + /* buffer characteristics (required for buffer increments) */ + unsigned long num_sdb; /* Number of sample-data-blocks */ + unsigned long num_sdbt; /* Number of sample-data-block-tables */ + unsigned long *tail; /* last sample-data-block-table */ +}; + +struct cpu_hw_sf { + /* CPU-measurement sampling information block */ + struct hws_qsi_info_block qsi; + /* CPU-measurement sampling control block */ + struct hws_lsctl_request_block lsctl; + struct sf_buffer sfb; /* Sampling buffer */ + unsigned int flags; /* Status flags */ + struct perf_event *event; /* Scheduled perf event */ +}; +static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); + +/* Debug feature */ +static debug_info_t *sfdbg; + +/* + * sf_disable() - Switch off sampling facility + */ +static int sf_disable(void) +{ + struct hws_lsctl_request_block sreq; + + memset(&sreq, 0, sizeof(sreq)); + return lsctl(&sreq); +} + +/* + * sf_buffer_available() - Check for an allocated sampling buffer + */ +static int sf_buffer_available(struct cpu_hw_sf *cpuhw) +{ + return !!cpuhw->sfb.sdbt; +} + +/* + * deallocate sampling facility buffer + */ +static void free_sampling_buffer(struct sf_buffer *sfb) +{ + unsigned long *sdbt, *curr; + + if (!sfb->sdbt) + return; + + sdbt = sfb->sdbt; + curr = sdbt; + + /* Free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* Process table-link entries */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page((unsigned long) sdbt); + + /* If the origin is reached, sampling buffer is freed */ + if (curr == sfb->sdbt) + break; + else + sdbt = curr; + } else { + /* Process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + } + + debug_sprintf_event(sfdbg, 5, + "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); + memset(sfb, 0, sizeof(*sfb)); +} + +static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) +{ + unsigned long sdb, *trailer; + + /* Allocate and initialize sample-data-block */ + sdb = get_zeroed_page(gfp_flags); + if (!sdb) + return -ENOMEM; + trailer = trailer_entry_ptr(sdb); + *trailer = SDB_TE_ALERT_REQ_MASK; + + /* Link SDB into the sample-data-block-table */ + *sdbt = sdb; + + return 0; +} + +/* + * realloc_sampling_buffer() - extend sampler memory + * + * Allocates new sample-data-blocks and adds them to the specified sampling + * buffer memory. + * + * Important: This modifies the sampling buffer and must be called when the + * sampling facility is disabled. + * + * Returns zero on success, non-zero otherwise. + */ +static int realloc_sampling_buffer(struct sf_buffer *sfb, + unsigned long num_sdb, gfp_t gfp_flags) +{ + int i, rc; + unsigned long *new, *tail; + + if (!sfb->sdbt || !sfb->tail) + return -EINVAL; + + if (!is_link_entry(sfb->tail)) + return -EINVAL; + + /* Append to the existing sampling buffer, overwriting the table-link + * register. + * The tail variables always points to the "tail" (last and table-link) + * entry in an SDB-table. + */ + tail = sfb->tail; + + /* Do a sanity check whether the table-link entry points to + * the sampling buffer origin. + */ + if (sfb->sdbt != get_next_sdbt(tail)) { + debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " + "sampling buffer is not linked: origin=%p" + "tail=%p\n", + (void *) sfb->sdbt, (void *) tail); + return -EINVAL; + } + + /* Allocate remaining SDBs */ + rc = 0; + for (i = 0; i < num_sdb; i++) { + /* Allocate a new SDB-table if it is full. */ + if (require_table_link(tail)) { + new = (unsigned long *) get_zeroed_page(gfp_flags); + if (!new) { + rc = -ENOMEM; + break; + } + sfb->num_sdbt++; + /* Link current page to tail of chain */ + *tail = (unsigned long)(void *) new + 1; + tail = new; + } + + /* Allocate a new sample-data-block. + * If there is not enough memory, stop the realloc process + * and simply use what was allocated. If this is a temporary + * issue, a new realloc call (if required) might succeed. + */ + rc = alloc_sample_data_block(tail, gfp_flags); + if (rc) + break; + sfb->num_sdb++; + tail++; + } + + /* Link sampling buffer to its origin */ + *tail = (unsigned long) sfb->sdbt + 1; + sfb->tail = tail; + + debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" + " settings: sdbt=%lu sdb=%lu\n", + sfb->num_sdbt, sfb->num_sdb); + return rc; +} + +/* + * allocate_sampling_buffer() - allocate sampler memory + * + * Allocates and initializes a sampling buffer structure using the + * specified number of sample-data-blocks (SDB). For each allocation, + * a 4K page is used. The number of sample-data-block-tables (SDBT) + * are calculated from SDBs. + * Also set the ALERT_REQ mask in each SDBs trailer. + * + * Returns zero on success, non-zero otherwise. + */ +static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) +{ + int rc; + + if (sfb->sdbt) + return -EINVAL; + + /* Allocate the sample-data-block-table origin */ + sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!sfb->sdbt) + return -ENOMEM; + sfb->num_sdb = 0; + sfb->num_sdbt = 1; + + /* Link the table origin to point to itself to prepare for + * realloc_sampling_buffer() invocation. + */ + sfb->tail = sfb->sdbt; + *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; + + /* Allocate requested number of sample-data-blocks */ + rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); + if (rc) { + free_sampling_buffer(sfb); + debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " + "realloc_sampling_buffer failed with rc=%i\n", rc); + } else + debug_sprintf_event(sfdbg, 4, + "alloc_sampling_buffer: tear=%p dear=%p\n", + sfb->sdbt, (void *) *sfb->sdbt); + return rc; +} + +static void sfb_set_limits(unsigned long min, unsigned long max) +{ + struct hws_qsi_info_block si; + + CPUM_SF_MIN_SDB = min; + CPUM_SF_MAX_SDB = max; + + memset(&si, 0, sizeof(si)); + if (!qsi(&si)) + CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); +} + +static unsigned long sfb_max_limit(struct hw_perf_event *hwc) +{ + return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR + : CPUM_SF_MAX_SDB; +} + +static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + if (!sfb->sdbt) + return SFB_ALLOC_REG(hwc); + if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) + return SFB_ALLOC_REG(hwc) - sfb->num_sdb; + return 0; +} + +static int sfb_has_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + return sfb_pending_allocs(sfb, hwc) > 0; +} + +static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + /* Limit the number of SDBs to not exceed the maximum */ + num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); + if (num) + SFB_ALLOC_REG(hwc) += num; +} + +static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + SFB_ALLOC_REG(hwc) = 0; + sfb_account_allocs(num, hwc); +} + +static size_t event_sample_size(struct hw_perf_event *hwc) +{ + struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); + size_t sample_size; + + /* The sample size depends on the sampling function: The basic-sampling + * function must be always enabled, diagnostic-sampling function is + * optional. + */ + sample_size = sfr->bsdes; + if (SAMPL_DIAG_MODE(hwc)) + sample_size += sfr->dsdes; + + return sample_size; +} + +static void deallocate_buffers(struct cpu_hw_sf *cpuhw) +{ + if (cpuhw->sfb.sdbt) + free_sampling_buffer(&cpuhw->sfb); +} + +static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) +{ + unsigned long n_sdb, freq, factor; + size_t sfr_size, sample_size; + struct sf_raw_sample *sfr; + + /* Allocate raw sample buffer + * + * The raw sample buffer is used to temporarily store sampling data + * entries for perf raw sample processing. The buffer size mainly + * depends on the size of diagnostic-sampling data entries which is + * machine-specific. The exact size calculation includes: + * 1. The first 4 bytes of diagnostic-sampling data entries are + * already reflected in the sf_raw_sample structure. Subtract + * these bytes. + * 2. The perf raw sample data must be 8-byte aligned (u64) and + * perf's internal data size must be considered too. So add + * an additional u32 for correct alignment and subtract before + * allocating the buffer. + * 3. Store the raw sample buffer pointer in the perf event + * hardware structure. + */ + sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + + sizeof(u32), sizeof(u64)); + sfr_size -= sizeof(u32); + sfr = kzalloc(sfr_size, GFP_KERNEL); + if (!sfr) + return -ENOMEM; + sfr->size = sfr_size; + sfr->bsdes = cpuhw->qsi.bsdes; + sfr->dsdes = cpuhw->qsi.dsdes; + RAWSAMPLE_REG(hwc) = (unsigned long) sfr; + + /* Calculate sampling buffers using 4K pages + * + * 1. Determine the sample data size which depends on the used + * sampling functions, for example, basic-sampling or + * basic-sampling with diagnostic-sampling. + * + * 2. Use the sampling frequency as input. The sampling buffer is + * designed for almost one second. This can be adjusted through + * the "factor" variable. + * In any case, alloc_sampling_buffer() sets the Alert Request + * Control indicator to trigger a measurement-alert to harvest + * sample-data-blocks (sdb). + * + * 3. Compute the number of sample-data-blocks and ensure a minimum + * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not + * exceed a "calculated" maximum. The symbolic maximum is + * designed for basic-sampling only and needs to be increased if + * diagnostic-sampling is active. + * See also the remarks for these symbolic constants. + * + * 4. Compute the number of sample-data-block-tables (SDBT) and + * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up + * to 511 SDBs). + */ + sample_size = event_sample_size(hwc); + freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); + factor = 1; + n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); + if (n_sdb < CPUM_SF_MIN_SDB) + n_sdb = CPUM_SF_MIN_SDB; + + /* If there is already a sampling buffer allocated, it is very likely + * that the sampling facility is enabled too. If the event to be + * initialized requires a greater sampling buffer, the allocation must + * be postponed. Changing the sampling buffer requires the sampling + * facility to be in the disabled state. So, account the number of + * required SDBs and let cpumsf_pmu_enable() resize the buffer just + * before the event is started. + */ + sfb_init_allocs(n_sdb, hwc); + if (sf_buffer_available(cpuhw)) + return 0; + + debug_sprintf_event(sfdbg, 3, + "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" + " sample_size=%lu cpuhw=%p\n", + SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), + sample_size, cpuhw); + + return alloc_sampling_buffer(&cpuhw->sfb, + sfb_pending_allocs(&cpuhw->sfb, hwc)); +} + +static unsigned long min_percent(unsigned int percent, unsigned long base, + unsigned long min) +{ + return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); +} + +static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) +{ + /* Use a percentage-based approach to extend the sampling facility + * buffer. Accept up to 5% sample data loss. + * Vary the extents between 1% to 5% of the current number of + * sample-data-blocks. + */ + if (ratio <= 5) + return 0; + if (ratio <= 25) + return min_percent(1, base, 1); + if (ratio <= 50) + return min_percent(1, base, 1); + if (ratio <= 75) + return min_percent(2, base, 2); + if (ratio <= 100) + return min_percent(3, base, 3); + if (ratio <= 250) + return min_percent(4, base, 4); + + return min_percent(5, base, 8); +} + +static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, + struct hw_perf_event *hwc) +{ + unsigned long ratio, num; + + if (!OVERFLOW_REG(hwc)) + return; + + /* The sample_overflow contains the average number of sample data + * that has been lost because sample-data-blocks were full. + * + * Calculate the total number of sample data entries that has been + * discarded. Then calculate the ratio of lost samples to total samples + * per second in percent. + */ + ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, + sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); + + /* Compute number of sample-data-blocks */ + num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); + if (num) + sfb_account_allocs(num, hwc); + + debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" + " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); + OVERFLOW_REG(hwc) = 0; +} + +/* extend_sampling_buffer() - Extend sampling buffer + * @sfb: Sampling buffer structure (for local CPU) + * @hwc: Perf event hardware structure + * + * Use this function to extend the sampling buffer based on the overflow counter + * and postponed allocation extents stored in the specified Perf event hardware. + * + * Important: This function disables the sampling facility in order to safely + * change the sampling buffer structure. Do not call this function + * when the PMU is active. + */ +static void extend_sampling_buffer(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + unsigned long num, num_old; + int rc; + + num = sfb_pending_allocs(sfb, hwc); + if (!num) + return; + num_old = sfb->num_sdb; + + /* Disable the sampling facility to reset any states and also + * clear pending measurement alerts. + */ + sf_disable(); + + /* Extend the sampling buffer. + * This memory allocation typically happens in an atomic context when + * called by perf. Because this is a reallocation, it is fine if the + * new SDB-request cannot be satisfied immediately. + */ + rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); + if (rc) + debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " + "failed with rc=%i\n", rc); + + if (sfb_has_pending_allocs(sfb, hwc)) + debug_sprintf_event(sfdbg, 5, "sfb: extend: " + "req=%lu alloc=%lu remaining=%lu\n", + num, sfb->num_sdb - num_old, + sfb_pending_allocs(sfb, hwc)); +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +#define PMC_FAILURE 2 +static void setup_pmc_cpu(void *flags) +{ + int err; + struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); + + err = 0; + switch (*((int *) flags)) { + case PMC_INIT: + memset(cpusf, 0, sizeof(*cpusf)); + err = qsi(&cpusf->qsi); + if (err) + break; + cpusf->flags |= PMU_F_RESERVED; + err = sf_disable(); + if (err) + pr_err("Switching off the sampling facility failed " + "with rc=%i\n", err); + debug_sprintf_event(sfdbg, 5, + "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); + break; + case PMC_RELEASE: + cpusf->flags &= ~PMU_F_RESERVED; + err = sf_disable(); + if (err) { + pr_err("Switching off the sampling facility failed " + "with rc=%i\n", err); + } else + deallocate_buffers(cpusf); + debug_sprintf_event(sfdbg, 5, + "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); + break; + } + if (err) + *((int *) flags) |= PMC_FAILURE; +} + +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(setup_pmc_cpu, &flags, 1); + perf_release_sampling(); +} + +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + int err; + + err = perf_reserve_sampling(); + if (err) + return err; + on_each_cpu(setup_pmc_cpu, &flags, 1); + if (flags & PMC_FAILURE) { + release_pmc_hardware(); + return -ENODEV; + } + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + /* Free raw sample buffer */ + if (RAWSAMPLE_REG(&event->hw)) + kfree((void *) RAWSAMPLE_REG(&event->hw)); + + /* Release PMC if this is the last perf event */ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static void hw_init_period(struct hw_perf_event *hwc, u64 period) +{ + hwc->sample_period = period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); +} + +static void hw_reset_registers(struct hw_perf_event *hwc, + unsigned long *sdbt_origin) +{ + struct sf_raw_sample *sfr; + + /* (Re)set to first sample-data-block-table */ + TEAR_REG(hwc) = (unsigned long) sdbt_origin; + + /* (Re)set raw sampling buffer register */ + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); + memset(&sfr->basic, 0, sizeof(sfr->basic)); + memset(&sfr->diag, 0, sfr->dsdes); +} + +static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, + unsigned long rate) +{ + return clamp_t(unsigned long, rate, + si->min_sampl_rate, si->max_sampl_rate); +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct cpu_hw_sf *cpuhw; + struct hws_qsi_info_block si; + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + unsigned long rate; + int cpu, err; + + /* Reserve CPU-measurement sampling facility */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + if (err) + goto out; + + /* Access per-CPU sampling information (query sampling info) */ + /* + * The event->cpu value can be -1 to count on every CPU, for example, + * when attaching to a task. If this is specified, use the query + * sampling info from the current CPU, otherwise use event->cpu to + * retrieve the per-CPU information. + * Later, cpuhw indicates whether to allocate sampling buffers for a + * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). + */ + memset(&si, 0, sizeof(si)); + cpuhw = NULL; + if (event->cpu == -1) + qsi(&si); + else { + /* Event is pinned to a particular CPU, retrieve the per-CPU + * sampling structure for accessing the CPU-specific QSI. + */ + cpuhw = &per_cpu(cpu_hw_sf, event->cpu); + si = cpuhw->qsi; + } + + /* Check sampling facility authorization and, if not authorized, + * fall back to other PMUs. It is safe to check any CPU because + * the authorization is identical for all configured CPUs. + */ + if (!si.as) { + err = -ENOENT; + goto out; + } + + /* Always enable basic sampling */ + SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; + + /* Check if diagnostic sampling is requested. Deny if the required + * sampling authorization is missing. + */ + if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { + if (!si.ad) { + err = -EPERM; + goto out; + } + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; + } + + /* Check and set other sampling flags */ + if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; + + /* The sampling information (si) contains information about the + * min/max sampling intervals and the CPU speed. So calculate the + * correct sampling interval and avoid the whole period adjust + * feedback loop. + */ + rate = 0; + if (attr->freq) { + rate = freq_to_sample_rate(&si, attr->sample_freq); + rate = hw_limit_rate(&si, rate); + attr->freq = 0; + attr->sample_period = rate; + } else { + /* The min/max sampling rates specifies the valid range + * of sample periods. If the specified sample period is + * out of range, limit the period to the range boundary. + */ + rate = hw_limit_rate(&si, hwc->sample_period); + + /* The perf core maintains a maximum sample rate that is + * configurable through the sysctl interface. Ensure the + * sampling rate does not exceed this value. This also helps + * to avoid throttling when pushing samples with + * perf_event_overflow(). + */ + if (sample_rate_to_freq(&si, rate) > + sysctl_perf_event_sample_rate) { + err = -EINVAL; + debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); + goto out; + } + } + SAMPL_RATE(hwc) = rate; + hw_init_period(hwc, SAMPL_RATE(hwc)); + + /* Initialize sample data overflow accounting */ + hwc->extra_reg.reg = REG_OVERFLOW; + OVERFLOW_REG(hwc) = 0; + + /* Allocate the per-CPU sampling buffer using the CPU information + * from the event. If the event is not pinned to a particular + * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling + * buffers for each online CPU. + */ + if (cpuhw) + /* Event is pinned to a particular CPU */ + err = allocate_buffers(cpuhw, hwc); + else { + /* Event is not pinned, allocate sampling buffer on + * each online CPU + */ + for_each_online_cpu(cpu) { + cpuhw = &per_cpu(cpu_hw_sf, cpu); + err = allocate_buffers(cpuhw, hwc); + if (err) + break; + } + } +out: + return err; +} + +static int cpumsf_pmu_event_init(struct perf_event *event) +{ + int err; + + /* No support for taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + if ((event->attr.config != PERF_EVENT_CPUM_SF) && + (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) + return -ENOENT; + break; + case PERF_TYPE_HARDWARE: + /* Support sampling of CPU cycles in addition to the + * counter facility. However, the counter facility + * is more precise and, hence, restrict this PMU to + * sampling events only. + */ + if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) + return -ENOENT; + if (!is_sampling_event(event)) + return -ENOENT; + break; + default: + return -ENOENT; + } + + /* Check online status of the CPU to which the event is pinned */ + if (event->cpu >= nr_cpumask_bits || + (event->cpu >= 0 && !cpu_online(event->cpu))) + return -ENODEV; + + /* Force reset of idle/hv excludes regardless of what the + * user requested. + */ + if (event->attr.exclude_hv) + event->attr.exclude_hv = 0; + if (event->attr.exclude_idle) + event->attr.exclude_idle = 0; + + err = __hw_perf_event_init(event); + if (unlikely(err)) + if (event->destroy) + event->destroy(event); + return err; +} + +static void cpumsf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct hw_perf_event *hwc; + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + /* Check whether to extent the sampling buffer. + * + * Two conditions trigger an increase of the sampling buffer for a + * perf event: + * 1. Postponed buffer allocations from the event initialization. + * 2. Sampling overflows that contribute to pending allocations. + * + * Note that the extend_sampling_buffer() function disables the sampling + * facility, but it can be fully re-enabled using sampling controls that + * have been saved in cpumsf_pmu_disable(). + */ + if (cpuhw->event) { + hwc = &cpuhw->event->hw; + /* Account number of overflow-designated buffer extents */ + sfb_account_overflows(cpuhw, hwc); + if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) + extend_sampling_buffer(&cpuhw->sfb, hwc); + } + + /* (Re)enable the PMU and sampling facility */ + cpuhw->flags |= PMU_F_ENABLED; + barrier(); + + err = lsctl(&cpuhw->lsctl); + if (err) { + cpuhw->flags &= ~PMU_F_ENABLED; + pr_err("Loading sampling controls failed: op=%i err=%i\n", + 1, err); + return; + } + + debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " + "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, + cpuhw->lsctl.ed, cpuhw->lsctl.cd, + (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); +} + +static void cpumsf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct hws_lsctl_request_block inactive; + struct hws_qsi_info_block si; + int err; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + /* Switch off sampling activation control */ + inactive = cpuhw->lsctl; + inactive.cs = 0; + inactive.cd = 0; + + err = lsctl(&inactive); + if (err) { + pr_err("Loading sampling controls failed: op=%i err=%i\n", + 2, err); + return; + } + + /* Save state of TEAR and DEAR register contents */ + if (!qsi(&si)) { + /* TEAR/DEAR values are valid only if the sampling facility is + * enabled. Note that cpumsf_pmu_disable() might be called even + * for a disabled sampling facility because cpumsf_pmu_enable() + * controls the enable/disable state. + */ + if (si.es) { + cpuhw->lsctl.tear = si.tear; + cpuhw->lsctl.dear = si.dear; + } + } else + debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " + "qsi() failed with err=%i\n", err); + + cpuhw->flags &= ~PMU_F_ENABLED; +} + +/* perf_exclude_event() - Filter event + * @event: The perf event + * @regs: pt_regs structure + * @sde_regs: Sample-data-entry (sde) regs structure + * + * Filter perf events according to their exclude specification. + * + * Return non-zero if the event shall be excluded. + */ +static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, + struct perf_sf_sde_regs *sde_regs) +{ + if (event->attr.exclude_user && user_mode(regs)) + return 1; + if (event->attr.exclude_kernel && !user_mode(regs)) + return 1; + if (event->attr.exclude_guest && sde_regs->in_guest) + return 1; + if (event->attr.exclude_host && !sde_regs->in_guest) + return 1; + return 0; +} + +/* perf_push_sample() - Push samples to perf + * @event: The perf event + * @sample: Hardware sample data + * + * Use the hardware sample data to create perf event sample. The sample + * is the pushed to the event subsystem and the function checks for + * possible event overflows. If an event overflow occurs, the PMU is + * stopped. + * + * Return non-zero if an event overflow occurred. + */ +static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) +{ + int overflow; + struct pt_regs regs; + struct perf_sf_sde_regs *sde_regs; + struct perf_sample_data data; + struct perf_raw_record raw; + + /* Setup perf sample */ + perf_sample_data_init(&data, 0, event->hw.last_period); + raw.size = sfr->size; + raw.data = sfr; + data.raw = &raw; + + /* Setup pt_regs to look like an CPU-measurement external interrupt + * using the Program Request Alert code. The regs.int_parm_long + * field which is unused contains additional sample-data-entry related + * indicators. + */ + memset(®s, 0, sizeof(regs)); + regs.int_code = 0x1407; + regs.int_parm = CPU_MF_INT_SF_PRA; + sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; + + regs.psw.addr = sfr->basic.ia; + if (sfr->basic.T) + regs.psw.mask |= PSW_MASK_DAT; + if (sfr->basic.W) + regs.psw.mask |= PSW_MASK_WAIT; + if (sfr->basic.P) + regs.psw.mask |= PSW_MASK_PSTATE; + switch (sfr->basic.AS) { + case 0x0: + regs.psw.mask |= PSW_ASC_PRIMARY; + break; + case 0x1: + regs.psw.mask |= PSW_ASC_ACCREG; + break; + case 0x2: + regs.psw.mask |= PSW_ASC_SECONDARY; + break; + case 0x3: + regs.psw.mask |= PSW_ASC_HOME; + break; + } + + /* The host-program-parameter (hpp) contains the sie control + * block that is set by sie64a() in entry64.S. Check if hpp + * refers to a valid control block and set sde_regs flags + * accordingly. This would allow to use hpp values for other + * purposes too. + * For now, simply use a non-zero value as guest indicator. + */ + if (sfr->basic.hpp) + sde_regs->in_guest = 1; + + overflow = 0; + if (perf_exclude_event(event, ®s, sde_regs)) + goto out; + if (perf_event_overflow(event, &data, ®s)) { + overflow = 1; + event->pmu->stop(event, 0); + } + perf_event_update_userpage(event); +out: + return overflow; +} + +static void perf_event_count_update(struct perf_event *event, u64 count) +{ + local64_add(count, &event->count); +} + +static int sample_format_is_valid(struct hws_combined_entry *sample, + unsigned int flags) +{ + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) + /* Only basic-sampling data entries with data-entry-format + * version of 0x0001 can be processed. + */ + if (sample->basic.def != 0x0001) + return 0; + if (flags & PERF_CPUM_SF_DIAG_MODE) + /* The data-entry-format number of diagnostic-sampling data + * entries can vary. Because diagnostic data is just passed + * through, do only a sanity check on the DEF. + */ + if (sample->diag.def < 0x8001) + return 0; + return 1; +} + +static int sample_is_consistent(struct hws_combined_entry *sample, + unsigned long flags) +{ + /* This check applies only to basic-sampling data entries of potentially + * combined-sampling data entries. Invalid entries cannot be processed + * by the PMU and, thus, do not deliver an associated + * diagnostic-sampling data entry. + */ + if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) + return 0; + /* + * Samples are skipped, if they are invalid or for which the + * instruction address is not predictable, i.e., the wait-state bit is + * set. + */ + if (sample->basic.I || sample->basic.W) + return 0; + return 1; +} + +static void reset_sample_slot(struct hws_combined_entry *sample, + unsigned long flags) +{ + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) + sample->basic.def = 0; + if (flags & PERF_CPUM_SF_DIAG_MODE) + sample->diag.def = 0; +} + +static void sfr_store_sample(struct sf_raw_sample *sfr, + struct hws_combined_entry *sample) +{ + if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) + sfr->basic = sample->basic; + if (sfr->format & PERF_CPUM_SF_DIAG_MODE) + memcpy(&sfr->diag, &sample->diag, sfr->dsdes); +} + +static void debug_sample_entry(struct hws_combined_entry *sample, + struct hws_trailer_entry *te, + unsigned long flags) +{ + debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " + "sampling data entry: te->f=%i basic.def=%04x (%p)" + " diag.def=%04x (%p)\n", te->f, + sample->basic.def, &sample->basic, + (flags & PERF_CPUM_SF_DIAG_MODE) + ? sample->diag.def : 0xFFFF, + (flags & PERF_CPUM_SF_DIAG_MODE) + ? &sample->diag : NULL); +} + +/* hw_collect_samples() - Walk through a sample-data-block and collect samples + * @event: The perf event + * @sdbt: Sample-data-block table + * @overflow: Event overflow counter + * + * Walks through a sample-data-block and collects sampling data entries that are + * then pushed to the perf event subsystem. Depending on the sampling function, + * there can be either basic-sampling or combined-sampling data entries. A + * combined-sampling data entry consists of a basic- and a diagnostic-sampling + * data entry. The sampling function is determined by the flags in the perf + * event hardware structure. The function always works with a combined-sampling + * data entry but ignores the the diagnostic portion if it is not available. + * + * Note that the implementation focuses on basic-sampling data entries and, if + * such an entry is not valid, the entire combined-sampling data entry is + * ignored. + * + * The overflow variables counts the number of samples that has been discarded + * due to a perf event overflow. + */ +static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, + unsigned long long *overflow) +{ + unsigned long flags = SAMPL_FLAGS(&event->hw); + struct hws_combined_entry *sample; + struct hws_trailer_entry *te; + struct sf_raw_sample *sfr; + size_t sample_size; + + /* Prepare and initialize raw sample data */ + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); + sfr->format = flags & PERF_CPUM_SF_MODE_MASK; + + sample_size = event_sample_size(&event->hw); + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + sample = (struct hws_combined_entry *) *sdbt; + while ((unsigned long *) sample < (unsigned long *) te) { + /* Check for an empty sample */ + if (!sample->basic.def) + break; + + /* Update perf event period */ + perf_event_count_update(event, SAMPL_RATE(&event->hw)); + + /* Check sampling data entry */ + if (sample_format_is_valid(sample, flags)) { + /* If an event overflow occurred, the PMU is stopped to + * throttle event delivery. Remaining sample data is + * discarded. + */ + if (!*overflow) { + if (sample_is_consistent(sample, flags)) { + /* Deliver sample data to perf */ + sfr_store_sample(sfr, sample); + *overflow = perf_push_sample(event, sfr); + } + } else + /* Count discarded samples */ + *overflow += 1; + } else { + debug_sample_entry(sample, te, flags); + /* Sample slot is not yet written or other record. + * + * This condition can occur if the buffer was reused + * from a combined basic- and diagnostic-sampling. + * If only basic-sampling is then active, entries are + * written into the larger diagnostic entries. + * This is typically the case for sample-data-blocks + * that are not full. Stop processing if the first + * invalid format was detected. + */ + if (!te->f) + break; + } + + /* Reset sample slot and advance to next sample */ + reset_sample_slot(sample, flags); + sample += sample_size; + } +} + +/* hw_perf_event_update() - Process sampling buffer + * @event: The perf event + * @flush_all: Flag to also flush partially filled sample-data-blocks + * + * Processes the sampling buffer and create perf event samples. + * The sampling buffer position are retrieved and saved in the TEAR_REG + * register of the specified perf event. + * + * Only full sample-data-blocks are processed. Specify the flash_all flag + * to also walk through partially filled sample-data-blocks. It is ignored + * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag + * enforces the processing of full sample-data-blocks only (trailer entries + * with the block-full-indicator bit set). + */ +static void hw_perf_event_update(struct perf_event *event, int flush_all) +{ + struct hw_perf_event *hwc = &event->hw; + struct hws_trailer_entry *te; + unsigned long *sdbt; + unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; + int done; + + if (flush_all && SDB_FULL_BLOCKS(hwc)) + flush_all = 0; + + sdbt = (unsigned long *) TEAR_REG(hwc); + done = event_overflow = sampl_overflow = num_sdb = 0; + while (!done) { + /* Get the trailer entry of the sample-data-block */ + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + + /* Leave loop if no more work to do (block full indicator) */ + if (!te->f) { + done = 1; + if (!flush_all) + break; + } + + /* Check the sample overflow count */ + if (te->overflow) + /* Account sample overflows and, if a particular limit + * is reached, extend the sampling buffer. + * For details, see sfb_account_overflows(). + */ + sampl_overflow += te->overflow; + + /* Timestamps are valid for full sample-data-blocks only */ + debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " + "overflow=%llu timestamp=0x%llx\n", + sdbt, te->overflow, + (te->f) ? trailer_timestamp(te) : 0ULL); + + /* Collect all samples from a single sample-data-block and + * flag if an (perf) event overflow happened. If so, the PMU + * is stopped and remaining samples will be discarded. + */ + hw_collect_samples(event, sdbt, &event_overflow); + num_sdb++; + + /* Reset trailer (using compare-double-and-swap) */ + do { + te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; + te_flags |= SDB_TE_ALERT_REQ_MASK; + } while (!cmpxchg_double(&te->flags, &te->overflow, + te->flags, te->overflow, + te_flags, 0ULL)); + + /* Advance to next sample-data-block */ + sdbt++; + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + /* Update event hardware registers */ + TEAR_REG(hwc) = (unsigned long) sdbt; + + /* Stop processing sample-data if all samples of the current + * sample-data-block were flushed even if it was not full. + */ + if (flush_all && done) + break; + + /* If an event overflow happened, discard samples by + * processing any remaining sample-data-blocks. + */ + if (event_overflow) + flush_all = 1; + } + + /* Account sample overflows in the event hardware structure */ + if (sampl_overflow) + OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + + sampl_overflow, 1 + num_sdb); + if (sampl_overflow || event_overflow) + debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " + "overflow stats: sample=%llu event=%llu\n", + sampl_overflow, event_overflow); +} + +static void cpumsf_pmu_read(struct perf_event *event) +{ + /* Nothing to do ... updates are interrupt-driven */ +} + +/* Activate sampling control. + * Next call of pmu_enable() starts sampling. + */ +static void cpumsf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + perf_pmu_disable(event->pmu); + event->hw.state = 0; + cpuhw->lsctl.cs = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.cd = 1; + perf_pmu_enable(event->pmu); +} + +/* Deactivate sampling control. + * Next call of pmu_enable() stops sampling. + */ +static void cpumsf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + if (event->hw.state & PERF_HES_STOPPED) + return; + + perf_pmu_disable(event->pmu); + cpuhw->lsctl.cs = 0; + cpuhw->lsctl.cd = 0; + event->hw.state |= PERF_HES_STOPPED; + + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event, 1); + event->hw.state |= PERF_HES_UPTODATE; + } + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + int err; + + if (cpuhw->flags & PMU_F_IN_USE) + return -EAGAIN; + + if (!cpuhw->sfb.sdbt) + return -EINVAL; + + err = 0; + perf_pmu_disable(event->pmu); + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + /* Set up sampling controls. Always program the sampling register + * using the SDB-table start. Reset TEAR_REG event hardware register + * that is used by hw_perf_event_update() to store the sampling buffer + * position after samples have been flushed. + */ + cpuhw->lsctl.s = 0; + cpuhw->lsctl.h = 1; + cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; + cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; + cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); + hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + + /* Ensure sampling functions are in the disabled state. If disabled, + * switch on sampling enable control. */ + if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { + err = -EAGAIN; + goto out; + } + cpuhw->lsctl.es = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.ed = 1; + + /* Set in_use flag and store event */ + event->hw.idx = 0; /* only one sampling event per CPU supported */ + cpuhw->event = event; + cpuhw->flags |= PMU_F_IN_USE; + + if (flags & PERF_EF_START) + cpumsf_pmu_start(event, PERF_EF_RELOAD); +out: + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + return err; +} + +static void cpumsf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + perf_pmu_disable(event->pmu); + cpumsf_pmu_stop(event, PERF_EF_UPDATE); + + cpuhw->lsctl.es = 0; + cpuhw->lsctl.ed = 0; + cpuhw->flags &= ~PMU_F_IN_USE; + cpuhw->event = NULL; + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_event_idx(struct perf_event *event) +{ + return event->hw.idx; +} + +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); + +static struct attribute *cpumsf_pmu_events_attr[] = { + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), + NULL, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumsf_pmu_events_attr, +}; +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + +static struct pmu cpumf_sampling = { + .pmu_enable = cpumsf_pmu_enable, + .pmu_disable = cpumsf_pmu_disable, + + .event_init = cpumsf_pmu_event_init, + .add = cpumsf_pmu_add, + .del = cpumsf_pmu_del, + + .start = cpumsf_pmu_start, + .stop = cpumsf_pmu_stop, + .read = cpumsf_pmu_read, + + .event_idx = cpumsf_pmu_event_idx, + .attr_groups = cpumsf_pmu_attr_groups, +}; + +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_sf *cpuhw; + + if (!(alert & CPU_MF_INT_SF_MASK)) + return; + inc_irq_stat(IRQEXT_CMS); + cpuhw = &__get_cpu_var(cpu_hw_sf); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* The processing below must take care of multiple alert events that + * might be indicated concurrently. */ + + /* Program alert request */ + if (alert & CPU_MF_INT_SF_PRA) { + if (cpuhw->flags & PMU_F_IN_USE) + hw_perf_event_update(cpuhw->event, 0); + else + WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); + } + + /* Report measurement alerts only for non-PRA codes */ + if (alert != CPU_MF_INT_SF_PRA) + debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); + + /* Sampling authorization change request */ + if (alert & CPU_MF_INT_SF_SACA) + qsi(&cpuhw->qsi); + + /* Loss of sample data due to high-priority machine activities */ + if (alert & CPU_MF_INT_SF_LSDA) { + pr_err("Sample data was lost\n"); + cpuhw->flags |= PMU_F_ERR_LSDA; + sf_disable(); + } + + /* Invalid sampling buffer entry */ + if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { + pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", + alert); + cpuhw->flags |= PMU_F_ERR_IBE; + sf_disable(); + } +} + +static int cpumf_pmu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + /* Ignore the notification if no events are scheduled on the PMU. + * This might be racy... + */ + if (!atomic_read(&num_events)) + return NOTIFY_OK; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) +{ + if (!cpum_sf_avail()) + return -ENODEV; + return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); +} + +static int param_set_sfb_size(const char *val, const struct kernel_param *kp) +{ + int rc; + unsigned long min, max; + + if (!cpum_sf_avail()) + return -ENODEV; + if (!val || !strlen(val)) + return -EINVAL; + + /* Valid parameter values: "min,max" or "max" */ + min = CPUM_SF_MIN_SDB; + max = CPUM_SF_MAX_SDB; + if (strchr(val, ',')) + rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; + else + rc = kstrtoul(val, 10, &max); + + if (min < 2 || min >= max || max > get_num_physpages()) + rc = -EINVAL; + if (rc) + return rc; + + sfb_set_limits(min, max); + pr_info("The sampling buffer limits have changed to: " + "min=%lu max=%lu (diag=x%lu)\n", + CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); + return 0; +} + +#define param_check_sfb_size(name, p) __param_check(name, p, void) +static struct kernel_param_ops param_ops_sfb_size = { + .set = param_set_sfb_size, + .get = param_get_sfb_size, +}; + +#define RS_INIT_FAILURE_QSI 0x0001 +#define RS_INIT_FAILURE_BSDES 0x0002 +#define RS_INIT_FAILURE_ALRT 0x0003 +#define RS_INIT_FAILURE_PERF 0x0004 +static void __init pr_cpumsf_err(unsigned int reason) +{ + pr_err("Sampling facility support for perf is not available: " + "reason=%04x\n", reason); +} + +static int __init init_cpum_sampling_pmu(void) +{ + struct hws_qsi_info_block si; + int err; + + if (!cpum_sf_avail()) + return -ENODEV; + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) { + pr_cpumsf_err(RS_INIT_FAILURE_QSI); + return -ENODEV; + } + + if (si.bsdes != sizeof(struct hws_basic_entry)) { + pr_cpumsf_err(RS_INIT_FAILURE_BSDES); + return -EINVAL; + } + + if (si.ad) + sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + + sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); + if (!sfdbg) + pr_err("Registering for s390dbf failed\n"); + debug_register_view(sfdbg, &debug_sprintf_view); + + err = register_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + if (err) { + pr_cpumsf_err(RS_INIT_FAILURE_ALRT); + goto out; + } + + err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); + if (err) { + pr_cpumsf_err(RS_INIT_FAILURE_PERF); + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return err; +} +arch_initcall(init_cpum_sampling_pmu); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c new file mode 100644 index 00000000000..61595c1f0a0 --- /dev/null +++ b/arch/s390/kernel/perf_event.c @@ -0,0 +1,324 @@ +/* + * Performance event support for s390x + * + * Copyright IBM Corp. 2012, 2013 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "perf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/kvm_host.h> +#include <linux/percpu.h> +#include <linux/export.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/sysfs.h> +#include <asm/irq.h> +#include <asm/cpu_mf.h> +#include <asm/lowcore.h> +#include <asm/processor.h> +#include <asm/sysinfo.h> + +const char *perf_pmu_name(void) +{ + if (cpum_cf_avail() || cpum_sf_avail()) + return "CPU-Measurement Facilities (CPU-MF)"; + return "pmu"; +} +EXPORT_SYMBOL(perf_pmu_name); + +int perf_num_counters(void) +{ + int num = 0; + + if (cpum_cf_avail()) + num += PERF_CPUM_CF_MAX_CTR; + if (cpum_sf_avail()) + num += PERF_CPUM_SF_MAX_CTR; + + return num; +} +EXPORT_SYMBOL(perf_num_counters); + +static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) +{ + struct stack_frame *stack = (struct stack_frame *) regs->gprs[15]; + + if (!stack) + return NULL; + + return (struct kvm_s390_sie_block *) stack->empty1[0]; +} + +static bool is_in_guest(struct pt_regs *regs) +{ + if (user_mode(regs)) + return false; +#if IS_ENABLED(CONFIG_KVM) + return instruction_pointer(regs) == (unsigned long) &sie_exit; +#else + return false; +#endif +} + +static unsigned long guest_is_user_mode(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE; +} + +static unsigned long instruction_pointer_guest(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN; +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + return is_in_guest(regs) ? instruction_pointer_guest(regs) + : instruction_pointer(regs); +} + +static unsigned long perf_misc_guest_flags(struct pt_regs *regs) +{ + return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; +} + +static unsigned long perf_misc_flags_sf(struct pt_regs *regs) +{ + struct perf_sf_sde_regs *sde_regs; + unsigned long flags; + + sde_regs = (struct perf_sf_sde_regs *) ®s->int_parm_long; + if (sde_regs->in_guest) + flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; + else + flags = user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; + return flags; +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + /* Check if the cpum_sf PMU has created the pt_regs structure. + * In this case, perf misc flags can be easily extracted. Otherwise, + * do regular checks on the pt_regs content. + */ + if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA) + if (!regs->gprs[15]) + return perf_misc_flags_sf(regs); + + if (is_in_guest(regs)) + return perf_misc_guest_flags(regs); + + return user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; +} + +static void print_debug_cf(void) +{ + struct cpumf_ctr_info cf_info; + int cpu = smp_processor_id(); + + memset(&cf_info, 0, sizeof(cf_info)); + if (!qctri(&cf_info)) + pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n", + cpu, cf_info.cfvn, cf_info.csvn, + cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl); +} + +static void print_debug_sf(void) +{ + struct hws_qsi_info_block si; + int cpu = smp_processor_id(); + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) + return; + + pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n", + cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + + if (si.as) + pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i" + " bsdes=%i tear=%016lx dear=%016lx\n", cpu, + si.as, si.es, si.cs, si.bsdes, si.tear, si.dear); + if (si.ad) + pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i" + " dsdes=%i tear=%016lx dear=%016lx\n", cpu, + si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear); +} + +void perf_event_print_debug(void) +{ + unsigned long flags; + + local_irq_save(flags); + if (cpum_cf_avail()) + print_debug_cf(); + if (cpum_sf_avail()) + print_debug_sf(); + local_irq_restore(flags); +} + +/* Service level infrastructure */ +static void sl_print_counter(struct seq_file *m) +{ + struct cpumf_ctr_info ci; + + memset(&ci, 0, sizeof(ci)); + if (qctri(&ci)) + return; + + seq_printf(m, "CPU-MF: Counter facility: version=%u.%u " + "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl); +} + +static void sl_print_sampling(struct seq_file *m) +{ + struct hws_qsi_info_block si; + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) + return; + + if (!si.as && !si.ad) + return; + + seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu" + " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + if (si.as) + seq_printf(m, "CPU-MF: Sampling facility: mode=basic" + " sample_size=%u\n", si.bsdes); + if (si.ad) + seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic" + " sample_size=%u\n", si.dsdes); +} + +static void service_level_perf_print(struct seq_file *m, + struct service_level *sl) +{ + if (cpum_cf_avail()) + sl_print_counter(m); + if (cpum_sf_avail()) + sl_print_sampling(m); +} + +static struct service_level service_level_perf = { + .seq_print = service_level_perf_print, +}; + +static int __init service_level_perf_register(void) +{ + return register_service_level(&service_level_perf); +} +arch_initcall(service_level_perf_register); + +/* See also arch/s390/kernel/traps.c */ +static unsigned long __store_trace(struct perf_callchain_entry *entry, + unsigned long sp, + unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, + sf->gprs[8] & PSW_ADDR_INSN); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + low = sp; + sp = regs->gprs[15]; + } +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + unsigned long head; + struct stack_frame *head_sf; + + if (user_mode(regs)) + return; + + head = regs->gprs[15]; + head_sf = (struct stack_frame *) head; + + if (!head_sf || !head_sf->back_chain) + return; + + head = head_sf->back_chain; + head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + + __store_trace(entry, head, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); +} + +/* Perf defintions for PMU event attributes in sysfs */ +ssize_t cpumf_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sprintf(page, "event=0x%04llx,name=%s\n", + pmu_attr->id, attr->attr.name); +} + +/* Reserve/release functions for sharing perf hardware */ +static DEFINE_SPINLOCK(perf_hw_owner_lock); +static void *perf_sampling_owner; + +int perf_reserve_sampling(void) +{ + int err; + + err = 0; + spin_lock(&perf_hw_owner_lock); + if (perf_sampling_owner) { + pr_warn("The sampling facility is already reserved by %p\n", + perf_sampling_owner); + err = -EBUSY; + } else + perf_sampling_owner = __builtin_return_address(0); + spin_unlock(&perf_hw_owner_lock); + return err; +} +EXPORT_SYMBOL(perf_reserve_sampling); + +void perf_release_sampling(void) +{ + spin_lock(&perf_hw_owner_lock); + WARN_ON(!perf_sampling_owner); + perf_sampling_owner = NULL; + spin_unlock(&perf_hw_owner_lock); +} +EXPORT_SYMBOL(perf_release_sampling); diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S new file mode 100644 index 00000000000..813ec726087 --- /dev/null +++ b/arch/s390/kernel/pgm_check.S @@ -0,0 +1,152 @@ +/* + * Program check table. + * + * Copyright IBM Corp. 2012 + */ + +#include <linux/linkage.h> + +#ifdef CONFIG_32BIT +#define PGM_CHECK_64BIT(handler) .long default_trap_handler +#else +#define PGM_CHECK_64BIT(handler) .long handler +#endif + +#define PGM_CHECK(handler) .long handler +#define PGM_CHECK_DEFAULT PGM_CHECK(default_trap_handler) + +/* + * The program check table contains exactly 128 (0x00-0x7f) entries. Each + * line defines the 31 and/or 64 bit function to be called corresponding + * to the program check interruption code. + */ +.section .rodata, "a" +ENTRY(pgm_check_table) +PGM_CHECK_DEFAULT /* 00 */ +PGM_CHECK(illegal_op) /* 01 */ +PGM_CHECK(privileged_op) /* 02 */ +PGM_CHECK(execute_exception) /* 03 */ +PGM_CHECK(do_protection_exception) /* 04 */ +PGM_CHECK(addressing_exception) /* 05 */ +PGM_CHECK(specification_exception) /* 06 */ +PGM_CHECK(data_exception) /* 07 */ +PGM_CHECK(overflow_exception) /* 08 */ +PGM_CHECK(divide_exception) /* 09 */ +PGM_CHECK(overflow_exception) /* 0a */ +PGM_CHECK(divide_exception) /* 0b */ +PGM_CHECK(hfp_overflow_exception) /* 0c */ +PGM_CHECK(hfp_underflow_exception) /* 0d */ +PGM_CHECK(hfp_significance_exception) /* 0e */ +PGM_CHECK(hfp_divide_exception) /* 0f */ +PGM_CHECK(do_dat_exception) /* 10 */ +PGM_CHECK(do_dat_exception) /* 11 */ +PGM_CHECK(translation_exception) /* 12 */ +PGM_CHECK(special_op_exception) /* 13 */ +PGM_CHECK_DEFAULT /* 14 */ +PGM_CHECK(operand_exception) /* 15 */ +PGM_CHECK_DEFAULT /* 16 */ +PGM_CHECK_DEFAULT /* 17 */ +PGM_CHECK_64BIT(transaction_exception) /* 18 */ +PGM_CHECK_DEFAULT /* 19 */ +PGM_CHECK_DEFAULT /* 1a */ +PGM_CHECK_DEFAULT /* 1b */ +PGM_CHECK(space_switch_exception) /* 1c */ +PGM_CHECK(hfp_sqrt_exception) /* 1d */ +PGM_CHECK_DEFAULT /* 1e */ +PGM_CHECK_DEFAULT /* 1f */ +PGM_CHECK_DEFAULT /* 20 */ +PGM_CHECK_DEFAULT /* 21 */ +PGM_CHECK_DEFAULT /* 22 */ +PGM_CHECK_DEFAULT /* 23 */ +PGM_CHECK_DEFAULT /* 24 */ +PGM_CHECK_DEFAULT /* 25 */ +PGM_CHECK_DEFAULT /* 26 */ +PGM_CHECK_DEFAULT /* 27 */ +PGM_CHECK_DEFAULT /* 28 */ +PGM_CHECK_DEFAULT /* 29 */ +PGM_CHECK_DEFAULT /* 2a */ +PGM_CHECK_DEFAULT /* 2b */ +PGM_CHECK_DEFAULT /* 2c */ +PGM_CHECK_DEFAULT /* 2d */ +PGM_CHECK_DEFAULT /* 2e */ +PGM_CHECK_DEFAULT /* 2f */ +PGM_CHECK_DEFAULT /* 30 */ +PGM_CHECK_DEFAULT /* 31 */ +PGM_CHECK_DEFAULT /* 32 */ +PGM_CHECK_DEFAULT /* 33 */ +PGM_CHECK_DEFAULT /* 34 */ +PGM_CHECK_DEFAULT /* 35 */ +PGM_CHECK_DEFAULT /* 36 */ +PGM_CHECK_DEFAULT /* 37 */ +PGM_CHECK_64BIT(do_dat_exception) /* 38 */ +PGM_CHECK_64BIT(do_dat_exception) /* 39 */ +PGM_CHECK_64BIT(do_dat_exception) /* 3a */ +PGM_CHECK_64BIT(do_dat_exception) /* 3b */ +PGM_CHECK_DEFAULT /* 3c */ +PGM_CHECK_DEFAULT /* 3d */ +PGM_CHECK_DEFAULT /* 3e */ +PGM_CHECK_DEFAULT /* 3f */ +PGM_CHECK_DEFAULT /* 40 */ +PGM_CHECK_DEFAULT /* 41 */ +PGM_CHECK_DEFAULT /* 42 */ +PGM_CHECK_DEFAULT /* 43 */ +PGM_CHECK_DEFAULT /* 44 */ +PGM_CHECK_DEFAULT /* 45 */ +PGM_CHECK_DEFAULT /* 46 */ +PGM_CHECK_DEFAULT /* 47 */ +PGM_CHECK_DEFAULT /* 48 */ +PGM_CHECK_DEFAULT /* 49 */ +PGM_CHECK_DEFAULT /* 4a */ +PGM_CHECK_DEFAULT /* 4b */ +PGM_CHECK_DEFAULT /* 4c */ +PGM_CHECK_DEFAULT /* 4d */ +PGM_CHECK_DEFAULT /* 4e */ +PGM_CHECK_DEFAULT /* 4f */ +PGM_CHECK_DEFAULT /* 50 */ +PGM_CHECK_DEFAULT /* 51 */ +PGM_CHECK_DEFAULT /* 52 */ +PGM_CHECK_DEFAULT /* 53 */ +PGM_CHECK_DEFAULT /* 54 */ +PGM_CHECK_DEFAULT /* 55 */ +PGM_CHECK_DEFAULT /* 56 */ +PGM_CHECK_DEFAULT /* 57 */ +PGM_CHECK_DEFAULT /* 58 */ +PGM_CHECK_DEFAULT /* 59 */ +PGM_CHECK_DEFAULT /* 5a */ +PGM_CHECK_DEFAULT /* 5b */ +PGM_CHECK_DEFAULT /* 5c */ +PGM_CHECK_DEFAULT /* 5d */ +PGM_CHECK_DEFAULT /* 5e */ +PGM_CHECK_DEFAULT /* 5f */ +PGM_CHECK_DEFAULT /* 60 */ +PGM_CHECK_DEFAULT /* 61 */ +PGM_CHECK_DEFAULT /* 62 */ +PGM_CHECK_DEFAULT /* 63 */ +PGM_CHECK_DEFAULT /* 64 */ +PGM_CHECK_DEFAULT /* 65 */ +PGM_CHECK_DEFAULT /* 66 */ +PGM_CHECK_DEFAULT /* 67 */ +PGM_CHECK_DEFAULT /* 68 */ +PGM_CHECK_DEFAULT /* 69 */ +PGM_CHECK_DEFAULT /* 6a */ +PGM_CHECK_DEFAULT /* 6b */ +PGM_CHECK_DEFAULT /* 6c */ +PGM_CHECK_DEFAULT /* 6d */ +PGM_CHECK_DEFAULT /* 6e */ +PGM_CHECK_DEFAULT /* 6f */ +PGM_CHECK_DEFAULT /* 70 */ +PGM_CHECK_DEFAULT /* 71 */ +PGM_CHECK_DEFAULT /* 72 */ +PGM_CHECK_DEFAULT /* 73 */ +PGM_CHECK_DEFAULT /* 74 */ +PGM_CHECK_DEFAULT /* 75 */ +PGM_CHECK_DEFAULT /* 76 */ +PGM_CHECK_DEFAULT /* 77 */ +PGM_CHECK_DEFAULT /* 78 */ +PGM_CHECK_DEFAULT /* 79 */ +PGM_CHECK_DEFAULT /* 7a */ +PGM_CHECK_DEFAULT /* 7b */ +PGM_CHECK_DEFAULT /* 7c */ +PGM_CHECK_DEFAULT /* 7d */ +PGM_CHECK_DEFAULT /* 7e */ +PGM_CHECK_DEFAULT /* 7f */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 2ff90a1a105..93b9ca42e5c 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -1,52 +1,40 @@ /* - * arch/s390/kernel/process.c + * This file handles the architecture dependent parts of process handling. * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Hartmut Penner (hp@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * - * Derived from "arch/i386/kernel/process.c" - * Copyright (C) 1995, Linus Torvalds - */ - -/* - * This file handles the architecture-dependent parts of process handling.. + * Copyright IBM Corp. 1999, 2009 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Hartmut Penner <hp@de.ibm.com>, + * Denis Joseph Barrow, */ -#include <linux/config.h> #include <linux/compiler.h> #include <linux/cpu.h> -#include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/elfcore.h> #include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> #include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/user.h> -#include <linux/a.out.h> #include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/reboot.h> -#include <linux/init.h> +#include <linux/tick.h> +#include <linux/personality.h> +#include <linux/syscalls.h> +#include <linux/compat.h> +#include <linux/kprobes.h> +#include <linux/random.h> #include <linux/module.h> -#include <linux/notifier.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/system.h> #include <asm/io.h> #include <asm/processor.h> +#include <asm/vtimer.h> +#include <asm/exec.h> #include <asm/irq.h> -#include <asm/timer.h> +#include <asm/nmi.h> +#include <asm/smp.h> +#include <asm/switch_to.h> +#include <asm/runtime_instr.h> +#include "entry.h" -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); +asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); /* * Return saved PC of a blocked thread. used in kernel/sched. @@ -58,298 +46,178 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); */ unsigned long thread_saved_pc(struct task_struct *tsk) { - struct stack_frame *sf; + struct stack_frame *sf, *low, *high; - sf = (struct stack_frame *) tsk->thread.ksp; - sf = (struct stack_frame *) sf->back_chain; + if (!tsk || !task_stack_page(tsk)) + return 0; + low = task_stack_page(tsk); + high = (struct stack_frame *) task_pt_regs(tsk); + sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN); + if (sf <= low || sf > high) + return 0; + sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN); + if (sf <= low || sf > high) + return 0; return sf->gprs[8]; } -/* - * Need to know about CPUs going idle? - */ -static struct notifier_block *idle_chain; - -int register_idle_notifier(struct notifier_block *nb) -{ - return notifier_chain_register(&idle_chain, nb); -} -EXPORT_SYMBOL(register_idle_notifier); - -int unregister_idle_notifier(struct notifier_block *nb) -{ - return notifier_chain_unregister(&idle_chain, nb); -} -EXPORT_SYMBOL(unregister_idle_notifier); - -void do_monitor_call(struct pt_regs *regs, long interruption_code) +void arch_cpu_idle(void) { - /* disable monitor call class 0 */ - __ctl_clear_bit(8, 15); - - notifier_call_chain(&idle_chain, CPU_NOT_IDLE, - (void *)(long) smp_processor_id()); -} - -extern void s390_handle_mcck(void); -/* - * The idle loop on a S390... - */ -void default_idle(void) -{ - int cpu, rc; - - /* CPU is going idle. */ - cpu = smp_processor_id(); - - local_irq_disable(); - if (need_resched()) { - local_irq_enable(); - return; - } - - rc = notifier_call_chain(&idle_chain, CPU_IDLE, (void *)(long) cpu); - if (rc != NOTIFY_OK && rc != NOTIFY_DONE) - BUG(); - if (rc != NOTIFY_OK) { - local_irq_enable(); - return; - } - - /* enable monitor call class 0 */ - __ctl_set_bit(8, 15); - -#ifdef CONFIG_HOTPLUG_CPU - if (cpu_is_offline(cpu)) - cpu_die(); -#endif - local_mcck_disable(); - if (test_thread_flag(TIF_MCCK_PENDING)) { + if (test_cpu_flag(CIF_MCCK_PENDING)) { local_mcck_enable(); local_irq_enable(); - s390_handle_mcck(); return; } - - /* Wait for external, I/O or machine check interrupt. */ - __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT | - PSW_MASK_IO | PSW_MASK_EXT); + /* Halt the cpu and keep track of cpu time accounting. */ + vtime_stop_cpu(); + local_irq_enable(); } -void cpu_idle(void) +void arch_cpu_idle_exit(void) { - for (;;) { - while (!need_resched()) - default_idle(); - - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } + if (test_cpu_flag(CIF_MCCK_PENDING)) + s390_handle_mcck(); } -void show_regs(struct pt_regs *regs) +void arch_cpu_idle_dead(void) { - struct task_struct *tsk = current; - - printk("CPU: %d %s\n", task_thread_info(tsk)->cpu, print_tainted()); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, (void *) tsk, - (void *) tsk->thread.ksp); - - show_registers(regs); - /* Show stack backtrace if pt_regs is from kernel mode */ - if (!(regs->psw.mask & PSW_MASK_PSTATE)) - show_trace(0,(unsigned long *) regs->gprs[15]); + cpu_die(); } -extern void kernel_thread_starter(void); - -__asm__(".align 4\n" - "kernel_thread_starter:\n" - " la 2,0(10)\n" - " basr 14,9\n" - " la 2,0\n" - " br 11\n"); - -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - regs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT; - regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; - regs.gprs[9] = (unsigned long) fn; - regs.gprs[10] = (unsigned long) arg; - regs.gprs[11] = (unsigned long) do_exit; - regs.orig_gpr2 = -1; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, - 0, ®s, 0, NULL, NULL); -} +extern void __kprobes kernel_thread_starter(void); /* * Free current thread data structures etc.. */ void exit_thread(void) { + exit_thread_runtime_instr(); } void flush_thread(void) { - clear_used_math(); - clear_tsk_thread_flag(current, TIF_USEDFPU); } void release_thread(struct task_struct *dead_task) { } -int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, - unsigned long unused, - struct task_struct * p, struct pt_regs * regs) +int copy_thread(unsigned long clone_flags, unsigned long new_stackp, + unsigned long arg, struct task_struct *p) { - struct fake_frame - { - struct stack_frame sf; - struct pt_regs childregs; - } *frame; + struct thread_info *ti; + struct fake_frame + { + struct stack_frame sf; + struct pt_regs childregs; + } *frame; + + frame = container_of(task_pt_regs(p), struct fake_frame, childregs); + p->thread.ksp = (unsigned long) frame; + /* Save access registers to new thread structure. */ + save_access_regs(&p->thread.acrs[0]); + /* start new process with ar4 pointing to the correct address space */ + p->thread.mm_segment = get_fs(); + /* Don't copy debug registers */ + memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); + memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); + clear_tsk_thread_flag(p, TIF_SINGLE_STEP); + /* Initialize per thread user and system timer values */ + ti = task_thread_info(p); + ti->user_timer = 0; + ti->system_timer = 0; + + frame->sf.back_chain = 0; + /* new return point is ret_from_fork */ + frame->sf.gprs[8] = (unsigned long) ret_from_fork; + /* fake return stack for resume(), don't go back to schedule */ + frame->sf.gprs[9] = (unsigned long) frame; - frame = container_of(task_pt_regs(p), struct fake_frame, childregs); - p->thread.ksp = (unsigned long) frame; /* Store access registers to kernel stack of new process. */ - frame->childregs = *regs; - frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ - frame->childregs.gprs[15] = new_stackp; - frame->sf.back_chain = 0; - - /* new return point is ret_from_fork */ - frame->sf.gprs[8] = (unsigned long) ret_from_fork; + if (unlikely(p->flags & PF_KTHREAD)) { + /* kernel thread */ + memset(&frame->childregs, 0, sizeof(struct pt_regs)); + frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + frame->childregs.psw.addr = PSW_ADDR_AMODE | + (unsigned long) kernel_thread_starter; + frame->childregs.gprs[9] = new_stackp; /* function */ + frame->childregs.gprs[10] = arg; + frame->childregs.gprs[11] = (unsigned long) do_exit; + frame->childregs.orig_gpr2 = -1; - /* fake return stack for resume(), don't go back to schedule */ - frame->sf.gprs[9] = (unsigned long) frame; + return 0; + } + frame->childregs = *current_pt_regs(); + frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ + frame->childregs.flags = 0; + if (new_stackp) + frame->childregs.gprs[15] = new_stackp; - /* Save access registers to new thread structure. */ - save_access_regs(&p->thread.acrs[0]); + /* Don't copy runtime instrumentation info */ + p->thread.ri_cb = NULL; + p->thread.ri_signum = 0; + frame->childregs.psw.mask &= ~PSW_MASK_RI; #ifndef CONFIG_64BIT - /* + /* * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the child. */ - save_fp_regs(¤t->thread.fp_regs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); memcpy(&p->thread.fp_regs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); - p->thread.user_seg = __pa((unsigned long) p->mm->pgd) | _SEGMENT_TABLE; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) - p->thread.acrs[0] = regs->gprs[6]; + p->thread.acrs[0] = frame->childregs.gprs[6]; #else /* CONFIG_64BIT */ /* Save the fpu registers to new thread structure. */ - save_fp_regs(&p->thread.fp_regs); - p->thread.user_seg = __pa((unsigned long) p->mm->pgd) | _REGION_TABLE; + save_fp_ctl(&p->thread.fp_regs.fpc); + save_fp_regs(p->thread.fp_regs.fprs); + p->thread.fp_regs.pad = 0; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { - if (test_thread_flag(TIF_31BIT)) { - p->thread.acrs[0] = (unsigned int) regs->gprs[6]; + unsigned long tls = frame->childregs.gprs[6]; + if (is_compat_task()) { + p->thread.acrs[0] = (unsigned int)tls; } else { - p->thread.acrs[0] = (unsigned int)(regs->gprs[6] >> 32); - p->thread.acrs[1] = (unsigned int) regs->gprs[6]; + p->thread.acrs[0] = (unsigned int)(tls >> 32); + p->thread.acrs[1] = (unsigned int)tls; } } #endif /* CONFIG_64BIT */ - /* start new process with ar4 pointing to the correct address space */ - p->thread.mm_segment = get_fs(); - /* Don't copy debug registers */ - memset(&p->thread.per_info,0,sizeof(p->thread.per_info)); - - return 0; -} - -asmlinkage long sys_fork(struct pt_regs regs) -{ - return do_fork(SIGCHLD, regs.gprs[15], ®s, 0, NULL, NULL); -} - -asmlinkage long sys_clone(struct pt_regs regs) -{ - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; - - clone_flags = regs.gprs[3]; - newsp = regs.orig_gpr2; - parent_tidptr = (int __user *) regs.gprs[4]; - child_tidptr = (int __user *) regs.gprs[5]; - if (!newsp) - newsp = regs.gprs[15]; - return do_fork(clone_flags, newsp, ®s, 0, - parent_tidptr, child_tidptr); -} - -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -asmlinkage long sys_vfork(struct pt_regs regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, - regs.gprs[15], ®s, 0, NULL, NULL); + return 0; } -/* - * sys_execve() executes a new program. - */ -asmlinkage long sys_execve(struct pt_regs regs) +asmlinkage void execve_tail(void) { - int error; - char * filename; - - filename = getname((char __user *) regs.orig_gpr2); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = do_execve(filename, (char __user * __user *) regs.gprs[3], - (char __user * __user *) regs.gprs[4], ®s); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); - current->thread.fp_regs.fpc = 0; - if (MACHINE_HAS_IEEE) - asm volatile("sfpc %0,%0" : : "d" (0)); - } - putname(filename); -out: - return error; + current->thread.fp_regs.fpc = 0; + if (MACHINE_HAS_IEEE) + asm volatile("sfpc %0,%0" : : "d" (0)); } - /* * fill in the FPU structure for a core dump. */ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) { #ifndef CONFIG_64BIT - /* + /* * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the dump. */ - save_fp_regs(¤t->thread.fp_regs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); memcpy(fpregs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); #else /* CONFIG_64BIT */ - save_fp_regs(fpregs); + save_fp_ctl(&fpregs->fpc); + save_fp_regs(fpregs->fprs); #endif /* CONFIG_64BIT */ return 1; } +EXPORT_SYMBOL(dump_fpu); unsigned long get_wchan(struct task_struct *p) { @@ -375,3 +243,36 @@ unsigned long get_wchan(struct task_struct *p) return 0; } +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() & ~PAGE_MASK; + return sp & ~0xf; +} + +static inline unsigned long brk_rnd(void) +{ + /* 8MB for 32bit, 1GB for 64bit */ + if (is_32bit_task()) + return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; + else + return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long ret; + + ret = PAGE_ALIGN(mm->brk + brk_rnd()); + return (ret > mm->brk) ? ret : mm->brk; +} + +unsigned long randomize_et_dyn(unsigned long base) +{ + unsigned long ret; + + if (!(current->flags & PF_RANDOMIZE)) + return base; + ret = PAGE_ALIGN(base + brk_rnd()); + return (ret > base) ? ret : base; +} diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c new file mode 100644 index 00000000000..24612029f45 --- /dev/null +++ b/arch/s390/kernel/processor.c @@ -0,0 +1,97 @@ +/* + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/delay.h> +#include <linux/cpu.h> +#include <asm/elf.h> +#include <asm/lowcore.h> +#include <asm/param.h> + +static DEFINE_PER_CPU(struct cpuid, cpu_id); + +/* + * cpu_init - initializes state that is per-CPU. + */ +void cpu_init(void) +{ + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct cpuid *id = &__get_cpu_var(cpu_id); + + get_cpu_id(id); + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + BUG_ON(current->mm); + enter_lazy_tlb(&init_mm, current); + memset(idle, 0, sizeof(*idle)); +} + +/* + * show_cpuinfo - Get information on one CPU for use by procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + static const char *hwcap_str[] = { + "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", + "edat", "etf3eh", "highgprs", "te" + }; + unsigned long n = (unsigned long) v - 1; + int i; + + if (!n) { + s390_adjust_jiffies(); + seq_printf(m, "vendor_id : IBM/S390\n" + "# processors : %i\n" + "bogomips per cpu: %lu.%02lu\n", + num_online_cpus(), loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ))%100); + seq_puts(m, "features\t: "); + for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) + if (hwcap_str[i] && (elf_hwcap & (1UL << i))) + seq_printf(m, "%s ", hwcap_str[i]); + seq_puts(m, "\n"); + show_cacheinfo(m); + } + get_online_cpus(); + if (cpu_online(n)) { + struct cpuid *id = &per_cpu(cpu_id, n); + seq_printf(m, "processor %li: " + "version = %02X, " + "identification = %06X, " + "machine = %04X\n", + n, id->version, id->ident, id->machine); + } + put_online_cpus(); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + diff --git a/arch/s390/kernel/profile.c b/arch/s390/kernel/profile.c deleted file mode 100644 index 7ba777eec1a..00000000000 --- a/arch/s390/kernel/profile.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * arch/s390/kernel/profile.c - * - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Thomas Spatzier (tspat@de.ibm.com) - * - */ -#include <linux/proc_fs.h> -#include <linux/profile.h> - -static struct proc_dir_entry * root_irq_dir; - -void init_irq_proc(void) -{ - /* create /proc/irq */ - root_irq_dir = proc_mkdir("irq", 0); - - /* create /proc/irq/prof_cpu_mask */ - create_prof_cpu_mask(root_irq_dir); -} diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 37dfe33dab7..5dc7ad9e2fb 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1,115 +1,143 @@ /* - * arch/s390/kernel/ptrace.c + * Ptrace user space interface. * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Copyright IBM Corp. 1999, 2010 + * Author(s): Denis Joseph Barrow * Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * Based on PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Derived from "arch/m68k/kernel/ptrace.c" - * Copyright (C) 1994 by Hamish Macdonald - * Taken from linux/kernel/ptrace.c and modified for M680x0. - * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds - * - * Modified by Cort Dougan (cort@cs.nmt.edu) - * - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/user.h> #include <linux/security.h> #include <linux/audit.h> #include <linux/signal.h> - +#include <linux/elf.h> +#include <linux/regset.h> +#include <linux/tracehook.h> +#include <linux/seccomp.h> +#include <linux/compat.h> +#include <trace/syscall.h> #include <asm/segment.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/unistd.h> +#include <asm/switch_to.h> +#include "entry.h" #ifdef CONFIG_COMPAT #include "compat_ptrace.h" #endif -static void -FixPerRegisters(struct task_struct *task) +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + +enum s390_regset { + REGSET_GENERAL, + REGSET_FP, + REGSET_LAST_BREAK, + REGSET_TDB, + REGSET_SYSTEM_CALL, + REGSET_GENERAL_EXTENDED, +}; + +void update_cr_regs(struct task_struct *task) { - struct pt_regs *regs; - per_struct *per_info; + struct pt_regs *regs = task_pt_regs(task); + struct thread_struct *thread = &task->thread; + struct per_regs old, new; - regs = task_pt_regs(task); - per_info = (per_struct *) &task->thread.per_info; - per_info->control_regs.bits.em_instruction_fetch = - per_info->single_step | per_info->instruction_fetch; - - if (per_info->single_step) { - per_info->control_regs.bits.starting_addr = 0; -#ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) - per_info->control_regs.bits.ending_addr = 0x7fffffffUL; +#ifdef CONFIG_64BIT + /* Take care of the enable/disable of transactional execution. */ + if (MACHINE_HAS_TE) { + unsigned long cr, cr_new; + + __ctl_store(cr, 0, 0); + /* Set or clear transaction execution TXC bit 8. */ + cr_new = cr | (1UL << 55); + if (task->thread.per_flags & PER_FLAG_NO_TE) + cr_new &= ~(1UL << 55); + if (cr_new != cr) + __ctl_load(cr_new, 0, 0); + /* Set or clear transaction execution TDC bits 62 and 63. */ + __ctl_store(cr, 2, 2); + cr_new = cr & ~3UL; + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) + cr_new |= 1UL; + else + cr_new |= 2UL; + } + if (cr_new != cr) + __ctl_load(cr_new, 2, 2); + } +#endif + /* Copy user specified PER registers */ + new.control = thread->per_user.control; + new.start = thread->per_user.start; + new.end = thread->per_user.end; + + /* merge TIF_SINGLE_STEP into user specified PER registers. */ + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + if (test_tsk_thread_flag(task, TIF_BLOCK_STEP)) + new.control |= PER_EVENT_BRANCH; else + new.control |= PER_EVENT_IFETCH; +#ifdef CONFIG_64BIT + new.control |= PER_CONTROL_SUSPENSION; + new.control |= PER_EVENT_TRANSACTION_END; #endif - per_info->control_regs.bits.ending_addr = PSW_ADDR_INSN; - } else { - per_info->control_regs.bits.starting_addr = - per_info->starting_addr; - per_info->control_regs.bits.ending_addr = - per_info->ending_addr; + new.start = 0; + new.end = PSW_ADDR_INSN; } - /* - * if any of the control reg tracing bits are on - * we switch on per in the psw - */ - if (per_info->control_regs.words.cr[0] & PER_EM_MASK) - regs->psw.mask |= PSW_MASK_PER; - else + + /* Take care of the PER enablement bit in the PSW. */ + if (!(new.control & PER_EVENT_MASK)) { regs->psw.mask &= ~PSW_MASK_PER; + return; + } + regs->psw.mask |= PSW_MASK_PER; + __ctl_store(old, 9, 11); + if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) + __ctl_load(new, 9, 11); +} - if (per_info->control_regs.bits.em_storage_alteration) - per_info->control_regs.bits.storage_alt_space_ctl = 1; - else - per_info->control_regs.bits.storage_alt_space_ctl = 0; +void user_enable_single_step(struct task_struct *task) +{ + clear_tsk_thread_flag(task, TIF_BLOCK_STEP); + set_tsk_thread_flag(task, TIF_SINGLE_STEP); } -void -set_single_step(struct task_struct *task) +void user_disable_single_step(struct task_struct *task) { - task->thread.per_info.single_step = 1; - FixPerRegisters(task); + clear_tsk_thread_flag(task, TIF_BLOCK_STEP); + clear_tsk_thread_flag(task, TIF_SINGLE_STEP); } -void -clear_single_step(struct task_struct *task) +void user_enable_block_step(struct task_struct *task) { - task->thread.per_info.single_step = 0; - FixPerRegisters(task); + set_tsk_thread_flag(task, TIF_SINGLE_STEP); + set_tsk_thread_flag(task, TIF_BLOCK_STEP); } /* * Called by kernel/ptrace.c when detaching.. * - * Make sure single step bits etc are not set. + * Clear all debugging related fields. */ -void -ptrace_disable(struct task_struct *child) +void ptrace_disable(struct task_struct *task) { - /* make sure the single step bit is not set. */ - clear_single_step(child); + memset(&task->thread.per_user, 0, sizeof(task->thread.per_user)); + memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); + clear_tsk_thread_flag(task, TIF_SINGLE_STEP); + clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP); + task->thread.per_flags = 0; } #ifndef CONFIG_64BIT @@ -118,6 +146,47 @@ ptrace_disable(struct task_struct *child) # define __ADDR_MASK 7 #endif +static inline unsigned long __peek_user_per(struct task_struct *child, + addr_t addr) +{ + struct per_struct_kernel *dummy = NULL; + + if (addr == (addr_t) &dummy->cr9) + /* Control bits of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PER_EVENT_IFETCH : child->thread.per_user.control; + else if (addr == (addr_t) &dummy->cr10) + /* Start address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + 0 : child->thread.per_user.start; + else if (addr == (addr_t) &dummy->cr11) + /* End address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PSW_ADDR_INSN : child->thread.per_user.end; + else if (addr == (addr_t) &dummy->bits) + /* Single-step bit. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + (1UL << (BITS_PER_LONG - 1)) : 0; + else if (addr == (addr_t) &dummy->starting_addr) + /* Start address of the user specified per set. */ + return child->thread.per_user.start; + else if (addr == (addr_t) &dummy->ending_addr) + /* End address of the user specified per set. */ + return child->thread.per_user.end; + else if (addr == (addr_t) &dummy->perc_atmid) + /* PER code, ATMID and AI of the last PER trap */ + return (unsigned long) + child->thread.per_event.cause << (BITS_PER_LONG - 16); + else if (addr == (addr_t) &dummy->address) + /* Address of the last PER trap */ + return child->thread.per_event.address; + else if (addr == (addr_t) &dummy->access_id) + /* Access id of the last PER trap */ + return (unsigned long) + child->thread.per_event.paid << (BITS_PER_LONG - 8); + return 0; +} + /* * Read the word at offset addr from the user area of a process. The * trouble here is that the information is littered over different @@ -127,33 +196,21 @@ ptrace_disable(struct task_struct *child) * struct user contain pad bytes that should be read as zeroes. * Lovely... */ -static int -peek_user(struct task_struct *child, addr_t addr, addr_t data) +static unsigned long __peek_user(struct task_struct *child, addr_t addr) { struct user *dummy = NULL; - addr_t offset, tmp, mask; - - /* - * Stupid gdb peeks/pokes the access registers in 64 bit with - * an alignment of 4. Programmers from hell... - */ - mask = __ADDR_MASK; -#ifdef CONFIG_64BIT - if (addr >= (addr_t) &dummy->regs.acrs && - addr < (addr_t) &dummy->regs.orig_gpr2) - mask = 3; -#endif - if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) - return -EIO; + addr_t offset, tmp; if (addr < (addr_t) &dummy->regs.acrs) { /* * psw and gprs are stored on the stack */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); - if (addr == (addr_t) &dummy->regs.psw.mask) - /* Remove per bit from user psw. */ - tmp &= ~PSW_MASK_PER; + if (addr == (addr_t) &dummy->regs.psw.mask) { + /* Return a clean psw mask. */ + tmp &= PSW_MASK_USER | PSW_MASK_RI; + tmp |= PSW_USER_BITS; + } } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { /* @@ -178,6 +235,13 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) */ tmp = (addr_t) task_pt_regs(child)->orig_gpr2; + } else if (addr < (addr_t) &dummy->regs.fp_regs) { + /* + * prevent reads of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + tmp = 0; + } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure @@ -185,64 +249,101 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) offset = addr - (addr_t) &dummy->regs.fp_regs; tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset); if (addr == (addr_t) &dummy->regs.fp_regs.fpc) - tmp &= (unsigned long) FPC_VALID_MASK - << (BITS_PER_LONG - 32); + tmp <<= BITS_PER_LONG - 32; } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* - * per_info is found in the thread structure + * Handle access to the per_info structure. */ - offset = addr - (addr_t) &dummy->regs.per_info; - tmp = *(addr_t *)((addr_t) &child->thread.per_info + offset); + addr -= (addr_t) &dummy->regs.per_info; + tmp = __peek_user_per(child, addr); } else tmp = 0; - return put_user(tmp, (addr_t __user *) data); + return tmp; } -/* - * Write a word to the user area of a process at location addr. This - * operation does have an additional problem compared to peek_user. - * Stores to the program status word and on the floating point - * control register needs to get checked for validity. - */ static int -poke_user(struct task_struct *child, addr_t addr, addr_t data) +peek_user(struct task_struct *child, addr_t addr, addr_t data) { - struct user *dummy = NULL; - addr_t offset, mask; + addr_t tmp, mask; /* * Stupid gdb peeks/pokes the access registers in 64 bit with - * an alignment of 4. Programmers from hell indeed... + * an alignment of 4. Programmers from hell... */ mask = __ADDR_MASK; #ifdef CONFIG_64BIT - if (addr >= (addr_t) &dummy->regs.acrs && - addr < (addr_t) &dummy->regs.orig_gpr2) + if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && + addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) mask = 3; #endif if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; + tmp = __peek_user(child, addr); + return put_user(tmp, (addr_t __user *) data); +} + +static inline void __poke_user_per(struct task_struct *child, + addr_t addr, addr_t data) +{ + struct per_struct_kernel *dummy = NULL; + + /* + * There are only three fields in the per_info struct that the + * debugger user can write to. + * 1) cr9: the debugger wants to set a new PER event mask + * 2) starting_addr: the debugger wants to set a new starting + * address to use with the PER event mask. + * 3) ending_addr: the debugger wants to set a new ending + * address to use with the PER event mask. + * The user specified PER event mask and the start and end + * addresses are used only if single stepping is not in effect. + * Writes to any other field in per_info are ignored. + */ + if (addr == (addr_t) &dummy->cr9) + /* PER event mask of the user specified per set. */ + child->thread.per_user.control = + data & (PER_EVENT_MASK | PER_CONTROL_MASK); + else if (addr == (addr_t) &dummy->starting_addr) + /* Starting address of the user specified per set. */ + child->thread.per_user.start = data; + else if (addr == (addr_t) &dummy->ending_addr) + /* Ending address of the user specified per set. */ + child->thread.per_user.end = data; +} + +/* + * Write a word to the user area of a process at location addr. This + * operation does have an additional problem compared to peek_user. + * Stores to the program status word and on the floating point + * control register needs to get checked for validity. + */ +static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) +{ + struct user *dummy = NULL; + addr_t offset; + if (addr < (addr_t) &dummy->regs.acrs) { /* * psw and gprs are stored on the stack */ - if (addr == (addr_t) &dummy->regs.psw.mask && -#ifdef CONFIG_COMPAT - data != PSW_MASK_MERGE(PSW_USER32_BITS, data) && -#endif - data != PSW_MASK_MERGE(PSW_USER_BITS, data)) - /* Invalid psw mask. */ - return -EINVAL; -#ifndef CONFIG_64BIT - if (addr == (addr_t) &dummy->regs.psw.addr) - /* I'd like to reject addresses without the - high order bit but older gdb's rely on it */ - data |= PSW_ADDR_AMODE; -#endif + if (addr == (addr_t) &dummy->regs.psw.mask) { + unsigned long mask = PSW_MASK_USER; + + mask |= is_ri_task(child) ? PSW_MASK_RI : 0; + if ((data ^ PSW_USER_BITS) & ~mask) + /* Invalid psw mask. */ + return -EINVAL; + if ((data & PSW_MASK_ASC) == PSW_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; + if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)) + /* Invalid addressing mode bits */ + return -EINVAL; + } *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -269,69 +370,74 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) */ task_pt_regs(child)->orig_gpr2 = data; + } else if (addr < (addr_t) &dummy->regs.fp_regs) { + /* + * prevent writes of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + return 0; + } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure */ - if (addr == (addr_t) &dummy->regs.fp_regs.fpc && - (data & ~((unsigned long) FPC_VALID_MASK - << (BITS_PER_LONG - 32))) != 0) - return -EINVAL; + if (addr == (addr_t) &dummy->regs.fp_regs.fpc) + if ((unsigned int) data != 0 || + test_fp_ctl(data >> (BITS_PER_LONG - 32))) + return -EINVAL; offset = addr - (addr_t) &dummy->regs.fp_regs; *(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data; } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* - * per_info is found in the thread structure + * Handle access to the per_info structure. */ - offset = addr - (addr_t) &dummy->regs.per_info; - *(addr_t *)((addr_t) &child->thread.per_info + offset) = data; + addr -= (addr_t) &dummy->regs.per_info; + __poke_user_per(child, addr, data); } - FixPerRegisters(child); return 0; } -static int -do_ptrace_normal(struct task_struct *child, long request, long addr, long data) +static int poke_user(struct task_struct *child, addr_t addr, addr_t data) +{ + addr_t mask; + + /* + * Stupid gdb peeks/pokes the access registers in 64 bit with + * an alignment of 4. Programmers from hell indeed... + */ + mask = __ADDR_MASK; +#ifdef CONFIG_64BIT + if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && + addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + mask = 3; +#endif + if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) + return -EIO; + + return __poke_user(child, addr, data); +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { - unsigned long tmp; ptrace_area parea; int copied, ret; switch (request) { - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - /* Remove high order bit from address (only for 31 bit). */ - addr &= PSW_ADDR_INSN; - /* read word at location addr. */ - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - if (copied != sizeof(tmp)) - return -EIO; - return put_user(tmp, (unsigned long __user *) data); - case PTRACE_PEEKUSR: /* read the word at location addr in the USER area. */ return peek_user(child, addr, data); - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - /* Remove high order bit from address (only for 31 bit). */ - addr &= PSW_ADDR_INSN; - /* write the word at location addr. */ - copied = access_process_vm(child, addr, &data, sizeof(data),1); - if (copied != sizeof(data)) - return -EIO; - return 0; - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ return poke_user(child, addr, data); case PTRACE_PEEKUSR_AREA: case PTRACE_POKEUSR_AREA: - if (copy_from_user(&parea, (void __user *) addr, + if (copy_from_user(&parea, (void __force __user *) addr, sizeof(parea))) return -EFAULT; addr = parea.kernel_addr; @@ -341,10 +447,11 @@ do_ptrace_normal(struct task_struct *child, long request, long addr, long data) if (request == PTRACE_PEEKUSR_AREA) ret = peek_user(child, addr, data); else { - addr_t tmp; - if (get_user (tmp, (addr_t __user *) data)) + addr_t utmp; + if (get_user(utmp, + (addr_t __force __user *) data)) return -EFAULT; - ret = poke_user(child, addr, tmp); + ret = poke_user(child, addr, utmp); } if (ret) return ret; @@ -353,8 +460,45 @@ do_ptrace_normal(struct task_struct *child, long request, long addr, long data) copied += sizeof(unsigned long); } return 0; + case PTRACE_GET_LAST_BREAK: + put_user(task_thread_info(child)->last_break, + (unsigned long __user *) data); + return 0; + case PTRACE_ENABLE_TE: + if (!MACHINE_HAS_TE) + return -EIO; + child->thread.per_flags &= ~PER_FLAG_NO_TE; + return 0; + case PTRACE_DISABLE_TE: + if (!MACHINE_HAS_TE) + return -EIO; + child->thread.per_flags |= PER_FLAG_NO_TE; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + return 0; + case PTRACE_TE_ABORT_RAND: + if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + return -EIO; + switch (data) { + case 0UL: + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + break; + case 1UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND; + break; + case 2UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND; + break; + default: + return -EINVAL; + } + return 0; + default: + /* Removing high order bit from addr (only for 31 bit). */ + addr &= PSW_ADDR_INSN; + return ptrace_request(child, request, addr, data); } - return ptrace_request(child, request, addr, data); } #ifdef CONFIG_COMPAT @@ -373,36 +517,73 @@ do_ptrace_normal(struct task_struct *child, long request, long addr, long data) */ /* + * Same as peek_user_per but for a 31 bit program. + */ +static inline __u32 __peek_user_per_compat(struct task_struct *child, + addr_t addr) +{ + struct compat_per_struct_kernel *dummy32 = NULL; + + if (addr == (addr_t) &dummy32->cr9) + /* Control bits of the active per set. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + PER_EVENT_IFETCH : child->thread.per_user.control; + else if (addr == (addr_t) &dummy32->cr10) + /* Start address of the active per set. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + 0 : child->thread.per_user.start; + else if (addr == (addr_t) &dummy32->cr11) + /* End address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PSW32_ADDR_INSN : child->thread.per_user.end; + else if (addr == (addr_t) &dummy32->bits) + /* Single-step bit. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + 0x80000000 : 0; + else if (addr == (addr_t) &dummy32->starting_addr) + /* Start address of the user specified per set. */ + return (__u32) child->thread.per_user.start; + else if (addr == (addr_t) &dummy32->ending_addr) + /* End address of the user specified per set. */ + return (__u32) child->thread.per_user.end; + else if (addr == (addr_t) &dummy32->perc_atmid) + /* PER code, ATMID and AI of the last PER trap */ + return (__u32) child->thread.per_event.cause << 16; + else if (addr == (addr_t) &dummy32->address) + /* Address of the last PER trap */ + return (__u32) child->thread.per_event.address; + else if (addr == (addr_t) &dummy32->access_id) + /* Access id of the last PER trap */ + return (__u32) child->thread.per_event.paid << 24; + return 0; +} + +/* * Same as peek_user but for a 31 bit program. */ -static int -peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data) +static u32 __peek_user_compat(struct task_struct *child, addr_t addr) { - struct user32 *dummy32 = NULL; - per_struct32 *dummy_per32 = NULL; + struct compat_user *dummy32 = NULL; addr_t offset; __u32 tmp; - if (!test_thread_flag(TIF_31BIT) || - (addr & 3) || addr > sizeof(struct user) - 3) - return -EIO; - if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Fake a 31 bit psw mask. */ - tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32); - tmp = PSW32_MASK_MERGE(PSW32_USER_BITS, tmp); + tmp = (__u32)(regs->psw.mask >> 32); + tmp &= PSW32_MASK_USER | PSW32_MASK_RI; + tmp |= PSW32_USER_BITS; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ - tmp = (__u32) task_pt_regs(child)->psw.addr | - PSW32_ADDR_AMODE31; + tmp = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); } else { /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) &task_pt_regs(child)->psw + - addr*2 + 4); + tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -417,6 +598,13 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data) */ tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4); + } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + /* + * prevent reads of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + tmp = 0; + } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure @@ -426,62 +614,87 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data) } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* - * per_info is found in the thread structure + * Handle access to the per_info structure. */ - offset = addr - (addr_t) &dummy32->regs.per_info; - /* This is magic. See per_struct and per_struct32. */ - if ((offset >= (addr_t) &dummy_per32->control_regs && - offset < (addr_t) (&dummy_per32->control_regs + 1)) || - (offset >= (addr_t) &dummy_per32->starting_addr && - offset <= (addr_t) &dummy_per32->ending_addr) || - offset == (addr_t) &dummy_per32->lowcore.words.address) - offset = offset*2 + 4; - else - offset = offset*2; - tmp = *(__u32 *)((addr_t) &child->thread.per_info + offset); + addr -= (addr_t) &dummy32->regs.per_info; + tmp = __peek_user_per_compat(child, addr); } else tmp = 0; + return tmp; +} + +static int peek_user_compat(struct task_struct *child, + addr_t addr, addr_t data) +{ + __u32 tmp; + + if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3) + return -EIO; + + tmp = __peek_user_compat(child, addr); return put_user(tmp, (__u32 __user *) data); } /* + * Same as poke_user_per but for a 31 bit program. + */ +static inline void __poke_user_per_compat(struct task_struct *child, + addr_t addr, __u32 data) +{ + struct compat_per_struct_kernel *dummy32 = NULL; + + if (addr == (addr_t) &dummy32->cr9) + /* PER event mask of the user specified per set. */ + child->thread.per_user.control = + data & (PER_EVENT_MASK | PER_CONTROL_MASK); + else if (addr == (addr_t) &dummy32->starting_addr) + /* Starting address of the user specified per set. */ + child->thread.per_user.start = data; + else if (addr == (addr_t) &dummy32->ending_addr) + /* Ending address of the user specified per set. */ + child->thread.per_user.end = data; +} + +/* * Same as poke_user but for a 31 bit program. */ -static int -poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data) +static int __poke_user_compat(struct task_struct *child, + addr_t addr, addr_t data) { - struct user32 *dummy32 = NULL; - per_struct32 *dummy_per32 = NULL; + struct compat_user *dummy32 = NULL; + __u32 tmp = (__u32) data; addr_t offset; - __u32 tmp; - - if (!test_thread_flag(TIF_31BIT) || - (addr & 3) || addr > sizeof(struct user32) - 3) - return -EIO; - - tmp = (__u32) data; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { + __u32 mask = PSW32_MASK_USER; + + mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; /* Build a 64 bit psw mask from 31 bit mask. */ - if (tmp != PSW32_MASK_MERGE(PSW32_USER_BITS, tmp)) + if ((tmp ^ PSW32_USER_BITS) & ~mask) /* Invalid psw mask. */ return -EINVAL; - task_pt_regs(child)->psw.mask = - PSW_MASK_MERGE(PSW_USER32_BITS, (__u64) tmp << 32); + if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (regs->psw.mask & PSW_MASK_BA) | + (__u64)(tmp & mask) << 32; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ - task_pt_regs(child)->psw.addr = - (__u64) tmp & PSW32_ADDR_INSN; + regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* Transfer 31 bit amode bit to psw mask. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | + (__u64)(tmp & PSW32_ADDR_AMODE); } else { /* gpr 0-15 */ - *(__u32*)((addr_t) &task_pt_regs(child)->psw - + addr*2 + 4) = tmp; + *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -496,82 +709,64 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data) */ *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp; + } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + /* + * prevent writess of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + return 0; + } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure */ if (addr == (addr_t) &dummy32->regs.fp_regs.fpc && - (tmp & ~FPC_VALID_MASK) != 0) - /* Invalid floating point control. */ + test_fp_ctl(tmp)) return -EINVAL; offset = addr - (addr_t) &dummy32->regs.fp_regs; *(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp; } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* - * per_info is found in the thread structure. - */ - offset = addr - (addr_t) &dummy32->regs.per_info; - /* - * This is magic. See per_struct and per_struct32. - * By incident the offsets in per_struct are exactly - * twice the offsets in per_struct32 for all fields. - * The 8 byte fields need special handling though, - * because the second half (bytes 4-7) is needed and - * not the first half. + * Handle access to the per_info structure. */ - if ((offset >= (addr_t) &dummy_per32->control_regs && - offset < (addr_t) (&dummy_per32->control_regs + 1)) || - (offset >= (addr_t) &dummy_per32->starting_addr && - offset <= (addr_t) &dummy_per32->ending_addr) || - offset == (addr_t) &dummy_per32->lowcore.words.address) - offset = offset*2 + 4; - else - offset = offset*2; - *(__u32 *)((addr_t) &child->thread.per_info + offset) = tmp; - + addr -= (addr_t) &dummy32->regs.per_info; + __poke_user_per_compat(child, addr, data); } - FixPerRegisters(child); return 0; } -static int -do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) +static int poke_user_compat(struct task_struct *child, + addr_t addr, addr_t data) { - unsigned int tmp; /* 4 bytes !! */ - ptrace_area_emu31 parea; + if (!is_compat_task() || (addr & 3) || + addr > sizeof(struct compat_user) - 3) + return -EIO; + + return __poke_user_compat(child, addr, data); +} + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + unsigned long addr = caddr; + unsigned long data = cdata; + compat_ptrace_area parea; int copied, ret; switch (request) { - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - /* read word at location addr. */ - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - if (copied != sizeof(tmp)) - return -EIO; - return put_user(tmp, (unsigned int __user *) data); - case PTRACE_PEEKUSR: /* read the word at location addr in the USER area. */ - return peek_user_emu31(child, addr, data); - - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - /* write the word at location addr. */ - tmp = data; - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 1); - if (copied != sizeof(tmp)) - return -EIO; - return 0; + return peek_user_compat(child, addr, data); case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - return poke_user_emu31(child, addr, data); + return poke_user_compat(child, addr, data); case PTRACE_PEEKUSR_AREA: case PTRACE_POKEUSR_AREA: - if (copy_from_user(&parea, (void __user *) addr, + if (copy_from_user(&parea, (void __force __user *) addr, sizeof(parea))) return -EFAULT; addr = parea.kernel_addr; @@ -579,12 +774,13 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) copied = 0; while (copied < parea.len) { if (request == PTRACE_PEEKUSR_AREA) - ret = peek_user_emu31(child, addr, data); + ret = peek_user_compat(child, addr, data); else { - __u32 tmp; - if (get_user (tmp, (__u32 __user *) data)) + __u32 utmp; + if (get_user(utmp, + (__u32 __force __user *) data)) return -EFAULT; - ret = poke_user_emu31(child, addr, tmp); + ret = poke_user_compat(child, addr, utmp); } if (ret) return ret; @@ -593,176 +789,582 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) copied += sizeof(unsigned int); } return 0; - case PTRACE_GETEVENTMSG: - return put_user((__u32) child->ptrace_message, - (unsigned int __user *) data); - case PTRACE_GETSIGINFO: - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_to_user32((compat_siginfo_t __user *) data, - child->last_siginfo); - case PTRACE_SETSIGINFO: - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_from_user32(child->last_siginfo, - (compat_siginfo_t __user *) data); + case PTRACE_GET_LAST_BREAK: + put_user(task_thread_info(child)->last_break, + (unsigned int __user *) data); + return 0; } - return ptrace_request(child, request, addr, data); + return compat_ptrace_request(child, request, addr, data); } #endif -#define PT32_IEEE_IP 0x13c - -static int -do_ptrace(struct task_struct *child, long request, long addr, long data) +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { - int ret; + long ret = 0; - if (request == PTRACE_ATTACH) - return ptrace_attach(child); + /* Do the secure computing check first. */ + if (secure_computing(regs->gprs[2])) { + /* seccomp failures shouldn't expose any additional code. */ + ret = -1; + goto out; + } /* - * Special cases to get/store the ieee instructions pointer. + * The sysc_tracesys code in entry.S stored the system + * call number to gprs[2]. */ - if (child == current) { - if (request == PTRACE_PEEKUSR && addr == PT_IEEE_IP) - return peek_user(child, addr, data); - if (request == PTRACE_POKEUSR && addr == PT_IEEE_IP) - return poke_user(child, addr, data); -#ifdef CONFIG_COMPAT - if (request == PTRACE_PEEKUSR && - addr == PT32_IEEE_IP && test_thread_flag(TIF_31BIT)) - return peek_user_emu31(child, addr, data); - if (request == PTRACE_POKEUSR && - addr == PT32_IEEE_IP && test_thread_flag(TIF_31BIT)) - return poke_user_emu31(child, addr, data); -#endif + if (test_thread_flag(TIF_SYSCALL_TRACE) && + (tracehook_report_syscall_entry(regs) || + regs->gprs[2] >= NR_syscalls)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ + clear_pt_regs_flag(regs, PIF_SYSCALL); + ret = -1; } - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - return ret; + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->gprs[2]); - switch (request) { - case PTRACE_SYSCALL: - /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: - /* restart after signal. */ - if (!valid_signal(data)) - return -EIO; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - /* make sure the single step bit is not set. */ - clear_single_step(child); - wake_up_process(child); - return 0; + audit_syscall_entry(is_compat_task() ? + AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, + regs->gprs[2], regs->orig_gpr2, + regs->gprs[3], regs->gprs[4], + regs->gprs[5]); +out: + return ret ?: regs->gprs[2]; +} - case PTRACE_KILL: - /* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - return 0; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - clear_single_step(child); - wake_up_process(child); - return 0; +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) +{ + audit_syscall_exit(regs); - case PTRACE_SINGLESTEP: - /* set the trap flag. */ - if (!valid_signal(data)) - return -EIO; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - if (data) - set_tsk_thread_flag(child, TIF_SINGLE_STEP); - else - set_single_step(child); - /* give it a chance to run. */ - wake_up_process(child); - return 0; + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->gprs[2]); - case PTRACE_DETACH: - /* detach a process that was attached. */ - return ptrace_detach(child, data); + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); +} +/* + * user_regset definitions. + */ - /* Do requests that differ for 31/64 bit */ - default: -#ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) - return do_ptrace_emu31(child, request, addr, data); -#endif - return do_ptrace_normal(child, request, addr, data); +static int s390_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (target == current) + save_access_regs(target->thread.acrs); + + if (kbuf) { + unsigned long *k = kbuf; + while (count > 0) { + *k++ = __peek_user(target, pos); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + unsigned long __user *u = ubuf; + while (count > 0) { + if (__put_user(__peek_user(target, pos), u++)) + return -EFAULT; + count -= sizeof(*u); + pos += sizeof(*u); + } } - /* Not reached. */ - return -EIO; + return 0; } -asmlinkage long -sys_ptrace(long request, long pid, long addr, long data) +static int s390_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) { - struct task_struct *child; - int ret; + int rc = 0; + + if (target == current) + save_access_regs(target->thread.acrs); - lock_kernel(); - if (request == PTRACE_TRACEME) { - ret = ptrace_traceme(); - goto out; + if (kbuf) { + const unsigned long *k = kbuf; + while (count > 0 && !rc) { + rc = __poke_user(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const unsigned long __user *u = ubuf; + while (count > 0 && !rc) { + unsigned long word; + rc = __get_user(word, u++); + if (rc) + break; + rc = __poke_user(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } } - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) { - ret = PTR_ERR(child); - goto out; + if (rc == 0 && target == current) + restore_access_regs(target->thread.acrs); + + return rc; +} + +static int s390_fpregs_get(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, void *kbuf, void __user *ubuf) +{ + if (target == current) { + save_fp_ctl(&target->thread.fp_regs.fpc); + save_fp_regs(target->thread.fp_regs.fprs); } - ret = do_ptrace(child, request, addr, data); - put_task_struct(child); -out: - unlock_kernel(); - return ret; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fp_regs, 0, -1); } -asmlinkage void -syscall_trace(struct pt_regs *regs, int entryexit) +static int s390_fpregs_set(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, + const void __user *ubuf) { - if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(current, AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); + int rc = 0; - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - goto out; - if (!(current->ptrace & PT_PTRACED)) - goto out; - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); + if (target == current) { + save_fp_ctl(&target->thread.fp_regs.fpc); + save_fp_regs(target->thread.fp_regs.fprs); + } - /* - * If the debuffer has set an invalid system call number, - * we prepare to skip the system call restart handling. - */ - if (!entryexit && regs->gprs[2] >= NR_syscalls) - regs->trap = -1; + /* If setting FPC, must validate it first. */ + if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { + u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 }; + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc, + 0, offsetof(s390_fp_regs, fprs)); + if (rc) + return rc; + if (ufpc[1] != 0 || test_fp_ctl(ufpc[0])) + return -EINVAL; + target->thread.fp_regs.fpc = ufpc[0]; + } - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; + if (rc == 0 && count > 0) + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.fp_regs.fprs, + offsetof(s390_fp_regs, fprs), -1); + + if (rc == 0 && target == current) { + restore_fp_ctl(&target->thread.fp_regs.fpc); + restore_fp_regs(target->thread.fp_regs.fprs); + } + + return rc; +} + +#ifdef CONFIG_64BIT + +static int s390_last_break_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (count > 0) { + if (kbuf) { + unsigned long *k = kbuf; + *k = task_thread_info(target)->last_break; + } else { + unsigned long __user *u = ubuf; + if (__put_user(task_thread_info(target)->last_break, u)) + return -EFAULT; + } + } + return 0; +} + +static int s390_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +static int s390_tdb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + unsigned char *data; + + if (!(regs->int_code & 0x200)) + return -ENODATA; + data = target->thread.trap_tdb; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256); +} + +static int s390_tdb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +#endif + +static int s390_system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static int s390_system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static const struct user_regset s390_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(s390_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .get = s390_regs_get, + .set = s390_regs_set, + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(s390_fp_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .get = s390_fpregs_get, + .set = s390_fpregs_set, + }, +#ifdef CONFIG_64BIT + [REGSET_LAST_BREAK] = { + .core_note_type = NT_S390_LAST_BREAK, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .get = s390_last_break_get, + .set = s390_last_break_set, + }, + [REGSET_TDB] = { + .core_note_type = NT_S390_TDB, + .n = 1, + .size = 256, + .align = 1, + .get = s390_tdb_get, + .set = s390_tdb_set, + }, +#endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, +}; + +static const struct user_regset_view user_s390_view = { + .name = UTS_MACHINE, + .e_machine = EM_S390, + .regsets = s390_regsets, + .n = ARRAY_SIZE(s390_regsets) +}; + +#ifdef CONFIG_COMPAT +static int s390_compat_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (target == current) + save_access_regs(target->thread.acrs); + + if (kbuf) { + compat_ulong_t *k = kbuf; + while (count > 0) { + *k++ = __peek_user_compat(target, pos); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + compat_ulong_t __user *u = ubuf; + while (count > 0) { + if (__put_user(__peek_user_compat(target, pos), u++)) + return -EFAULT; + count -= sizeof(*u); + pos += sizeof(*u); + } + } + return 0; +} + +static int s390_compat_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int rc = 0; + + if (target == current) + save_access_regs(target->thread.acrs); + + if (kbuf) { + const compat_ulong_t *k = kbuf; + while (count > 0 && !rc) { + rc = __poke_user_compat(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const compat_ulong_t __user *u = ubuf; + while (count > 0 && !rc) { + compat_ulong_t word; + rc = __get_user(word, u++); + if (rc) + break; + rc = __poke_user_compat(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } + } + + if (rc == 0 && target == current) + restore_access_regs(target->thread.acrs); + + return rc; +} + +static int s390_compat_regs_high_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + compat_ulong_t *gprs_high; + + gprs_high = (compat_ulong_t *) + &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; + if (kbuf) { + compat_ulong_t *k = kbuf; + while (count > 0) { + *k++ = *gprs_high; + gprs_high += 2; + count -= sizeof(*k); + } + } else { + compat_ulong_t __user *u = ubuf; + while (count > 0) { + if (__put_user(*gprs_high, u++)) + return -EFAULT; + gprs_high += 2; + count -= sizeof(*u); + } + } + return 0; +} + +static int s390_compat_regs_high_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + compat_ulong_t *gprs_high; + int rc = 0; + + gprs_high = (compat_ulong_t *) + &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; + if (kbuf) { + const compat_ulong_t *k = kbuf; + while (count > 0) { + *gprs_high = *k++; + *gprs_high += 2; + count -= sizeof(*k); + } + } else { + const compat_ulong_t __user *u = ubuf; + while (count > 0 && !rc) { + unsigned long word; + rc = __get_user(word, u++); + if (rc) + break; + *gprs_high = word; + *gprs_high += 2; + count -= sizeof(*u); + } } - out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(current, - test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X, - regs->gprs[2], regs->orig_gpr2, regs->gprs[3], - regs->gprs[4], regs->gprs[5]); + + return rc; +} + +static int s390_compat_last_break_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + compat_ulong_t last_break; + + if (count > 0) { + last_break = task_thread_info(target)->last_break; + if (kbuf) { + unsigned long *k = kbuf; + *k = last_break; + } else { + unsigned long __user *u = ubuf; + if (__put_user(last_break, u)) + return -EFAULT; + } + } + return 0; +} + +static int s390_compat_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +static const struct user_regset s390_compat_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), + .size = sizeof(compat_long_t), + .align = sizeof(compat_long_t), + .get = s390_compat_regs_get, + .set = s390_compat_regs_set, + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), + .size = sizeof(compat_long_t), + .align = sizeof(compat_long_t), + .get = s390_fpregs_get, + .set = s390_fpregs_set, + }, + [REGSET_LAST_BREAK] = { + .core_note_type = NT_S390_LAST_BREAK, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .get = s390_compat_last_break_get, + .set = s390_compat_last_break_set, + }, + [REGSET_TDB] = { + .core_note_type = NT_S390_TDB, + .n = 1, + .size = 256, + .align = 1, + .get = s390_tdb_get, + .set = s390_tdb_set, + }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, + [REGSET_GENERAL_EXTENDED] = { + .core_note_type = NT_S390_HIGH_GPRS, + .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), + .size = sizeof(compat_long_t), + .align = sizeof(compat_long_t), + .get = s390_compat_regs_high_get, + .set = s390_compat_regs_high_set, + }, +}; + +static const struct user_regset_view user_s390_compat_view = { + .name = "s390", + .e_machine = EM_S390, + .regsets = s390_compat_regsets, + .n = ARRAY_SIZE(s390_compat_regsets) +}; +#endif + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) + return &user_s390_compat_view; +#endif + return &user_s390_view; +} + +static const char *gpr_names[NUM_GPRS] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", +}; + +unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + if (offset >= NUM_GPRS) + return 0; + return regs->gprs[offset]; +} + +int regs_query_register_offset(const char *name) +{ + unsigned long offset; + + if (!name || *name != 'r') + return -EINVAL; + if (kstrtoul(name + 1, 10, &offset)) + return -EINVAL; + if (offset >= NUM_GPRS) + return -EINVAL; + return offset; +} + +const char *regs_query_register_name(unsigned int offset) +{ + if (offset >= NUM_GPRS) + return NULL; + return gpr_names[offset]; +} + +static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + unsigned long ksp = kernel_stack_pointer(regs); + + return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs:pt_regs which contains kernel stack pointer. + * @n:stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long addr; + + addr = kernel_stack_pointer(regs) + n * sizeof(long); + if (!regs_within_kernel_stack(regs, addr)) + return 0; + return *(unsigned long *)addr; } diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 658e5ac484f..dd8016b0477 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -1,70 +1,87 @@ /* - * arch/s390/kernel/reipl.S - * * S390 version - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2000 * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com) */ -#include <asm/lowcore.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/sigp.h> - .globl do_reipl -do_reipl: basr %r13,0 +# +# store_status: Empty implementation until kdump is supported on 31 bit +# +ENTRY(store_status) + br %r14 + +# +# do_reipl_asm +# Parameter: r2 = schid of reipl device +# +ENTRY(do_reipl_asm) + basr %r13,0 .Lpg0: lpsw .Lnewpsw-.Lpg0(%r13) -.Lpg1: lctl %c6,%c6,.Lall-.Lpg0(%r13) - stctl %c0,%c0,.Lctlsave-.Lpg0(%r13) - ni .Lctlsave-.Lpg0(%r13),0xef - lctl %c0,%c0,.Lctlsave-.Lpg0(%r13) - lr %r1,%r2 - mvc __LC_PGM_NEW_PSW(8),.Lpcnew-.Lpg0(%r13) - stsch .Lschib-.Lpg0(%r13) - oi .Lschib+5-.Lpg0(%r13),0x84 -.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01 - msch .Lschib-.Lpg0(%r13) - lhi %r0,5 -.Lssch: ssch .Liplorb-.Lpg0(%r13) +.Lpg1: # do store status of all registers + + stm %r0,%r15,__LC_GPREGS_SAVE_AREA + stctl %c0,%c15,__LC_CREGS_SAVE_AREA + stam %a0,%a15,__LC_AREGS_SAVE_AREA + l %r10,.Ldump_pfx-.Lpg0(%r13) + mvc __LC_PREFIX_SAVE_AREA(4),0(%r10) + stckc .Lclkcmp-.Lpg0(%r13) + mvc __LC_CLOCK_COMP_SAVE_AREA(8),.Lclkcmp-.Lpg0(%r13) + stpt __LC_CPU_TIMER_SAVE_AREA + st %r13, __LC_PSW_SAVE_AREA+4 + lctl %c6,%c6,.Lall-.Lpg0(%r13) + lr %r1,%r2 + mvc __LC_PGM_NEW_PSW(8),.Lpcnew-.Lpg0(%r13) + stsch .Lschib-.Lpg0(%r13) + oi .Lschib+5-.Lpg0(%r13),0x84 +.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01 + msch .Lschib-.Lpg0(%r13) + lhi %r0,5 +.Lssch: ssch .Liplorb-.Lpg0(%r13) jz .L001 - brct %r0,.Lssch + brct %r0,.Lssch bas %r14,.Ldisab-.Lpg0(%r13) -.L001: mvc __LC_IO_NEW_PSW(8),.Lionew-.Lpg0(%r13) -.Ltpi: lpsw .Lwaitpsw-.Lpg0(%r13) +.L001: mvc __LC_IO_NEW_PSW(8),.Lionew-.Lpg0(%r13) +.Ltpi: lpsw .Lwaitpsw-.Lpg0(%r13) .Lcont: c %r1,__LC_SUBCHANNEL_ID jnz .Ltpi clc __LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13) jnz .Ltpi - tsch .Liplirb-.Lpg0(%r13) + tsch .Liplirb-.Lpg0(%r13) tm .Liplirb+9-.Lpg0(%r13),0xbf - jz .L002 - bas %r14,.Ldisab-.Lpg0(%r13) -.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 - jz .L003 - bas %r14,.Ldisab-.Lpg0(%r13) -.L003: spx .Lnull-.Lpg0(%r13) - st %r1,__LC_SUBCHANNEL_ID - lpsw 0 - sigp 0,0,0(6) -.Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13) + jz .L002 + bas %r14,.Ldisab-.Lpg0(%r13) +.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 + jz .L003 + bas %r14,.Ldisab-.Lpg0(%r13) +.L003: st %r1,__LC_SUBCHANNEL_ID + lpsw 0 + sigp 0,0,SIGP_RESTART +.Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13) lpsw .Ldispsw-.Lpg0(%r13) - .align 8 + .align 8 +.Lclkcmp: .quad 0x0000000000000000 .Lall: .long 0xff000000 -.Lnull: .long 0x00000000 -.Lctlsave: .long 0x00000000 - .align 8 -.Lnewpsw: .long 0x00080000,0x80000000+.Lpg1 -.Lpcnew: .long 0x00080000,0x80000000+.Lecs -.Lionew: .long 0x00080000,0x80000000+.Lcont +.Ldump_pfx: .long dump_prefix_page + .align 8 +.Lnewpsw: .long 0x00080000,0x80000000+.Lpg1 +.Lpcnew: .long 0x00080000,0x80000000+.Lecs +.Lionew: .long 0x00080000,0x80000000+.Lcont .Lwaitpsw: .long 0x020a0000,0x00000000+.Ltpi -.Ldispsw: .long 0x000a0000,0x00000000 -.Liplccws: .long 0x02000000,0x60000018 - .long 0x08000008,0x20000001 +.Ldispsw: .long 0x000a0000,0x00000000 +.Liplccws: .long 0x02000000,0x60000018 + .long 0x08000008,0x20000001 .Liplorb: .long 0x0049504c,0x0040ff80 .long 0x00000000+.Liplccws -.Lschib: .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 +.Lschib: .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 .Liplirb: .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 @@ -73,6 +90,3 @@ do_reipl: basr %r13,0 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 - - - diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index 4d090d60f3e..dc3b1273c4d 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -1,59 +1,121 @@ /* - * arch/s390/kernel/reipl.S - * - * S390 version - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com) - Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + * Copyright IBM Corp 2000, 2011 + * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Denis Joseph Barrow, */ -#include <asm/lowcore.h> - .globl do_reipl -do_reipl: basr %r13,0 -.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) -.Lpg1: lctlg %c6,%c6,.Lall-.Lpg0(%r13) - stctg %c0,%c0,.Lctlsave-.Lpg0(%r13) - ni .Lctlsave+4-.Lpg0(%r13),0xef - lctlg %c0,%c0,.Lctlsave-.Lpg0(%r13) - lgr %r1,%r2 - mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13) - stsch .Lschib-.Lpg0(%r13) - oi .Lschib+5-.Lpg0(%r13),0x84 -.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01 - msch .Lschib-.Lpg0(%r13) - lghi %r0,5 -.Lssch: ssch .Liplorb-.Lpg0(%r13) +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/sigp.h> + +# +# store_status +# +# Prerequisites to run this function: +# - Prefix register is set to zero +# - Original prefix register is stored in "dump_prefix_page" +# - Lowcore protection is off +# +ENTRY(store_status) + /* Save register one and load save area base */ + stg %r1,__LC_SAVE_AREA_RESTART + lghi %r1,SAVE_AREA_BASE + /* General purpose registers */ + stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lg %r2,__LC_SAVE_AREA_RESTART + stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) + /* Control registers */ + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Access registers */ + stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point registers */ + std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point control register */ + stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* CPU timer */ + stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Saved prefix register */ + larl %r2,dump_prefix_page + mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2) + /* Clock comparator - seven bytes */ + larl %r2,.Lclkcmp + stckc 0(%r2) + mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2) + /* Program status word */ + epsw %r2,%r3 + st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1) + st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1) + larl %r2,store_status + stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) + br %r14 + + .section .bss + .align 8 +.Lclkcmp: .quad 0x0000000000000000 + .previous + +# +# do_reipl_asm +# Parameter: r2 = schid of reipl device +# + +ENTRY(do_reipl_asm) + basr %r13,0 +.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) +.Lpg1: brasl %r14,store_status + + lctlg %c6,%c6,.Lall-.Lpg0(%r13) + lgr %r1,%r2 + mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13) + stsch .Lschib-.Lpg0(%r13) + oi .Lschib+5-.Lpg0(%r13),0x84 +.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01 + msch .Lschib-.Lpg0(%r13) + lghi %r0,5 +.Lssch: ssch .Liplorb-.Lpg0(%r13) jz .L001 - brct %r0,.Lssch + brct %r0,.Lssch bas %r14,.Ldisab-.Lpg0(%r13) -.L001: mvc __LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13) -.Ltpi: lpswe .Lwaitpsw-.Lpg0(%r13) +.L001: mvc __LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13) +.Ltpi: lpswe .Lwaitpsw-.Lpg0(%r13) .Lcont: c %r1,__LC_SUBCHANNEL_ID jnz .Ltpi clc __LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13) jnz .Ltpi - tsch .Liplirb-.Lpg0(%r13) + tsch .Liplirb-.Lpg0(%r13) tm .Liplirb+9-.Lpg0(%r13),0xbf - jz .L002 - bas %r14,.Ldisab-.Lpg0(%r13) -.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 - jz .L003 - bas %r14,.Ldisab-.Lpg0(%r13) -.L003: spx .Lnull-.Lpg0(%r13) - st %r1,__LC_SUBCHANNEL_ID - lhi %r1,0 # mode 0 = esa - slr %r0,%r0 # set cpuid to zero - sigp %r1,%r0,0x12 # switch to esa mode - lpsw 0 -.Ldisab: sll %r14,1 - srl %r14,1 # need to kill hi bit to avoid specification exceptions. - st %r14,.Ldispsw+12-.Lpg0(%r13) + jz .L002 + bas %r14,.Ldisab-.Lpg0(%r13) +.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3 + jz .L003 + bas %r14,.Ldisab-.Lpg0(%r13) +.L003: st %r1,__LC_SUBCHANNEL_ID + lhi %r1,0 # mode 0 = esa + slr %r0,%r0 # set cpuid to zero + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode + lpsw 0 +.Ldisab: sll %r14,1 + srl %r14,1 # need to kill hi bit to avoid specification exceptions. + st %r14,.Ldispsw+12-.Lpg0(%r13) lpswe .Ldispsw-.Lpg0(%r13) - .align 8 + .align 8 .Lall: .quad 0x00000000ff000000 -.Lctlsave: .quad 0x0000000000000000 -.Lnull: .long 0x0000000000000000 - .align 16 + .align 16 /* * These addresses have to be 31 bit otherwise * the sigp will throw a specifcation exception @@ -61,28 +123,28 @@ do_reipl: basr %r13,0 * in the ESA psw. * Bit 31 of the addresses has to be 0 for the * 31bit lpswe instruction a fact they appear to have - * ommited from the pop. + * omitted from the pop. */ -.Lnewpsw: .quad 0x0000000080000000 - .quad .Lpg1 -.Lpcnew: .quad 0x0000000080000000 - .quad .Lecs -.Lionew: .quad 0x0000000080000000 - .quad .Lcont +.Lnewpsw: .quad 0x0000000080000000 + .quad .Lpg1 +.Lpcnew: .quad 0x0000000080000000 + .quad .Lecs +.Lionew: .quad 0x0000000080000000 + .quad .Lcont .Lwaitpsw: .quad 0x0202000080000000 - .quad .Ltpi -.Ldispsw: .quad 0x0002000080000000 - .quad 0x0000000000000000 -.Liplccws: .long 0x02000000,0x60000018 - .long 0x08000008,0x20000001 + .quad .Ltpi +.Ldispsw: .quad 0x0002000080000000 + .quad 0x0000000000000000 +.Liplccws: .long 0x02000000,0x60000018 + .long 0x08000008,0x20000001 .Liplorb: .long 0x0049504c,0x0040ff80 .long 0x00000000+.Liplccws -.Lschib: .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 +.Lschib: .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 + .long 0x00000000,0x00000000 .Liplirb: .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 @@ -91,6 +153,3 @@ do_reipl: basr %r13,0 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 .long 0x00000000,0x00000000 - - - diff --git a/arch/s390/kernel/reipl_diag.c b/arch/s390/kernel/reipl_diag.c deleted file mode 100644 index 1f33951ba43..00000000000 --- a/arch/s390/kernel/reipl_diag.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * This file contains the implementation of the - * Linux re-IPL support - * - * (C) Copyright IBM Corp. 2005 - * - * Author(s): Volker Sameske (sameske@de.ibm.com) - * - */ - -#include <linux/kernel.h> - -static unsigned int reipl_diag_rc1; -static unsigned int reipl_diag_rc2; - -/* - * re-IPL the system using the last used IPL parameters - */ -void reipl_diag(void) -{ - asm volatile ( - " la %%r4,0\n" - " la %%r5,0\n" - " diag %%r4,%2,0x308\n" - "0:\n" - " st %%r4,%0\n" - " st %%r5,%1\n" - ".section __ex_table,\"a\"\n" -#ifdef CONFIG_64BIT - " .align 8\n" - " .quad 0b, 0b\n" -#else - " .align 4\n" - " .long 0b, 0b\n" -#endif - ".previous\n" - : "=m" (reipl_diag_rc1), "=m" (reipl_diag_rc2) - : "d" (3) : "cc", "4", "5" ); -} diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index 2a25ec7147f..f4e6f20e117 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -1,13 +1,14 @@ /* - * arch/s390/kernel/relocate_kernel.S + * Copyright IBM Corp. 2005 * - * (C) Copyright IBM Corp. 2005 - * - * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> * */ +#include <linux/linkage.h> +#include <asm/sigp.h> + /* * moves the new kernel to its destination... * %r2 = pointer to first kimage_entry_t @@ -22,16 +23,14 @@ */ .text - .globl relocate_kernel - relocate_kernel: - basr %r13,0 #base address +ENTRY(relocate_kernel) + basr %r13,0 # base address .base: - stnsm sys_msk-.base(%r13),0xf8 #disable DAT and IRQ (external) - spx zero64-.base(%r13) #absolute addressing mode + stnsm sys_msk-.base(%r13),0xfb # disable DAT stctl %c0,%c15,ctlregs-.base(%r13) stm %r0,%r15,gprregs-.base(%r13) la %r1,load_psw-.base(%r13) - mvc 0(8,%r0),0(%r1) + mvc 0(8,%r0),0(%r1) la %r0,.back-.base(%r13) st %r0,4(%r0) oi 4(%r0),0x80 @@ -51,54 +50,52 @@ .back_pgm: lm %r0,%r15,gprregs-.base(%r13) .start_reloc: - lhi %r10,-1 #preparing the mask - sll %r10,12 #shift it such that it becomes 0xf000 + lhi %r10,-1 # preparing the mask + sll %r10,12 # shift it such that it becomes 0xf000 .top: - lhi %r7,4096 #load PAGE_SIZE in r7 - lhi %r9,4096 #load PAGE_SIZE in r9 - l %r5,0(%r2) #read another word for indirection page - ahi %r2,4 #increment pointer - tml %r5,0x1 #is it a destination page? - je .indir_check #NO, goto "indir_check" - lr %r6,%r5 #r6 = r5 - nr %r6,%r10 #mask it out and... - j .top #...next iteration + lhi %r7,4096 # load PAGE_SIZE in r7 + lhi %r9,4096 # load PAGE_SIZE in r9 + l %r5,0(%r2) # read another word for indirection page + ahi %r2,4 # increment pointer + tml %r5,0x1 # is it a destination page? + je .indir_check # NO, goto "indir_check" + lr %r6,%r5 # r6 = r5 + nr %r6,%r10 # mask it out and... + j .top # ...next iteration .indir_check: - tml %r5,0x2 #is it a indirection page? - je .done_test #NO, goto "done_test" - nr %r5,%r10 #YES, mask out, - lr %r2,%r5 #move it into the right register, - j .top #and read next... + tml %r5,0x2 # is it a indirection page? + je .done_test # NO, goto "done_test" + nr %r5,%r10 # YES, mask out, + lr %r2,%r5 # move it into the right register, + j .top # and read next... .done_test: - tml %r5,0x4 #is it the done indicator? - je .source_test #NO! Well, then it should be the source indicator... - j .done #ok, lets finish it here... + tml %r5,0x4 # is it the done indicator? + je .source_test # NO! Well, then it should be the source indicator... + j .done # ok, lets finish it here... .source_test: - tml %r5,0x8 #it should be a source indicator... - je .top #NO, ignore it... - lr %r8,%r5 #r8 = r5 - nr %r8,%r10 #masking - 0: mvcle %r6,%r8,0x0 #copy PAGE_SIZE bytes from r8 to r6 - pad with 0 + tml %r5,0x8 # it should be a source indicator... + je .top # NO, ignore it... + lr %r8,%r5 # r8 = r5 + nr %r8,%r10 # masking + 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0 jo 0b j .top .done: - sr %r0,%r0 #clear register r0 - la %r4,load_psw-.base(%r13) #load psw-address into the register - o %r3,4(%r4) #or load address into psw + sr %r0,%r0 # clear register r0 + la %r4,load_psw-.base(%r13) # load psw-address into the register + o %r3,4(%r4) # or load address into psw st %r3,4(%r4) - mvc 0(8,%r0),0(%r4) #copy psw to absolute address 0 + mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0 tm have_diag308-.base(%r13),0x01 jno .no_diag308 diag %r0,%r0,0x308 .no_diag308: - sr %r1,%r1 #clear %r1 - sr %r2,%r2 #clear %r2 - sigp %r1,%r2,0x12 #set cpuid to zero - lpsw 0 #hopefully start new kernel... + sr %r1,%r1 # clear %r1 + sr %r2,%r2 # clear %r2 + sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero + lpsw 0 # hopefully start new kernel... .align 8 - zero64: - .quad 0 load_psw: .long 0x00080000,0x80000000 sys_msk: @@ -115,6 +112,7 @@ .byte 0 .align 8 relocate_kernel_end: + .align 8 .globl relocate_kernel_len relocate_kernel_len: .quad relocate_kernel_end - relocate_kernel diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S index 8cdb86e8911..cfac28330b0 100644 --- a/arch/s390/kernel/relocate_kernel64.S +++ b/arch/s390/kernel/relocate_kernel64.S @@ -1,13 +1,14 @@ /* - * arch/s390/kernel/relocate_kernel64.S + * Copyright IBM Corp. 2005 * - * (C) Copyright IBM Corp. 2005 - * - * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> * */ +#include <linux/linkage.h> +#include <asm/sigp.h> + /* * moves the new kernel to its destination... * %r2 = pointer to first kimage_entry_t @@ -23,12 +24,10 @@ */ .text - .globl relocate_kernel - relocate_kernel: - basr %r13,0 #base address +ENTRY(relocate_kernel) + basr %r13,0 # base address .base: - stnsm sys_msk-.base(%r13),0xf8 #disable DAT and IRQs - spx zero64-.base(%r13) #absolute addressing mode + stnsm sys_msk-.base(%r13),0xfb # disable DAT stctg %c0,%c15,ctlregs-.base(%r13) stmg %r0,%r15,gprregs-.base(%r13) lghi %r0,3 @@ -37,16 +36,16 @@ la %r0,.back_pgm-.base(%r13) stg %r0,0x1d8(%r0) la %r1,load_psw-.base(%r13) - mvc 0(8,%r0),0(%r1) + mvc 0(8,%r0),0(%r1) la %r0,.back-.base(%r13) st %r0,4(%r0) oi 4(%r0),0x80 lghi %r0,0 diag %r0,%r0,0x308 .back: - lhi %r1,1 #mode 1 = esame - sigp %r1,%r0,0x12 #switch to esame mode - sam64 #switch to 64 bit addressing mode + lhi %r1,1 # mode 1 = esame + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode + sam64 # switch to 64 bit addressing mode basr %r13,0 .back_base: oi have_diag308-.back_base(%r13),0x01 @@ -56,52 +55,50 @@ .back_pgm: lmg %r0,%r15,gprregs-.base(%r13) .top: - lghi %r7,4096 #load PAGE_SIZE in r7 - lghi %r9,4096 #load PAGE_SIZE in r9 - lg %r5,0(%r2) #read another word for indirection page - aghi %r2,8 #increment pointer - tml %r5,0x1 #is it a destination page? - je .indir_check #NO, goto "indir_check" - lgr %r6,%r5 #r6 = r5 - nill %r6,0xf000 #mask it out and... - j .top #...next iteration + lghi %r7,4096 # load PAGE_SIZE in r7 + lghi %r9,4096 # load PAGE_SIZE in r9 + lg %r5,0(%r2) # read another word for indirection page + aghi %r2,8 # increment pointer + tml %r5,0x1 # is it a destination page? + je .indir_check # NO, goto "indir_check" + lgr %r6,%r5 # r6 = r5 + nill %r6,0xf000 # mask it out and... + j .top # ...next iteration .indir_check: - tml %r5,0x2 #is it a indirection page? - je .done_test #NO, goto "done_test" - nill %r5,0xf000 #YES, mask out, - lgr %r2,%r5 #move it into the right register, - j .top #and read next... + tml %r5,0x2 # is it a indirection page? + je .done_test # NO, goto "done_test" + nill %r5,0xf000 # YES, mask out, + lgr %r2,%r5 # move it into the right register, + j .top # and read next... .done_test: - tml %r5,0x4 #is it the done indicator? - je .source_test #NO! Well, then it should be the source indicator... - j .done #ok, lets finish it here... + tml %r5,0x4 # is it the done indicator? + je .source_test # NO! Well, then it should be the source indicator... + j .done # ok, lets finish it here... .source_test: - tml %r5,0x8 #it should be a source indicator... - je .top #NO, ignore it... - lgr %r8,%r5 #r8 = r5 - nill %r8,0xf000 #masking - 0: mvcle %r6,%r8,0x0 #copy PAGE_SIZE bytes from r8 to r6 - pad with 0 + tml %r5,0x8 # it should be a source indicator... + je .top # NO, ignore it... + lgr %r8,%r5 # r8 = r5 + nill %r8,0xf000 # masking + 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0 jo 0b - j .top + j .top .done: - sgr %r0,%r0 #clear register r0 - la %r4,load_psw-.base(%r13) #load psw-address into the register - o %r3,4(%r4) #or load address into psw + sgr %r0,%r0 # clear register r0 + la %r4,load_psw-.base(%r13) # load psw-address into the register + o %r3,4(%r4) # or load address into psw st %r3,4(%r4) - mvc 0(8,%r0),0(%r4) #copy psw to absolute address 0 + mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0 tm have_diag308-.base(%r13),0x01 jno .no_diag308 diag %r0,%r0,0x308 .no_diag308: - sam31 #31 bit mode - sr %r1,%r1 #erase register r1 - sr %r2,%r2 #erase register r2 - sigp %r1,%r2,0x12 #set cpuid to zero - lpsw 0 #hopefully start new kernel... + sam31 # 31 bit mode + sr %r1,%r1 # erase register r1 + sr %r2,%r2 # erase register r2 + sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero + lpsw 0 # hopefully start new kernel... - .align 8 - zero64: - .quad 0 + .align 8 load_psw: .long 0x00080000,0x80000000 sys_msk: @@ -118,6 +115,7 @@ .byte 0 .align 8 relocate_kernel_end: + .align 8 .globl relocate_kernel_len relocate_kernel_len: .quad relocate_kernel_end - relocate_kernel diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c new file mode 100644 index 00000000000..26b4ae96fdd --- /dev/null +++ b/arch/s390/kernel/runtime_instr.c @@ -0,0 +1,149 @@ +/* + * Copyright IBM Corp. 2012 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/kernel_stat.h> +#include <asm/runtime_instr.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> + +/* empty control block to disable RI by loading it */ +struct runtime_instr_cb runtime_instr_empty_cb; + +static int runtime_instr_avail(void) +{ + return test_facility(64); +} + +static void disable_runtime_instr(void) +{ + struct pt_regs *regs = task_pt_regs(current); + + load_runtime_instr_cb(&runtime_instr_empty_cb); + + /* + * Make sure the RI bit is deleted from the PSW. If the user did not + * switch off RI before the system call the process will get a + * specification exception otherwise. + */ + regs->psw.mask &= ~PSW_MASK_RI; +} + +static void init_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + cb->buf_limit = 0xfff; + cb->int_requested = 1; + cb->pstate = 1; + cb->pstate_set_buf = 1; + cb->pstate_sample = 1; + cb->pstate_collect = 1; + cb->key = PAGE_DEFAULT_KEY; + cb->valid = 1; +} + +void exit_thread_runtime_instr(void) +{ + struct task_struct *task = current; + + if (!task->thread.ri_cb) + return; + disable_runtime_instr(); + kfree(task->thread.ri_cb); + task->thread.ri_signum = 0; + task->thread.ri_cb = NULL; +} + +static void runtime_instr_int_handler(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct siginfo info; + + if (!(param32 & CPU_MF_INT_RI_MASK)) + return; + + inc_irq_stat(IRQEXT_CMR); + + if (!current->thread.ri_cb) + return; + if (current->thread.ri_signum < SIGRTMIN || + current->thread.ri_signum > SIGRTMAX) { + WARN_ON_ONCE(1); + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = current->thread.ri_signum; + info.si_code = SI_QUEUE; + if (param32 & CPU_MF_INT_RI_BUF_FULL) + info.si_int = ENOBUFS; + else if (param32 & CPU_MF_INT_RI_HALTED) + info.si_int = ECANCELED; + else + return; /* unknown reason */ + + send_sig_info(current->thread.ri_signum, &info, current); +} + +SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) +{ + struct runtime_instr_cb *cb; + + if (!runtime_instr_avail()) + return -EOPNOTSUPP; + + if (command == S390_RUNTIME_INSTR_STOP) { + preempt_disable(); + exit_thread_runtime_instr(); + preempt_enable(); + return 0; + } + + if (command != S390_RUNTIME_INSTR_START || + (signum < SIGRTMIN || signum > SIGRTMAX)) + return -EINVAL; + + if (!current->thread.ri_cb) { + cb = kzalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) + return -ENOMEM; + } else { + cb = current->thread.ri_cb; + memset(cb, 0, sizeof(*cb)); + } + + init_runtime_instr_cb(cb); + current->thread.ri_signum = signum; + + /* now load the control block to make it available */ + preempt_disable(); + current->thread.ri_cb = cb; + load_runtime_instr_cb(cb); + preempt_enable(); + return 0; +} + +static int __init runtime_instr_init(void) +{ + int rc; + + if (!runtime_instr_avail()) + return 0; + + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, + runtime_instr_int_handler); + if (rc) + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + else + pr_info("Runtime instrumentation facility initialized\n"); + return rc; +} +device_initcall(runtime_instr_init); diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c deleted file mode 100644 index 207bc511a6e..00000000000 --- a/arch/s390/kernel/s390_ext.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * arch/s390/kernel/s390_ext.c - * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com), - * Martin Schwidefsky (schwidefsky@de.ibm.com) - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/errno.h> -#include <linux/kernel_stat.h> -#include <linux/interrupt.h> - -#include <asm/lowcore.h> -#include <asm/s390_ext.h> -#include <asm/irq.h> - -/* - * ext_int_hash[index] is the start of the list for all external interrupts - * that hash to this index. With the current set of external interrupts - * (0x1202 external call, 0x1004 cpu timer, 0x2401 hwc console, 0x4000 - * iucv and 0x2603 pfault) this is always the first element. - */ -ext_int_info_t *ext_int_hash[256] = { 0, }; - -static inline int ext_hash(__u16 code) -{ - return (code + (code >> 9)) & 0xff; -} - -int register_external_interrupt(__u16 code, ext_int_handler_t handler) -{ - ext_int_info_t *p; - int index; - - p = (ext_int_info_t *) kmalloc(sizeof(ext_int_info_t), GFP_ATOMIC); - if (p == NULL) - return -ENOMEM; - p->code = code; - p->handler = handler; - index = ext_hash(code); - p->next = ext_int_hash[index]; - ext_int_hash[index] = p; - return 0; -} - -int register_early_external_interrupt(__u16 code, ext_int_handler_t handler, - ext_int_info_t *p) -{ - int index; - - if (p == NULL) - return -EINVAL; - p->code = code; - p->handler = handler; - index = ext_hash(code); - p->next = ext_int_hash[index]; - ext_int_hash[index] = p; - return 0; -} - -int unregister_external_interrupt(__u16 code, ext_int_handler_t handler) -{ - ext_int_info_t *p, *q; - int index; - - index = ext_hash(code); - q = NULL; - p = ext_int_hash[index]; - while (p != NULL) { - if (p->code == code && p->handler == handler) - break; - q = p; - p = p->next; - } - if (p == NULL) - return -ENOENT; - if (q != NULL) - q->next = p->next; - else - ext_int_hash[index] = p->next; - kfree(p); - return 0; -} - -int unregister_early_external_interrupt(__u16 code, ext_int_handler_t handler, - ext_int_info_t *p) -{ - ext_int_info_t *q; - int index; - - if (p == NULL || p->code != code || p->handler != handler) - return -EINVAL; - index = ext_hash(code); - q = ext_int_hash[index]; - if (p != q) { - while (q != NULL) { - if (q->next == p) - break; - q = q->next; - } - if (q == NULL) - return -ENOENT; - q->next = p->next; - } else - ext_int_hash[index] = p->next; - return 0; -} - -void do_extint(struct pt_regs *regs, unsigned short code) -{ - ext_int_info_t *p; - int index; - - irq_enter(); - asm volatile ("mc 0,0"); - if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer) - /** - * Make sure that the i/o interrupt did not "overtake" - * the last HZ timer interrupt. - */ - account_ticks(regs); - kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; - index = ext_hash(code); - for (p = ext_int_hash[index]; p; p = p->next) { - if (likely(p->code == code)) { - if (likely(p->handler)) - p->handler(regs, code); - } - } - irq_exit(); -} - -EXPORT_SYMBOL(register_external_interrupt); -EXPORT_SYMBOL(unregister_external_interrupt); - diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 4176c77670c..9f60467938d 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -1,63 +1,13 @@ -/* - * arch/s390/kernel/s390_ksyms.c - * - * S390 version - */ -#include <linux/config.h> -#include <linux/highuid.h> #include <linux/module.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/syscalls.h> -#include <linux/interrupt.h> -#include <asm/checksum.h> -#include <asm/cpcmd.h> -#include <asm/delay.h> -#include <asm/pgalloc.h> -#include <asm/setup.h> -#ifdef CONFIG_IP_MULTICAST -#include <net/arp.h> -#endif - -/* - * memory management - */ -EXPORT_SYMBOL(_oi_bitmap); -EXPORT_SYMBOL(_ni_bitmap); -EXPORT_SYMBOL(_zb_findmap); -EXPORT_SYMBOL(_sb_findmap); -EXPORT_SYMBOL(__copy_from_user_asm); -EXPORT_SYMBOL(__copy_to_user_asm); -EXPORT_SYMBOL(__copy_in_user_asm); -EXPORT_SYMBOL(__clear_user_asm); -EXPORT_SYMBOL(__strncpy_from_user_asm); -EXPORT_SYMBOL(__strnlen_user_asm); -EXPORT_SYMBOL(diag10); - -/* - * semaphore ops - */ -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); +#include <linux/kvm_host.h> +#include <asm/ftrace.h> -/* - * binfmt_elf loader - */ -extern int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs); -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(overflowuid); -EXPORT_SYMBOL(overflowgid); -EXPORT_SYMBOL(empty_zero_page); - -/* - * misc. - */ -EXPORT_SYMBOL(machine_flags); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(csum_fold); -EXPORT_SYMBOL(console_mode); -EXPORT_SYMBOL(console_devno); -EXPORT_SYMBOL(console_irq); -EXPORT_SYMBOL(sys_wait4); +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif +#if IS_ENABLED(CONFIG_KVM) +EXPORT_SYMBOL(sie64a); +EXPORT_SYMBOL(sie_exit); +#endif +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S new file mode 100644 index 00000000000..a41f2c99dcc --- /dev/null +++ b/arch/s390/kernel/sclp.S @@ -0,0 +1,360 @@ +/* + * Mini SCLP driver. + * + * Copyright IBM Corp. 2004, 2009 + * + * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/linkage.h> +#include <asm/irq.h> + +LC_EXT_NEW_PSW = 0x58 # addr of ext int handler +LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit +LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter +LC_EXT_INT_CODE = 0x86 # addr of ext int code +LC_AR_MODE_ID = 0xa3 + +# +# Subroutine which waits synchronously until either an external interruption +# or a timeout occurs. +# +# Parameters: +# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds +# +# Returns: +# R2 = 0 on interrupt, 2 on timeout +# R3 = external interruption parameter if R2=0 +# + +_sclp_wait_int: + stm %r6,%r15,24(%r15) # save registers + basr %r13,0 # get base register +.LbaseS1: + ahi %r15,-96 # create stack frame + la %r8,LC_EXT_NEW_PSW # register int handler + la %r9,.LextpswS1-.LbaseS1(%r13) +#ifdef CONFIG_64BIT + tm LC_AR_MODE_ID,1 + jno .Lesa1 + la %r8,LC_EXT_NEW_PSW_64 # register int handler 64 bit + la %r9,.LextpswS1_64-.LbaseS1(%r13) +.Lesa1: +#endif + mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8) + mvc 0(16,%r8),0(%r9) +#ifdef CONFIG_64BIT + epsw %r6,%r7 # set current addressing mode + nill %r6,0x1 # in new psw (31 or 64 bit mode) + nilh %r7,0x8000 + stm %r6,%r7,0(%r8) +#endif + lhi %r6,0x0200 # cr mask for ext int (cr0.54) + ltr %r2,%r2 + jz .LsetctS1 + ahi %r6,0x0800 # cr mask for clock int (cr0.52) + stck .LtimeS1-.LbaseS1(%r13) # initiate timeout + al %r2,.LtimeS1-.LbaseS1(%r13) + st %r2,.LtimeS1-.LbaseS1(%r13) + sckc .LtimeS1-.LbaseS1(%r13) + +.LsetctS1: + stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts + l %r0,.LctlS1-.LbaseS1(%r13) + lhi %r1,~(0x200 | 0x800) # clear old values + nr %r1,%r0 + or %r1,%r6 # set new value + st %r1,.LctlS1-.LbaseS1(%r13) + lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) + st %r0,.LctlS1-.LbaseS1(%r13) + lhi %r2,2 # return code for timeout +.LloopS1: + lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt +.LwaitS1: + lh %r7,LC_EXT_INT_CODE + chi %r7,EXT_IRQ_CLK_COMP # timeout? + je .LtimeoutS1 + chi %r7,EXT_IRQ_SERVICE_SIG # service int? + jne .LloopS1 + sr %r2,%r2 + l %r3,LC_EXT_INT_PARAM +.LtimeoutS1: + lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting + # restore old handler + mvc 0(16,%r8),.LoldpswS1-.LbaseS1(%r13) + lm %r6,%r15,120(%r15) # restore registers + br %r14 # return to caller + + .align 8 +.LoldpswS1: + .long 0, 0, 0, 0 # old ext int PSW +.LextpswS1: + .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int +#ifdef CONFIG_64BIT +.LextpswS1_64: + .quad 0, .LwaitS1 # PSW to handle ext int, 64 bit +#endif +.LwaitpswS1: + .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int +.LtimeS1: + .quad 0 # current time +.LctlS1: + .long 0 # CT0 contents + +# +# Subroutine to synchronously issue a service call. +# +# Parameters: +# R2 = command word +# R3 = sccb address +# +# Returns: +# R2 = 0 on success, 1 on failure +# R3 = sccb response code if R2 = 0 +# + +_sclp_servc: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + lr %r6,%r2 # save command word + lr %r7,%r3 # save sccb address +.LretryS2: + lhi %r2,1 # error return code + .insn rre,0xb2200000,%r6,%r7 # servc + brc 1,.LendS2 # exit if not operational + brc 8,.LnotbusyS2 # go on if not busy + sr %r2,%r2 # wait until no longer busy + bras %r14,_sclp_wait_int + j .LretryS2 # retry +.LnotbusyS2: + sr %r2,%r2 # wait until result + bras %r14,_sclp_wait_int + sr %r2,%r2 + lh %r3,6(%r7) +.LendS2: + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +# +# Subroutine to set up the SCLP interface. +# +# Parameters: +# R2 = 0 to activate, non-zero to deactivate +# +# Returns: +# R2 = 0 on success, non-zero on failure +# + +_sclp_setup: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + basr %r13,0 # get base register +.LbaseS3: + l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb + mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13) + ltr %r2,%r2 # initialization? + jz .LdoinitS3 # go ahead + # clear masks + xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6) +.LdoinitS3: + l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word + lr %r3,%r6 # get sccb address + bras %r14,_sclp_servc # issue service call + ltr %r2,%r2 # servc successful? + jnz .LerrorS3 + chi %r3,0x20 # write mask successful? + jne .LerrorS3 + # check masks + la %r2,.LinitmaskS3-.LinitsccbS3(%r6) + l %r1,0(%r2) # receive mask ok? + n %r1,12(%r2) + cl %r1,0(%r2) + jne .LerrorS3 + l %r1,4(%r2) # send mask ok? + n %r1,8(%r2) + cl %r1,4(%r2) + sr %r2,%r2 + je .LendS3 +.LerrorS3: + lhi %r2,1 # error return code +.LendS3: + lm %r6,%r15,120(%r15) # restore registers + br %r14 +.LwritemaskS3: + .long 0x00780005 # SCLP command for write mask +.LinitsccbS3: + .word .LinitendS3-.LinitsccbS3 + .byte 0,0,0,0 + .word 0 + .word 0 + .word 4 +.LinitmaskS3: + .long 0x80000000 + .long 0x40000000 + .long 0 + .long 0 +.LinitendS3: + +# +# Subroutine which prints a given text to the SCLP console. +# +# Parameters: +# R2 = address of nil-terminated ASCII text +# +# Returns: +# R2 = 0 on success, 1 on failure +# + +_sclp_print: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + basr %r13,0 # get base register +.LbaseS4: + l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb + mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13) + la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr + sr %r0,%r0 + l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table +.LinitmtoS4: + # initialize mto + mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13) + lhi %r6,.LmtoendS4-.LmtoS4 # current mto length +.LloopS4: + ic %r0,0(%r2) # get character + ahi %r2,1 + ltr %r0,%r0 # end of string? + jz .LfinalizemtoS4 + chi %r0,0x0a # end of line (NL)? + jz .LfinalizemtoS4 + stc %r0,0(%r6,%r7) # copy to mto + la %r11,0(%r6,%r7) + tr 0(1,%r11),0(%r10) # translate to EBCDIC + ahi %r6,1 + j .LloopS4 +.LfinalizemtoS4: + sth %r6,0(%r7) # update mto length + lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length + ar %r9,%r6 + sth %r9,.LmdbS4-.LwritesccbS4(%r8) + lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length + ar %r9,%r6 + sth %r9,.LevbufS4-.LwritesccbS4(%r8) + lh %r9,0(%r8) # update sccb length + ar %r9,%r6 + sth %r9,0(%r8) + ar %r7,%r6 # update current mto address + ltr %r0,%r0 # more characters? + jnz .LinitmtoS4 + l %r2,.LwritedataS4-.LbaseS4(%r13)# write data + lr %r3,%r8 + bras %r14,_sclp_servc + ltr %r2,%r2 # servc successful? + jnz .LendS4 + chi %r3,0x20 # write data successful? + je .LendS4 + lhi %r2,1 # error return code +.LendS4: + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +# +# Function which prints a given text to the SCLP console. +# +# Parameters: +# R2 = address of nil-terminated ASCII text +# +# Returns: +# R2 = 0 on success, 1 on failure +# + +ENTRY(_sclp_print_early) + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame +#ifdef CONFIG_64BIT + tm LC_AR_MODE_ID,1 + jno .Lesa2 + ahi %r15,-80 + stmh %r6,%r15,96(%r15) # store upper register halves +.Lesa2: +#endif + lr %r10,%r2 # save string pointer + lhi %r2,0 + bras %r14,_sclp_setup # enable console + ltr %r2,%r2 + jnz .LendS5 + lr %r2,%r10 + bras %r14,_sclp_print # print string + ltr %r2,%r2 + jnz .LendS5 + lhi %r2,1 + bras %r14,_sclp_setup # disable console +.LendS5: +#ifdef CONFIG_64BIT + tm LC_AR_MODE_ID,1 + jno .Lesa3 + lmh %r6,%r15,96(%r15) # store upper register halves + ahi %r15,80 +.Lesa3: +#endif + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +.LwritedataS4: + .long 0x00760005 # SCLP command for write data +.LwritesccbS4: + # sccb + .word .LmtoS4-.LwritesccbS4 + .byte 0 + .byte 0,0,0 + .word 0 + + # evbuf +.LevbufS4: + .word .LmtoS4-.LevbufS4 + .byte 0x02 + .byte 0 + .word 0 + +.LmdbS4: + # mdb + .word .LmtoS4-.LmdbS4 + .word 1 + .long 0xd4c4c240 + .long 1 + + # go +.LgoS4: + .word .LmtoS4-.LgoS4 + .word 1 + .long 0 + .byte 0,0,0,0,0,0,0,0 + .byte 0,0,0 + .byte 0 + .byte 0,0,0,0,0,0,0 + .byte 0 + .word 0 + .byte 0,0,0,0,0,0,0,0,0,0 + .byte 0,0,0,0,0,0,0,0 + .byte 0,0,0,0,0,0,0,0 + +.LmtoS4: + .word .LmtoendS4-.LmtoS4 + .word 4 + .word 0x1000 + .byte 0 + .byte 0,0,0 +.LmtoendS4: + + # Global constants +.LsccbS0: + .long _sclp_work_area +.Lascebc: + .long _ascebc + +.section .data,"aw",@progbits + .balign 4096 +_sclp_work_area: + .fill 4096 +.previous diff --git a/arch/s390/kernel/semaphore.c b/arch/s390/kernel/semaphore.c deleted file mode 100644 index 8dfb690c159..00000000000 --- a/arch/s390/kernel/semaphore.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * linux/arch/s390/kernel/semaphore.c - * - * S390 version - * Copyright (C) 1998-2000 IBM Corporation - * Author(s): Martin Schwidefsky - * - * Derived from "linux/arch/i386/kernel/semaphore.c - * Copyright (C) 1999, Linus Torvalds - * - */ -#include <linux/sched.h> -#include <linux/errno.h> -#include <linux/init.h> - -#include <asm/semaphore.h> - -/* - * Atomically update sem->count. Equivalent to: - * old_val = sem->count.counter; - * new_val = ((old_val >= 0) ? old_val : 0) + incr; - * sem->count.counter = new_val; - * return old_val; - */ -static inline int __sem_update_count(struct semaphore *sem, int incr) -{ - int old_val, new_val; - - __asm__ __volatile__(" l %0,0(%3)\n" - "0: ltr %1,%0\n" - " jhe 1f\n" - " lhi %1,0\n" - "1: ar %1,%4\n" - " cs %0,%1,0(%3)\n" - " jl 0b\n" - : "=&d" (old_val), "=&d" (new_val), - "=m" (sem->count) - : "a" (&sem->count), "d" (incr), "m" (sem->count) - : "cc" ); - return old_val; -} - -/* - * The inline function up() incremented count but the result - * was <= 0. This indicates that some process is waiting on - * the semaphore. The semaphore is free and we'll wake the - * first sleeping process, so we set count to 1 unless some - * other cpu has called up in the meantime in which case - * we just increment count by 1. - */ -void __up(struct semaphore *sem) -{ - __sem_update_count(sem, 1); - wake_up(&sem->wait); -} - -/* - * The inline function down() decremented count and the result - * was < 0. The wait loop will atomically test and update the - * semaphore counter following the rules: - * count > 0: decrement count, wake up queue and exit. - * count <= 0: set count to -1, go to sleep. - */ -void __sched __down(struct semaphore * sem) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - - __set_task_state(tsk, TASK_UNINTERRUPTIBLE); - add_wait_queue_exclusive(&sem->wait, &wait); - while (__sem_update_count(sem, -1) <= 0) { - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - remove_wait_queue(&sem->wait, &wait); - __set_task_state(tsk, TASK_RUNNING); - wake_up(&sem->wait); -} - -/* - * Same as __down() with an additional test for signals. - * If a signal is pending the count is updated as follows: - * count > 0: wake up queue and exit. - * count <= 0: set count to 0, wake up queue and exit. - */ -int __sched __down_interruptible(struct semaphore * sem) -{ - int retval = 0; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - - __set_task_state(tsk, TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&sem->wait, &wait); - while (__sem_update_count(sem, -1) <= 0) { - if (signal_pending(current)) { - __sem_update_count(sem, 0); - retval = -EINTR; - break; - } - schedule(); - set_task_state(tsk, TASK_INTERRUPTIBLE); - } - remove_wait_queue(&sem->wait, &wait); - __set_task_state(tsk, TASK_RUNNING); - wake_up(&sem->wait); - return retval; -} - diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b03847d100d..1e2264b46e4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/setup.c - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2012 * Author(s): Hartmut Penner (hp@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -14,32 +12,42 @@ * This file handles the architecture-dependent parts of initialization */ +#define KMSG_COMPONENT "setup" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/errno.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> -#include <linux/slab.h> #include <linux/user.h> -#include <linux/a.out.h> #include <linux/tty.h> #include <linux/ioport.h> #include <linux/delay.h> -#include <linux/config.h> #include <linux/init.h> #include <linux/initrd.h> #include <linux/bootmem.h> #include <linux/root_dev.h> #include <linux/console.h> -#include <linux/seq_file.h> #include <linux/kernel_stat.h> #include <linux/device.h> - -#include <asm/uaccess.h> -#include <asm/system.h> +#include <linux/notifier.h> +#include <linux/pfn.h> +#include <linux/ctype.h> +#include <linux/reboot.h> +#include <linux/topology.h> +#include <linux/ftrace.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/memory.h> +#include <linux/compat.h> + +#include <asm/ipl.h> +#include <asm/facility.h> #include <asm/smp.h> #include <asm/mmu_context.h> #include <asm/cpcmd.h> @@ -47,114 +55,58 @@ #include <asm/irq.h> #include <asm/page.h> #include <asm/ptrace.h> +#include <asm/sections.h> +#include <asm/ebcdic.h> +#include <asm/kvm_virtio.h> +#include <asm/diag.h> +#include <asm/os_info.h> +#include <asm/sclp.h> +#include "entry.h" /* * Machine setup.. */ unsigned int console_mode = 0; +EXPORT_SYMBOL(console_mode); + unsigned int console_devno = -1; +EXPORT_SYMBOL(console_devno); + unsigned int console_irq = -1; -unsigned long memory_size = 0; -unsigned long machine_flags = 0; -struct { - unsigned long addr, size, type; -} memory_chunk[MEMORY_CHUNKS] = { { 0 } }; -#define CHUNK_READ_WRITE 0 -#define CHUNK_READ_ONLY 1 -volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ -unsigned long __initdata zholes_size[MAX_NR_ZONES]; -static unsigned long __initdata memory_end; +EXPORT_SYMBOL(console_irq); -/* - * Setup options - */ -extern int _text,_etext, _edata, _end; +unsigned long elf_hwcap = 0; +char elf_platform[ELF_PLATFORM_SIZE]; -/* - * This is set up by the setup-routine at boot-time - * for S390 need to find out, what we have to setup - * using address 0x10400 ... - */ +int __initdata memory_end_set; +unsigned long __initdata memory_end; +unsigned long __initdata max_physmem_end; -#include <asm/setup.h> +unsigned long VMALLOC_START; +EXPORT_SYMBOL(VMALLOC_START); -static char command_line[COMMAND_LINE_SIZE] = { 0, }; +unsigned long VMALLOC_END; +EXPORT_SYMBOL(VMALLOC_END); -static struct resource code_resource = { - .name = "Kernel code", - .start = (unsigned long) &_text, - .end = (unsigned long) &_etext - 1, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, -}; +struct page *vmemmap; +EXPORT_SYMBOL(vmemmap); -static struct resource data_resource = { - .name = "Kernel data", - .start = (unsigned long) &_etext, - .end = (unsigned long) &_edata - 1, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, -}; - -/* - * cpu_init() initializes state that is per-CPU. - */ -void __devinit cpu_init (void) -{ - int addr = hard_smp_processor_id(); - - /* - * Store processor id in lowcore (used e.g. in timer_interrupt) - */ - asm volatile ("stidp %0": "=m" (S390_lowcore.cpu_data.cpu_id)); - S390_lowcore.cpu_data.cpu_addr = addr; +#ifdef CONFIG_64BIT +unsigned long MODULES_VADDR; +unsigned long MODULES_END; +#endif - /* - * Force FPU initialization: - */ - clear_thread_flag(TIF_USEDFPU); - clear_used_math(); - - atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - if (current->mm) - BUG(); - enter_lazy_tlb(&init_mm, current); -} +/* An array with a pointer to the lowcore of every CPU. */ +struct _lowcore *lowcore_ptr[NR_CPUS]; +EXPORT_SYMBOL(lowcore_ptr); /* - * VM halt and poweroff setup routines + * This is set up by the setup-routine at boot-time + * for S390 need to find out, what we have to setup + * using address 0x10400 ... */ -char vmhalt_cmd[128] = ""; -char vmpoff_cmd[128] = ""; - -static inline void strncpy_skip_quote(char *dst, char *src, int n) -{ - int sx, dx; - - dx = 0; - for (sx = 0; src[sx] != 0; sx++) { - if (src[sx] == '"') continue; - dst[dx++] = src[sx]; - if (dx >= n) break; - } -} - -static int __init vmhalt_setup(char *str) -{ - strncpy_skip_quote(vmhalt_cmd, str, 127); - vmhalt_cmd[127] = 0; - return 1; -} - -__setup("vmhalt=", vmhalt_setup); - -static int __init vmpoff_setup(char *str) -{ - strncpy_skip_quote(vmpoff_cmd, str, 127); - vmpoff_cmd[127] = 0; - return 1; -} -__setup("vmpoff=", vmpoff_setup); +#include <asm/setup.h> /* * condev= and conmode= setup parameter. @@ -174,9 +126,24 @@ static int __init condev_setup(char *str) __setup("condev=", condev_setup); +static void __init set_preferred_console(void) +{ + if (MACHINE_IS_KVM) { + if (sclp_has_vt220()) + add_preferred_console("ttyS", 1, NULL); + else if (sclp_has_linemode()) + add_preferred_console("ttyS", 0, NULL); + else + add_preferred_console("hvc", 0, NULL); + } else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) + add_preferred_console("ttyS", 0, NULL); + else if (CONSOLE_IS_3270) + add_preferred_console("tty3270", 0, NULL); +} + static int __init conmode_setup(char *str) { -#if defined(CONFIG_SCLP_CONSOLE) +#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0) SET_CONSOLE_SCLP; #endif @@ -188,6 +155,7 @@ static int __init conmode_setup(char *str) if (strncmp(str, "3270", 5) == 0) SET_CONSOLE_3270; #endif + set_preferred_console(); return 1; } @@ -199,11 +167,11 @@ static void __init conmode_default(void) char *ptr; if (MACHINE_IS_VM) { - __cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); + cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); console_irq = simple_strtoul(ptr + 13, NULL, 16); - __cpcmd("QUERY TERM", query_buffer, 1024, NULL); + cpcmd("QUERY TERM", query_buffer, 1024, NULL); ptr = strstr(query_buffer, "CONMODE"); /* * Set the conmode to 3215 so that the device recognition @@ -212,9 +180,9 @@ static void __init conmode_default(void) * 3215 and the 3270 driver will try to access the console * device (3215 as console and 3270 as normal tty). */ - __cpcmd("TERM CONMODE 3215", NULL, 0, NULL); + cpcmd("TERM CONMODE 3215", NULL, 0, NULL); if (ptr == NULL) { -#if defined(CONFIG_SCLP_CONSOLE) +#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) SET_CONSOLE_SCLP; #endif return; @@ -224,7 +192,7 @@ static void __init conmode_default(void) SET_CONSOLE_3270; #elif defined(CONFIG_TN3215_CONSOLE) SET_CONSOLE_3215; -#elif defined(CONFIG_SCLP_CONSOLE) +#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) SET_CONSOLE_SCLP; #endif } else if (strncmp(ptr + 8, "3215", 4) == 0) { @@ -232,65 +200,30 @@ static void __init conmode_default(void) SET_CONSOLE_3215; #elif defined(CONFIG_TN3270_CONSOLE) SET_CONSOLE_3270; -#elif defined(CONFIG_SCLP_CONSOLE) +#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) SET_CONSOLE_SCLP; #endif } - } else if (MACHINE_IS_P390) { -#if defined(CONFIG_TN3215_CONSOLE) - SET_CONSOLE_3215; -#elif defined(CONFIG_TN3270_CONSOLE) - SET_CONSOLE_3270; -#endif } else { -#if defined(CONFIG_SCLP_CONSOLE) +#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) SET_CONSOLE_SCLP; #endif } } -#ifdef CONFIG_SMP -extern void machine_restart_smp(char *); -extern void machine_halt_smp(void); -extern void machine_power_off_smp(void); - -void (*_machine_restart)(char *command) = machine_restart_smp; -void (*_machine_halt)(void) = machine_halt_smp; -void (*_machine_power_off)(void) = machine_power_off_smp; -#else -/* - * Reboot, halt and power_off routines for non SMP. - */ -extern void reipl(unsigned long devno); -extern void reipl_diag(void); -static void do_machine_restart_nonsmp(char * __unused) +#ifdef CONFIG_CRASH_DUMP +static void __init setup_zfcpdump(void) { - reipl_diag(); - - if (MACHINE_IS_VM) - cpcmd ("IPL", NULL, 0); - else - reipl (0x10000 | S390_lowcore.ipl_device); -} - -static void do_machine_halt_nonsmp(void) -{ - if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0) - cpcmd(vmhalt_cmd, NULL, 0); - signal_processor(smp_processor_id(), sigp_stop_and_store_status); + if (ipl_info.type != IPL_TYPE_FCP_DUMP) + return; + if (OLDMEM_BASE) + return; + strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); + console_loglevel = 2; } - -static void do_machine_power_off_nonsmp(void) -{ - if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0) - cpcmd(vmpoff_cmd, NULL, 0); - signal_processor(smp_processor_id(), sigp_stop_and_store_status); -} - -void (*_machine_restart)(char *command) = do_machine_restart_nonsmp; -void (*_machine_halt)(void) = do_machine_halt_nonsmp; -void (*_machine_power_off)(void) = do_machine_power_off_nonsmp; -#endif +#else +static inline void setup_zfcpdump(void) {} +#endif /* CONFIG_CRASH_DUMP */ /* * Reboot, halt and power_off stubs. They just call _machine_restart, @@ -299,575 +232,703 @@ void (*_machine_power_off)(void) = do_machine_power_off_nonsmp; void machine_restart(char *command) { - console_unblank(); + if ((!in_interrupt() && !in_atomic()) || oops_in_progress) + /* + * Only unblank the console if we are called in enabled + * context or a bust_spinlocks cleared the way for us. + */ + console_unblank(); _machine_restart(command); } void machine_halt(void) { - console_unblank(); + if (!in_interrupt() || oops_in_progress) + /* + * Only unblank the console if we are called in enabled + * context or a bust_spinlocks cleared the way for us. + */ + console_unblank(); _machine_halt(); } void machine_power_off(void) { - console_unblank(); + if (!in_interrupt() || oops_in_progress) + /* + * Only unblank the console if we are called in enabled + * context or a bust_spinlocks cleared the way for us. + */ + console_unblank(); _machine_power_off(); } -static void __init -add_memory_hole(unsigned long start, unsigned long end) +/* + * Dummy power off function. + */ +void (*pm_power_off)(void) = machine_power_off; +EXPORT_SYMBOL_GPL(pm_power_off); + +static int __init early_parse_mem(char *p) { - unsigned long dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; - - if (end <= dma_pfn) - zholes_size[ZONE_DMA] += end - start + 1; - else if (start > dma_pfn) - zholes_size[ZONE_NORMAL] += end - start + 1; - else { - zholes_size[ZONE_DMA] += dma_pfn - start + 1; - zholes_size[ZONE_NORMAL] += end - dma_pfn; - } + memory_end = memparse(p, &p); + memory_end &= PAGE_MASK; + memory_end_set = 1; + return 0; } +early_param("mem", early_parse_mem); -static void __init -parse_cmdline_early(char **cmdline_p) +static int __init parse_vmalloc(char *arg) { - char c = ' ', cn, *to = command_line, *from = COMMAND_LINE; - unsigned long delay = 0; - - /* Save unparsed command line copy for /proc/cmdline */ - memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE); - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; - - for (;;) { - /* - * "mem=XXX[kKmM]" sets memsize - */ - if (c == ' ' && strncmp(from, "mem=", 4) == 0) { - memory_end = simple_strtoul(from+4, &from, 0); - if ( *from == 'K' || *from == 'k' ) { - memory_end = memory_end << 10; - from++; - } else if ( *from == 'M' || *from == 'm' ) { - memory_end = memory_end << 20; - from++; - } - } - /* - * "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes - */ - if (c == ' ' && strncmp(from, "ipldelay=", 9) == 0) { - delay = simple_strtoul(from+9, &from, 0); - if (*from == 's' || *from == 'S') { - delay = delay*1000000; - from++; - } else if (*from == 'm' || *from == 'M') { - delay = delay*60*1000000; - from++; - } - /* now wait for the requested amount of time */ - udelay(delay); - } - cn = *(from++); - if (!cn) - break; - if (cn == '\n') - cn = ' '; /* replace newlines with space */ - if (cn == 0x0d) - cn = ' '; /* replace 0x0d with space */ - if (cn == ' ' && c == ' ') - continue; /* remove additional spaces */ - c = cn; - if (to - command_line >= COMMAND_LINE_SIZE) - break; - *(to++) = c; - } - if (c == ' ' && to > command_line) to--; - *to = '\0'; - *cmdline_p = command_line; + if (!arg) + return -EINVAL; + VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK; + return 0; } +early_param("vmalloc", parse_vmalloc); + +void *restart_stack __attribute__((__section__(".data"))); -static void __init -setup_lowcore(void) +static void __init setup_lowcore(void) { struct _lowcore *lc; - int lc_pages; /* * Setup lowcore for boot cpu */ - lc_pages = sizeof(void *) == 8 ? 2 : 1; - lc = (struct _lowcore *) - __alloc_bootmem(lc_pages * PAGE_SIZE, lc_pages * PAGE_SIZE, 0); - memset(lc, 0, lc_pages * PAGE_SIZE); - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); + lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); + lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; - lc->external_new_psw.mask = PSW_KERNEL_BITS; + lc->external_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->external_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT; + lc->svc_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = PSW_KERNEL_BITS; + lc->program_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = - PSW_KERNEL_BITS & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; + PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; + lc->mcck_new_psw.mask = PSW_KERNEL_BITS; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = PSW_KERNEL_BITS; + lc->io_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; - lc->ipl_device = S390_lowcore.ipl_device; - lc->jiffy_timer = -1LL; - lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; + lc->clock_comparator = -1ULL; + lc->kernel_stack = ((unsigned long) &init_thread_union) + + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->async_stack = (unsigned long) - __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; + __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->panic_stack = (unsigned long) - __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; + __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; + lc->machine_flags = S390_lowcore.machine_flags; + lc->stfl_fac_list = S390_lowcore.stfl_fac_list; + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { lc->extended_save_area_addr = (__u32) - __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0); + __alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0); /* enable extended save area */ - ctl_set_bit(14, 29); + __ctl_set_bit(14, 29); } +#else + lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif + lc->sync_enter_timer = S390_lowcore.sync_enter_timer; + lc->async_enter_timer = S390_lowcore.async_enter_timer; + lc->exit_timer = S390_lowcore.exit_timer; + lc->user_timer = S390_lowcore.user_timer; + lc->system_timer = S390_lowcore.system_timer; + lc->steal_timer = S390_lowcore.steal_timer; + lc->last_update_timer = S390_lowcore.last_update_timer; + lc->last_update_clock = S390_lowcore.last_update_clock; + lc->ftrace_func = S390_lowcore.ftrace_func; + + restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); + restart_stack += ASYNC_SIZE; + + /* + * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant + * restart data to the absolute zero lowcore. This is necessary if + * PSW restart is done on an offline CPU that has lowcore zero. + */ + lc->restart_stack = (unsigned long) restart_stack; + lc->restart_fn = (unsigned long) do_restart; + lc->restart_data = 0; + lc->restart_source = -1UL; + + /* Setup absolute zero lowcore */ + mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); + mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); + mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data); + mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); + mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); + +#ifdef CONFIG_SMP + lc->spinlock_lockval = arch_spin_lockval(0); +#endif + set_prefix((u32)(unsigned long) lc); + lowcore_ptr[0] = lc; } -static void __init -setup_resources(void) -{ - struct resource *res; - int i; +static struct resource code_resource = { + .name = "Kernel code", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource data_resource = { + .name = "Kernel data", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - res = alloc_bootmem_low(sizeof(struct resource)); +static struct resource __initdata *standard_resources[] = { + &code_resource, + &data_resource, + &bss_resource, +}; + +static void __init setup_resources(void) +{ + struct resource *res, *std_res, *sub_res; + struct memblock_region *reg; + int j; + + code_resource.start = (unsigned long) &_text; + code_resource.end = (unsigned long) &_etext - 1; + data_resource.start = (unsigned long) &_etext; + data_resource.end = (unsigned long) &_edata - 1; + bss_resource.start = (unsigned long) &__bss_start; + bss_resource.end = (unsigned long) &__bss_stop - 1; + + for_each_memblock(memory, reg) { + res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; - switch (memory_chunk[i].type) { - case CHUNK_READ_WRITE: - res->name = "System RAM"; - break; - case CHUNK_READ_ONLY: - res->name = "System ROM"; - res->flags |= IORESOURCE_READONLY; - break; - default: - res->name = "reserved"; - } - res->start = memory_chunk[i].addr; - res->end = memory_chunk[i].addr + memory_chunk[i].size - 1; + + res->name = "System RAM"; + res->start = reg->base; + res->end = reg->base + reg->size - 1; request_resource(&iomem_resource, res); - request_resource(res, &code_resource); - request_resource(res, &data_resource); + + for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { + std_res = standard_resources[j]; + if (std_res->start < res->start || + std_res->start > res->end) + continue; + if (std_res->end > res->end) { + sub_res = alloc_bootmem_low(sizeof(*sub_res)); + *sub_res = *std_res; + sub_res->end = res->end; + std_res->start = res->end + 1; + request_resource(res, sub_res); + } else { + request_resource(res, std_res); + } + } } } -static void __init -setup_memory(void) +static void __init setup_memory_end(void) { - unsigned long bootmap_size; - unsigned long start_pfn, end_pfn, init_pfn; - unsigned long last_rw_end; - int i; - - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ - start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT; - end_pfn = max_pfn = memory_end >> PAGE_SHIFT; - - /* Initialize storage key for kernel pages */ - for (init_pfn = 0 ; init_pfn < start_pfn; init_pfn++) - page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY); + unsigned long vmax, vmalloc_size, tmp; + + /* Choose kernel address space layout: 2, 3, or 4 levels. */ +#ifdef CONFIG_64BIT + vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; + tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; + tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; + if (tmp <= (1UL << 42)) + vmax = 1UL << 42; /* 3-level kernel page table */ + else + vmax = 1UL << 53; /* 4-level kernel page table */ + /* module area is at the end of the kernel address space. */ + MODULES_END = vmax; + MODULES_VADDR = MODULES_END - MODULES_LEN; + VMALLOC_END = MODULES_VADDR; +#else + vmalloc_size = VMALLOC_END ?: 96UL << 20; + vmax = 1UL << 31; /* 2-level kernel page table */ + /* vmalloc area is at the end of the kernel address space. */ + VMALLOC_END = vmax; +#endif + VMALLOC_START = vmax - vmalloc_size; + + /* Split remaining virtual space between 1:1 mapping & vmemmap array */ + tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); + /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ + tmp = SECTION_ALIGN_UP(tmp); + tmp = VMALLOC_START - tmp * sizeof(struct page); + tmp &= ~((vmax >> 11) - 1); /* align to page table level */ + tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); + vmemmap = (struct page *) tmp; + + /* Take care that memory_end is set and <= vmemmap */ + memory_end = min(memory_end ?: max_physmem_end, tmp); + max_pfn = max_low_pfn = PFN_DOWN(memory_end); + memblock_remove(memory_end, ULONG_MAX); + + pr_notice("Max memory size: %luMB\n", memory_end >> 20); +} - /* - * Initialize the boot-time allocator (with low memory only): - */ - bootmap_size = init_bootmem(start_pfn, end_pfn); +static void __init setup_vmcoreinfo(void) +{ + mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); +} - /* - * Register RAM areas with the bootmem allocator. - */ - last_rw_end = start_pfn; - - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - unsigned long start_chunk, end_chunk; - - if (memory_chunk[i].type != CHUNK_READ_WRITE) - continue; - start_chunk = (memory_chunk[i].addr + PAGE_SIZE - 1); - start_chunk >>= PAGE_SHIFT; - end_chunk = (memory_chunk[i].addr + memory_chunk[i].size); - end_chunk >>= PAGE_SHIFT; - if (start_chunk < start_pfn) - start_chunk = start_pfn; - if (end_chunk > end_pfn) - end_chunk = end_pfn; - if (start_chunk < end_chunk) { - /* Initialize storage key for RAM pages */ - for (init_pfn = start_chunk ; init_pfn < end_chunk; - init_pfn++) - page_set_storage_key(init_pfn << PAGE_SHIFT, - PAGE_DEFAULT_KEY); - free_bootmem(start_chunk << PAGE_SHIFT, - (end_chunk - start_chunk) << PAGE_SHIFT); - if (last_rw_end < start_chunk) - add_memory_hole(last_rw_end, start_chunk - 1); - last_rw_end = end_chunk; - } - } +#ifdef CONFIG_CRASH_DUMP - psw_set_key(PAGE_DEFAULT_KEY); +/* + * When kdump is enabled, we have to ensure that no memory from + * the area [0 - crashkernel memory size] and + * [crashk_res.start - crashk_res.end] is set offline. + */ +static int kdump_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct memory_notify *arg = data; + + if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) + return NOTIFY_BAD; + if (arg->start_pfn > PFN_DOWN(crashk_res.end)) + return NOTIFY_OK; + if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) + return NOTIFY_OK; + return NOTIFY_BAD; +} - if (last_rw_end < end_pfn - 1) - add_memory_hole(last_rw_end, end_pfn - 1); +static struct notifier_block kdump_mem_nb = { + .notifier_call = kdump_mem_notifier, +}; - /* - * Reserve the bootmem bitmap itself as well. We do this in two - * steps (first step was init_bootmem()) because this catches - * the (very unlikely) case of us accidentally initializing the - * bootmem allocator with an invalid RAM area. - */ - reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size); +#endif -#ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START) { - if (INITRD_START + INITRD_SIZE <= memory_end) { - reserve_bootmem(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START; - initrd_end = initrd_start + INITRD_SIZE; - } else { - printk("initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - initrd_start + INITRD_SIZE, memory_end); - initrd_start = initrd_end = 0; - } +/* + * Make sure that the area behind memory_end is protected + */ +static void reserve_memory_end(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (ipl_info.type == IPL_TYPE_FCP_DUMP && + !OLDMEM_BASE && sclp_get_hsa_size()) { + memory_end = sclp_get_hsa_size(); + memory_end &= PAGE_MASK; + memory_end_set = 1; } #endif + if (!memory_end_set) + return; + memblock_reserve(memory_end, ULONG_MAX); } /* - * Setup function called from init/main.c just after the banner - * was printed. + * Make sure that oldmem, where the dump is stored, is protected + */ +static void reserve_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) + /* Forget all memory above the running kdump system */ + memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); +#endif +} + +/* + * Make sure that oldmem, where the dump is stored, is protected */ +static void remove_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) + /* Forget all memory above the running kdump system */ + memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); +#endif +} -void __init -setup_arch(char **cmdline_p) +/* + * Reserve memory for kdump kernel to be loaded with kexec + */ +static void __init reserve_crashkernel(void) { - /* - * print what head.S has found out about the machine - */ -#ifndef CONFIG_64BIT - printk((MACHINE_IS_VM) ? - "We are running under VM (31 bit mode)\n" : - "We are running native (31 bit mode)\n"); - printk((MACHINE_HAS_IEEE) ? - "This machine has an IEEE fpu\n" : - "This machine has no IEEE fpu\n"); -#else /* CONFIG_64BIT */ - printk((MACHINE_IS_VM) ? - "We are running under VM (64 bit mode)\n" : - "We are running native (64 bit mode)\n"); -#endif /* CONFIG_64BIT */ +#ifdef CONFIG_CRASH_DUMP + unsigned long long crash_base, crash_size; + phys_addr_t low, high; + int rc; - ROOT_DEV = Root_RAM0; -#ifndef CONFIG_64BIT - memory_end = memory_size & ~0x400000UL; /* align memory end to 4MB */ - /* - * We need some free virtual space to be able to do vmalloc. - * On a machine with 2GB memory we make sure that we have at - * least 128 MB free space for vmalloc. - */ - if (memory_end > 1920*1024*1024) - memory_end = 1920*1024*1024; -#else /* CONFIG_64BIT */ - memory_end = memory_size & ~0x200000UL; /* detected in head.s */ -#endif /* CONFIG_64BIT */ + rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + &crash_base); - init_mm.start_code = PAGE_OFFSET; - init_mm.end_code = (unsigned long) &_etext; - init_mm.end_data = (unsigned long) &_edata; - init_mm.brk = (unsigned long) &_end; + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); + crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); + if (rc || crash_size == 0) + return; - parse_cmdline_early(cmdline_p); + if (memblock.memory.regions[0].size < crash_size) { + pr_info("crashkernel reservation failed: %s\n", + "first memory chunk must be at least crashkernel size"); + return; + } - setup_memory(); - setup_resources(); - setup_lowcore(); + low = crash_base ?: OLDMEM_BASE; + high = low + crash_size; + if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) { + /* The crashkernel fits into OLDMEM, reuse OLDMEM */ + crash_base = low; + } else { + /* Find suitable area in free memory */ + low = max_t(unsigned long, crash_size, sclp_get_hsa_size()); + high = crash_base ? crash_base + crash_size : ULONG_MAX; - cpu_init(); - __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr; + if (crash_base && crash_base < low) { + pr_info("crashkernel reservation failed: %s\n", + "crash_base too low"); + return; + } + low = crash_base ?: low; + crash_base = memblock_find_in_range(low, high, crash_size, + KEXEC_CRASH_MEM_ALIGN); + } - /* - * Create kernel page tables and switch to virtual addressing. - */ - paging_init(); + if (!crash_base) { + pr_info("crashkernel reservation failed: %s\n", + "no suitable area found"); + return; + } - /* Setup default console */ - conmode_default(); + if (register_memory_notifier(&kdump_mem_nb)) + return; + + if (!OLDMEM_BASE && MACHINE_IS_VM) + diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); + memblock_remove(crash_base, crash_size); + pr_info("Reserving %lluMB of memory at %lluMB " + "for crashkernel (System RAM: %luMB)\n", + crash_size >> 20, crash_base >> 20, + (unsigned long)memblock.memory.total_size >> 20); + os_info_crashkernel_add(crash_base, crash_size); +#endif } -void print_cpu_info(struct cpuinfo_S390 *cpuinfo) +/* + * Reserve the initrd from being used by memblock + */ +static void __init reserve_initrd(void) { - printk("cpu %d " -#ifdef CONFIG_SMP - "phys_idx=%d " -#endif - "vers=%02X ident=%06X machine=%04X unused=%04X\n", - cpuinfo->cpu_nr, -#ifdef CONFIG_SMP - cpuinfo->cpu_addr, +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = INITRD_START; + initrd_end = initrd_start + INITRD_SIZE; + memblock_reserve(INITRD_START, INITRD_SIZE); #endif - cpuinfo->cpu_id.version, - cpuinfo->cpu_id.ident, - cpuinfo->cpu_id.machine, - cpuinfo->cpu_id.unused); } /* - * show_cpuinfo - Get information on one CPU for use by procfs. + * Check for initrd being in usable memory */ - -static int show_cpuinfo(struct seq_file *m, void *v) +static void __init check_initrd(void) { - struct cpuinfo_S390 *cpuinfo; - unsigned long n = (unsigned long) v - 1; - - preempt_disable(); - if (!n) { - seq_printf(m, "vendor_id : IBM/S390\n" - "# processors : %i\n" - "bogomips per cpu: %lu.%02lu\n", - num_online_cpus(), loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ))%100); +#ifdef CONFIG_BLK_DEV_INITRD + if (INITRD_START && INITRD_SIZE && + !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) { + pr_err("initrd does not fit memory.\n"); + memblock_free(INITRD_START, INITRD_SIZE); + initrd_start = initrd_end = 0; } - if (cpu_online(n)) { -#ifdef CONFIG_SMP - if (smp_processor_id() == n) - cpuinfo = &S390_lowcore.cpu_data; - else - cpuinfo = &lowcore_ptr[n]->cpu_data; -#else - cpuinfo = &S390_lowcore.cpu_data; #endif - seq_printf(m, "processor %li: " - "version = %02X, " - "identification = %06X, " - "machine = %04X\n", - n, cpuinfo->cpu_id.version, - cpuinfo->cpu_id.ident, - cpuinfo->cpu_id.machine); - } - preempt_enable(); - return 0; } -static void *c_start(struct seq_file *m, loff_t *pos) +/* + * Reserve all kernel text + */ +static void __init reserve_kernel(void) { - return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL; + unsigned long start_pfn; + start_pfn = PFN_UP(__pa(&_end)); + + /* + * Reserve memory used for lowcore/command line/kernel image. + */ + memblock_reserve(0, (unsigned long)_ehead); + memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) + - (unsigned long)_stext); } -static void *c_next(struct seq_file *m, void *v, loff_t *pos) + +static void __init reserve_elfcorehdr(void) { - ++*pos; - return c_start(m, pos); +#ifdef CONFIG_CRASH_DUMP + if (is_kdump_kernel()) + memblock_reserve(elfcorehdr_addr - OLDMEM_BASE, + PAGE_ALIGN(elfcorehdr_size)); +#endif } -static void c_stop(struct seq_file *m, void *v) + +static void __init setup_memory(void) { -} -struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; + struct memblock_region *reg; -#define DEFINE_IPL_ATTR(_name, _format, _value) \ -static ssize_t ipl_##_name##_show(struct subsystem *subsys, \ - char *page) \ -{ \ - return sprintf(page, _format, _value); \ -} \ -static struct subsys_attribute ipl_##_name##_attr = \ - __ATTR(_name, S_IRUGO, ipl_##_name##_show, NULL); - -DEFINE_IPL_ATTR(wwpn, "0x%016llx\n", (unsigned long long) - IPL_PARMBLOCK_START->fcp.wwpn); -DEFINE_IPL_ATTR(lun, "0x%016llx\n", (unsigned long long) - IPL_PARMBLOCK_START->fcp.lun); -DEFINE_IPL_ATTR(bootprog, "%lld\n", (unsigned long long) - IPL_PARMBLOCK_START->fcp.bootprog); -DEFINE_IPL_ATTR(br_lba, "%lld\n", (unsigned long long) - IPL_PARMBLOCK_START->fcp.br_lba); - -enum ipl_type_type { - ipl_type_unknown, - ipl_type_ccw, - ipl_type_fcp, -}; + /* + * Init storage key for present memory + */ + for_each_memblock(memory, reg) { + storage_key_init_range(reg->base, reg->base + reg->size); + } + psw_set_key(PAGE_DEFAULT_KEY); -static enum ipl_type_type -get_ipl_type(void) -{ - struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; - - if (!IPL_DEVNO_VALID) - return ipl_type_unknown; - if (!IPL_PARMBLOCK_VALID) - return ipl_type_ccw; - if (ipl->hdr.header.version > IPL_MAX_SUPPORTED_VERSION) - return ipl_type_unknown; - if (ipl->fcp.pbt != IPL_TYPE_FCP) - return ipl_type_unknown; - return ipl_type_fcp; + /* Only cosmetics */ + memblock_enforce_memory_limit(memblock_end_of_DRAM()); } -static ssize_t -ipl_type_show(struct subsystem *subsys, char *page) +/* + * Setup hardware capabilities. + */ +static void __init setup_hwcaps(void) { - switch (get_ipl_type()) { - case ipl_type_ccw: - return sprintf(page, "ccw\n"); - case ipl_type_fcp: - return sprintf(page, "fcp\n"); - default: - return sprintf(page, "unknown\n"); - } -} + static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; + struct cpuid cpu_id; + int i; + + /* + * The store facility list bits numbers as found in the principles + * of operation are numbered with bit 1UL<<31 as number 0 to + * bit 1UL<<0 as number 31. + * Bit 0: instructions named N3, "backported" to esa-mode + * Bit 2: z/Architecture mode is active + * Bit 7: the store-facility-list-extended facility is installed + * Bit 17: the message-security assist is installed + * Bit 19: the long-displacement facility is installed + * Bit 21: the extended-immediate facility is installed + * Bit 22: extended-translation facility 3 is installed + * Bit 30: extended-translation facility 3 enhancement facility + * These get translated to: + * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1, + * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3, + * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and + * HWCAP_S390_ETF3EH bit 8 (22 && 30). + */ + for (i = 0; i < 6; i++) + if (test_facility(stfl_bits[i])) + elf_hwcap |= 1UL << i; -static struct subsys_attribute ipl_type_attr = __ATTR_RO(ipl_type); + if (test_facility(22) && test_facility(30)) + elf_hwcap |= HWCAP_S390_ETF3EH; -static ssize_t -ipl_device_show(struct subsystem *subsys, char *page) -{ - struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START; - - switch (get_ipl_type()) { - case ipl_type_ccw: - return sprintf(page, "0.0.%04x\n", ipl_devno); - case ipl_type_fcp: - return sprintf(page, "0.0.%04x\n", ipl->fcp.devno); - default: - return 0; + /* + * Check for additional facilities with store-facility-list-extended. + * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0 + * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information + * as stored by stfl, bits 32-xxx contain additional facilities. + * How many facility words are stored depends on the number of + * doublewords passed to the instruction. The additional facilities + * are: + * Bit 42: decimal floating point facility is installed + * Bit 44: perform floating point operation facility is installed + * translated to: + * HWCAP_S390_DFP bit 6 (42 && 44). + */ + if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44)) + elf_hwcap |= HWCAP_S390_DFP; + + /* + * Huge page support HWCAP_S390_HPAGE is bit 7. + */ + if (MACHINE_HAS_HPAGE) + elf_hwcap |= HWCAP_S390_HPAGE; + +#if defined(CONFIG_64BIT) + /* + * 64-bit register support for 31-bit processes + * HWCAP_S390_HIGH_GPRS is bit 9. + */ + elf_hwcap |= HWCAP_S390_HIGH_GPRS; + + /* + * Transactional execution support HWCAP_S390_TE is bit 10. + */ + if (test_facility(50) && test_facility(73)) + elf_hwcap |= HWCAP_S390_TE; +#endif + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x9672: +#if !defined(CONFIG_64BIT) + default: /* Use "g5" as default for 31 bit kernels. */ +#endif + strcpy(elf_platform, "g5"); + break; + case 0x2064: + case 0x2066: +#if defined(CONFIG_64BIT) + default: /* Use "z900" as default for 64 bit kernels. */ +#endif + strcpy(elf_platform, "z900"); + break; + case 0x2084: + case 0x2086: + strcpy(elf_platform, "z990"); + break; + case 0x2094: + case 0x2096: + strcpy(elf_platform, "z9-109"); + break; + case 0x2097: + case 0x2098: + strcpy(elf_platform, "z10"); + break; + case 0x2817: + case 0x2818: + strcpy(elf_platform, "z196"); + break; + case 0x2827: + case 0x2828: + strcpy(elf_platform, "zEC12"); + break; } } -static struct subsys_attribute ipl_device_attr = - __ATTR(device, S_IRUGO, ipl_device_show, NULL); - -static struct attribute *ipl_fcp_attrs[] = { - &ipl_type_attr.attr, - &ipl_device_attr.attr, - &ipl_wwpn_attr.attr, - &ipl_lun_attr.attr, - &ipl_bootprog_attr.attr, - &ipl_br_lba_attr.attr, - NULL, -}; +/* + * Setup function called from init/main.c just after the banner + * was printed. + */ -static struct attribute_group ipl_fcp_attr_group = { - .attrs = ipl_fcp_attrs, -}; +void __init setup_arch(char **cmdline_p) +{ + /* + * print what head.S has found out about the machine + */ +#ifndef CONFIG_64BIT + if (MACHINE_IS_VM) + pr_info("Linux is running as a z/VM " + "guest operating system in 31-bit mode\n"); + else if (MACHINE_IS_LPAR) + pr_info("Linux is running natively in 31-bit mode\n"); + if (MACHINE_HAS_IEEE) + pr_info("The hardware system has IEEE compatible " + "floating point units\n"); + else + pr_info("The hardware system has no IEEE compatible " + "floating point units\n"); +#else /* CONFIG_64BIT */ + if (MACHINE_IS_VM) + pr_info("Linux is running as a z/VM " + "guest operating system in 64-bit mode\n"); + else if (MACHINE_IS_KVM) + pr_info("Linux is running under KVM in 64-bit mode\n"); + else if (MACHINE_IS_LPAR) + pr_info("Linux is running natively in 64-bit mode\n"); +#endif /* CONFIG_64BIT */ -static struct attribute *ipl_ccw_attrs[] = { - &ipl_type_attr.attr, - &ipl_device_attr.attr, - NULL, -}; + /* Have one command line that is parsed and saved in /proc/cmdline */ + /* boot_command_line has been already set up in early.c */ + *cmdline_p = boot_command_line; -static struct attribute_group ipl_ccw_attr_group = { - .attrs = ipl_ccw_attrs, -}; + ROOT_DEV = Root_RAM0; -static struct attribute *ipl_unknown_attrs[] = { - &ipl_type_attr.attr, - NULL, -}; + /* Is init_mm really needed? */ + init_mm.start_code = PAGE_OFFSET; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; -static struct attribute_group ipl_unknown_attr_group = { - .attrs = ipl_unknown_attrs, -}; + parse_early_param(); + os_info_init(); + setup_ipl(); -static ssize_t -ipl_parameter_read(struct kobject *kobj, char *buf, loff_t off, size_t count) -{ - unsigned int size = IPL_PARMBLOCK_SIZE; + /* Do some memory reservations *before* memory is added to memblock */ + reserve_memory_end(); + reserve_oldmem(); + reserve_kernel(); + reserve_initrd(); + reserve_elfcorehdr(); + memblock_allow_resize(); - if (off > size) - return 0; - if (off + count > size) - count = size - off; + /* Get information about *all* installed memory */ + detect_memory_memblock(); - memcpy(buf, (void *) IPL_PARMBLOCK_START + off, count); - return count; -} + remove_oldmem(); -static struct bin_attribute ipl_parameter_attr = { - .attr = { - .name = "binary_parameter", - .mode = S_IRUGO, - .owner = THIS_MODULE, - }, - .size = PAGE_SIZE, - .read = &ipl_parameter_read, -}; + /* + * Make sure all chunks are MAX_ORDER aligned so we don't need the + * extra checks that HOLES_IN_ZONE would require. + * + * Is this still required? + */ + memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT)); -static ssize_t -ipl_scp_data_read(struct kobject *kobj, char *buf, loff_t off, size_t count) -{ - unsigned int size = IPL_PARMBLOCK_START->fcp.scp_data_len; - void *scp_data = &IPL_PARMBLOCK_START->fcp.scp_data; + setup_memory_end(); + setup_memory(); - if (off > size) - return 0; - if (off + count > size) - count = size - off; + check_initrd(); + reserve_crashkernel(); - memcpy(buf, scp_data + off, count); - return count; -} + setup_resources(); + setup_vmcoreinfo(); + setup_lowcore(); + smp_fill_possible_mask(); + cpu_init(); + s390_init_cpu_topology(); -static struct bin_attribute ipl_scp_data_attr = { - .attr = { - .name = "scp_data", - .mode = S_IRUGO, - .owner = THIS_MODULE, - }, - .size = PAGE_SIZE, - .read = &ipl_scp_data_read, -}; + /* + * Setup capabilities (ELF_HWCAP & ELF_PLATFORM). + */ + setup_hwcaps(); -static decl_subsys(ipl, NULL, NULL); + /* + * Create kernel page tables and switch to virtual addressing. + */ + paging_init(); -static int __init -ipl_device_sysfs_register(void) { - int rc; + /* Setup default console */ + conmode_default(); + set_preferred_console(); - rc = firmware_register(&ipl_subsys); - if (rc) - return rc; + /* Setup zfcpdump support */ + setup_zfcpdump(); +} - switch (get_ipl_type()) { - case ipl_type_ccw: - sysfs_create_group(&ipl_subsys.kset.kobj, &ipl_ccw_attr_group); - break; - case ipl_type_fcp: - sysfs_create_group(&ipl_subsys.kset.kobj, &ipl_fcp_attr_group); - sysfs_create_bin_file(&ipl_subsys.kset.kobj, - &ipl_parameter_attr); - sysfs_create_bin_file(&ipl_subsys.kset.kobj, - &ipl_scp_data_attr); - break; - default: - sysfs_create_group(&ipl_subsys.kset.kobj, - &ipl_unknown_attr_group); - break; - } +#ifdef CONFIG_32BIT +static int no_removal_warning __initdata; + +static int __init parse_no_removal_warning(char *str) +{ + no_removal_warning = 1; return 0; } +__setup("no_removal_warning", parse_no_removal_warning); -__initcall(ipl_device_sysfs_register); +static int __init removal_warning(void) +{ + if (no_removal_warning) + return 0; + printk(KERN_ALERT "\n\n"); + printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n"); + printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n"); + printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n"); + printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n"); + printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n"); + printk(KERN_CONT "please let us know. Please write to:\n"); + printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n"); + printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n"); + printk(KERN_CONT "Thank you!\n\n"); + printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n"); + printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n"); + schedule_timeout_uninterruptible(300 * HZ); + return 0; +} +early_initcall(removal_warning); +#endif diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 6ae4a77270b..42b49f9e19b 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -1,8 +1,5 @@ /* - * arch/s390/kernel/signal.c - * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2006 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) * * Based on Intel version @@ -12,11 +9,9 @@ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson */ -#include <linux/config.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/errno.h> @@ -27,12 +22,14 @@ #include <linux/tty.h> #include <linux/personality.h> #include <linux/binfmts.h> +#include <linux/tracehook.h> +#include <linux/syscalls.h> +#include <linux/compat.h> #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> - -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - +#include <asm/switch_to.h> +#include "entry.h" typedef struct { @@ -51,214 +48,106 @@ typedef struct struct ucontext uc; } rt_sigframe; -int do_signal(struct pt_regs *regs, sigset_t *oldset); - -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -asmlinkage int -sys_sigsuspend(struct pt_regs * regs, int history0, int history1, - old_sigset_t mask) -{ - sigset_t saveset; - - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - regs->gprs[2] = -EINTR; - - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage long -sys_rt_sigsuspend(struct pt_regs *regs, sigset_t __user *unewset, - size_t sigsetsize) -{ - sigset_t saveset, newset; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (copy_from_user(&newset, unewset, sizeof(newset))) - return -EFAULT; - sigdelsetmask(&newset, ~_BLOCKABLE); - - spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - regs->gprs[2] = -EINTR; - - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - if (do_signal(regs, &saveset)) - return -EINTR; - } -} - -asmlinkage long -sys_sigaction(int sig, const struct old_sigaction __user *act, - struct old_sigaction __user *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) - return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) - return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - } - - return ret; -} - -asmlinkage long -sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, - struct pt_regs *regs) -{ - return do_sigaltstack(uss, uoss, regs->gprs[15]); -} - - - /* Returns non-zero on fault. */ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { - unsigned long old_mask = regs->psw.mask; - int err; - + _sigregs user_sregs; + save_access_regs(current->thread.acrs); /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - regs->psw.mask = PSW_MASK_MERGE(PSW_USER_BITS, regs->psw.mask); - err = __copy_to_user(&sregs->regs.psw, ®s->psw, - sizeof(sregs->regs.psw)+sizeof(sregs->regs.gprs)); - regs->psw.mask = old_mask; - if (err != 0) - return err; - err = __copy_to_user(&sregs->regs.acrs, current->thread.acrs, - sizeof(sregs->regs.acrs)); - if (err != 0) - return err; + user_sregs.regs.psw.mask = PSW_USER_BITS | + (regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI)); + user_sregs.regs.psw.addr = regs->psw.addr; + memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); + memcpy(&user_sregs.regs.acrs, current->thread.acrs, + sizeof(user_sregs.regs.acrs)); /* * We have to store the fp registers to current->thread.fp_regs * to merge them with the emulated registers. */ - save_fp_regs(¤t->thread.fp_regs); - return __copy_to_user(&sregs->fpregs, ¤t->thread.fp_regs, - sizeof(s390_fp_regs)); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); + memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, + sizeof(user_sregs.fpregs)); + if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs))) + return -EFAULT; + return 0; } -/* Returns positive number on error */ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { - unsigned long old_mask = regs->psw.mask; - int err; + _sigregs user_sregs; /* Alwys make any pending restarted system call return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; - err = __copy_from_user(®s->psw, &sregs->regs.psw, - sizeof(sregs->regs.psw)+sizeof(sregs->regs.gprs)); - regs->psw.mask = PSW_MASK_MERGE(old_mask, regs->psw.mask); - regs->psw.addr |= PSW_ADDR_AMODE; - if (err) - return err; - err = __copy_from_user(¤t->thread.acrs, &sregs->regs.acrs, - sizeof(sregs->regs.acrs)); - if (err) - return err; + if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs))) + return -EFAULT; + + if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI)) + return -EINVAL; + + /* Loading the floating-point-control word can fail. Do that first. */ + if (restore_fp_ctl(&user_sregs.fpregs.fpc)) + return -EINVAL; + + /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) | + (user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI)); + /* Check for invalid user address space control. */ + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME) + regs->psw.mask = PSW_ASC_PRIMARY | + (regs->psw.mask & ~PSW_MASK_ASC); + /* Check for invalid amode */ + if (regs->psw.mask & PSW_MASK_EA) + regs->psw.mask |= PSW_MASK_BA; + regs->psw.addr = user_sregs.regs.psw.addr; + memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); + memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, + sizeof(current->thread.acrs)); restore_access_regs(current->thread.acrs); - err = __copy_from_user(¤t->thread.fp_regs, &sregs->fpregs, - sizeof(s390_fp_regs)); - current->thread.fp_regs.fpc &= FPC_VALID_MASK; - if (err) - return err; + memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, + sizeof(current->thread.fp_regs)); - restore_fp_regs(¤t->thread.fp_regs); - regs->trap = -1; /* disable syscall checks */ + restore_fp_regs(current->thread.fp_regs.fprs); + clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } -asmlinkage long sys_sigreturn(struct pt_regs *regs) +SYSCALL_DEFINE0(sigreturn) { + struct pt_regs *regs = task_pt_regs(current); sigframe __user *frame = (sigframe __user *)regs->gprs[15]; sigset_t set; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs(regs, &frame->sregs)) goto badframe; - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; } -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +SYSCALL_DEFINE0(rt_sigreturn) { + struct pt_regs *regs = task_pt_regs(current); rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15]; sigset_t set; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; - - if (do_sigaltstack(&frame->uc.uc_stack, NULL, - regs->gprs[15]) == -EFAULT) + if (restore_altstack(&frame->uc.uc_stack)) goto badframe; return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -280,19 +169,16 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) /* Default to using normal stack */ sp = regs->gprs[15]; + /* Overflow on alternate signal stack gives SIGSEGV. */ + if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL)) + return (void __user *) -1UL; + /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { if (! sas_ss_flags(sp)) sp = current->sas_ss_sp + current->sas_ss_size; } - /* This is the legacy signal stack switching. */ - else if (!user_mode(regs) && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - sp = (unsigned long) ka->sa.sa_restorer; - } - return (void __user *)((sp - frame_size) & -8ul); } @@ -306,13 +192,14 @@ static inline int map_signal(int sig) return sig; } -static void setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs) +static int setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) { sigframe __user *frame; frame = get_sigframe(ka, regs, sizeof(sigframe)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe))) + + if (frame == (void __user *) -1UL) goto give_sigsegv; if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE)) @@ -342,6 +229,10 @@ static void setup_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + /* Force default amode and default user address space control. */ + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -349,26 +240,33 @@ static void setup_frame(int sig, struct k_sigaction *ka, /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ - regs->gprs[4] = current->thread.trap_no; - regs->gprs[5] = current->thread.prot_addr; + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; + } /* Place signal number on stack to allow backtrace from handler. */ if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) goto give_sigsegv; - return; + return 0; give_sigsegv: force_sigsegv(sig, current); + return -EFAULT; } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { int err = 0; rt_sigframe __user *frame; frame = get_sigframe(ka, regs, sizeof(rt_sigframe)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe))) + + if (frame == (void __user *) -1UL) goto give_sigsegv; if (copy_siginfo_to_user(&frame->info, info)) @@ -377,10 +275,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(NULL, &frame->uc.uc_link); - err |= __put_user((void __user *)current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->gprs[15]), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]); err |= save_sigregs(regs, &frame->uc.uc_mcontext); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) @@ -394,8 +289,9 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } else { regs->gprs[14] = (unsigned long) frame->retcode | PSW_ADDR_AMODE; - err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode)); + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, + (u16 __user *)(frame->retcode))) + goto give_sigsegv; } /* Set up backchain. */ @@ -404,37 +300,38 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + /* Force default amode and default user address space control. */ + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); regs->gprs[3] = (unsigned long) &frame->info; regs->gprs[4] = (unsigned long) &frame->uc; - return; + regs->gprs[5] = task_thread_info(current)->last_break; + return 0; give_sigsegv: force_sigsegv(sig, current); + return -EFAULT; } -/* - * OK, we're invoking a handler - */ - -static void -handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +static void handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs) { + int ret; + /* Set up the stack frame */ if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(sig, ka, info, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); else - setup_frame(sig, ka, oldset, regs); - - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + ret = setup_frame(sig, ka, oldset, regs); + if (ret) + return; + signal_delivered(sig, info, ka, regs, + test_thread_flag(TIF_SINGLE_STEP)); } /* @@ -446,81 +343,85 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ -int do_signal(struct pt_regs *regs, sigset_t *oldset) +void do_signal(struct pt_regs *regs) { - unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; struct k_sigaction ka; + sigset_t *oldset = sigmask_to_save(); /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. + * Get signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers, including the system + * call information. */ - if (!user_mode(regs)) - return 1; - - if (!oldset) - oldset = ¤t->blocked; - - /* Are we from a system call? */ - if (regs->trap == __LC_SVC_OLD_PSW) { - continue_addr = regs->psw.addr; - restart_addr = continue_addr - regs->ilc; - retval = regs->gprs[2]; - - /* Prepare for system call restart. We do this here so that a - debugger will see the already changed PSW. */ - if (retval == -ERESTARTNOHAND || - retval == -ERESTARTSYS || - retval == -ERESTARTNOINTR) { - regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = restart_addr; - } else if (retval == -ERESTART_RESTARTBLOCK) { - regs->gprs[2] = -EINTR; - } - } - - /* Get signal to deliver. When running under ptrace, at this point - the debugger may change all our registers ... */ + current_thread_info()->system_call = + test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; signr = get_signal_to_deliver(&info, &ka, regs, NULL); - /* Depending on the signal settings we may need to revert the - decision to restart the system call. */ - if (signr > 0 && regs->psw.addr == restart_addr) { - if (retval == -ERESTARTNOHAND - || (retval == -ERESTARTSYS - && !(current->sighand->action[signr-1].sa.sa_flags - & SA_RESTART))) { - regs->gprs[2] = -EINTR; - regs->psw.addr = continue_addr; - } - } - if (signr > 0) { /* Whee! Actually deliver the signal. */ -#ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) { - extern void handle_signal32(unsigned long sig, - struct k_sigaction *ka, - siginfo_t *info, - sigset_t *oldset, - struct pt_regs *regs); + if (current_thread_info()->system_call) { + regs->int_code = current_thread_info()->system_call; + /* Check for system call restarting. */ + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->gprs[2] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka.sa.sa_flags & SA_RESTART)) { + regs->gprs[2] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->gprs[2] = regs->orig_gpr2; + regs->psw.addr = + __rewind_psw(regs->psw, + regs->int_code >> 16); + break; + } + } + /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL); + + if (is_compat_task()) handle_signal32(signr, &ka, &info, oldset, regs); - return 1; - } -#endif - handle_signal(signr, &ka, &info, oldset, regs); - return 1; + else + handle_signal(signr, &ka, &info, oldset, regs); + return; } - /* Restart a different system call. */ - if (retval == -ERESTART_RESTARTBLOCK - && regs->psw.addr == continue_addr) { - regs->gprs[2] = __NR_restart_syscall; - set_thread_flag(TIF_RESTART_SVC); + /* No handlers present - check for system call restart */ + clear_pt_regs_flag(regs, PIF_SYSCALL); + if (current_thread_info()->system_call) { + regs->int_code = current_thread_info()->system_call; + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + /* Restart with sys_restart_syscall */ + regs->int_code = __NR_restart_syscall; + /* fallthrough */ + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* Restart system call with magic TIF bit. */ + regs->gprs[2] = regs->orig_gpr2; + set_pt_regs_flag(regs, PIF_SYSCALL); + if (test_thread_flag(TIF_SINGLE_STEP)) + clear_pt_regs_flag(regs, PIF_PER_TRAP); + break; + } } - return 0; + + /* + * If there's no signal to deliver, we just put the saved sigmask back. + */ + restore_saved_sigmask(); +} + +void do_notify_resume(struct pt_regs *regs) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index cbfcfd02a43..243c7e51260 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,419 +1,459 @@ /* - * arch/s390/kernel/smp.c + * SMP related functions * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Martin Schwidefsky (schwidefsky@de.ibm.com) - * Heiko Carstens (heiko.carstens@de.ibm.com) + * Copyright IBM Corp. 1999, 2012 + * Author(s): Denis Joseph Barrow, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, * - * based on other smp stuff by + * based on other smp stuff by * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net> * (c) 1998 Ingo Molnar * - * We work with logical cpu numbering everywhere we can. The only - * functions using the real cpu address (got from STAP) are the sigp - * functions. For all other functions we use the identity mapping. - * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is - * used e.g. to find the idle task belonging to a logical cpu. Every array - * in the kernel is sorted by the logical cpu number and not by the physical - * one which is causing all the confusion with __cpu_logical_map and - * cpu_number_map in other architectures. + * The code outside of smp.c uses logical cpu numbers, only smp.c does + * the translation of logical to physical cpu ids. All new code that + * operates on physical cpu numbers needs to go into smp.c. */ +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/workqueue.h> #include <linux/module.h> #include <linux/init.h> - #include <linux/mm.h> +#include <linux/err.h> #include <linux/spinlock.h> #include <linux/kernel_stat.h> -#include <linux/smp_lock.h> - #include <linux/delay.h> -#include <linux/cache.h> #include <linux/interrupt.h> +#include <linux/irqflags.h> #include <linux/cpu.h> - -#include <asm/sigp.h> -#include <asm/pgalloc.h> +#include <linux/slab.h> +#include <linux/crash_dump.h> +#include <asm/asm-offsets.h> +#include <asm/switch_to.h> +#include <asm/facility.h> +#include <asm/ipl.h> +#include <asm/setup.h> #include <asm/irq.h> -#include <asm/s390_ext.h> -#include <asm/cpcmd.h> #include <asm/tlbflush.h> +#include <asm/vtimer.h> +#include <asm/lowcore.h> +#include <asm/sclp.h> +#include <asm/vdso.h> +#include <asm/debug.h> +#include <asm/os_info.h> +#include <asm/sigp.h> +#include "entry.h" -/* prototypes */ - -extern volatile int __cpu_logical_map[]; - -/* - * An array with a pointer the lowcore of every CPU. - */ - -struct _lowcore *lowcore_ptr[NR_CPUS]; +enum { + ec_schedule = 0, + ec_call_function_single, + ec_stop_cpu, +}; -cpumask_t cpu_online_map; -cpumask_t cpu_possible_map; +enum { + CPU_STATE_STANDBY, + CPU_STATE_CONFIGURED, +}; -static struct task_struct *current_set[NR_CPUS]; +struct pcpu { + struct cpu *cpu; + struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ + unsigned long async_stack; /* async stack for the cpu */ + unsigned long panic_stack; /* panic stack for the cpu */ + unsigned long ec_mask; /* bit mask for ec_xxx functions */ + int state; /* physical cpu state */ + int polarization; /* physical polarization */ + u16 address; /* physical cpu address */ +}; -EXPORT_SYMBOL(cpu_online_map); +static u8 boot_cpu_type; +static u16 boot_cpu_address; +static struct pcpu pcpu_devices[NR_CPUS]; /* - * Reboot, halt and power_off routines for SMP. + * The smp_cpu_state_mutex must be held when changing the state or polarization + * member of a pcpu data structure within the pcpu_devices arreay. */ -extern char vmhalt_cmd[]; -extern char vmpoff_cmd[]; - -extern void reipl(unsigned long devno); -extern void reipl_diag(void); - -static void smp_ext_bitcall(int, ec_bit_sig); -static void smp_ext_bitcall_others(ec_bit_sig); +DEFINE_MUTEX(smp_cpu_state_mutex); /* - * Structure and data for smp_call_function(). This is designed to minimise - * static memory requirements. It also looks cleaner. + * Signal processor helper functions. */ -static DEFINE_SPINLOCK(call_lock); - -struct call_data_struct { - void (*func) (void *info); - void *info; - atomic_t started; - atomic_t finished; - int wait; -}; +static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) +{ + int cc; -static struct call_data_struct * call_data; + while (1) { + cc = __pcpu_sigp(addr, order, parm, NULL); + if (cc != SIGP_CC_BUSY) + return cc; + cpu_relax(); + } +} -/* - * 'Call function' interrupt callback - */ -static void do_call_function(void) +static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) { - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; + int cc, retry; - atomic_inc(&call_data->started); - (*func)(info); - if (wait) - atomic_inc(&call_data->finished); + for (retry = 0; ; retry++) { + cc = __pcpu_sigp(pcpu->address, order, parm, NULL); + if (cc != SIGP_CC_BUSY) + break; + if (retry >= 3) + udelay(10); + } + return cc; } -/* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ - -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, - int wait) -/* - * [SUMMARY] Run a function on all other CPUs. - * <func> The function to run. This must be fast and non-blocking. - * <info> An arbitrary pointer to pass to the function. - * <nonatomic> currently unused. - * <wait> If true, wait (atomically) until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. Does not return until - * remote CPUs are nearly ready to execute <<func>> or are or have executed. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ +static inline int pcpu_stopped(struct pcpu *pcpu) { - struct call_data_struct data; - int cpus = num_online_cpus()-1; + u32 uninitialized_var(status); - if (cpus <= 0) + if (__pcpu_sigp(pcpu->address, SIGP_SENSE, + 0, &status) != SIGP_CC_STATUS_STORED) return 0; + return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); +} - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - spin_lock(&call_lock); - call_data = &data; - /* Send a message to all other CPUs and wait for them to respond */ - smp_ext_bitcall_others(ec_call_function); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); - spin_unlock(&call_lock); - +static inline int pcpu_running(struct pcpu *pcpu) +{ + if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING, + 0, NULL) != SIGP_CC_STATUS_STORED) + return 1; + /* Status stored condition code is equivalent to cpu not running. */ return 0; } /* - * Call a function on one CPU - * cpu : the CPU the function should be executed on - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler. You may call it from a bottom half. - * - * It is guaranteed that the called function runs on the specified CPU, - * preemption is disabled. + * Find struct pcpu by cpu address. */ -int smp_call_function_on(void (*func) (void *info), void *info, - int nonatomic, int wait, int cpu) +static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address) { - struct call_data_struct data; - int curr_cpu; + int cpu; - if (!cpu_online(cpu)) - return -EINVAL; + for_each_cpu(cpu, mask) + if (pcpu_devices[cpu].address == address) + return pcpu_devices + cpu; + return NULL; +} - /* disable preemption for local function call */ - curr_cpu = get_cpu(); +static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) +{ + int order; - if (curr_cpu == cpu) { - /* direct call to function */ - func(info); - put_cpu(); - return 0; + if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) + return; + order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; + pcpu_sigp_retry(pcpu, order, 0); +} + +static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc; + + if (pcpu != &pcpu_devices[0]) { + pcpu->lowcore = (struct _lowcore *) + __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); + pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + pcpu->panic_stack = __get_free_page(GFP_KERNEL); + if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) + goto out; } + lc = pcpu->lowcore; + memcpy(lc, &S390_lowcore, 512); + memset((char *) lc + 512, 0, sizeof(*lc) - 512); + lc->async_stack = pcpu->async_stack + ASYNC_SIZE + - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->panic_stack = pcpu->panic_stack + PAGE_SIZE + - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->cpu_nr = cpu; + lc->spinlock_lockval = arch_spin_lockval(cpu); +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL); + if (!lc->extended_save_area_addr) + goto out; + } +#else + if (vdso_alloc_per_cpu(lc)) + goto out; +#endif + lowcore_ptr[cpu] = lc; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); + return 0; +out: + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } + return -ENOMEM; +} - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); +#ifdef CONFIG_HOTPLUG_CPU - spin_lock_bh(&call_lock); - call_data = &data; - smp_ext_bitcall(cpu, ec_call_function); +static void pcpu_free_lowcore(struct pcpu *pcpu) +{ + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); + lowcore_ptr[pcpu - pcpu_devices] = NULL; +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + struct _lowcore *lc = pcpu->lowcore; - /* Wait for response */ - while (atomic_read(&data.started) != 1) - cpu_relax(); + free_page((unsigned long) lc->extended_save_area_addr); + lc->extended_save_area_addr = 0; + } +#else + vdso_free_per_cpu(pcpu->lowcore); +#endif + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } +} - if (wait) - while (atomic_read(&data.finished) != 1) - cpu_relax(); +#endif /* CONFIG_HOTPLUG_CPU */ - spin_unlock_bh(&call_lock); - put_cpu(); - return 0; +static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc = pcpu->lowcore; + + if (MACHINE_HAS_TLB_LC) + cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); + atomic_inc(&init_mm.context.attach_count); + lc->cpu_nr = cpu; + lc->spinlock_lockval = arch_spin_lockval(cpu); + lc->percpu_offset = __per_cpu_offset[cpu]; + lc->kernel_asce = S390_lowcore.kernel_asce; + lc->machine_flags = S390_lowcore.machine_flags; + lc->ftrace_func = S390_lowcore.ftrace_func; + lc->user_timer = lc->system_timer = lc->steal_timer = 0; + __ctl_store(lc->cregs_save_area, 0, 15); + save_access_regs((unsigned int *) lc->access_regs_save_area); + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); } -EXPORT_SYMBOL(smp_call_function_on); -static inline void do_send_stop(void) +static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) { - int cpu, rc; - - /* stop all processors */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - do { - rc = signal_processor(cpu, sigp_stop); - } while (rc == sigp_busy); - } + struct _lowcore *lc = pcpu->lowcore; + struct thread_info *ti = task_thread_info(tsk); + + lc->kernel_stack = (unsigned long) task_stack_page(tsk) + + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->thread_info = (unsigned long) task_thread_info(tsk); + lc->current_task = (unsigned long) tsk; + lc->user_timer = ti->user_timer; + lc->system_timer = ti->system_timer; + lc->steal_timer = 0; } -static inline void do_store_status(void) +static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) { - int cpu, rc; + struct _lowcore *lc = pcpu->lowcore; - /* store status of all processors in their lowcores (real 0) */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - do { - rc = signal_processor_p( - (__u32)(unsigned long) lowcore_ptr[cpu], cpu, - sigp_store_status_at_address); - } while(rc == sigp_busy); - } + lc->restart_stack = lc->kernel_stack; + lc->restart_fn = (unsigned long) func; + lc->restart_data = (unsigned long) data; + lc->restart_source = -1UL; + pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); } /* - * this function sends a 'stop' sigp to all other CPUs in the system. - * it goes straight through. + * Call function via PSW restart on pcpu and stop the current cpu. */ -void smp_send_stop(void) +static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), + void *data, unsigned long stack) { - /* write magic number to zero page (absolute 0) */ - lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC; - - /* stop other processors. */ - do_send_stop(); - - /* store status of other processors. */ - do_store_status(); + struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; + unsigned long source_cpu = stap(); + + __load_psw_mask(PSW_KERNEL_BITS); + if (pcpu->address == source_cpu) + func(data); /* should not return */ + /* Stop target cpu (if func returns this stops the current cpu). */ + pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + /* Restart func on the target cpu and stop the current cpu. */ + mem_assign_absolute(lc->restart_stack, stack); + mem_assign_absolute(lc->restart_fn, (unsigned long) func); + mem_assign_absolute(lc->restart_data, (unsigned long) data); + mem_assign_absolute(lc->restart_source, source_cpu); + asm volatile( + "0: sigp 0,%0,%2 # sigp restart to target cpu\n" + " brc 2,0b # busy, try again\n" + "1: sigp 0,%1,%3 # sigp stop to current cpu\n" + " brc 2,1b # busy, try again\n" + : : "d" (pcpu->address), "d" (source_cpu), + "K" (SIGP_RESTART), "K" (SIGP_STOP) + : "0", "1", "cc"); + for (;;) ; } /* - * Reboot, halt and power_off routines for SMP. + * Call function on an online CPU. */ - -static void do_machine_restart(void * __unused) +void smp_call_online_cpu(void (*func)(void *), void *data) { - int cpu; - static atomic_t cpuid = ATOMIC_INIT(-1); - - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) != -1) - signal_processor(smp_processor_id(), sigp_stop); - - /* Wait for all other cpus to enter stopped state */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - while(!smp_cpu_not_running(cpu)) - cpu_relax(); - } - - /* Store status of other cpus. */ - do_store_status(); - - /* - * Finally call reipl. Because we waited for all other - * cpus to enter this function we know that they do - * not hold any s390irq-locks (the cpus have been - * interrupted by an external interrupt and s390irq - * locks are always held disabled). - */ - reipl_diag(); - - if (MACHINE_IS_VM) - cpcmd ("IPL", NULL, 0, NULL); - else - reipl (0x10000 | S390_lowcore.ipl_device); + struct pcpu *pcpu; + + /* Use the current cpu if it is online. */ + pcpu = pcpu_find_address(cpu_online_mask, stap()); + if (!pcpu) + /* Use the first online cpu. */ + pcpu = pcpu_devices + cpumask_first(cpu_online_mask); + pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); } -void machine_restart_smp(char * __unused) +/* + * Call function on the ipl CPU. + */ +void smp_call_ipl_cpu(void (*func)(void *), void *data) { - on_each_cpu(do_machine_restart, NULL, 0, 0); + pcpu_delegate(&pcpu_devices[0], func, data, + pcpu_devices->panic_stack + PAGE_SIZE); } -static void do_wait_for_stop(void) +int smp_find_processor_id(u16 address) { - unsigned long cr[16]; + int cpu; - __ctl_store(cr, 0, 15); - cr[0] &= ~0xffff; - cr[6] = 0; - __ctl_load(cr, 0, 15); - for (;;) - enabled_wait(); + for_each_present_cpu(cpu) + if (pcpu_devices[cpu].address == address) + return cpu; + return -1; } -static void do_machine_halt(void * __unused) +int smp_vcpu_scheduled(int cpu) { - static atomic_t cpuid = ATOMIC_INIT(-1); + return pcpu_running(pcpu_devices + cpu); +} - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) == -1) { - smp_send_stop(); - if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0) - cpcmd(vmhalt_cmd, NULL, 0, NULL); - signal_processor(smp_processor_id(), - sigp_stop_and_store_status); - } - do_wait_for_stop(); +void smp_yield(void) +{ + if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); } -void machine_halt_smp(void) +void smp_yield_cpu(int cpu) { - on_each_cpu(do_machine_halt, NULL, 0, 0); + if (MACHINE_HAS_DIAG9C) + asm volatile("diag %0,0,0x9c" + : : "d" (pcpu_devices[cpu].address)); + else if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); } -static void do_machine_power_off(void * __unused) +/* + * Send cpus emergency shutdown signal. This gives the cpus the + * opportunity to complete outstanding interrupts. + */ +static void smp_emergency_stop(cpumask_t *cpumask) { - static atomic_t cpuid = ATOMIC_INIT(-1); + u64 end; + int cpu; - if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) == -1) { - smp_send_stop(); - if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0) - cpcmd(vmpoff_cmd, NULL, 0, NULL); - signal_processor(smp_processor_id(), - sigp_stop_and_store_status); + end = get_tod_clock() + (1000000UL << 12); + for_each_cpu(cpu, cpumask) { + struct pcpu *pcpu = pcpu_devices + cpu; + set_bit(ec_stop_cpu, &pcpu->ec_mask); + while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, + 0, NULL) == SIGP_CC_BUSY && + get_tod_clock() < end) + cpu_relax(); + } + while (get_tod_clock() < end) { + for_each_cpu(cpu, cpumask) + if (pcpu_stopped(pcpu_devices + cpu)) + cpumask_clear_cpu(cpu, cpumask); + if (cpumask_empty(cpumask)) + break; + cpu_relax(); } - do_wait_for_stop(); } -void machine_power_off_smp(void) +/* + * Stop all cpus but the current one. + */ +void smp_send_stop(void) { - on_each_cpu(do_machine_power_off, NULL, 0, 0); + cpumask_t cpumask; + int cpu; + + /* Disable all interrupts/machine checks */ + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); + trace_hardirqs_off(); + + debug_set_critical(); + cpumask_copy(&cpumask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &cpumask); + + if (oops_in_progress) + smp_emergency_stop(&cpumask); + + /* stop all processors */ + for_each_cpu(cpu, &cpumask) { + struct pcpu *pcpu = pcpu_devices + cpu; + pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + while (!pcpu_stopped(pcpu)) + cpu_relax(); + } } /* * This is the main routine where commands issued by other * cpus are handled. */ - -void do_ext_call_interrupt(struct pt_regs *regs, __u16 code) +static void smp_handle_ext_call(void) { - unsigned long bits; - - /* - * handle bit signal external calls - * - * For the ec_schedule signal we have to do nothing. All the work - * is done automatically when we return from the interrupt. - */ - bits = xchg(&S390_lowcore.ext_call_fast, 0); + unsigned long bits; + + /* handle bit signal external calls */ + bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); + if (test_bit(ec_stop_cpu, &bits)) + smp_stop_cpu(); + if (test_bit(ec_schedule, &bits)) + scheduler_ipi(); + if (test_bit(ec_call_function_single, &bits)) + generic_smp_call_function_single_interrupt(); +} - if (test_bit(ec_call_function, &bits)) - do_call_function(); +static void do_ext_call_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS); + smp_handle_ext_call(); } -/* - * Send an external call sigp to another cpu and return without waiting - * for its completion. - */ -static void smp_ext_bitcall(int cpu, ec_bit_sig sig) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - /* - * Set signaling bit in lowcore of target cpu and kick it - */ - set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while(signal_processor(cpu, sigp_emergency_signal) == sigp_busy) - udelay(10); + int cpu; + + for_each_cpu(cpu, mask) + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } -/* - * Send an external call sigp to every other cpu in the system and - * return without waiting for its completion. - */ -static void smp_ext_bitcall_others(ec_bit_sig sig) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - /* - * Set signaling bit in lowcore of target cpu and kick it - */ - set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while (signal_processor(cpu, sigp_emergency_signal) == sigp_busy) - udelay(10); - } +void arch_send_call_function_single_ipi(int cpu) +{ + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } #ifndef CONFIG_64BIT /* * this function sends a 'purge tlb' signal to another CPU. */ -void smp_ptlb_callback(void *info) +static void smp_ptlb_callback(void *info) { - local_flush_tlb(); + __tlb_flush_local(); } void smp_ptlb_all(void) { - on_each_cpu(smp_ptlb_callback, NULL, 0, 1); + on_each_cpu(smp_ptlb_callback, NULL, 1); } EXPORT_SYMBOL(smp_ptlb_all); #endif /* ! CONFIG_64BIT */ @@ -425,399 +465,342 @@ EXPORT_SYMBOL(smp_ptlb_all); */ void smp_send_reschedule(int cpu) { - smp_ext_bitcall(cpu, ec_schedule); + pcpu_ec_call(pcpu_devices + cpu, ec_schedule); } /* * parameter area for the set/clear control bit callbacks */ -typedef struct -{ - __u16 start_ctl; - __u16 end_ctl; - unsigned long orvals[16]; - unsigned long andvals[16]; -} ec_creg_mask_parms; +struct ec_creg_mask_parms { + unsigned long orval; + unsigned long andval; + int cr; +}; /* * callback for setting/clearing control bits */ -void smp_ctl_bit_callback(void *info) { - ec_creg_mask_parms *pp; +static void smp_ctl_bit_callback(void *info) +{ + struct ec_creg_mask_parms *pp = info; unsigned long cregs[16]; - int i; - - pp = (ec_creg_mask_parms *) info; - __ctl_store(cregs[pp->start_ctl], pp->start_ctl, pp->end_ctl); - for (i = pp->start_ctl; i <= pp->end_ctl; i++) - cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i]; - __ctl_load(cregs[pp->start_ctl], pp->start_ctl, pp->end_ctl); + + __ctl_store(cregs, 0, 15); + cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval; + __ctl_load(cregs, 0, 15); } /* * Set a bit in a control register of all cpus */ -void smp_ctl_set_bit(int cr, int bit) { - ec_creg_mask_parms parms; +void smp_ctl_set_bit(int cr, int bit) +{ + struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; - parms.start_ctl = cr; - parms.end_ctl = cr; - parms.orvals[cr] = 1 << bit; - parms.andvals[cr] = -1L; - preempt_disable(); - smp_call_function(smp_ctl_bit_callback, &parms, 0, 1); - __ctl_set_bit(cr, bit); - preempt_enable(); + on_each_cpu(smp_ctl_bit_callback, &parms, 1); } +EXPORT_SYMBOL(smp_ctl_set_bit); /* * Clear a bit in a control register of all cpus */ -void smp_ctl_clear_bit(int cr, int bit) { - ec_creg_mask_parms parms; +void smp_ctl_clear_bit(int cr, int bit) +{ + struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; - parms.start_ctl = cr; - parms.end_ctl = cr; - parms.orvals[cr] = 0; - parms.andvals[cr] = ~(1L << bit); - preempt_disable(); - smp_call_function(smp_ctl_bit_callback, &parms, 0, 1); - __ctl_clear_bit(cr, bit); - preempt_enable(); + on_each_cpu(smp_ctl_bit_callback, &parms, 1); } +EXPORT_SYMBOL(smp_ctl_clear_bit); -/* - * Lets check how many CPUs we have. - */ +#ifdef CONFIG_CRASH_DUMP -void -__init smp_check_cpus(unsigned int max_cpus) +static void __init smp_get_save_area(int cpu, u16 address) { - int cpu, num_cpus; - __u16 boot_cpu_addr; - - /* - * cpu 0 is the boot cpu. See smp_prepare_boot_cpu. - */ - - boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr; - current_thread_info()->cpu = 0; - num_cpus = 1; - for (cpu = 0; cpu <= 65535 && num_cpus < max_cpus; cpu++) { - if ((__u16) cpu == boot_cpu_addr) - continue; - __cpu_logical_map[num_cpus] = (__u16) cpu; - if (signal_processor(num_cpus, sigp_sense) == - sigp_not_operational) - continue; - cpu_set(num_cpus, cpu_present_map); - num_cpus++; + void *lc = pcpu_devices[0].lowcore; + struct save_area *save_area; + + if (is_kdump_kernel()) + return; + if (!OLDMEM_BASE && (address == boot_cpu_address || + ipl_info.type != IPL_TYPE_FCP_DUMP)) + return; + save_area = dump_save_area_create(cpu); + if (!save_area) + panic("could not allocate memory for save area\n"); + if (address == boot_cpu_address) { + /* Copy the registers of the boot cpu. */ + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + SAVE_AREA_BASE - PAGE_SIZE, 0); + return; } + /* Get the registers of a non-boot cpu. */ + __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); + memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); +} - for (cpu = 1; cpu < max_cpus; cpu++) - cpu_set(cpu, cpu_possible_map); +int smp_store_status(int cpu) +{ + struct pcpu *pcpu; - printk("Detected %d CPU's\n",(int) num_cpus); - printk("Boot cpu address %2X\n", boot_cpu_addr); + pcpu = pcpu_devices + cpu; + if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, + 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) + return -EIO; + return 0; } -/* - * Activate a secondary processor. - */ -extern void init_cpu_timer(void); -extern void init_cpu_vtimer(void); -extern int pfault_init(void); -extern void pfault_fini(void); +#else /* CONFIG_CRASH_DUMP */ + +static inline void smp_get_save_area(int cpu, u16 address) { } -int __devinit start_secondary(void *cpuvoid) +#endif /* CONFIG_CRASH_DUMP */ + +void smp_cpu_set_polarization(int cpu, int val) { - /* Setup the cpu */ - cpu_init(); - preempt_disable(); - /* init per CPU timer */ - init_cpu_timer(); -#ifdef CONFIG_VIRT_TIMER - init_cpu_vtimer(); -#endif -#ifdef CONFIG_PFAULT - /* Enable pfault pseudo page faults on this cpu. */ - if (MACHINE_IS_VM) - pfault_init(); -#endif - /* Mark this cpu as online */ - cpu_set(smp_processor_id(), cpu_online_map); - /* Switch on interrupts */ - local_irq_enable(); - /* Print info about this processor */ - print_cpu_info(&S390_lowcore.cpu_data); - /* cpu_idle will call schedule for us */ - cpu_idle(); - return 0; + pcpu_devices[cpu].polarization = val; } -static void __init smp_create_idle(unsigned int cpu) +int smp_cpu_get_polarization(int cpu) { - struct task_struct *p; - - /* - * don't care about the psw and regs settings since we'll never - * reschedule the forked task. - */ - p = fork_idle(cpu); - if (IS_ERR(p)) - panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p)); - current_set[cpu] = p; + return pcpu_devices[cpu].polarization; } -/* Reserving and releasing of CPUs */ - -static DEFINE_SPINLOCK(smp_reserve_lock); -static int smp_cpu_reserved[NR_CPUS]; - -int -smp_get_cpu(cpumask_t cpu_mask) +static struct sclp_cpu_info *smp_get_cpu_info(void) { - unsigned long flags; - int cpu; - - spin_lock_irqsave(&smp_reserve_lock, flags); - /* Try to find an already reserved cpu. */ - for_each_cpu_mask(cpu, cpu_mask) { - if (smp_cpu_reserved[cpu] != 0) { - smp_cpu_reserved[cpu]++; - /* Found one. */ - goto out; + static int use_sigp_detection; + struct sclp_cpu_info *info; + int address; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { + use_sigp_detection = 1; + for (address = 0; address <= MAX_CPU_ADDRESS; address++) { + if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) == + SIGP_CC_NOT_OPERATIONAL) + continue; + info->cpu[info->configured].address = address; + info->configured++; } + info->combined = info->configured; } - /* Reserve a new cpu from cpu_mask. */ - for_each_cpu_mask(cpu, cpu_mask) { - if (cpu_online(cpu)) { - smp_cpu_reserved[cpu]++; - goto out; - } - } - cpu = -ENODEV; -out: - spin_unlock_irqrestore(&smp_reserve_lock, flags); - return cpu; + return info; } -void -smp_put_cpu(int cpu) -{ - unsigned long flags; +static int smp_add_present_cpu(int cpu); - spin_lock_irqsave(&smp_reserve_lock, flags); - smp_cpu_reserved[cpu]--; - spin_unlock_irqrestore(&smp_reserve_lock, flags); -} - -static inline int -cpu_stopped(int cpu) +static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) { - __u32 status; - - /* Check for stopped state */ - if (signal_processor_ps(&status, 0, cpu, sigp_sense) == sigp_status_stored) { - if (status & 0x40) - return 1; + struct pcpu *pcpu; + cpumask_t avail; + int cpu, nr, i; + + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + cpu = cpumask_first(&avail); + for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { + if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) + continue; + if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) + continue; + pcpu = pcpu_devices + cpu; + pcpu->address = info->cpu[i].address; + pcpu->state = (i >= info->configured) ? + CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (sysfs_add && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpu = cpumask_next(cpu, &avail); } - return 0; + return nr; } -/* Upping and downing of CPUs */ - -int -__cpu_up(unsigned int cpu) +static void __init smp_detect_cpus(void) { - struct task_struct *idle; - struct _lowcore *cpu_lowcore; - struct stack_frame *sf; - sigp_ccode ccode; - int curr_cpu; - - for (curr_cpu = 0; curr_cpu <= 65535; curr_cpu++) { - __cpu_logical_map[cpu] = (__u16) curr_cpu; - if (cpu_stopped(cpu)) + unsigned int cpu, c_cpus, s_cpus; + struct sclp_cpu_info *info; + + info = smp_get_cpu_info(); + if (!info) + panic("smp_detect_cpus failed to allocate memory\n"); + if (info->has_cpu_type) { + for (cpu = 0; cpu < info->combined; cpu++) { + if (info->cpu[cpu].address != boot_cpu_address) + continue; + /* The boot cpu dictates the cpu type. */ + boot_cpu_type = info->cpu[cpu].type; break; + } + } + c_cpus = s_cpus = 0; + for (cpu = 0; cpu < info->combined; cpu++) { + if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) + continue; + if (cpu < info->configured) { + smp_get_save_area(c_cpus, info->cpu[cpu].address); + c_cpus++; + } else + s_cpus++; } + pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); + get_online_cpus(); + __smp_rescan_cpus(info, 0); + put_online_cpus(); + kfree(info); +} - if (!cpu_stopped(cpu)) - return -ENODEV; +/* + * Activate a secondary processor. + */ +static void smp_start_secondary(void *cpuvoid) +{ + S390_lowcore.last_update_clock = get_tod_clock(); + S390_lowcore.restart_stack = (unsigned long) restart_stack; + S390_lowcore.restart_fn = (unsigned long) do_restart; + S390_lowcore.restart_data = 0; + S390_lowcore.restart_source = -1UL; + restore_access_regs(S390_lowcore.access_regs_save_area); + __ctl_load(S390_lowcore.cregs_save_area, 0, 15); + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); + cpu_init(); + preempt_disable(); + init_cpu_timer(); + init_cpu_vtimer(); + pfault_init(); + notify_cpu_starting(smp_processor_id()); + set_cpu_online(smp_processor_id(), true); + inc_irq_stat(CPU_RST); + local_irq_enable(); + cpu_startup_entry(CPUHP_ONLINE); +} - ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]), - cpu, sigp_set_prefix); - if (ccode){ - printk("sigp_set_prefix failed for cpu %d " - "with condition code %d\n", - (int) cpu, (int) ccode); - return -EIO; - } +/* Upping and downing of CPUs */ +int __cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + struct pcpu *pcpu; + int rc; - idle = current_set[cpu]; - cpu_lowcore = lowcore_ptr[cpu]; - cpu_lowcore->kernel_stack = (unsigned long) - task_stack_page(idle) + (THREAD_SIZE); - sf = (struct stack_frame *) (cpu_lowcore->kernel_stack - - sizeof(struct pt_regs) - - sizeof(struct stack_frame)); - memset(sf, 0, sizeof(struct stack_frame)); - sf->gprs[9] = (unsigned long) sf; - cpu_lowcore->save_area[15] = (unsigned long) sf; - __ctl_store(cpu_lowcore->cregs_save_area[0], 0, 15); - __asm__ __volatile__("stam 0,15,0(%0)" - : : "a" (&cpu_lowcore->access_regs_save_area) - : "memory"); - cpu_lowcore->percpu_offset = __per_cpu_offset[cpu]; - cpu_lowcore->current_task = (unsigned long) idle; - cpu_lowcore->cpu_data.cpu_nr = cpu; - eieio(); - signal_processor(cpu,sigp_restart); + pcpu = pcpu_devices + cpu; + if (pcpu->state != CPU_STATE_CONFIGURED) + return -EIO; + if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != + SIGP_CC_ORDER_CODE_ACCEPTED) + return -EIO; + rc = pcpu_alloc_lowcore(pcpu, cpu); + if (rc) + return rc; + pcpu_prepare_secondary(pcpu, cpu); + pcpu_attach_task(pcpu, tidle); + pcpu_start_fn(pcpu, smp_start_secondary, NULL); while (!cpu_online(cpu)) cpu_relax(); return 0; } -int -__cpu_disable(void) -{ - unsigned long flags; - ec_creg_mask_parms cr_parms; - int cpu = smp_processor_id(); - - spin_lock_irqsave(&smp_reserve_lock, flags); - if (smp_cpu_reserved[cpu] != 0) { - spin_unlock_irqrestore(&smp_reserve_lock, flags); - return -EBUSY; - } - cpu_clear(cpu, cpu_online_map); - -#ifdef CONFIG_PFAULT - /* Disable pfault pseudo page faults on this cpu. */ - if (MACHINE_IS_VM) - pfault_fini(); -#endif - - /* disable all external interrupts */ - - cr_parms.start_ctl = 0; - cr_parms.end_ctl = 0; - cr_parms.orvals[0] = 0; - cr_parms.andvals[0] = ~(1<<15 | 1<<14 | 1<<13 | 1<<12 | - 1<<11 | 1<<10 | 1<< 6 | 1<< 4); - smp_ctl_bit_callback(&cr_parms); +static unsigned int setup_possible_cpus __initdata; - /* disable all I/O interrupts */ - - cr_parms.start_ctl = 6; - cr_parms.end_ctl = 6; - cr_parms.orvals[6] = 0; - cr_parms.andvals[6] = ~(1<<31 | 1<<30 | 1<<29 | 1<<28 | - 1<<27 | 1<<26 | 1<<25 | 1<<24); - smp_ctl_bit_callback(&cr_parms); +static int __init _setup_possible_cpus(char *s) +{ + get_option(&s, &setup_possible_cpus); + return 0; +} +early_param("possible_cpus", _setup_possible_cpus); - /* disable most machine checks */ +#ifdef CONFIG_HOTPLUG_CPU - cr_parms.start_ctl = 14; - cr_parms.end_ctl = 14; - cr_parms.orvals[14] = 0; - cr_parms.andvals[14] = ~(1<<28 | 1<<27 | 1<<26 | 1<<25 | 1<<24); - smp_ctl_bit_callback(&cr_parms); +int __cpu_disable(void) +{ + unsigned long cregs[16]; - spin_unlock_irqrestore(&smp_reserve_lock, flags); + /* Handle possible pending IPIs */ + smp_handle_ext_call(); + set_cpu_online(smp_processor_id(), false); + /* Disable pseudo page faults on this cpu. */ + pfault_fini(); + /* Disable interrupt sources via control register. */ + __ctl_store(cregs, 0, 15); + cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */ + cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ + cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ + __ctl_load(cregs, 0, 15); return 0; } -void -__cpu_die(unsigned int cpu) +void __cpu_die(unsigned int cpu) { + struct pcpu *pcpu; + /* Wait until target cpu is down */ - while (!smp_cpu_not_running(cpu)) + pcpu = pcpu_devices + cpu; + while (!pcpu_stopped(pcpu)) cpu_relax(); - printk("Processor %d spun down\n", cpu); + pcpu_free_lowcore(pcpu); + atomic_dec(&init_mm.context.attach_count); + cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); + if (MACHINE_HAS_TLB_LC) + cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); } -void -cpu_die(void) +void __noreturn cpu_die(void) { idle_task_exit(); - signal_processor(smp_processor_id(), sigp_stop); - BUG(); - for(;;); + pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); + for (;;) ; } -/* - * Cycle through the processors and setup structures. - */ +#endif /* CONFIG_HOTPLUG_CPU */ -void __init smp_prepare_cpus(unsigned int max_cpus) +void __init smp_fill_possible_mask(void) { - unsigned long stack; - unsigned int cpu; - int i; - - /* request the 0x1201 emergency signal external interrupt */ - if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) - panic("Couldn't request external interrupt 0x1201"); - smp_check_cpus(max_cpus); - memset(lowcore_ptr,0,sizeof(lowcore_ptr)); - /* - * Initialize prefix pages and stacks for all possible cpus - */ - print_cpu_info(&S390_lowcore.cpu_data); - - for(i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; - lowcore_ptr[i] = (struct _lowcore *) - __get_free_pages(GFP_KERNEL|GFP_DMA, - sizeof(void*) == 8 ? 1 : 0); - stack = __get_free_pages(GFP_KERNEL,ASYNC_ORDER); - if (lowcore_ptr[i] == NULL || stack == 0ULL) - panic("smp_boot_cpus failed to allocate memory\n"); - - *(lowcore_ptr[i]) = S390_lowcore; - lowcore_ptr[i]->async_stack = stack + (ASYNC_SIZE); - stack = __get_free_pages(GFP_KERNEL,0); - if (stack == 0ULL) - panic("smp_boot_cpus failed to allocate memory\n"); - lowcore_ptr[i]->panic_stack = stack + (PAGE_SIZE); -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) { - lowcore_ptr[i]->extended_save_area_addr = - (__u32) __get_free_pages(GFP_KERNEL,0); - if (lowcore_ptr[i]->extended_save_area_addr == 0) - panic("smp_boot_cpus failed to " - "allocate memory\n"); - } -#endif - } -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - ctl_set_bit(14, 29); /* enable extended save area */ -#endif - set_prefix((u32)(unsigned long) lowcore_ptr[smp_processor_id()]); + unsigned int possible, sclp, cpu; - for_each_cpu(cpu) - if (cpu != smp_processor_id()) - smp_create_idle(cpu); + sclp = sclp_get_max_cpu() ?: nr_cpu_ids; + possible = setup_possible_cpus ?: nr_cpu_ids; + possible = min(possible, sclp); + for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) + set_cpu_possible(cpu, true); } -void __devinit smp_prepare_boot_cpu(void) +void __init smp_prepare_cpus(unsigned int max_cpus) { - BUG_ON(smp_processor_id() != 0); + /* request the 0x1201 emergency signal external interrupt */ + if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) + panic("Couldn't request external interrupt 0x1201"); + /* request the 0x1202 external call external interrupt */ + if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) + panic("Couldn't request external interrupt 0x1202"); + smp_detect_cpus(); +} - cpu_set(0, cpu_online_map); - cpu_set(0, cpu_present_map); - cpu_set(0, cpu_possible_map); +void __init smp_prepare_boot_cpu(void) +{ + struct pcpu *pcpu = pcpu_devices; + + boot_cpu_address = stap(); + pcpu->state = CPU_STATE_CONFIGURED; + pcpu->address = boot_cpu_address; + pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); + pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE + + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE + + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); S390_lowcore.percpu_offset = __per_cpu_offset[0]; - current_set[0] = current; + smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); + set_cpu_present(0, true); + set_cpu_online(0, true); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ } -void smp_cpus_done(unsigned int max_cpus) +void __init smp_setup_processor_id(void) { - cpu_present_map = cpu_possible_map; + S390_lowcore.cpu_nr = 0; + S390_lowcore.spinlock_lockval = arch_spin_lockval(0); } /* @@ -828,32 +811,253 @@ void smp_cpus_done(unsigned int max_cpus) */ int setup_profiling_timer(unsigned int multiplier) { - return 0; + return 0; } -static DEFINE_PER_CPU(struct cpu, cpu_devices); +#ifdef CONFIG_HOTPLUG_CPU +static ssize_t cpu_configure_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); + mutex_unlock(&smp_cpu_state_mutex); + return count; +} -static int __init topology_init(void) +static ssize_t cpu_configure_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { - int cpu; - int ret; + struct pcpu *pcpu; + int cpu, val, rc; + char delim; + + if (sscanf(buf, "%d %c", &val, &delim) != 1) + return -EINVAL; + if (val != 0 && val != 1) + return -EINVAL; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + rc = -EBUSY; + /* disallow configuration changes of online cpus and cpu 0 */ + cpu = dev->id; + if (cpu_online(cpu) || cpu == 0) + goto out; + pcpu = pcpu_devices + cpu; + rc = 0; + switch (val) { + case 0: + if (pcpu->state != CPU_STATE_CONFIGURED) + break; + rc = sclp_cpu_deconfigure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); + break; + case 1: + if (pcpu->state != CPU_STATE_STANDBY) + break; + rc = sclp_cpu_configure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); + break; + default: + break; + } +out: + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); +#endif /* CONFIG_HOTPLUG_CPU */ + +static ssize_t show_cpu_address(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); +} +static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); + +static struct attribute *cpu_common_attrs[] = { +#ifdef CONFIG_HOTPLUG_CPU + &dev_attr_configure.attr, +#endif + &dev_attr_address.attr, + NULL, +}; + +static struct attribute_group cpu_common_attr_group = { + .attrs = cpu_common_attrs, +}; + +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long idle_count; + unsigned int sequence; + + do { + sequence = ACCESS_ONCE(idle->sequence); + idle_count = ACCESS_ONCE(idle->idle_count); + if (ACCESS_ONCE(idle->clock_idle_enter)) + idle_count++; + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + return sprintf(buf, "%llu\n", idle_count); +} +static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); - for_each_cpu(cpu) { - ret = register_cpu(&per_cpu(cpu_devices, cpu), cpu, NULL); - if (ret) - printk(KERN_WARNING "topology_init: register_cpu %d " - "failed (%d)\n", cpu, ret); +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long now, idle_time, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_time = ACCESS_ONCE(idle->idle_time); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + return sprintf(buf, "%llu\n", idle_time >> 12); +} +static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); + +static struct attribute *cpu_online_attrs[] = { + &dev_attr_idle_count.attr, + &dev_attr_idle_time_us.attr, + NULL, +}; + +static struct attribute_group cpu_online_attr_group = { + .attrs = cpu_online_attrs, +}; + +static int smp_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned int)(long)hcpu; + struct cpu *c = pcpu_devices[cpu].cpu; + struct device *s = &c->dev; + int err = 0; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + err = sysfs_create_group(&s->kobj, &cpu_online_attr_group); + break; + case CPU_DEAD: + sysfs_remove_group(&s->kobj, &cpu_online_attr_group); + break; } + return notifier_from_errno(err); +} + +static int smp_add_present_cpu(int cpu) +{ + struct device *s; + struct cpu *c; + int rc; + + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + pcpu_devices[cpu].cpu = c; + s = &c->dev; + c->hotpluggable = 1; + rc = register_cpu(c, cpu); + if (rc) + goto out; + rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group); + if (rc) + goto out_cpu; + if (cpu_online(cpu)) { + rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); + if (rc) + goto out_online; + } + rc = topology_cpu_init(c); + if (rc) + goto out_topology; return 0; + +out_topology: + if (cpu_online(cpu)) + sysfs_remove_group(&s->kobj, &cpu_online_attr_group); +out_online: + sysfs_remove_group(&s->kobj, &cpu_common_attr_group); +out_cpu: +#ifdef CONFIG_HOTPLUG_CPU + unregister_cpu(c); +#endif +out: + return rc; } -subsys_initcall(topology_init); +#ifdef CONFIG_HOTPLUG_CPU -EXPORT_SYMBOL(cpu_possible_map); -EXPORT_SYMBOL(lowcore_ptr); -EXPORT_SYMBOL(smp_ctl_set_bit); -EXPORT_SYMBOL(smp_ctl_clear_bit); -EXPORT_SYMBOL(smp_call_function); -EXPORT_SYMBOL(smp_get_cpu); -EXPORT_SYMBOL(smp_put_cpu); +int __ref smp_rescan_cpus(void) +{ + struct sclp_cpu_info *info; + int nr; + + info = smp_get_cpu_info(); + if (!info) + return -ENOMEM; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + nr = __smp_rescan_cpus(info, 1); + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); + kfree(info); + if (nr) + topology_schedule_update(); + return 0; +} +static ssize_t __ref rescan_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + int rc; + + rc = smp_rescan_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); +#endif /* CONFIG_HOTPLUG_CPU */ + +static int __init s390_smp_init(void) +{ + int cpu, rc = 0; + +#ifdef CONFIG_HOTPLUG_CPU + rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); + if (rc) + return rc; +#endif + cpu_notifier_register_begin(); + for_each_present_cpu(cpu) { + rc = smp_add_present_cpu(cpu); + if (rc) + goto out; + } + + __hotcpu_notifier(smp_cpu_notify, 0); + +out: + cpu_notifier_register_done(); + return rc; +} +subsys_initcall(s390_smp_init); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c new file mode 100644 index 00000000000..1785cd82253 --- /dev/null +++ b/arch/s390/kernel/stacktrace.c @@ -0,0 +1,96 @@ +/* + * Stack trace management functions + * + * Copyright IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/sched.h> +#include <linux/stacktrace.h> +#include <linux/kallsyms.h> +#include <linux/module.h> + +static unsigned long save_context_stack(struct stack_trace *trace, + unsigned long sp, + unsigned long low, + unsigned long high, + int savesched) +{ + struct stack_frame *sf; + struct pt_regs *regs; + unsigned long addr; + + while(1) { + sp &= PSW_ADDR_INSN; + if (sp < low || sp > high) + return sp; + sf = (struct stack_frame *)sp; + while(1) { + addr = sf->gprs[8] & PSW_ADDR_INSN; + if (!trace->skip) + trace->entries[trace->nr_entries++] = addr; + else + trace->skip--; + if (trace->nr_entries >= trace->max_entries) + return sp; + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *)sp; + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long)(sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *)sp; + addr = regs->psw.addr & PSW_ADDR_INSN; + if (savesched || !in_sched_functions(addr)) { + if (!trace->skip) + trace->entries[trace->nr_entries++] = addr; + else + trace->skip--; + } + if (trace->nr_entries >= trace->max_entries) + return sp; + low = sp; + sp = regs->gprs[15]; + } +} + +void save_stack_trace(struct stack_trace *trace) +{ + register unsigned long sp asm ("15"); + unsigned long orig_sp, new_sp; + + orig_sp = sp & PSW_ADDR_INSN; + new_sp = save_context_stack(trace, orig_sp, + S390_lowcore.panic_stack - PAGE_SIZE, + S390_lowcore.panic_stack, 1); + if (new_sp != orig_sp) + return; + new_sp = save_context_stack(trace, new_sp, + S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack, 1); + if (new_sp != orig_sp) + return; + save_context_stack(trace, new_sp, + S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE, 1); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + unsigned long sp, low, high; + + sp = tsk->thread.ksp & PSW_ADDR_INSN; + low = (unsigned long) task_stack_page(tsk); + high = (unsigned long) task_pt_regs(tsk); + save_context_stack(trace, sp, low, high, 0); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c new file mode 100644 index 00000000000..a7a7537ce1e --- /dev/null +++ b/arch/s390/kernel/suspend.c @@ -0,0 +1,225 @@ +/* + * Suspend support specific for s390. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> + */ + +#include <linux/pfn.h> +#include <linux/suspend.h> +#include <linux/mm.h> +#include <asm/ctl_reg.h> +#include <asm/ipl.h> +#include <asm/cio.h> +#include <asm/pci.h> +#include "entry.h" + +/* + * References to section boundaries + */ +extern const void __nosave_begin, __nosave_end; + +/* + * The restore of the saved pages in an hibernation image will set + * the change and referenced bits in the storage key for each page. + * Overindication of the referenced bits after an hibernation cycle + * does not cause any harm but the overindication of the change bits + * would cause trouble. + * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each + * page to the most significant byte of the associated page frame + * number in the hibernation image. + */ + +/* + * Key storage is allocated as a linked list of pages. + * The size of the keys array is (PAGE_SIZE - sizeof(long)) + */ +struct page_key_data { + struct page_key_data *next; + unsigned char data[]; +}; + +#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *)) + +static struct page_key_data *page_key_data; +static struct page_key_data *page_key_rp, *page_key_wp; +static unsigned long page_key_rx, page_key_wx; +unsigned long suspend_zero_pages; + +/* + * For each page in the hibernation image one additional byte is + * stored in the most significant byte of the page frame number. + * On suspend no additional memory is required but on resume the + * keys need to be memorized until the page data has been restored. + * Only then can the storage keys be set to their old state. + */ +unsigned long page_key_additional_pages(unsigned long pages) +{ + return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); +} + +/* + * Free page_key_data list of arrays. + */ +void page_key_free(void) +{ + struct page_key_data *pkd; + + while (page_key_data) { + pkd = page_key_data; + page_key_data = pkd->next; + free_page((unsigned long) pkd); + } +} + +/* + * Allocate page_key_data list of arrays with enough room to store + * one byte for each page in the hibernation image. + */ +int page_key_alloc(unsigned long pages) +{ + struct page_key_data *pk; + unsigned long size; + + size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); + while (size--) { + pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL); + if (!pk) { + page_key_free(); + return -ENOMEM; + } + pk->next = page_key_data; + page_key_data = pk; + } + page_key_rp = page_key_wp = page_key_data; + page_key_rx = page_key_wx = 0; + return 0; +} + +/* + * Save the storage key into the upper 8 bits of the page frame number. + */ +void page_key_read(unsigned long *pfn) +{ + unsigned long addr; + + addr = (unsigned long) page_address(pfn_to_page(*pfn)); + *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); +} + +/* + * Extract the storage key from the upper 8 bits of the page frame number + * and store it in the page_key_data list of arrays. + */ +void page_key_memorize(unsigned long *pfn) +{ + page_key_wp->data[page_key_wx] = *(unsigned char *) pfn; + *(unsigned char *) pfn = 0; + if (++page_key_wx < PAGE_KEY_DATA_SIZE) + return; + page_key_wp = page_key_wp->next; + page_key_wx = 0; +} + +/* + * Get the next key from the page_key_data list of arrays and set the + * storage key of the page referred by @address. If @address refers to + * a "safe" page the swsusp_arch_resume code will transfer the storage + * key from the buffer page to the original page. + */ +void page_key_write(void *address) +{ + page_set_storage_key((unsigned long) address, + page_key_rp->data[page_key_rx], 0); + if (++page_key_rx >= PAGE_KEY_DATA_SIZE) + return; + page_key_rp = page_key_rp->next; + page_key_rx = 0; +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); + unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end)); + + /* Always save lowcore pages (LC protection might be enabled). */ + if (pfn <= LC_PAGES) + return 0; + if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) + return 1; + /* Skip memory holes and read-only pages (NSS, DCSS, ...). */ + if (tprot(PFN_PHYS(pfn))) + return 1; + return 0; +} + +/* + * PM notifier callback for suspend + */ +static int suspend_pm_cb(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER); + if (!suspend_zero_pages) + return NOTIFY_BAD; + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + free_pages(suspend_zero_pages, LC_ORDER); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static int __init suspend_pm_init(void) +{ + pm_notifier(suspend_pm_cb, 0); + return 0; +} +arch_initcall(suspend_pm_init); + +void save_processor_state(void) +{ + /* swsusp_arch_suspend() actually saves all cpu register contents. + * Machine checks must be disabled since swsusp_arch_suspend() stores + * register contents to their lowcore save areas. That's the same + * place where register contents on machine checks would be saved. + * To avoid register corruption disable machine checks. + * We must also disable machine checks in the new psw mask for + * program checks, since swsusp_arch_suspend() may generate program + * checks. Disabling machine checks for all other new psw masks is + * just paranoia. + */ + local_mcck_disable(); + /* Disable lowcore protection */ + __ctl_clear_bit(0,28); + S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK; +} + +void restore_processor_state(void) +{ + S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK; + /* Enable lowcore protection */ + __ctl_set_bit(0,28); + local_mcck_enable(); +} + +/* Called at the end of swsusp_arch_resume */ +void s390_early_resume(void) +{ + lgr_info_log(); + channel_subsystem_reinit(); + zpci_rescan(); +} diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S new file mode 100644 index 00000000000..6b09fdffbd2 --- /dev/null +++ b/arch/s390/kernel/swsusp_asm64.S @@ -0,0 +1,306 @@ +/* + * S390 64-bit swsusp implementation + * + * Copyright IBM Corp. 2009 + * + * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/linkage.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> +#include <asm/sigp.h> + +/* + * Save register context in absolute 0 lowcore and call swsusp_save() to + * create in-memory kernel image. The context is saved in the designated + * "store status" memory locations (see POP). + * We return from this function twice. The first time during the suspend to + * disk process. The second time via the swsusp_arch_resume() function + * (see below) in the resume process. + * This function runs with disabled interrupts. + */ + .section .text +ENTRY(swsusp_arch_suspend) + stmg %r6,%r15,__SF_GPRS(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + + /* Deactivate DAT */ + stnsm __SF_EMPTY(%r15),0xfb + + /* Store prefix register on stack */ + stpx __SF_EMPTY(%r15) + + /* Save prefix register contents for lowcore copy */ + llgf %r10,__SF_EMPTY(%r15) + + /* Get pointer to save area */ + lghi %r1,0x1000 + + /* Save CPU address */ + stap __LC_EXT_CPU_ADDR(%r0) + + /* Store registers */ + mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ + stfpc 0x31c(%r1) /* store fpu control */ + std 0,0x200(%r1) /* store f0 */ + std 1,0x208(%r1) /* store f1 */ + std 2,0x210(%r1) /* store f2 */ + std 3,0x218(%r1) /* store f3 */ + std 4,0x220(%r1) /* store f4 */ + std 5,0x228(%r1) /* store f5 */ + std 6,0x230(%r1) /* store f6 */ + std 7,0x238(%r1) /* store f7 */ + std 8,0x240(%r1) /* store f8 */ + std 9,0x248(%r1) /* store f9 */ + std 10,0x250(%r1) /* store f10 */ + std 11,0x258(%r1) /* store f11 */ + std 12,0x260(%r1) /* store f12 */ + std 13,0x268(%r1) /* store f13 */ + std 14,0x270(%r1) /* store f14 */ + std 15,0x278(%r1) /* store f15 */ + stam %a0,%a15,0x340(%r1) /* store access registers */ + stctg %c0,%c15,0x380(%r1) /* store control registers */ + stmg %r0,%r15,0x280(%r1) /* store general registers */ + + stpt 0x328(%r1) /* store timer */ + stck __SF_EMPTY(%r15) /* store clock */ + stckc 0x330(%r1) /* store clock comparator */ + + /* Update cputime accounting before going to sleep */ + lg %r0,__LC_LAST_UPDATE_TIMER + slg %r0,0x328(%r1) + alg %r0,__LC_SYSTEM_TIMER + stg %r0,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1) + lg %r0,__LC_LAST_UPDATE_CLOCK + slg %r0,__SF_EMPTY(%r15) + alg %r0,__LC_STEAL_TIMER + stg %r0,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15) + + /* Activate DAT */ + stosm __SF_EMPTY(%r15),0x04 + + /* Set prefix page to zero */ + xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) + spx __SF_EMPTY(%r15) + + /* Save absolute zero pages */ + larl %r2,suspend_zero_pages + lg %r2,0(%r2) + lghi %r4,0 + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + + /* Copy lowcore to absolute zero lowcore */ + lghi %r2,0 + lgr %r4,%r10 + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + + /* Save image */ + brasl %r14,swsusp_save + + /* Restore prefix register and return */ + lghi %r1,0x1000 + spx 0x318(%r1) + lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) + lghi %r2,0 + br %r14 + +/* + * Restore saved memory image to correct place and restore register context. + * Then we return to the function that called swsusp_arch_suspend(). + * swsusp_arch_resume() runs with disabled interrupts. + */ +ENTRY(swsusp_arch_resume) + stmg %r6,%r15,__SF_GPRS(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + + /* Make all free pages stable */ + lghi %r2,1 + brasl %r14,arch_set_page_states + + /* Deactivate DAT */ + stnsm __SF_EMPTY(%r15),0xfb + + /* Set prefix page to zero */ + xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) + spx __SF_EMPTY(%r15) + + /* Restore saved image */ + larl %r1,restore_pblist + lg %r1,0(%r1) + ltgr %r1,%r1 + jz 2f +0: + lg %r2,8(%r1) + lg %r4,0(%r1) + iske %r0,%r4 + lghi %r3,PAGE_SIZE + lghi %r5,PAGE_SIZE +1: + mvcle %r2,%r4,0 + jo 1b + lg %r2,8(%r1) + sske %r0,%r2 + lg %r1,16(%r1) + ltgr %r1,%r1 + jnz 0b +2: + ptlb /* flush tlb */ + + /* Reset System */ + larl %r1,restart_entry + larl %r2,.Lrestart_diag308_psw + og %r1,0(%r2) + stg %r1,0(%r0) + larl %r1,.Lnew_pgm_check_psw + epsw %r2,%r3 + stm %r2,%r3,0(%r1) + mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1) + lghi %r0,0 + diag %r0,%r0,0x308 +restart_entry: + lhi %r1,1 + sigp %r1,%r0,SIGP_SET_ARCHITECTURE + sam64 + larl %r1,.Lnew_pgm_check_psw + lpswe 0(%r1) +pgm_check_entry: + + /* Switch to original suspend CPU */ + larl %r1,.Lresume_cpu /* Resume CPU address: r2 */ + stap 0(%r1) + llgh %r2,0(%r1) + llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */ + cgr %r1,%r2 + je restore_registers /* r1 = r2 -> nothing to do */ + larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */ + mvc __LC_RST_NEW_PSW(16,%r0),0(%r4) +3: + sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */ + brc 8,4f /* accepted */ + brc 2,3b /* busy, try again */ + + /* Suspend CPU not available -> panic */ + larl %r15,init_thread_union + ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) + larl %r2,.Lpanic_string + larl %r3,_sclp_print_early + lghi %r1,0 + sam31 + sigp %r1,%r0,SIGP_SET_ARCHITECTURE + basr %r14,%r3 + larl %r3,.Ldisabled_wait_31 + lpsw 0(%r3) +4: + /* Switch to suspend CPU */ + sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */ + brc 2,4b /* busy, try again */ +5: + sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */ + brc 2,5b /* busy, try again */ +6: j 6b + +restart_suspend: + larl %r1,.Lresume_cpu + llgh %r2,0(%r1) +7: + sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */ + brc 8,7b /* accepted, status 0, still running */ + brc 2,7b /* busy, try again */ + tmll %r9,0x40 /* Test if resume CPU is stopped */ + jz 7b + +restore_registers: + /* Restore registers */ + lghi %r13,0x1000 /* %r1 = pointer to save area */ + + /* Ignore time spent in suspended state. */ + llgf %r1,0x318(%r13) + stck __LC_LAST_UPDATE_CLOCK(%r1) + spt 0x328(%r13) /* reprogram timer */ + //sckc 0x330(%r13) /* set clock comparator */ + + lctlg %c0,%c15,0x380(%r13) /* load control registers */ + lam %a0,%a15,0x340(%r13) /* load access registers */ + + lfpc 0x31c(%r13) /* load fpu control */ + ld 0,0x200(%r13) /* load f0 */ + ld 1,0x208(%r13) /* load f1 */ + ld 2,0x210(%r13) /* load f2 */ + ld 3,0x218(%r13) /* load f3 */ + ld 4,0x220(%r13) /* load f4 */ + ld 5,0x228(%r13) /* load f5 */ + ld 6,0x230(%r13) /* load f6 */ + ld 7,0x238(%r13) /* load f7 */ + ld 8,0x240(%r13) /* load f8 */ + ld 9,0x248(%r13) /* load f9 */ + ld 10,0x250(%r13) /* load f10 */ + ld 11,0x258(%r13) /* load f11 */ + ld 12,0x260(%r13) /* load f12 */ + ld 13,0x268(%r13) /* load f13 */ + ld 14,0x270(%r13) /* load f14 */ + ld 15,0x278(%r13) /* load f15 */ + + /* Load old stack */ + lg %r15,0x2f8(%r13) + + /* Save prefix register */ + mvc __SF_EMPTY(4,%r15),0x318(%r13) + + /* Restore absolute zero pages */ + lghi %r2,0 + larl %r4,suspend_zero_pages + lg %r4,0(%r4) + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + + /* Restore prefix register */ + spx __SF_EMPTY(%r15) + + /* Activate DAT */ + stosm __SF_EMPTY(%r15),0x04 + + /* Make all free pages unstable */ + lghi %r2,0 + brasl %r14,arch_set_page_states + + /* Call arch specific early resume code */ + brasl %r14,s390_early_resume + + /* Return 0 */ + lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) + lghi %r2,0 + br %r14 + + .section .data..nosave,"aw",@progbits + .align 8 +.Ldisabled_wait_31: + .long 0x000a0000,0x00000000 +.Lpanic_string: + .asciz "Resume not possible because suspend CPU is no longer available" + .align 8 +.Lrestart_diag308_psw: + .long 0x00080000,0x80000000 +.Lrestart_suspend_psw: + .quad 0x0000000180000000,restart_suspend +.Lnew_pgm_check_psw: + .quad 0,pgm_check_entry +.Lresume_cpu: + .byte 0,0 diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 6a63553493c..23eb222c165 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/sys_s390.c - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Thomas Spatzier (tspat@de.ibm.com) * @@ -16,8 +14,8 @@ #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/fs.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/sem.h> #include <linux/msg.h> #include <linux/shm.h> @@ -27,61 +25,18 @@ #include <linux/file.h> #include <linux/utsname.h> #include <linux/personality.h> - +#include <linux/unistd.h> +#include <linux/ipc.h> #include <asm/uaccess.h> -#include <asm/ipc.h> - -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage long sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - long error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} +#include "entry.h" /* - * Perform the select(nd, in, out, ex, tv) and mmap() system - * calls. Linux for S/390 isn't able to handle more than 5 - * system call parameters, so these system calls used a memory - * block for parameter passing.. + * Perform the mmap() system call. Linux for S/390 isn't able to handle more + * than 5 system call parameters, so this system call uses a memory block + * for parameter passing. */ -struct mmap_arg_struct { +struct s390_mmap_arg_struct { unsigned long addr; unsigned long len; unsigned long prot; @@ -90,146 +45,48 @@ struct mmap_arg_struct { unsigned long offset; }; -asmlinkage long sys_mmap2(struct mmap_arg_struct __user *arg) +SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) { - struct mmap_arg_struct a; + struct s390_mmap_arg_struct a; int error = -EFAULT; if (copy_from_user(&a, arg, sizeof(a))) goto out; - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -out: - return error; -} - -asmlinkage long old_mmap(struct mmap_arg_struct __user *arg) -{ - struct mmap_arg_struct a; - long error = -EFAULT; - - if (copy_from_user(&a, arg, sizeof(a))) - goto out; - - error = -EINVAL; - if (a.offset & ~PAGE_MASK) - goto out; - - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); out: return error; } -#ifndef CONFIG_64BIT -struct sel_arg_struct { - unsigned long n; - fd_set *inp, *outp, *exp; - struct timeval *tvp; -}; - -asmlinkage long old_select(struct sel_arg_struct __user *arg) -{ - struct sel_arg_struct a; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - /* sys_select() does the appropriate kernel locking */ - return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); - -} -#endif /* CONFIG_64BIT */ - /* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. + * sys_ipc() is the de-multiplexer for the SysV IPC calls. */ -asmlinkage long sys_ipc(uint call, int first, unsigned long second, - unsigned long third, void __user *ptr) +SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, + unsigned long, third, void __user *, ptr) { - struct ipc_kludge tmp; - int ret; - - switch (call) { - case SEMOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, NULL); - case SEMTIMEDOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, - (const struct timespec __user *) third); - case SEMGET: - return sys_semget(first, (int)second, third); - case SEMCTL: { - union semun fourth; - if (!ptr) - return -EINVAL; - if (get_user(fourth.__pad, (void __user * __user *) ptr)) - return -EFAULT; - return sys_semctl(first, (int)second, third, fourth); - } - case MSGSND: - return sys_msgsnd (first, (struct msgbuf __user *) ptr, - (size_t)second, third); - break; - case MSGRCV: - if (!ptr) - return -EINVAL; - if (copy_from_user (&tmp, (struct ipc_kludge __user *) ptr, - sizeof (struct ipc_kludge))) - return -EFAULT; - return sys_msgrcv (first, tmp.msgp, - (size_t)second, tmp.msgtyp, third); - case MSGGET: - return sys_msgget((key_t)first, (int)second); - case MSGCTL: - return sys_msgctl(first, (int)second, - (struct msqid_ds __user *)ptr); - - case SHMAT: { - ulong raddr; - ret = do_shmat(first, (char __user *)ptr, - (int)second, &raddr); - if (ret) - return ret; - return put_user (raddr, (ulong __user *) third); - break; - } - case SHMDT: - return sys_shmdt ((char __user *)ptr); - case SHMGET: - return sys_shmget(first, (size_t)second, third); - case SHMCTL: - return sys_shmctl(first, (int)second, - (struct shmid_ds __user *) ptr); - default: - return -ENOSYS; - - } - - return -EINVAL; + if (call >> 16) + return -EINVAL; + /* The s390 sys_ipc variant has only five parameters instead of six + * like the generic variant. The only difference is the handling of + * the SEMTIMEDOP subcall where on s390 the third parameter is used + * as a pointer to a struct timespec where the generic variant uses + * the fifth parameter. + * Therefore we can call the generic variant by simply passing the + * third parameter also as fifth parameter. + */ + return sys_ipc(call, first, second, third, ptr, third); } #ifdef CONFIG_64BIT -asmlinkage long s390x_newuname(struct new_utsname __user *name) -{ - int ret = sys_newuname(name); - - if (current->personality == PER_LINUX32 && !ret) { - ret = copy_to_user(name->machine, "s390\0\0\0\0", 8); - if (ret) ret = -EFAULT; - } - return ret; -} - -asmlinkage long s390x_personality(unsigned long personality) +SYSCALL_DEFINE1(s390_personality, unsigned int, personality) { - int ret; + unsigned int ret; - if (current->personality == PER_LINUX32 && personality == PER_LINUX) - personality = PER_LINUX32; + if (personality(current->personality) == PER_LINUX32 && + personality(personality) == PER_LINUX) + personality |= PER_LINUX32; ret = sys_personality(personality); - if (ret == PER_LINUX32) - ret = PER_LINUX; + if (personality(ret) == PER_LINUX32) + ret &= ~PER_LINUX32; return ret; } @@ -240,15 +97,13 @@ asmlinkage long s390x_personality(unsigned long personality) */ #ifndef CONFIG_64BIT -asmlinkage long -s390_fadvise64(int fd, u32 offset_high, u32 offset_low, size_t len, int advice) +SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, offset_high, u32, offset_low, + size_t, len, int, advice) { return sys_fadvise64(fd, (u64) offset_high << 32 | offset_low, len, advice); } -#endif - struct fadvise64_64_args { int fd; long long offset; @@ -256,8 +111,7 @@ struct fadvise64_64_args { int advice; }; -asmlinkage long -s390_fadvise64_64(struct fadvise64_64_args __user *args) +SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) { struct fadvise64_64_args a; @@ -266,3 +120,21 @@ s390_fadvise64_64(struct fadvise64_64_args __user *args) return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); } +/* + * This is a wrapper to call sys_fallocate(). For 31 bit s390 the last + * 64 bit argument "len" is split into the upper and lower 32 bits. The + * system call wrapper in the user space loads the value to %r6/%r7. + * The code in entry.S keeps the values in %r2 - %r6 where they are and + * stores %r7 to 96(%r15). But the standard C linkage requires that + * the whole 64 bit value for len is stored on the stack and doesn't + * use %r6 at all. So s390_fallocate has to convert the arguments from + * %r2: fd, %r3: mode, %r4/%r5: offset, %r6/96(%r15)-99(%r15): len + * to + * %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len + */ +SYSCALL_DEFINE5(s390_fallocate, int, fd, int, mode, loff_t, offset, + u32, len_high, u32, len_low) +{ + return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low); +} +#endif diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 426d7cafdab..fe5cdf29a00 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -9,289 +9,350 @@ #define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall,sys_ni_syscall) NI_SYSCALL /* 0 */ -SYSCALL(sys_exit,sys_exit,sys32_exit_wrapper) -SYSCALL(sys_fork_glue,sys_fork_glue,sys_fork_glue) -SYSCALL(sys_read,sys_read,sys32_read_wrapper) -SYSCALL(sys_write,sys_write,sys32_write_wrapper) -SYSCALL(sys_open,sys_open,sys32_open_wrapper) /* 5 */ -SYSCALL(sys_close,sys_close,sys32_close_wrapper) +SYSCALL(sys_exit,sys_exit,compat_sys_exit) +SYSCALL(sys_fork,sys_fork,sys_fork) +SYSCALL(sys_read,sys_read,compat_sys_s390_read) +SYSCALL(sys_write,sys_write,compat_sys_s390_write) +SYSCALL(sys_open,sys_open,compat_sys_open) /* 5 */ +SYSCALL(sys_close,sys_close,compat_sys_close) SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall) -SYSCALL(sys_creat,sys_creat,sys32_creat_wrapper) -SYSCALL(sys_link,sys_link,sys32_link_wrapper) -SYSCALL(sys_unlink,sys_unlink,sys32_unlink_wrapper) /* 10 */ -SYSCALL(sys_execve_glue,sys_execve_glue,sys32_execve_glue) -SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper) -SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */ -SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper) -SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */ -SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper) /* old lchown16 syscall*/ +SYSCALL(sys_creat,sys_creat,compat_sys_creat) +SYSCALL(sys_link,sys_link,compat_sys_link) +SYSCALL(sys_unlink,sys_unlink,compat_sys_unlink) /* 10 */ +SYSCALL(sys_execve,sys_execve,compat_sys_execve) +SYSCALL(sys_chdir,sys_chdir,compat_sys_chdir) +SYSCALL(sys_time,sys_ni_syscall,compat_sys_time) /* old time syscall */ +SYSCALL(sys_mknod,sys_mknod,compat_sys_mknod) +SYSCALL(sys_chmod,sys_chmod,compat_sys_chmod) /* 15 */ +SYSCALL(sys_lchown16,sys_ni_syscall,compat_sys_s390_lchown16) /* old lchown16 syscall*/ NI_SYSCALL /* old break syscall holder */ NI_SYSCALL /* old stat syscall holder */ -SYSCALL(sys_lseek,sys_lseek,sys32_lseek_wrapper) +SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek) SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */ -SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper) -SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper) -SYSCALL(sys_setuid16,sys_ni_syscall,sys32_setuid16_wrapper) /* old setuid16 syscall*/ -SYSCALL(sys_getuid16,sys_ni_syscall,sys32_getuid16) /* old getuid16 syscall*/ -SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */ -SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) -SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper) +SYSCALL(sys_mount,sys_mount,compat_sys_mount) +SYSCALL(sys_oldumount,sys_oldumount,compat_sys_oldumount) +SYSCALL(sys_setuid16,sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/ +SYSCALL(sys_getuid16,sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/ +SYSCALL(sys_stime,sys_ni_syscall,compat_sys_stime) /* 25 old stime syscall */ +SYSCALL(sys_ptrace,sys_ptrace,compat_sys_ptrace) +SYSCALL(sys_alarm,sys_alarm,compat_sys_alarm) NI_SYSCALL /* old fstat syscall */ -SYSCALL(sys_pause,sys_pause,sys32_pause) -SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */ +SYSCALL(sys_pause,sys_pause,sys_pause) +SYSCALL(sys_utime,sys_utime,compat_sys_utime) /* 30 */ NI_SYSCALL /* old stty syscall */ NI_SYSCALL /* old gtty syscall */ -SYSCALL(sys_access,sys_access,sys32_access_wrapper) -SYSCALL(sys_nice,sys_nice,sys32_nice_wrapper) +SYSCALL(sys_access,sys_access,compat_sys_access) +SYSCALL(sys_nice,sys_nice,compat_sys_nice) NI_SYSCALL /* 35 old ftime syscall */ SYSCALL(sys_sync,sys_sync,sys_sync) -SYSCALL(sys_kill,sys_kill,sys32_kill_wrapper) -SYSCALL(sys_rename,sys_rename,sys32_rename_wrapper) -SYSCALL(sys_mkdir,sys_mkdir,sys32_mkdir_wrapper) -SYSCALL(sys_rmdir,sys_rmdir,sys32_rmdir_wrapper) /* 40 */ -SYSCALL(sys_dup,sys_dup,sys32_dup_wrapper) -SYSCALL(sys_pipe,sys_pipe,sys32_pipe_wrapper) -SYSCALL(sys_times,sys_times,compat_sys_times_wrapper) +SYSCALL(sys_kill,sys_kill,compat_sys_kill) +SYSCALL(sys_rename,sys_rename,compat_sys_rename) +SYSCALL(sys_mkdir,sys_mkdir,compat_sys_mkdir) +SYSCALL(sys_rmdir,sys_rmdir,compat_sys_rmdir) /* 40 */ +SYSCALL(sys_dup,sys_dup,compat_sys_dup) +SYSCALL(sys_pipe,sys_pipe,compat_sys_pipe) +SYSCALL(sys_times,sys_times,compat_sys_times) NI_SYSCALL /* old prof syscall */ -SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */ -SYSCALL(sys_setgid16,sys_ni_syscall,sys32_setgid16_wrapper) /* old setgid16 syscall*/ -SYSCALL(sys_getgid16,sys_ni_syscall,sys32_getgid16) /* old getgid16 syscall*/ -SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper) -SYSCALL(sys_geteuid16,sys_ni_syscall,sys32_geteuid16) /* old geteuid16 syscall */ -SYSCALL(sys_getegid16,sys_ni_syscall,sys32_getegid16) /* 50 old getegid16 syscall */ -SYSCALL(sys_acct,sys_acct,sys32_acct_wrapper) -SYSCALL(sys_umount,sys_umount,sys32_umount_wrapper) +SYSCALL(sys_brk,sys_brk,compat_sys_brk) /* 45 */ +SYSCALL(sys_setgid16,sys_ni_syscall,compat_sys_s390_setgid16) /* old setgid16 syscall*/ +SYSCALL(sys_getgid16,sys_ni_syscall,compat_sys_s390_getgid16) /* old getgid16 syscall*/ +SYSCALL(sys_signal,sys_signal,compat_sys_signal) +SYSCALL(sys_geteuid16,sys_ni_syscall,compat_sys_s390_geteuid16) /* old geteuid16 syscall */ +SYSCALL(sys_getegid16,sys_ni_syscall,compat_sys_s390_getegid16) /* 50 old getegid16 syscall */ +SYSCALL(sys_acct,sys_acct,compat_sys_acct) +SYSCALL(sys_umount,sys_umount,compat_sys_umount) NI_SYSCALL /* old lock syscall */ -SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper) -SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */ +SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl) +SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl) /* 55 */ NI_SYSCALL /* intel mpx syscall */ -SYSCALL(sys_setpgid,sys_setpgid,sys32_setpgid_wrapper) +SYSCALL(sys_setpgid,sys_setpgid,compat_sys_setpgid) NI_SYSCALL /* old ulimit syscall */ NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_umask,sys_umask,sys32_umask_wrapper) /* 60 */ -SYSCALL(sys_chroot,sys_chroot,sys32_chroot_wrapper) -SYSCALL(sys_ustat,sys_ustat,sys32_ustat_wrapper) -SYSCALL(sys_dup2,sys_dup2,sys32_dup2_wrapper) +SYSCALL(sys_umask,sys_umask,compat_sys_umask) /* 60 */ +SYSCALL(sys_chroot,sys_chroot,compat_sys_chroot) +SYSCALL(sys_ustat,sys_ustat,compat_sys_ustat) +SYSCALL(sys_dup2,sys_dup2,compat_sys_dup2) SYSCALL(sys_getppid,sys_getppid,sys_getppid) SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp) /* 65 */ SYSCALL(sys_setsid,sys_setsid,sys_setsid) -SYSCALL(sys_sigaction,sys_sigaction,sys32_sigaction_wrapper) +SYSCALL(sys_sigaction,sys_sigaction,compat_sys_sigaction) NI_SYSCALL /* old sgetmask syscall*/ NI_SYSCALL /* old ssetmask syscall*/ -SYSCALL(sys_setreuid16,sys_ni_syscall,sys32_setreuid16_wrapper) /* old setreuid16 syscall */ -SYSCALL(sys_setregid16,sys_ni_syscall,sys32_setregid16_wrapper) /* old setregid16 syscall */ -SYSCALL(sys_sigsuspend_glue,sys_sigsuspend_glue,sys32_sigsuspend_glue) -SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper) -SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper) -SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */ -SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) -SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage_wrapper) -SYSCALL(sys_gettimeofday,sys_gettimeofday,sys32_gettimeofday_wrapper) -SYSCALL(sys_settimeofday,sys_settimeofday,sys32_settimeofday_wrapper) -SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */ -SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */ +SYSCALL(sys_setreuid16,sys_ni_syscall,compat_sys_s390_setreuid16) /* old setreuid16 syscall */ +SYSCALL(sys_setregid16,sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */ +SYSCALL(sys_sigsuspend,sys_sigsuspend,compat_sys_sigsuspend) +SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending) +SYSCALL(sys_sethostname,sys_sethostname,compat_sys_sethostname) +SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit) /* 75 */ +SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit) +SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage) +SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday) +SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday) +SYSCALL(sys_getgroups16,sys_ni_syscall,compat_sys_s390_getgroups16) /* 80 old getgroups16 syscall */ +SYSCALL(sys_setgroups16,sys_ni_syscall,compat_sys_s390_setgroups16) /* old setgroups16 syscall */ NI_SYSCALL /* old select syscall */ -SYSCALL(sys_symlink,sys_symlink,sys32_symlink_wrapper) +SYSCALL(sys_symlink,sys_symlink,compat_sys_symlink) NI_SYSCALL /* old lstat syscall */ -SYSCALL(sys_readlink,sys_readlink,sys32_readlink_wrapper) /* 85 */ -SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper) -SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper) -SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper) -SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ -SYSCALL(old_mmap,old_mmap,old32_mmap_wrapper) /* 90 */ -SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper) -SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper) -SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper) -SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper) -SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper) /* 95 old fchown16 syscall*/ -SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper) -SYSCALL(sys_setpriority,sys_setpriority,sys32_setpriority_wrapper) +SYSCALL(sys_readlink,sys_readlink,compat_sys_readlink) /* 85 */ +SYSCALL(sys_uselib,sys_uselib,compat_sys_uselib) +SYSCALL(sys_swapon,sys_swapon,compat_sys_swapon) +SYSCALL(sys_reboot,sys_reboot,compat_sys_reboot) +SYSCALL(sys_ni_syscall,sys_ni_syscall,compat_sys_old_readdir) /* old readdir syscall */ +SYSCALL(sys_old_mmap,sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */ +SYSCALL(sys_munmap,sys_munmap,compat_sys_munmap) +SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate) +SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate) +SYSCALL(sys_fchmod,sys_fchmod,compat_sys_fchmod) +SYSCALL(sys_fchown16,sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 syscall*/ +SYSCALL(sys_getpriority,sys_getpriority,compat_sys_getpriority) +SYSCALL(sys_setpriority,sys_setpriority,compat_sys_setpriority) NI_SYSCALL /* old profil syscall */ -SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs_wrapper) -SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs_wrapper) /* 100 */ +SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs) +SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs) /* 100 */ NI_SYSCALL /* ioperm for i386 */ -SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall_wrapper) -SYSCALL(sys_syslog,sys_syslog,sys32_syslog_wrapper) -SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer_wrapper) -SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer_wrapper) /* 105 */ -SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper) -SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper) -SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper) +SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall) +SYSCALL(sys_syslog,sys_syslog,compat_sys_syslog) +SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer) +SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer) /* 105 */ +SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat) +SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat) +SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat) NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,sys32_lookup_dcookie_wrapper) /* 110 */ +SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */ SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup) NI_SYSCALL /* old "idle" system call */ NI_SYSCALL /* vm86old for i386 */ -SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4_wrapper) -SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */ -SYSCALL(sys_sysinfo,sys_sysinfo,sys32_sysinfo_wrapper) -SYSCALL(sys_ipc,sys_ipc,sys32_ipc_wrapper) -SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper) -SYSCALL(sys_sigreturn_glue,sys_sigreturn_glue,sys32_sigreturn_glue) -SYSCALL(sys_clone_glue,sys_clone_glue,sys32_clone_glue) /* 120 */ -SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper) -SYSCALL(sys_newuname,s390x_newuname,sys32_newuname_wrapper) +SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4) +SYSCALL(sys_swapoff,sys_swapoff,compat_sys_swapoff) /* 115 */ +SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo) +SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc) +SYSCALL(sys_fsync,sys_fsync,compat_sys_fsync) +SYSCALL(sys_sigreturn,sys_sigreturn,compat_sys_sigreturn) +SYSCALL(sys_clone,sys_clone,compat_sys_clone) /* 120 */ +SYSCALL(sys_setdomainname,sys_setdomainname,compat_sys_setdomainname) +SYSCALL(sys_newuname,sys_newuname,compat_sys_newuname) NI_SYSCALL /* modify_ldt for i386 */ -SYSCALL(sys_adjtimex,sys_adjtimex,sys32_adjtimex_wrapper) -SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */ -SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask_wrapper) +SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex) +SYSCALL(sys_mprotect,sys_mprotect,compat_sys_mprotect) /* 125 */ +SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask) NI_SYSCALL /* old "create module" */ -SYSCALL(sys_init_module,sys_init_module,sys32_init_module_wrapper) -SYSCALL(sys_delete_module,sys_delete_module,sys32_delete_module_wrapper) +SYSCALL(sys_init_module,sys_init_module,compat_sys_init_module) +SYSCALL(sys_delete_module,sys_delete_module,compat_sys_delete_module) NI_SYSCALL /* 130: old get_kernel_syms */ -SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper) -SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper) -SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper) -SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper) -SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */ -SYSCALL(sys_personality,s390x_personality,sys32_personality_wrapper) +SYSCALL(sys_quotactl,sys_quotactl,compat_sys_quotactl) +SYSCALL(sys_getpgid,sys_getpgid,compat_sys_getpgid) +SYSCALL(sys_fchdir,sys_fchdir,compat_sys_fchdir) +SYSCALL(sys_bdflush,sys_bdflush,compat_sys_bdflush) +SYSCALL(sys_sysfs,sys_sysfs,compat_sys_sysfs) /* 135 */ +SYSCALL(sys_personality,sys_s390_personality,compat_sys_s390_personality) NI_SYSCALL /* for afs_syscall */ -SYSCALL(sys_setfsuid16,sys_ni_syscall,sys32_setfsuid16_wrapper) /* old setfsuid16 syscall */ -SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */ -SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */ -SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper) -SYSCALL(sys_select,sys_select,compat_sys_select_wrapper) -SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper) -SYSCALL(sys_msync,sys_msync,sys32_msync_wrapper) -SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */ -SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper) -SYSCALL(sys_getsid,sys_getsid,sys32_getsid_wrapper) -SYSCALL(sys_fdatasync,sys_fdatasync,sys32_fdatasync_wrapper) -SYSCALL(sys_sysctl,sys_sysctl,sys32_sysctl_wrapper) -SYSCALL(sys_mlock,sys_mlock,sys32_mlock_wrapper) /* 150 */ -SYSCALL(sys_munlock,sys_munlock,sys32_munlock_wrapper) -SYSCALL(sys_mlockall,sys_mlockall,sys32_mlockall_wrapper) +SYSCALL(sys_setfsuid16,sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */ +SYSCALL(sys_setfsgid16,sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */ +SYSCALL(sys_llseek,sys_llseek,compat_sys_llseek) /* 140 */ +SYSCALL(sys_getdents,sys_getdents,compat_sys_getdents) +SYSCALL(sys_select,sys_select,compat_sys_select) +SYSCALL(sys_flock,sys_flock,compat_sys_flock) +SYSCALL(sys_msync,sys_msync,compat_sys_msync) +SYSCALL(sys_readv,sys_readv,compat_sys_readv) /* 145 */ +SYSCALL(sys_writev,sys_writev,compat_sys_writev) +SYSCALL(sys_getsid,sys_getsid,compat_sys_getsid) +SYSCALL(sys_fdatasync,sys_fdatasync,compat_sys_fdatasync) +SYSCALL(sys_sysctl,sys_sysctl,compat_sys_sysctl) +SYSCALL(sys_mlock,sys_mlock,compat_sys_mlock) /* 150 */ +SYSCALL(sys_munlock,sys_munlock,compat_sys_munlock) +SYSCALL(sys_mlockall,sys_mlockall,compat_sys_mlockall) SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall) -SYSCALL(sys_sched_setparam,sys_sched_setparam,sys32_sched_setparam_wrapper) -SYSCALL(sys_sched_getparam,sys_sched_getparam,sys32_sched_getparam_wrapper) /* 155 */ -SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,sys32_sched_setscheduler_wrapper) -SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,sys32_sched_getscheduler_wrapper) +SYSCALL(sys_sched_setparam,sys_sched_setparam,compat_sys_sched_setparam) +SYSCALL(sys_sched_getparam,sys_sched_getparam,compat_sys_sched_getparam) /* 155 */ +SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,compat_sys_sched_setscheduler) +SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,compat_sys_sched_getscheduler) SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield) -SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,sys32_sched_get_priority_max_wrapper) -SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,sys32_sched_get_priority_min_wrapper) /* 160 */ -SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,sys32_sched_rr_get_interval_wrapper) -SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper) -SYSCALL(sys_mremap,sys_mremap,sys32_mremap_wrapper) -SYSCALL(sys_setresuid16,sys_ni_syscall,sys32_setresuid16_wrapper) /* old setresuid16 syscall */ -SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper) /* 165 old getresuid16 syscall */ +SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,compat_sys_sched_get_priority_max) +SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,compat_sys_sched_get_priority_min) /* 160 */ +SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval) +SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep) +SYSCALL(sys_mremap,sys_mremap,compat_sys_mremap) +SYSCALL(sys_setresuid16,sys_ni_syscall,compat_sys_s390_setresuid16) /* old setresuid16 syscall */ +SYSCALL(sys_getresuid16,sys_ni_syscall,compat_sys_s390_getresuid16) /* 165 old getresuid16 syscall */ NI_SYSCALL /* for vm86 */ NI_SYSCALL /* old sys_query_module */ -SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper) -SYSCALL(sys_nfsservctl,sys_nfsservctl,compat_sys_nfsservctl_wrapper) -SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */ -SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */ -SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper) -SYSCALL(sys_rt_sigreturn_glue,sys_rt_sigreturn_glue,sys32_rt_sigreturn_glue) -SYSCALL(sys_rt_sigaction,sys_rt_sigaction,sys32_rt_sigaction_wrapper) -SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,sys32_rt_sigprocmask_wrapper) /* 175 */ -SYSCALL(sys_rt_sigpending,sys_rt_sigpending,sys32_rt_sigpending_wrapper) -SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait_wrapper) -SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,sys32_rt_sigqueueinfo_wrapper) -SYSCALL(sys_rt_sigsuspend_glue,sys_rt_sigsuspend_glue,sys32_rt_sigsuspend_glue) -SYSCALL(sys_pread64,sys_pread64,sys32_pread64_wrapper) /* 180 */ -SYSCALL(sys_pwrite64,sys_pwrite64,sys32_pwrite64_wrapper) -SYSCALL(sys_chown16,sys_ni_syscall,sys32_chown16_wrapper) /* old chown16 syscall */ -SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) -SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) -SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */ -SYSCALL(sys_sigaltstack_glue,sys_sigaltstack_glue,sys32_sigaltstack_glue) -SYSCALL(sys_sendfile,sys_sendfile64,sys32_sendfile_wrapper) +SYSCALL(sys_poll,sys_poll,compat_sys_poll) +NI_SYSCALL /* old nfsservctl */ +SYSCALL(sys_setresgid16,sys_ni_syscall,compat_sys_s390_setresgid16) /* 170 old setresgid16 syscall */ +SYSCALL(sys_getresgid16,sys_ni_syscall,compat_sys_s390_getresgid16) /* old getresgid16 syscall */ +SYSCALL(sys_prctl,sys_prctl,compat_sys_prctl) +SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,compat_sys_rt_sigreturn) +SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction) +SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */ +SYSCALL(sys_rt_sigpending,sys_rt_sigpending,compat_sys_rt_sigpending) +SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait) +SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo) +SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend) +SYSCALL(sys_pread64,sys_pread64,compat_sys_s390_pread64) /* 180 */ +SYSCALL(sys_pwrite64,sys_pwrite64,compat_sys_s390_pwrite64) +SYSCALL(sys_chown16,sys_ni_syscall,compat_sys_s390_chown16) /* old chown16 syscall */ +SYSCALL(sys_getcwd,sys_getcwd,compat_sys_getcwd) +SYSCALL(sys_capget,sys_capget,compat_sys_capget) +SYSCALL(sys_capset,sys_capset,compat_sys_capset) /* 185 */ +SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack) +SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile) NI_SYSCALL /* streams1 */ NI_SYSCALL /* streams2 */ -SYSCALL(sys_vfork_glue,sys_vfork_glue,sys_vfork_glue) /* 190 */ -SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper) -SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper) -SYSCALL(sys_truncate64,sys_ni_syscall,sys32_truncate64_wrapper) -SYSCALL(sys_ftruncate64,sys_ni_syscall,sys32_ftruncate64_wrapper) -SYSCALL(sys_stat64,sys_ni_syscall,sys32_stat64_wrapper) /* 195 */ -SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper) -SYSCALL(sys_fstat64,sys_ni_syscall,sys32_fstat64_wrapper) -SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper) +SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ +SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit) +SYSCALL(sys_mmap2,sys_mmap2,compat_sys_s390_mmap2) +SYSCALL(sys_truncate64,sys_ni_syscall,compat_sys_s390_truncate64) +SYSCALL(sys_ftruncate64,sys_ni_syscall,compat_sys_s390_ftruncate64) +SYSCALL(sys_stat64,sys_ni_syscall,compat_sys_s390_stat64) /* 195 */ +SYSCALL(sys_lstat64,sys_ni_syscall,compat_sys_s390_lstat64) +SYSCALL(sys_fstat64,sys_ni_syscall,compat_sys_s390_fstat64) +SYSCALL(sys_lchown,sys_lchown,compat_sys_lchown) SYSCALL(sys_getuid,sys_getuid,sys_getuid) SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */ SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid) SYSCALL(sys_getegid,sys_getegid,sys_getegid) -SYSCALL(sys_setreuid,sys_setreuid,sys32_setreuid_wrapper) -SYSCALL(sys_setregid,sys_setregid,sys32_setregid_wrapper) -SYSCALL(sys_getgroups,sys_getgroups,sys32_getgroups_wrapper) /* 205 */ -SYSCALL(sys_setgroups,sys_setgroups,sys32_setgroups_wrapper) -SYSCALL(sys_fchown,sys_fchown,sys32_fchown_wrapper) -SYSCALL(sys_setresuid,sys_setresuid,sys32_setresuid_wrapper) -SYSCALL(sys_getresuid,sys_getresuid,sys32_getresuid_wrapper) -SYSCALL(sys_setresgid,sys_setresgid,sys32_setresgid_wrapper) /* 210 */ -SYSCALL(sys_getresgid,sys_getresgid,sys32_getresgid_wrapper) -SYSCALL(sys_chown,sys_chown,sys32_chown_wrapper) -SYSCALL(sys_setuid,sys_setuid,sys32_setuid_wrapper) -SYSCALL(sys_setgid,sys_setgid,sys32_setgid_wrapper) -SYSCALL(sys_setfsuid,sys_setfsuid,sys32_setfsuid_wrapper) /* 215 */ -SYSCALL(sys_setfsgid,sys_setfsgid,sys32_setfsgid_wrapper) -SYSCALL(sys_pivot_root,sys_pivot_root,sys32_pivot_root_wrapper) -SYSCALL(sys_mincore,sys_mincore,sys32_mincore_wrapper) -SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper) -SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */ -SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) -SYSCALL(sys_readahead,sys_readahead,sys32_readahead) -SYSCALL(sys_sendfile64,sys_ni_syscall,sys32_sendfile64) -SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper) -SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper) -SYSCALL(sys_getxattr,sys_getxattr,sys32_getxattr_wrapper) -SYSCALL(sys_lgetxattr,sys_lgetxattr,sys32_lgetxattr_wrapper) -SYSCALL(sys_fgetxattr,sys_fgetxattr,sys32_fgetxattr_wrapper) -SYSCALL(sys_listxattr,sys_listxattr,sys32_listxattr_wrapper) /* 230 */ -SYSCALL(sys_llistxattr,sys_llistxattr,sys32_llistxattr_wrapper) -SYSCALL(sys_flistxattr,sys_flistxattr,sys32_flistxattr_wrapper) -SYSCALL(sys_removexattr,sys_removexattr,sys32_removexattr_wrapper) -SYSCALL(sys_lremovexattr,sys_lremovexattr,sys32_lremovexattr_wrapper) -SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */ +SYSCALL(sys_setreuid,sys_setreuid,compat_sys_setreuid) +SYSCALL(sys_setregid,sys_setregid,compat_sys_setregid) +SYSCALL(sys_getgroups,sys_getgroups,compat_sys_getgroups) /* 205 */ +SYSCALL(sys_setgroups,sys_setgroups,compat_sys_setgroups) +SYSCALL(sys_fchown,sys_fchown,compat_sys_fchown) +SYSCALL(sys_setresuid,sys_setresuid,compat_sys_setresuid) +SYSCALL(sys_getresuid,sys_getresuid,compat_sys_getresuid) +SYSCALL(sys_setresgid,sys_setresgid,compat_sys_setresgid) /* 210 */ +SYSCALL(sys_getresgid,sys_getresgid,compat_sys_getresgid) +SYSCALL(sys_chown,sys_chown,compat_sys_chown) +SYSCALL(sys_setuid,sys_setuid,compat_sys_setuid) +SYSCALL(sys_setgid,sys_setgid,compat_sys_setgid) +SYSCALL(sys_setfsuid,sys_setfsuid,compat_sys_setfsuid) /* 215 */ +SYSCALL(sys_setfsgid,sys_setfsgid,compat_sys_setfsgid) +SYSCALL(sys_pivot_root,sys_pivot_root,compat_sys_pivot_root) +SYSCALL(sys_mincore,sys_mincore,compat_sys_mincore) +SYSCALL(sys_madvise,sys_madvise,compat_sys_madvise) +SYSCALL(sys_getdents64,sys_getdents64,compat_sys_getdents64) /* 220 */ +SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64) +SYSCALL(sys_readahead,sys_readahead,compat_sys_s390_readahead) +SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64) +SYSCALL(sys_setxattr,sys_setxattr,compat_sys_setxattr) +SYSCALL(sys_lsetxattr,sys_lsetxattr,compat_sys_lsetxattr) /* 225 */ +SYSCALL(sys_fsetxattr,sys_fsetxattr,compat_sys_fsetxattr) +SYSCALL(sys_getxattr,sys_getxattr,compat_sys_getxattr) +SYSCALL(sys_lgetxattr,sys_lgetxattr,compat_sys_lgetxattr) +SYSCALL(sys_fgetxattr,sys_fgetxattr,compat_sys_fgetxattr) +SYSCALL(sys_listxattr,sys_listxattr,compat_sys_listxattr) /* 230 */ +SYSCALL(sys_llistxattr,sys_llistxattr,compat_sys_llistxattr) +SYSCALL(sys_flistxattr,sys_flistxattr,compat_sys_flistxattr) +SYSCALL(sys_removexattr,sys_removexattr,compat_sys_removexattr) +SYSCALL(sys_lremovexattr,sys_lremovexattr,compat_sys_lremovexattr) +SYSCALL(sys_fremovexattr,sys_fremovexattr,compat_sys_fremovexattr) /* 235 */ SYSCALL(sys_gettid,sys_gettid,sys_gettid) -SYSCALL(sys_tkill,sys_tkill,sys_tkill) -SYSCALL(sys_futex,sys_futex,compat_sys_futex_wrapper) -SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper) -SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */ -SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill) +SYSCALL(sys_tkill,sys_tkill,compat_sys_tkill) +SYSCALL(sys_futex,sys_futex,compat_sys_futex) +SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,compat_sys_sched_setaffinity) +SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,compat_sys_sched_getaffinity) /* 240 */ +SYSCALL(sys_tgkill,sys_tgkill,compat_sys_tgkill) NI_SYSCALL /* reserved for TUX */ -SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper) -SYSCALL(sys_io_destroy,sys_io_destroy,sys32_io_destroy_wrapper) -SYSCALL(sys_io_getevents,sys_io_getevents,sys32_io_getevents_wrapper) /* 245 */ -SYSCALL(sys_io_submit,sys_io_submit,sys32_io_submit_wrapper) -SYSCALL(sys_io_cancel,sys_io_cancel,sys32_io_cancel_wrapper) -SYSCALL(sys_exit_group,sys_exit_group,sys32_exit_group_wrapper) -SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper) -SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */ -SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper) -SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper) -SYSCALL(s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper) -SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper) -SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */ -SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper) -SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,sys32_timer_getoverrun_wrapper) -SYSCALL(sys_timer_delete,sys_timer_delete,sys32_timer_delete_wrapper) -SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper) -SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ -SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) -SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) +SYSCALL(sys_io_setup,sys_io_setup,compat_sys_io_setup) +SYSCALL(sys_io_destroy,sys_io_destroy,compat_sys_io_destroy) +SYSCALL(sys_io_getevents,sys_io_getevents,compat_sys_io_getevents) /* 245 */ +SYSCALL(sys_io_submit,sys_io_submit,compat_sys_io_submit) +SYSCALL(sys_io_cancel,sys_io_cancel,compat_sys_io_cancel) +SYSCALL(sys_exit_group,sys_exit_group,compat_sys_exit_group) +SYSCALL(sys_epoll_create,sys_epoll_create,compat_sys_epoll_create) +SYSCALL(sys_epoll_ctl,sys_epoll_ctl,compat_sys_epoll_ctl) /* 250 */ +SYSCALL(sys_epoll_wait,sys_epoll_wait,compat_sys_epoll_wait) +SYSCALL(sys_set_tid_address,sys_set_tid_address,compat_sys_set_tid_address) +SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,compat_sys_s390_fadvise64) +SYSCALL(sys_timer_create,sys_timer_create,compat_sys_timer_create) +SYSCALL(sys_timer_settime,sys_timer_settime,compat_sys_timer_settime) /* 255 */ +SYSCALL(sys_timer_gettime,sys_timer_gettime,compat_sys_timer_gettime) +SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,compat_sys_timer_getoverrun) +SYSCALL(sys_timer_delete,sys_timer_delete,compat_sys_timer_delete) +SYSCALL(sys_clock_settime,sys_clock_settime,compat_sys_clock_settime) +SYSCALL(sys_clock_gettime,sys_clock_gettime,compat_sys_clock_gettime) /* 260 */ +SYSCALL(sys_clock_getres,sys_clock_getres,compat_sys_clock_getres) +SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,compat_sys_clock_nanosleep) NI_SYSCALL /* reserved for vserver */ -SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) -SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) -SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) -SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper) +SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,compat_sys_s390_fadvise64_64) +SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64) +SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64) +SYSCALL(sys_remap_file_pages,sys_remap_file_pages,compat_sys_remap_file_pages) NI_SYSCALL /* 268 sys_mbind */ NI_SYSCALL /* 269 sys_get_mempolicy */ NI_SYSCALL /* 270 sys_set_mempolicy */ -SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper) -SYSCALL(sys_mq_unlink,sys_mq_unlink,sys32_mq_unlink_wrapper) -SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper) -SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper) -SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */ -SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper) -SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper) -SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper) -SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper) +SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open) +SYSCALL(sys_mq_unlink,sys_mq_unlink,compat_sys_mq_unlink) +SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend) +SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive) +SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify) /* 275 */ +SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr) +SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load) +SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key) +SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key) SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl) /* 280 */ -SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid_wrapper) -SYSCALL(sys_ioprio_set,sys_ioprio_set,sys_ioprio_set_wrapper) -SYSCALL(sys_ioprio_get,sys_ioprio_get,sys_ioprio_get_wrapper) +SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid) +SYSCALL(sys_ioprio_set,sys_ioprio_set,compat_sys_ioprio_set) +SYSCALL(sys_ioprio_get,sys_ioprio_get,compat_sys_ioprio_get) SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init) -SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper) -SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper) +SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */ +SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,compat_sys_inotify_rm_watch) +NI_SYSCALL /* 287 sys_migrate_pages */ +SYSCALL(sys_openat,sys_openat,compat_sys_openat) +SYSCALL(sys_mkdirat,sys_mkdirat,compat_sys_mkdirat) +SYSCALL(sys_mknodat,sys_mknodat,compat_sys_mknodat) /* 290 */ +SYSCALL(sys_fchownat,sys_fchownat,compat_sys_fchownat) +SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat) +SYSCALL(sys_fstatat64,sys_newfstatat,compat_sys_s390_fstatat64) +SYSCALL(sys_unlinkat,sys_unlinkat,compat_sys_unlinkat) +SYSCALL(sys_renameat,sys_renameat,compat_sys_renameat) /* 295 */ +SYSCALL(sys_linkat,sys_linkat,compat_sys_linkat) +SYSCALL(sys_symlinkat,sys_symlinkat,compat_sys_symlinkat) +SYSCALL(sys_readlinkat,sys_readlinkat,compat_sys_readlinkat) +SYSCALL(sys_fchmodat,sys_fchmodat,compat_sys_fchmodat) +SYSCALL(sys_faccessat,sys_faccessat,compat_sys_faccessat) /* 300 */ +SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6) +SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll) +SYSCALL(sys_unshare,sys_unshare,compat_sys_unshare) +SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list) +SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list) +SYSCALL(sys_splice,sys_splice,compat_sys_splice) +SYSCALL(sys_sync_file_range,sys_sync_file_range,compat_sys_s390_sync_file_range) +SYSCALL(sys_tee,sys_tee,compat_sys_tee) +SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice) +NI_SYSCALL /* 310 sys_move_pages */ +SYSCALL(sys_getcpu,sys_getcpu,compat_sys_getcpu) +SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait) +SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes) +SYSCALL(sys_s390_fallocate,sys_fallocate,compat_sys_s390_fallocate) +SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat) /* 315 */ +SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd) +NI_SYSCALL /* 317 old sys_timer_fd */ +SYSCALL(sys_eventfd,sys_eventfd,compat_sys_eventfd) +SYSCALL(sys_timerfd_create,sys_timerfd_create,compat_sys_timerfd_create) +SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */ +SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime) +SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4) +SYSCALL(sys_eventfd2,sys_eventfd2,compat_sys_eventfd2) +SYSCALL(sys_inotify_init1,sys_inotify_init1,compat_sys_inotify_init1) +SYSCALL(sys_pipe2,sys_pipe2,compat_sys_pipe2) /* 325 */ +SYSCALL(sys_dup3,sys_dup3,compat_sys_dup3) +SYSCALL(sys_epoll_create1,sys_epoll_create1,compat_sys_epoll_create1) +SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv) +SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev) +SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */ +SYSCALL(sys_perf_event_open,sys_perf_event_open,compat_sys_perf_event_open) +SYSCALL(sys_fanotify_init,sys_fanotify_init,compat_sys_fanotify_init) +SYSCALL(sys_fanotify_mark,sys_fanotify_mark,compat_sys_fanotify_mark) +SYSCALL(sys_prlimit64,sys_prlimit64,compat_sys_prlimit64) +SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */ +SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at) +SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime) +SYSCALL(sys_syncfs,sys_syncfs,compat_sys_syncfs) +SYSCALL(sys_setns,sys_setns,compat_sys_setns) +SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */ +SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev) +SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,compat_sys_s390_runtime_instr) +SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp) +SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module) +SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */ +SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr) +SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c new file mode 100644 index 00000000000..811f542b8ed --- /dev/null +++ b/arch/s390/kernel/sysinfo.c @@ -0,0 +1,428 @@ +/* + * Copyright IBM Corp. 2001, 2009 + * Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <asm/ebcdic.h> +#include <asm/sysinfo.h> +#include <asm/cpcmd.h> +#include <asm/topology.h> + +/* Sigh, math-emu. Don't ask. */ +#include <asm/sfp-util.h> +#include <math-emu/soft-fp.h> +#include <math-emu/single.h> + +int topology_max_mnest; + +/* + * stsi - store system information + * + * Returns the current configuration level if function code 0 was specified. + * Otherwise returns 0 on success or a negative value on error. + */ +int stsi(void *sysinfo, int fc, int sel1, int sel2) +{ + register int r0 asm("0") = (fc << 28) | sel1; + register int r1 asm("1") = sel2; + int rc = 0; + + asm volatile( + " stsi 0(%3)\n" + "0: jz 2f\n" + "1: lhi %1,%4\n" + "2:\n" + EX_TABLE(0b, 1b) + : "+d" (r0), "+d" (rc) + : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP) + : "cc", "memory"); + if (rc) + return rc; + return fc ? 0 : ((unsigned int) r0) >> 28; +} +EXPORT_SYMBOL(stsi); + +static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) +{ + int i; + + if (stsi(info, 1, 1, 1)) + return; + EBCASC(info->manufacturer, sizeof(info->manufacturer)); + EBCASC(info->type, sizeof(info->type)); + EBCASC(info->model, sizeof(info->model)); + EBCASC(info->sequence, sizeof(info->sequence)); + EBCASC(info->plant, sizeof(info->plant)); + EBCASC(info->model_capacity, sizeof(info->model_capacity)); + EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap)); + EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap)); + seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer); + seq_printf(m, "Type: %-4.4s\n", info->type); + /* + * Sigh: the model field has been renamed with System z9 + * to model_capacity and a new model field has been added + * after the plant field. To avoid confusing older programs + * the "Model:" prints "model_capacity model" or just + * "model_capacity" if the model string is empty . + */ + seq_printf(m, "Model: %-16.16s", info->model_capacity); + if (info->model[0] != '\0') + seq_printf(m, " %-16.16s", info->model); + seq_putc(m, '\n'); + seq_printf(m, "Sequence Code: %-16.16s\n", info->sequence); + seq_printf(m, "Plant: %-4.4s\n", info->plant); + seq_printf(m, "Model Capacity: %-16.16s %08u\n", + info->model_capacity, info->model_cap_rating); + if (info->model_perm_cap_rating) + seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n", + info->model_perm_cap, + info->model_perm_cap_rating); + if (info->model_temp_cap_rating) + seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n", + info->model_temp_cap, + info->model_temp_cap_rating); + if (info->ncr) + seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr); + if (info->npr) + seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr); + if (info->ntr) + seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr); + if (info->cai) { + seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai); + seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr); + seq_printf(m, "Capacity Transient: %d\n", info->t); + } + if (info->p) { + for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) { + seq_printf(m, "Type %d Percentage: %d\n", + i, info->typepct[i - 1]); + } + } +} + +static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) +{ + static int max_mnest; + int i, rc; + + seq_putc(m, '\n'); + if (!MACHINE_HAS_TOPOLOGY) + return; + if (stsi(info, 15, 1, topology_max_mnest)) + return; + seq_printf(m, "CPU Topology HW: "); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + seq_printf(m, " %d", info->mag[i]); + seq_putc(m, '\n'); +#ifdef CONFIG_SCHED_MC + store_topology(info); + seq_printf(m, "CPU Topology SW: "); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + seq_printf(m, " %d", info->mag[i]); + seq_putc(m, '\n'); +#endif +} + +static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info) +{ + struct sysinfo_1_2_2_extension *ext; + int i; + + if (stsi(info, 1, 2, 2)) + return; + ext = (struct sysinfo_1_2_2_extension *) + ((unsigned long) info + info->acc_offset); + seq_printf(m, "CPUs Total: %d\n", info->cpus_total); + seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured); + seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby); + seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved); + /* + * Sigh 2. According to the specification the alternate + * capability field is a 32 bit floating point number + * if the higher order 8 bits are not zero. Printing + * a floating point number in the kernel is a no-no, + * always print the number as 32 bit unsigned integer. + * The user-space needs to know about the strange + * encoding of the alternate cpu capability. + */ + seq_printf(m, "Capability: %u", info->capability); + if (info->format == 1) + seq_printf(m, " %u", ext->alt_capability); + seq_putc(m, '\n'); + if (info->nominal_cap) + seq_printf(m, "Nominal Capability: %d\n", info->nominal_cap); + if (info->secondary_cap) + seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap); + for (i = 2; i <= info->cpus_total; i++) { + seq_printf(m, "Adjustment %02d-way: %u", + i, info->adjustment[i-2]); + if (info->format == 1) + seq_printf(m, " %u", ext->alt_adjustment[i-2]); + seq_putc(m, '\n'); + } +} + +static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info) +{ + if (stsi(info, 2, 2, 2)) + return; + EBCASC(info->name, sizeof(info->name)); + seq_putc(m, '\n'); + seq_printf(m, "LPAR Number: %d\n", info->lpar_number); + seq_printf(m, "LPAR Characteristics: "); + if (info->characteristics & LPAR_CHAR_DEDICATED) + seq_printf(m, "Dedicated "); + if (info->characteristics & LPAR_CHAR_SHARED) + seq_printf(m, "Shared "); + if (info->characteristics & LPAR_CHAR_LIMITED) + seq_printf(m, "Limited "); + seq_putc(m, '\n'); + seq_printf(m, "LPAR Name: %-8.8s\n", info->name); + seq_printf(m, "LPAR Adjustment: %d\n", info->caf); + seq_printf(m, "LPAR CPUs Total: %d\n", info->cpus_total); + seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured); + seq_printf(m, "LPAR CPUs Standby: %d\n", info->cpus_standby); + seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved); + seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated); + seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared); +} + +static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info) +{ + int i; + + if (stsi(info, 3, 2, 2)) + return; + for (i = 0; i < info->count; i++) { + EBCASC(info->vm[i].name, sizeof(info->vm[i].name)); + EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi)); + seq_putc(m, '\n'); + seq_printf(m, "VM%02d Name: %-8.8s\n", i, info->vm[i].name); + seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi); + seq_printf(m, "VM%02d Adjustment: %d\n", i, info->vm[i].caf); + seq_printf(m, "VM%02d CPUs Total: %d\n", i, info->vm[i].cpus_total); + seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured); + seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby); + seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved); + } +} + +static int sysinfo_show(struct seq_file *m, void *v) +{ + void *info = (void *)get_zeroed_page(GFP_KERNEL); + int level; + + if (!info) + return 0; + level = stsi(NULL, 0, 0, 0); + if (level >= 1) + stsi_1_1_1(m, info); + if (level >= 1) + stsi_15_1_x(m, info); + if (level >= 1) + stsi_1_2_2(m, info); + if (level >= 2) + stsi_2_2_2(m, info); + if (level >= 3) + stsi_3_2_2(m, info); + free_page((unsigned long)info); + return 0; +} + +static int sysinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, sysinfo_show, NULL); +} + +static const struct file_operations sysinfo_fops = { + .open = sysinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init sysinfo_create_proc(void) +{ + proc_create("sysinfo", 0444, NULL, &sysinfo_fops); + return 0; +} +device_initcall(sysinfo_create_proc); + +/* + * Service levels interface. + */ + +static DECLARE_RWSEM(service_level_sem); +static LIST_HEAD(service_level_list); + +int register_service_level(struct service_level *slr) +{ + struct service_level *ptr; + + down_write(&service_level_sem); + list_for_each_entry(ptr, &service_level_list, list) + if (ptr == slr) { + up_write(&service_level_sem); + return -EEXIST; + } + list_add_tail(&slr->list, &service_level_list); + up_write(&service_level_sem); + return 0; +} +EXPORT_SYMBOL(register_service_level); + +int unregister_service_level(struct service_level *slr) +{ + struct service_level *ptr, *next; + int rc = -ENOENT; + + down_write(&service_level_sem); + list_for_each_entry_safe(ptr, next, &service_level_list, list) { + if (ptr != slr) + continue; + list_del(&ptr->list); + rc = 0; + break; + } + up_write(&service_level_sem); + return rc; +} +EXPORT_SYMBOL(unregister_service_level); + +static void *service_level_start(struct seq_file *m, loff_t *pos) +{ + down_read(&service_level_sem); + return seq_list_start(&service_level_list, *pos); +} + +static void *service_level_next(struct seq_file *m, void *p, loff_t *pos) +{ + return seq_list_next(p, &service_level_list, pos); +} + +static void service_level_stop(struct seq_file *m, void *p) +{ + up_read(&service_level_sem); +} + +static int service_level_show(struct seq_file *m, void *p) +{ + struct service_level *slr; + + slr = list_entry(p, struct service_level, list); + slr->seq_print(m, slr); + return 0; +} + +static const struct seq_operations service_level_seq_ops = { + .start = service_level_start, + .next = service_level_next, + .stop = service_level_stop, + .show = service_level_show +}; + +static int service_level_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &service_level_seq_ops); +} + +static const struct file_operations service_level_ops = { + .open = service_level_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static void service_level_vm_print(struct seq_file *m, + struct service_level *slr) +{ + char *query_buffer, *str; + + query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA); + if (!query_buffer) + return; + cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL); + str = strchr(query_buffer, '\n'); + if (str) + *str = 0; + seq_printf(m, "VM: %s\n", query_buffer); + kfree(query_buffer); +} + +static struct service_level service_level_vm = { + .seq_print = service_level_vm_print +}; + +static __init int create_proc_service_level(void) +{ + proc_create("service_levels", 0, NULL, &service_level_ops); + if (MACHINE_IS_VM) + register_service_level(&service_level_vm); + return 0; +} +subsys_initcall(create_proc_service_level); + +/* + * CPU capability might have changed. Therefore recalculate loops_per_jiffy. + */ +void s390_adjust_jiffies(void) +{ + struct sysinfo_1_2_2 *info; + const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */ + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + unsigned int capability; + + info = (void *) get_zeroed_page(GFP_KERNEL); + if (!info) + return; + + if (stsi(info, 1, 2, 2) == 0) { + /* + * Major sigh. The cpu capability encoding is "special". + * If the first 9 bits of info->capability are 0 then it + * is a 32 bit unsigned integer in the range 0 .. 2^23. + * If the first 9 bits are != 0 then it is a 32 bit float. + * In addition a lower value indicates a proportionally + * higher cpu capacity. Bogomips are the other way round. + * To get to a halfway suitable number we divide 1e7 + * by the cpu capability number. Yes, that means a floating + * point division .. math-emu here we come :-) + */ + FP_UNPACK_SP(SA, &fmil); + if ((info->capability >> 23) == 0) + FP_FROM_INT_S(SB, (long) info->capability, 64, long); + else + FP_UNPACK_SP(SB, &info->capability); + FP_DIV_S(SR, SA, SB); + FP_TO_INT_S(capability, SR, 32, 0); + } else + /* + * Really old machine without stsi block for basic + * cpu information. Report 42.0 bogomips. + */ + capability = 42; + loops_per_jiffy = capability * (500000/HZ); + free_page((unsigned long) info); +} + +/* + * calibrate the delay loop + */ +void calibrate_delay(void) +{ + s390_adjust_jiffies(); + /* Print the good old Bogomips line .. */ + printk(KERN_DEBUG "Calibrating delay loop (skipped)... " + "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); +} diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index b0d8ca8e5ee..0931b110c82 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1,9 +1,8 @@ /* - * arch/s390/kernel/time.c * Time of day based timer functions. * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2008 * Author(s): Hartmut Penner (hp@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) @@ -12,7 +11,10 @@ * Copyright (C) 1991, 1992, 1995 Linus Torvalds */ -#include <linux/config.h> +#define KMSG_COMPONENT "time" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel_stat.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/sched.h> @@ -21,7 +23,10 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> +#include <linux/cpu.h> +#include <linux/stop_machine.h> #include <linux/time.h> +#include <linux/device.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/smp.h> @@ -29,359 +34,1755 @@ #include <linux/profile.h> #include <linux/timex.h> #include <linux/notifier.h> - +#include <linux/timekeeper_internal.h> +#include <linux/clockchips.h> +#include <linux/gfp.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/delay.h> -#include <asm/s390_ext.h> #include <asm/div64.h> +#include <asm/vdso.h> #include <asm/irq.h> -#include <asm/timer.h> +#include <asm/irq_regs.h> +#include <asm/vtimer.h> +#include <asm/etr.h> +#include <asm/cio.h> +#include "entry.h" /* change this if you have some constant time drift */ #define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) #define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12) -/* - * Create a small time difference between the timer interrupts - * on the different cpus to avoid lock contention. - */ -#define CPU_DEVIATION (smp_processor_id() << 12) - -#define TICK_SIZE tick - -static ext_int_info_t ext_int_info_cc; -static u64 init_timer_cc; -static u64 jiffies_timer_cc; -static u64 xtime_cc; +u64 sched_clock_base_cc = -1; /* Force to data section. */ +EXPORT_SYMBOL_GPL(sched_clock_base_cc); -extern unsigned long wall_jiffies; +static DEFINE_PER_CPU(struct clock_event_device, comparators); /* * Scheduler clock - returns current time in nanosec units. */ -unsigned long long sched_clock(void) +unsigned long long notrace __kprobes sched_clock(void) { - return ((get_clock() - jiffies_timer_cc) * 1000) >> 12; + return tod_to_ns(get_tod_clock_monotonic()); } -void tod_to_timeval(__u64 todval, struct timespec *xtime) +/* + * Monotonic_clock - returns # of nanoseconds passed since time_init() + */ +unsigned long long monotonic_clock(void) +{ + return sched_clock(); +} +EXPORT_SYMBOL(monotonic_clock); + +void tod_to_timeval(__u64 todval, struct timespec *xt) { unsigned long long sec; sec = todval >> 12; do_div(sec, 1000000); - xtime->tv_sec = sec; + xt->tv_sec = sec; todval -= (sec * 1000000) << 12; - xtime->tv_nsec = ((todval * 1000) >> 12); + xt->tv_nsec = ((todval * 1000) >> 12); +} +EXPORT_SYMBOL(tod_to_timeval); + +void clock_comparator_work(void) +{ + struct clock_event_device *cd; + + S390_lowcore.clock_comparator = -1ULL; + cd = &__get_cpu_var(comparators); + cd->event_handler(cd); } -static inline unsigned long do_gettimeoffset(void) +/* + * Fixup the clock comparator. + */ +static void fixup_clock_comparator(unsigned long long delta) { - __u64 now; + /* If nobody is waiting there's nothing to fix. */ + if (S390_lowcore.clock_comparator == -1ULL) + return; + S390_lowcore.clock_comparator += delta; + set_clock_comparator(S390_lowcore.clock_comparator); +} - now = (get_clock() - jiffies_timer_cc) >> 12; - /* We require the offset from the latest update of xtime */ - now -= (__u64) wall_jiffies*USECS_PER_JIFFY; - return (unsigned long) now; +static int s390_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + S390_lowcore.clock_comparator = get_tod_clock() + delta; + set_clock_comparator(S390_lowcore.clock_comparator); + return 0; +} + +static void s390_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ } /* - * This version of gettimeofday has microsecond resolution. + * Set up lowcore and control register of the current cpu to + * enable TOD clock and clock comparator interrupts. */ -void do_gettimeofday(struct timeval *tv) +void init_cpu_timer(void) { - unsigned long flags; - unsigned long seq; - unsigned long usec, sec; + struct clock_event_device *cd; + int cpu; + + S390_lowcore.clock_comparator = -1ULL; + set_clock_comparator(S390_lowcore.clock_comparator); + + cpu = smp_processor_id(); + cd = &per_cpu(comparators, cpu); + cd->name = "comparator"; + cd->features = CLOCK_EVT_FEAT_ONESHOT; + cd->mult = 16777; + cd->shift = 12; + cd->min_delta_ns = 1; + cd->max_delta_ns = LONG_MAX; + cd->rating = 400; + cd->cpumask = cpumask_of(cpu); + cd->set_next_event = s390_next_event; + cd->set_mode = s390_set_mode; - do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + clockevents_register_device(cd); - sec = xtime.tv_sec; - usec = xtime.tv_nsec / 1000 + do_gettimeoffset(); - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + /* Enable clock comparator timer interrupt. */ + __ctl_set_bit(0,11); - while (usec >= 1000000) { - usec -= 1000000; - sec++; + /* Always allow the timing alert external interrupt. */ + __ctl_set_bit(0, 4); +} + +static void clock_comparator_interrupt(struct ext_code ext_code, + unsigned int param32, + unsigned long param64) +{ + inc_irq_stat(IRQEXT_CLK); + if (S390_lowcore.clock_comparator == -1ULL) + set_clock_comparator(S390_lowcore.clock_comparator); +} + +static void etr_timing_alert(struct etr_irq_parm *); +static void stp_timing_alert(struct stp_irq_parm *); + +static void timing_alert_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + inc_irq_stat(IRQEXT_TLA); + if (param32 & 0x00c40000) + etr_timing_alert((struct etr_irq_parm *) ¶m32); + if (param32 & 0x00038000) + stp_timing_alert((struct stp_irq_parm *) ¶m32); +} + +static void etr_reset(void); +static void stp_reset(void); + +void read_persistent_clock(struct timespec *ts) +{ + tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts); +} + +void read_boot_clock(struct timespec *ts) +{ + tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); +} + +static cycle_t read_tod_clock(struct clocksource *cs) +{ + return get_tod_clock(); +} + +static struct clocksource clocksource_tod = { + .name = "tod", + .rating = 400, + .read = read_tod_clock, + .mask = -1ULL, + .mult = 1000, + .shift = 12, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +struct clocksource * __init clocksource_default_clock(void) +{ + return &clocksource_tod; +} + +void update_vsyscall(struct timekeeper *tk) +{ + u64 nsecps; + + if (tk->clock != &clocksource_tod) + return; + + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->xtime_tod_stamp = tk->clock->cycle_last; + vdso_data->xtime_clock_sec = tk->xtime_sec; + vdso_data->xtime_clock_nsec = tk->xtime_nsec; + vdso_data->wtom_clock_sec = + tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + vdso_data->wtom_clock_nsec = tk->xtime_nsec + + + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift); + nsecps = (u64) NSEC_PER_SEC << tk->shift; + while (vdso_data->wtom_clock_nsec >= nsecps) { + vdso_data->wtom_clock_nsec -= nsecps; + vdso_data->wtom_clock_sec++; } + vdso_data->tk_mult = tk->mult; + vdso_data->tk_shift = tk->shift; + smp_wmb(); + ++vdso_data->tb_update_count; +} + +extern struct timezone sys_tz; + +void update_vsyscall_tz(void) +{ + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_wmb(); + ++vdso_data->tb_update_count; +} + +/* + * Initialize the TOD clock and the CPU timer of + * the boot cpu. + */ +void __init time_init(void) +{ + /* Reset time synchronization interfaces. */ + etr_reset(); + stp_reset(); + + /* request the clock comparator external interrupt */ + if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt)) + panic("Couldn't request external interrupt 0x1004"); + + /* request the timing alert external interrupt */ + if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt)) + panic("Couldn't request external interrupt 0x1406"); + + if (clocksource_register(&clocksource_tod) != 0) + panic("Could not register TOD clock source"); + + /* Enable TOD clock interrupts on the boot cpu. */ + init_cpu_timer(); + + /* Enable cpu timer interrupts on the boot cpu. */ + vtime_init(); +} + +/* + * The time is "clock". old is what we think the time is. + * Adjust the value by a multiple of jiffies and add the delta to ntp. + * "delay" is an approximation how long the synchronization took. If + * the time correction is positive, then "delay" is subtracted from + * the time difference and only the remaining part is passed to ntp. + */ +static unsigned long long adjust_time(unsigned long long old, + unsigned long long clock, + unsigned long long delay) +{ + unsigned long long delta, ticks; + struct timex adjust; - tv->tv_sec = sec; - tv->tv_usec = usec; + if (clock > old) { + /* It is later than we thought. */ + delta = ticks = clock - old; + delta = ticks = (delta < delay) ? 0 : delta - delay; + delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); + adjust.offset = ticks * (1000000 / HZ); + } else { + /* It is earlier than we thought. */ + delta = ticks = old - clock; + delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); + delta = -delta; + adjust.offset = -ticks * (1000000 / HZ); + } + sched_clock_base_cc += delta; + if (adjust.offset != 0) { + pr_notice("The ETR interface has adjusted the clock " + "by %li microseconds\n", adjust.offset); + adjust.modes = ADJ_OFFSET_SINGLESHOT; + do_adjtimex(&adjust); + } + return delta; } -EXPORT_SYMBOL(do_gettimeofday); +static DEFINE_PER_CPU(atomic_t, clock_sync_word); +static DEFINE_MUTEX(clock_sync_mutex); +static unsigned long clock_sync_flags; -int do_settimeofday(struct timespec *tv) +#define CLOCK_SYNC_HAS_ETR 0 +#define CLOCK_SYNC_HAS_STP 1 +#define CLOCK_SYNC_ETR 2 +#define CLOCK_SYNC_STP 3 + +/* + * The synchronous get_clock function. It will write the current clock + * value to the clock pointer and return 0 if the clock is in sync with + * the external time source. If the clock mode is local it will return + * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external + * reference. + */ +int get_sync_clock(unsigned long long *clock) { - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; + atomic_t *sw_ptr; + unsigned int sw0, sw1; - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; + sw_ptr = &get_cpu_var(clock_sync_word); + sw0 = atomic_read(sw_ptr); + *clock = get_tod_clock(); + sw1 = atomic_read(sw_ptr); + put_cpu_var(clock_sync_word); + if (sw0 == sw1 && (sw0 & 0x80000000U)) + /* Success: time is in sync. */ + return 0; + if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) && + !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return -EOPNOTSUPP; + if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) && + !test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) + return -EACCES; + return -EAGAIN; +} +EXPORT_SYMBOL(get_sync_clock); - write_seqlock_irq(&xtime_lock); - /* This is revolting. We need to set the xtime.tv_nsec - * correctly. However, the value in this location is - * is value at the last tick. - * Discover what correction gettimeofday - * would have done, and then undo it! +/* + * Make get_sync_clock return -EAGAIN. + */ +static void disable_sync_clock(void *dummy) +{ + atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + /* + * Clear the in-sync bit 2^31. All get_sync_clock calls will + * fail until the sync bit is turned back on. In addition + * increase the "sequence" counter to avoid the race of an + * etr event and the complete recovery against get_sync_clock. */ - nsec -= do_gettimeoffset() * 1000; + atomic_clear_mask(0x80000000, sw_ptr); + atomic_inc(sw_ptr); +} - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); +/* + * Make get_sync_clock return 0 again. + * Needs to be called from a context disabled for preemption. + */ +static void enable_sync_clock(void) +{ + atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + atomic_set_mask(0x80000000, sw_ptr); +} - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); +/* + * Function to check if the clock is in sync. + */ +static inline int check_sync_clock(void) +{ + atomic_t *sw_ptr; + int rc; - ntp_clear(); - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; + sw_ptr = &get_cpu_var(clock_sync_word); + rc = (atomic_read(sw_ptr) & 0x80000000U) != 0; + put_cpu_var(clock_sync_word); + return rc; } -EXPORT_SYMBOL(do_settimeofday); +/* Single threaded workqueue used for etr and stp sync events */ +static struct workqueue_struct *time_sync_wq; +static void __init time_init_wq(void) +{ + if (time_sync_wq) + return; + time_sync_wq = create_singlethread_workqueue("timesync"); +} -#ifdef CONFIG_PROFILING -#define s390_do_profile(regs) profile_tick(CPU_PROFILING, regs) -#else -#define s390_do_profile(regs) do { ; } while(0) -#endif /* CONFIG_PROFILING */ +/* + * External Time Reference (ETR) code. + */ +static int etr_port0_online; +static int etr_port1_online; +static int etr_steai_available; + +static int __init early_parse_etr(char *p) +{ + if (strncmp(p, "off", 3) == 0) + etr_port0_online = etr_port1_online = 0; + else if (strncmp(p, "port0", 5) == 0) + etr_port0_online = 1; + else if (strncmp(p, "port1", 5) == 0) + etr_port1_online = 1; + else if (strncmp(p, "on", 2) == 0) + etr_port0_online = etr_port1_online = 1; + return 0; +} +early_param("etr", early_parse_etr); + +enum etr_event { + ETR_EVENT_PORT0_CHANGE, + ETR_EVENT_PORT1_CHANGE, + ETR_EVENT_PORT_ALERT, + ETR_EVENT_SYNC_CHECK, + ETR_EVENT_SWITCH_LOCAL, + ETR_EVENT_UPDATE, +}; +/* + * Valid bit combinations of the eacr register are (x = don't care): + * e0 e1 dp p0 p1 ea es sl + * 0 0 x 0 0 0 0 0 initial, disabled state + * 0 0 x 0 1 1 0 0 port 1 online + * 0 0 x 1 0 1 0 0 port 0 online + * 0 0 x 1 1 1 0 0 both ports online + * 0 1 x 0 1 1 0 0 port 1 online and usable, ETR or PPS mode + * 0 1 x 0 1 1 0 1 port 1 online, usable and ETR mode + * 0 1 x 0 1 1 1 0 port 1 online, usable, PPS mode, in-sync + * 0 1 x 0 1 1 1 1 port 1 online, usable, ETR mode, in-sync + * 0 1 x 1 1 1 0 0 both ports online, port 1 usable + * 0 1 x 1 1 1 1 0 both ports online, port 1 usable, PPS mode, in-sync + * 0 1 x 1 1 1 1 1 both ports online, port 1 usable, ETR mode, in-sync + * 1 0 x 1 0 1 0 0 port 0 online and usable, ETR or PPS mode + * 1 0 x 1 0 1 0 1 port 0 online, usable and ETR mode + * 1 0 x 1 0 1 1 0 port 0 online, usable, PPS mode, in-sync + * 1 0 x 1 0 1 1 1 port 0 online, usable, ETR mode, in-sync + * 1 0 x 1 1 1 0 0 both ports online, port 0 usable + * 1 0 x 1 1 1 1 0 both ports online, port 0 usable, PPS mode, in-sync + * 1 0 x 1 1 1 1 1 both ports online, port 0 usable, ETR mode, in-sync + * 1 1 x 1 1 1 1 0 both ports online & usable, ETR, in-sync + * 1 1 x 1 1 1 1 1 both ports online & usable, ETR, in-sync + */ +static struct etr_eacr etr_eacr; +static u64 etr_tolec; /* time of last eacr update */ +static struct etr_aib etr_port0; +static int etr_port0_uptodate; +static struct etr_aib etr_port1; +static int etr_port1_uptodate; +static unsigned long etr_events; +static struct timer_list etr_timer; + +static void etr_timeout(unsigned long dummy); +static void etr_work_fn(struct work_struct *work); +static DEFINE_MUTEX(etr_work_mutex); +static DECLARE_WORK(etr_work, etr_work_fn); /* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick + * Reset ETR attachment. */ -void account_ticks(struct pt_regs *regs) +static void etr_reset(void) { - __u64 tmp; - __u32 ticks, xticks; + etr_eacr = (struct etr_eacr) { + .e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0, + .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0, + .es = 0, .sl = 0 }; + if (etr_setr(&etr_eacr) == 0) { + etr_tolec = get_tod_clock(); + set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags); + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + } else if (etr_port0_online || etr_port1_online) { + pr_warning("The real or virtual hardware system does " + "not provide an ETR interface\n"); + etr_port0_online = etr_port1_online = 0; + } +} - /* Calculate how many ticks have passed. */ - if (S390_lowcore.int_clock < S390_lowcore.jiffy_timer) { - /* - * We have to program the clock comparator even if - * no tick has passed. That happens if e.g. an i/o - * interrupt wakes up an idle processor that has - * switched off its hz timer. - */ - tmp = S390_lowcore.jiffy_timer + CPU_DEVIATION; - asm volatile ("SCKC %0" : : "m" (tmp)); +static int __init etr_init(void) +{ + struct etr_aib aib; + + if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) + return 0; + time_init_wq(); + /* Check if this machine has the steai instruction. */ + if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0) + etr_steai_available = 1; + setup_timer(&etr_timer, etr_timeout, 0UL); + if (etr_port0_online) { + set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); + queue_work(time_sync_wq, &etr_work); + } + if (etr_port1_online) { + set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); + queue_work(time_sync_wq, &etr_work); + } + return 0; +} + +arch_initcall(etr_init); + +/* + * Two sorts of ETR machine checks. The architecture reads: + * "When a machine-check niterruption occurs and if a switch-to-local or + * ETR-sync-check interrupt request is pending but disabled, this pending + * disabled interruption request is indicated and is cleared". + * Which means that we can get etr_switch_to_local events from the machine + * check handler although the interruption condition is disabled. Lovely.. + */ + +/* + * Switch to local machine check. This is called when the last usable + * ETR port goes inactive. After switch to local the clock is not in sync. + */ +void etr_switch_to_local(void) +{ + if (!etr_eacr.sl) return; + disable_sync_clock(NULL); + if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) { + etr_eacr.es = etr_eacr.sl = 0; + etr_setr(&etr_eacr); + queue_work(time_sync_wq, &etr_work); } - tmp = S390_lowcore.int_clock - S390_lowcore.jiffy_timer; - if (tmp >= 2*CLK_TICKS_PER_JIFFY) { /* more than two ticks ? */ - ticks = __div(tmp, CLK_TICKS_PER_JIFFY) + 1; - S390_lowcore.jiffy_timer += - CLK_TICKS_PER_JIFFY * (__u64) ticks; - } else if (tmp >= CLK_TICKS_PER_JIFFY) { - ticks = 2; - S390_lowcore.jiffy_timer += 2*CLK_TICKS_PER_JIFFY; - } else { - ticks = 1; - S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY; +} + +/* + * ETR sync check machine check. This is called when the ETR OTE and the + * local clock OTE are farther apart than the ETR sync check tolerance. + * After a ETR sync check the clock is not in sync. The machine check + * is broadcasted to all cpus at the same time. + */ +void etr_sync_check(void) +{ + if (!etr_eacr.es) + return; + disable_sync_clock(NULL); + if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) { + etr_eacr.es = 0; + etr_setr(&etr_eacr); + queue_work(time_sync_wq, &etr_work); } +} + +/* + * ETR timing alert. There are two causes: + * 1) port state change, check the usability of the port + * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the + * sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3) + * or ETR-data word 4 (edf4) has changed. + */ +static void etr_timing_alert(struct etr_irq_parm *intparm) +{ + if (intparm->pc0) + /* ETR port 0 state change. */ + set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); + if (intparm->pc1) + /* ETR port 1 state change. */ + set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); + if (intparm->eai) + /* + * ETR port alert on either port 0, 1 or both. + * Both ports are not up-to-date now. + */ + set_bit(ETR_EVENT_PORT_ALERT, &etr_events); + queue_work(time_sync_wq, &etr_work); +} + +static void etr_timeout(unsigned long dummy) +{ + set_bit(ETR_EVENT_UPDATE, &etr_events); + queue_work(time_sync_wq, &etr_work); +} + +/* + * Check if the etr mode is pss. + */ +static inline int etr_mode_is_pps(struct etr_eacr eacr) +{ + return eacr.es && !eacr.sl; +} + +/* + * Check if the etr mode is etr. + */ +static inline int etr_mode_is_etr(struct etr_eacr eacr) +{ + return eacr.es && eacr.sl; +} + +/* + * Check if the port can be used for TOD synchronization. + * For PPS mode the port has to receive OTEs. For ETR mode + * the port has to receive OTEs, the ETR stepping bit has to + * be zero and the validity bits for data frame 1, 2, and 3 + * have to be 1. + */ +static int etr_port_valid(struct etr_aib *aib, int port) +{ + unsigned int psc; + + /* Check that this port is receiving OTEs. */ + if (aib->tsp == 0) + return 0; + + psc = port ? aib->esw.psc1 : aib->esw.psc0; + if (psc == etr_lpsc_pps_mode) + return 1; + if (psc == etr_lpsc_operational_step) + return !aib->esw.y && aib->slsw.v1 && + aib->slsw.v2 && aib->slsw.v3; + return 0; +} + +/* + * Check if two ports are on the same network. + */ +static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2) +{ + // FIXME: any other fields we have to compare? + return aib1->edf1.net_id == aib2->edf1.net_id; +} + +/* + * Wrapper for etr_stei that converts physical port states + * to logical port states to be consistent with the output + * of stetr (see etr_psc vs. etr_lpsc). + */ +static void etr_steai_cv(struct etr_aib *aib, unsigned int func) +{ + BUG_ON(etr_steai(aib, func) != 0); + /* Convert port state to logical port state. */ + if (aib->esw.psc0 == 1) + aib->esw.psc0 = 2; + else if (aib->esw.psc0 == 0 && aib->esw.p == 0) + aib->esw.psc0 = 1; + if (aib->esw.psc1 == 1) + aib->esw.psc1 = 2; + else if (aib->esw.psc1 == 0 && aib->esw.p == 1) + aib->esw.psc1 = 1; +} + +/* + * Check if the aib a2 is still connected to the same attachment as + * aib a1, the etv values differ by one and a2 is valid. + */ +static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p) +{ + int state_a1, state_a2; + + /* Paranoia check: e0/e1 should better be the same. */ + if (a1->esw.eacr.e0 != a2->esw.eacr.e0 || + a1->esw.eacr.e1 != a2->esw.eacr.e1) + return 0; + + /* Still connected to the same etr ? */ + state_a1 = p ? a1->esw.psc1 : a1->esw.psc0; + state_a2 = p ? a2->esw.psc1 : a2->esw.psc0; + if (state_a1 == etr_lpsc_operational_step) { + if (state_a2 != etr_lpsc_operational_step || + a1->edf1.net_id != a2->edf1.net_id || + a1->edf1.etr_id != a2->edf1.etr_id || + a1->edf1.etr_pn != a2->edf1.etr_pn) + return 0; + } else if (state_a2 != etr_lpsc_pps_mode) + return 0; + + /* The ETV value of a2 needs to be ETV of a1 + 1. */ + if (a1->edf2.etv + 1 != a2->edf2.etv) + return 0; - /* set clock comparator for next tick */ - tmp = S390_lowcore.jiffy_timer + CPU_DEVIATION; - asm volatile ("SCKC %0" : : "m" (tmp)); + if (!etr_port_valid(a2, p)) + return 0; -#ifdef CONFIG_SMP + return 1; +} + +struct clock_sync_data { + atomic_t cpus; + int in_sync; + unsigned long long fixup_cc; + int etr_port; + struct etr_aib *etr_aib; +}; + +static void clock_sync_cpu(struct clock_sync_data *sync) +{ + atomic_dec(&sync->cpus); + enable_sync_clock(); + /* + * This looks like a busy wait loop but it isn't. etr_sync_cpus + * is called on all other cpus while the TOD clocks is stopped. + * __udelay will stop the cpu on an enabled wait psw until the + * TOD is running again. + */ + while (sync->in_sync == 0) { + __udelay(1); + /* + * A different cpu changes *in_sync. Therefore use + * barrier() to force memory access. + */ + barrier(); + } + if (sync->in_sync != 1) + /* Didn't work. Clear per-cpu in sync bit again. */ + disable_sync_clock(NULL); /* - * Do not rely on the boot cpu to do the calls to do_timer. - * Spread it over all cpus instead. + * This round of TOD syncing is done. Set the clock comparator + * to the next tick and let the processor continue. */ - write_seqlock(&xtime_lock); - if (S390_lowcore.jiffy_timer > xtime_cc) { - tmp = S390_lowcore.jiffy_timer - xtime_cc; - if (tmp >= 2*CLK_TICKS_PER_JIFFY) { - xticks = __div(tmp, CLK_TICKS_PER_JIFFY); - xtime_cc += (__u64) xticks * CLK_TICKS_PER_JIFFY; + fixup_clock_comparator(sync->fixup_cc); +} + +/* + * Sync the TOD clock using the port referred to by aibp. This port + * has to be enabled and the other port has to be disabled. The + * last eacr update has to be more than 1.6 seconds in the past. + */ +static int etr_sync_clock(void *data) +{ + static int first; + unsigned long long clock, old_clock, delay, delta; + struct clock_sync_data *etr_sync; + struct etr_aib *sync_port, *aib; + int port; + int rc; + + etr_sync = data; + + if (xchg(&first, 1) == 1) { + /* Slave */ + clock_sync_cpu(etr_sync); + return 0; + } + + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&etr_sync->cpus) != 0) + cpu_relax(); + + port = etr_sync->etr_port; + aib = etr_sync->etr_aib; + sync_port = (port == 0) ? &etr_port0 : &etr_port1; + enable_sync_clock(); + + /* Set clock to next OTE. */ + __ctl_set_bit(14, 21); + __ctl_set_bit(0, 29); + clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32; + old_clock = get_tod_clock(); + if (set_tod_clock(clock) == 0) { + __udelay(1); /* Wait for the clock to start. */ + __ctl_clear_bit(0, 29); + __ctl_clear_bit(14, 21); + etr_stetr(aib); + /* Adjust Linux timing variables. */ + delay = (unsigned long long) + (aib->edf2.etv - sync_port->edf2.etv) << 32; + delta = adjust_time(old_clock, clock, delay); + etr_sync->fixup_cc = delta; + fixup_clock_comparator(delta); + /* Verify that the clock is properly set. */ + if (!etr_aib_follows(sync_port, aib, port)) { + /* Didn't work. */ + disable_sync_clock(NULL); + etr_sync->in_sync = -EAGAIN; + rc = -EAGAIN; } else { - xticks = 1; - xtime_cc += CLK_TICKS_PER_JIFFY; + etr_sync->in_sync = 1; + rc = 0; } - while (xticks--) - do_timer(regs); + } else { + /* Could not set the clock ?!? */ + __ctl_clear_bit(0, 29); + __ctl_clear_bit(14, 21); + disable_sync_clock(NULL); + etr_sync->in_sync = -EAGAIN; + rc = -EAGAIN; } - write_sequnlock(&xtime_lock); -#else - for (xticks = ticks; xticks > 0; xticks--) - do_timer(regs); -#endif + xchg(&first, 0); + return rc; +} -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - account_user_vtime(current); -#else - while (ticks--) - update_process_times(user_mode(regs)); -#endif +static int etr_sync_clock_stop(struct etr_aib *aib, int port) +{ + struct clock_sync_data etr_sync; + struct etr_aib *sync_port; + int follows; + int rc; - s390_do_profile(regs); + /* Check if the current aib is adjacent to the sync port aib. */ + sync_port = (port == 0) ? &etr_port0 : &etr_port1; + follows = etr_aib_follows(sync_port, aib, port); + memcpy(sync_port, aib, sizeof(*aib)); + if (!follows) + return -EAGAIN; + memset(&etr_sync, 0, sizeof(etr_sync)); + etr_sync.etr_aib = aib; + etr_sync.etr_port = port; + get_online_cpus(); + atomic_set(&etr_sync.cpus, num_online_cpus() - 1); + rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask); + put_online_cpus(); + return rc; } -#ifdef CONFIG_NO_IDLE_HZ +/* + * Handle the immediate effects of the different events. + * The port change event is used for online/offline changes. + */ +static struct etr_eacr etr_handle_events(struct etr_eacr eacr) +{ + if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) + eacr.es = 0; + if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) + eacr.es = eacr.sl = 0; + if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events)) + etr_port0_uptodate = etr_port1_uptodate = 0; -#ifdef CONFIG_NO_IDLE_HZ_INIT -int sysctl_hz_timer = 0; -#else -int sysctl_hz_timer = 1; -#endif + if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) { + if (eacr.e0) + /* + * Port change of an enabled port. We have to + * assume that this can have caused an stepping + * port switch. + */ + etr_tolec = get_tod_clock(); + eacr.p0 = etr_port0_online; + if (!eacr.p0) + eacr.e0 = 0; + etr_port0_uptodate = 0; + } + if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) { + if (eacr.e1) + /* + * Port change of an enabled port. We have to + * assume that this can have caused an stepping + * port switch. + */ + etr_tolec = get_tod_clock(); + eacr.p1 = etr_port1_online; + if (!eacr.p1) + eacr.e1 = 0; + etr_port1_uptodate = 0; + } + clear_bit(ETR_EVENT_UPDATE, &etr_events); + return eacr; +} /* - * Stop the HZ tick on the current CPU. - * Only cpu_idle may call this function. + * Set up a timer that expires after the etr_tolec + 1.6 seconds if + * one of the ports needs an update. */ -static inline void stop_hz_timer(void) +static void etr_set_tolec_timeout(unsigned long long now) { - unsigned long flags; - unsigned long seq, next; - __u64 timer, todval; + unsigned long micros; - if (sysctl_hz_timer != 0) + if ((!etr_eacr.p0 || etr_port0_uptodate) && + (!etr_eacr.p1 || etr_port1_uptodate)) return; + micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0; + micros = (micros > 1600000) ? 0 : 1600000 - micros; + mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1); +} - cpu_set(smp_processor_id(), nohz_cpu_mask); +/* + * Set up a time that expires after 1/2 second. + */ +static void etr_set_sync_timeout(void) +{ + mod_timer(&etr_timer, jiffies + HZ/2); +} + +/* + * Update the aib information for one or both ports. + */ +static struct etr_eacr etr_handle_update(struct etr_aib *aib, + struct etr_eacr eacr) +{ + /* With both ports disabled the aib information is useless. */ + if (!eacr.e0 && !eacr.e1) + return eacr; + + /* Update port0 or port1 with aib stored in etr_work_fn. */ + if (aib->esw.q == 0) { + /* Information for port 0 stored. */ + if (eacr.p0 && !etr_port0_uptodate) { + etr_port0 = *aib; + if (etr_port0_online) + etr_port0_uptodate = 1; + } + } else { + /* Information for port 1 stored. */ + if (eacr.p1 && !etr_port1_uptodate) { + etr_port1 = *aib; + if (etr_port0_online) + etr_port1_uptodate = 1; + } + } /* - * Leave the clock comparator set up for the next timer - * tick if either rcu or a softirq is pending. + * Do not try to get the alternate port aib if the clock + * is not in sync yet. */ - if (rcu_pending(smp_processor_id()) || local_softirq_pending()) { - cpu_clear(smp_processor_id(), nohz_cpu_mask); + if (!eacr.es || !check_sync_clock()) + return eacr; + + /* + * If steai is available we can get the information about + * the other port immediately. If only stetr is available the + * data-port bit toggle has to be used. + */ + if (etr_steai_available) { + if (eacr.p0 && !etr_port0_uptodate) { + etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0); + etr_port0_uptodate = 1; + } + if (eacr.p1 && !etr_port1_uptodate) { + etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1); + etr_port1_uptodate = 1; + } + } else { + /* + * One port was updated above, if the other + * port is not uptodate toggle dp bit. + */ + if ((eacr.p0 && !etr_port0_uptodate) || + (eacr.p1 && !etr_port1_uptodate)) + eacr.dp ^= 1; + else + eacr.dp = 0; + } + return eacr; +} + +/* + * Write new etr control register if it differs from the current one. + * Return 1 if etr_tolec has been updated as well. + */ +static void etr_update_eacr(struct etr_eacr eacr) +{ + int dp_changed; + + if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0) + /* No change, return. */ return; + /* + * The disable of an active port of the change of the data port + * bit can/will cause a change in the data port. + */ + dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 || + (etr_eacr.dp ^ eacr.dp) != 0; + etr_eacr = eacr; + etr_setr(&etr_eacr); + if (dp_changed) + etr_tolec = get_tod_clock(); +} + +/* + * ETR work. In this function you'll find the main logic. In + * particular this is the only function that calls etr_update_eacr(), + * it "controls" the etr control register. + */ +static void etr_work_fn(struct work_struct *work) +{ + unsigned long long now; + struct etr_eacr eacr; + struct etr_aib aib; + int sync_port; + + /* prevent multiple execution. */ + mutex_lock(&etr_work_mutex); + + /* Create working copy of etr_eacr. */ + eacr = etr_eacr; + + /* Check for the different events and their immediate effects. */ + eacr = etr_handle_events(eacr); + + /* Check if ETR is supposed to be active. */ + eacr.ea = eacr.p0 || eacr.p1; + if (!eacr.ea) { + /* Both ports offline. Reset everything. */ + eacr.dp = eacr.es = eacr.sl = 0; + on_each_cpu(disable_sync_clock, NULL, 1); + del_timer_sync(&etr_timer); + etr_update_eacr(eacr); + goto out_unlock; } + /* Store aib to get the current ETR status word. */ + BUG_ON(etr_stetr(&aib) != 0); + etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */ + now = get_tod_clock(); + + /* + * Update the port information if the last stepping port change + * or data port change is older than 1.6 seconds. + */ + if (now >= etr_tolec + (1600000 << 12)) + eacr = etr_handle_update(&aib, eacr); + /* - * This cpu is going really idle. Set up the clock comparator - * for the next event. + * Select ports to enable. The preferred synchronization mode is PPS. + * If a port can be enabled depends on a number of things: + * 1) The port needs to be online and uptodate. A port is not + * disabled just because it is not uptodate, but it is only + * enabled if it is uptodate. + * 2) The port needs to have the same mode (pps / etr). + * 3) The port needs to be usable -> etr_port_valid() == 1 + * 4) To enable the second port the clock needs to be in sync. + * 5) If both ports are useable and are ETR ports, the network id + * has to be the same. + * The eacr.sl bit is used to indicate etr mode vs. pps mode. */ - next = next_timer_interrupt(); - do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); - timer = (__u64)(next - jiffies) + jiffies_64; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); - todval = -1ULL; - /* Be careful about overflows. */ - if (timer < (-1ULL / CLK_TICKS_PER_JIFFY)) { - timer = jiffies_timer_cc + timer * CLK_TICKS_PER_JIFFY; - if (timer >= jiffies_timer_cc) - todval = timer; + if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) { + eacr.sl = 0; + eacr.e0 = 1; + if (!etr_mode_is_pps(etr_eacr)) + eacr.es = 0; + if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode) + eacr.e1 = 0; + // FIXME: uptodate checks ? + else if (etr_port0_uptodate && etr_port1_uptodate) + eacr.e1 = 1; + sync_port = (etr_port0_uptodate && + etr_port_valid(&etr_port0, 0)) ? 0 : -1; + } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) { + eacr.sl = 0; + eacr.e0 = 0; + eacr.e1 = 1; + if (!etr_mode_is_pps(etr_eacr)) + eacr.es = 0; + sync_port = (etr_port1_uptodate && + etr_port_valid(&etr_port1, 1)) ? 1 : -1; + } else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) { + eacr.sl = 1; + eacr.e0 = 1; + if (!etr_mode_is_etr(etr_eacr)) + eacr.es = 0; + if (!eacr.es || !eacr.p1 || + aib.esw.psc1 != etr_lpsc_operational_alt) + eacr.e1 = 0; + else if (etr_port0_uptodate && etr_port1_uptodate && + etr_compare_network(&etr_port0, &etr_port1)) + eacr.e1 = 1; + sync_port = (etr_port0_uptodate && + etr_port_valid(&etr_port0, 0)) ? 0 : -1; + } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) { + eacr.sl = 1; + eacr.e0 = 0; + eacr.e1 = 1; + if (!etr_mode_is_etr(etr_eacr)) + eacr.es = 0; + sync_port = (etr_port1_uptodate && + etr_port_valid(&etr_port1, 1)) ? 1 : -1; + } else { + /* Both ports not usable. */ + eacr.es = eacr.sl = 0; + sync_port = -1; } - asm volatile ("SCKC %0" : : "m" (todval)); + + /* + * If the clock is in sync just update the eacr and return. + * If there is no valid sync port wait for a port update. + */ + if ((eacr.es && check_sync_clock()) || sync_port < 0) { + etr_update_eacr(eacr); + etr_set_tolec_timeout(now); + goto out_unlock; + } + + /* + * Prepare control register for clock syncing + * (reset data port bit, set sync check control. + */ + eacr.dp = 0; + eacr.es = 1; + + /* + * Update eacr and try to synchronize the clock. If the update + * of eacr caused a stepping port switch (or if we have to + * assume that a stepping port switch has occurred) or the + * clock syncing failed, reset the sync check control bit + * and set up a timer to try again after 0.5 seconds + */ + etr_update_eacr(eacr); + if (now < etr_tolec + (1600000 << 12) || + etr_sync_clock_stop(&aib, sync_port) != 0) { + /* Sync failed. Try again in 1/2 second. */ + eacr.es = 0; + etr_update_eacr(eacr); + etr_set_sync_timeout(); + } else + etr_set_tolec_timeout(now); +out_unlock: + mutex_unlock(&etr_work_mutex); } /* - * Start the HZ tick on the current CPU. - * Only cpu_idle may call this function. + * Sysfs interface functions */ -static inline void start_hz_timer(void) +static struct bus_type etr_subsys = { + .name = "etr", + .dev_name = "etr", +}; + +static struct device etr_port0_dev = { + .id = 0, + .bus = &etr_subsys, +}; + +static struct device etr_port1_dev = { + .id = 1, + .bus = &etr_subsys, +}; + +/* + * ETR subsys attributes + */ +static ssize_t etr_stepping_port_show(struct device *dev, + struct device_attribute *attr, + char *buf) { - if (!cpu_isset(smp_processor_id(), nohz_cpu_mask)) - return; - account_ticks(task_pt_regs(current)); - cpu_clear(smp_processor_id(), nohz_cpu_mask); + return sprintf(buf, "%i\n", etr_port0.esw.p); } -static int nohz_idle_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); + +static ssize_t etr_stepping_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) { - switch (action) { - case CPU_IDLE: - stop_hz_timer(); - break; - case CPU_NOT_IDLE: - start_hz_timer(); - break; + char *mode_str; + + if (etr_mode_is_pps(etr_eacr)) + mode_str = "pps"; + else if (etr_mode_is_etr(etr_eacr)) + mode_str = "etr"; + else + mode_str = "local"; + return sprintf(buf, "%s\n", mode_str); +} + +static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); + +/* + * ETR port attributes + */ +static inline struct etr_aib *etr_aib_from_dev(struct device *dev) +{ + if (dev == &etr_port0_dev) + return etr_port0_online ? &etr_port0 : NULL; + else + return etr_port1_online ? &etr_port1 : NULL; +} + +static ssize_t etr_online_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned int online; + + online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online; + return sprintf(buf, "%i\n", online); +} + +static ssize_t etr_online_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int value; + + value = simple_strtoul(buf, NULL, 0); + if (value != 0 && value != 1) + return -EINVAL; + if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) + return -EOPNOTSUPP; + mutex_lock(&clock_sync_mutex); + if (dev == &etr_port0_dev) { + if (etr_port0_online == value) + goto out; /* Nothing to do. */ + etr_port0_online = value; + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); + queue_work(time_sync_wq, &etr_work); + } else { + if (etr_port1_online == value) + goto out; /* Nothing to do. */ + etr_port1_online = value; + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); + queue_work(time_sync_wq, &etr_work); } - return NOTIFY_OK; +out: + mutex_unlock(&clock_sync_mutex); + return count; +} + +static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store); + +static ssize_t etr_stepping_control_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? + etr_eacr.e0 : etr_eacr.e1); +} + +static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); + +static ssize_t etr_mode_code_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!etr_port0_online && !etr_port1_online) + /* Status word is not uptodate if both ports are offline. */ + return -ENODATA; + return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? + etr_port0.esw.psc0 : etr_port0.esw.psc1); +} + +static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL); + +static ssize_t etr_untuned_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v1) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf1.u); +} + +static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL); + +static ssize_t etr_network_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v1) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf1.net_id); +} + +static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL); + +static ssize_t etr_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v1) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf1.etr_id); +} + +static DEVICE_ATTR(id, 0400, etr_id_show, NULL); + +static ssize_t etr_port_number_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v1) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf1.etr_pn); } -static struct notifier_block nohz_idle_nb = { - .notifier_call = nohz_idle_notify, +static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL); + +static ssize_t etr_coupled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v3) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf3.c); +} + +static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL); + +static ssize_t etr_local_time_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v3) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf3.blto); +} + +static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL); + +static ssize_t etr_utc_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etr_aib *aib = etr_aib_from_dev(dev); + + if (!aib || !aib->slsw.v3) + return -ENODATA; + return sprintf(buf, "%i\n", aib->edf3.buo); +} + +static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); + +static struct device_attribute *etr_port_attributes[] = { + &dev_attr_online, + &dev_attr_stepping_control, + &dev_attr_state_code, + &dev_attr_untuned, + &dev_attr_network, + &dev_attr_id, + &dev_attr_port, + &dev_attr_coupled, + &dev_attr_local_time, + &dev_attr_utc_offset, + NULL }; -void __init nohz_init(void) +static int __init etr_register_port(struct device *dev) +{ + struct device_attribute **attr; + int rc; + + rc = device_register(dev); + if (rc) + goto out; + for (attr = etr_port_attributes; *attr; attr++) { + rc = device_create_file(dev, *attr); + if (rc) + goto out_unreg; + } + return 0; +out_unreg: + for (; attr >= etr_port_attributes; attr--) + device_remove_file(dev, *attr); + device_unregister(dev); +out: + return rc; +} + +static void __init etr_unregister_port(struct device *dev) { - if (register_idle_notifier(&nohz_idle_nb)) - panic("Couldn't register idle notifier"); + struct device_attribute **attr; + + for (attr = etr_port_attributes; *attr; attr++) + device_remove_file(dev, *attr); + device_unregister(dev); +} + +static int __init etr_init_sysfs(void) +{ + int rc; + + rc = subsys_system_register(&etr_subsys, NULL); + if (rc) + goto out; + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port); + if (rc) + goto out_unreg_subsys; + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode); + if (rc) + goto out_remove_stepping_port; + rc = etr_register_port(&etr_port0_dev); + if (rc) + goto out_remove_stepping_mode; + rc = etr_register_port(&etr_port1_dev); + if (rc) + goto out_remove_port0; + return 0; + +out_remove_port0: + etr_unregister_port(&etr_port0_dev); +out_remove_stepping_mode: + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode); +out_remove_stepping_port: + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port); +out_unreg_subsys: + bus_unregister(&etr_subsys); +out: + return rc; } -#endif +device_initcall(etr_init_sysfs); /* - * Start the clock comparator on the current CPU. + * Server Time Protocol (STP) code. */ -void init_cpu_timer(void) +static int stp_online; +static struct stp_sstpi stp_info; +static void *stp_page; + +static void stp_work_fn(struct work_struct *work); +static DEFINE_MUTEX(stp_work_mutex); +static DECLARE_WORK(stp_work, stp_work_fn); +static struct timer_list stp_timer; + +static int __init early_parse_stp(char *p) { - unsigned long cr0; - __u64 timer; + if (strncmp(p, "off", 3) == 0) + stp_online = 0; + else if (strncmp(p, "on", 2) == 0) + stp_online = 1; + return 0; +} +early_param("stp", early_parse_stp); + +/* + * Reset STP attachment. + */ +static void __init stp_reset(void) +{ + int rc; + + stp_page = (void *) get_zeroed_page(GFP_ATOMIC); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + if (rc == 0) + set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); + else if (stp_online) { + pr_warning("The real or virtual hardware system does " + "not provide an STP interface\n"); + free_page((unsigned long) stp_page); + stp_page = NULL; + stp_online = 0; + } +} + +static void stp_timeout(unsigned long dummy) +{ + queue_work(time_sync_wq, &stp_work); +} - timer = jiffies_timer_cc + jiffies_64 * CLK_TICKS_PER_JIFFY; - S390_lowcore.jiffy_timer = timer + CLK_TICKS_PER_JIFFY; - timer += CLK_TICKS_PER_JIFFY + CPU_DEVIATION; - asm volatile ("SCKC %0" : : "m" (timer)); - /* allow clock comparator timer interrupt */ - __ctl_store(cr0, 0, 0); - cr0 |= 0x800; - __ctl_load(cr0, 0, 0); +static int __init stp_init(void) +{ + if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return 0; + setup_timer(&stp_timer, stp_timeout, 0UL); + time_init_wq(); + if (!stp_online) + return 0; + queue_work(time_sync_wq, &stp_work); + return 0; } -extern void vtime_init(void); +arch_initcall(stp_init); /* - * Initialize the TOD clock and the CPU timer of - * the boot cpu. + * STP timing alert. There are three causes: + * 1) timing status change + * 2) link availability change + * 3) time control parameter change + * In all three cases we are only interested in the clock source state. + * If a STP clock source is now available use it. */ -void __init time_init(void) +static void stp_timing_alert(struct stp_irq_parm *intparm) { - __u64 set_time_cc; - int cc; - - /* kick the TOD clock */ - asm volatile ("STCK 0(%1)\n\t" - "IPM %0\n\t" - "SRL %0,28" : "=r" (cc) : "a" (&init_timer_cc) - : "memory", "cc"); - switch (cc) { - case 0: /* clock in set state: all is fine */ - break; - case 1: /* clock in non-set state: FIXME */ - printk("time_init: TOD clock in non-set state\n"); - break; - case 2: /* clock in error state: FIXME */ - printk("time_init: TOD clock in error state\n"); - break; - case 3: /* clock in stopped or not-operational state: FIXME */ - printk("time_init: TOD clock stopped/non-operational\n"); - break; - } - jiffies_timer_cc = init_timer_cc - jiffies_64 * CLK_TICKS_PER_JIFFY; - - /* set xtime */ - xtime_cc = init_timer_cc + CLK_TICKS_PER_JIFFY; - set_time_cc = init_timer_cc - 0x8126d60e46000000LL + - (0x3c26700LL*1000000*4096); - tod_to_timeval(set_time_cc, &xtime); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); + if (intparm->tsc || intparm->lac || intparm->tcpc) + queue_work(time_sync_wq, &stp_work); +} - /* request the clock comparator external interrupt */ - if (register_early_external_interrupt(0x1004, 0, - &ext_int_info_cc) != 0) - panic("Couldn't request external interrupt 0x1004"); +/* + * STP sync check machine check. This is called when the timing state + * changes from the synchronized state to the unsynchronized state. + * After a STP sync check the clock is not in sync. The machine check + * is broadcasted to all cpus at the same time. + */ +void stp_sync_check(void) +{ + disable_sync_clock(NULL); + queue_work(time_sync_wq, &stp_work); +} - init_cpu_timer(); +/* + * STP island condition machine check. This is called when an attached + * server attempts to communicate over an STP link and the servers + * have matching CTN ids and have a valid stratum-1 configuration + * but the configurations do not match. + */ +void stp_island_check(void) +{ + disable_sync_clock(NULL); + queue_work(time_sync_wq, &stp_work); +} -#ifdef CONFIG_NO_IDLE_HZ - nohz_init(); -#endif -#ifdef CONFIG_VIRT_TIMER - vtime_init(); -#endif +static int stp_sync_clock(void *data) +{ + static int first; + unsigned long long old_clock, delta; + struct clock_sync_data *stp_sync; + int rc; + + stp_sync = data; + + if (xchg(&first, 1) == 1) { + /* Slave */ + clock_sync_cpu(stp_sync); + return 0; + } + + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&stp_sync->cpus) != 0) + cpu_relax(); + + enable_sync_clock(); + + rc = 0; + if (stp_info.todoff[0] || stp_info.todoff[1] || + stp_info.todoff[2] || stp_info.todoff[3] || + stp_info.tmd != 2) { + old_clock = get_tod_clock(); + rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0); + if (rc == 0) { + delta = adjust_time(old_clock, get_tod_clock(), 0); + fixup_clock_comparator(delta); + rc = chsc_sstpi(stp_page, &stp_info, + sizeof(struct stp_sstpi)); + if (rc == 0 && stp_info.tmd != 2) + rc = -EAGAIN; + } + } + if (rc) { + disable_sync_clock(NULL); + stp_sync->in_sync = -EAGAIN; + } else + stp_sync->in_sync = 1; + xchg(&first, 0); + return 0; +} + +/* + * STP work. Check for the STP state and take over the clock + * synchronization if the STP clock source is usable. + */ +static void stp_work_fn(struct work_struct *work) +{ + struct clock_sync_data stp_sync; + int rc; + + /* prevent multiple execution. */ + mutex_lock(&stp_work_mutex); + + if (!stp_online) { + chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + del_timer_sync(&stp_timer); + goto out_unlock; + } + + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0); + if (rc) + goto out_unlock; + + rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + if (rc || stp_info.c == 0) + goto out_unlock; + + /* Skip synchronization if the clock is already in sync. */ + if (check_sync_clock()) + goto out_unlock; + + memset(&stp_sync, 0, sizeof(stp_sync)); + get_online_cpus(); + atomic_set(&stp_sync.cpus, num_online_cpus() - 1); + stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask); + put_online_cpus(); + + if (!check_sync_clock()) + /* + * There is a usable clock but the synchonization failed. + * Retry after a second. + */ + mod_timer(&stp_timer, jiffies + HZ); + +out_unlock: + mutex_unlock(&stp_work_mutex); +} + +/* + * STP subsys sysfs interface functions + */ +static struct bus_type stp_subsys = { + .name = "stp", + .dev_name = "stp", +}; + +static ssize_t stp_ctn_id_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%016llx\n", + *(unsigned long long *) stp_info.ctnid); +} + +static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); + +static ssize_t stp_ctn_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", stp_info.ctn); +} + +static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); + +static ssize_t stp_dst_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x2000)) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); +} + +static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); + +static ssize_t stp_leap_seconds_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x8000)) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); +} + +static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); + +static ssize_t stp_stratum_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); +} + +static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); + +static ssize_t stp_time_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x0800)) + return -ENODATA; + return sprintf(buf, "%i\n", (int) stp_info.tto); +} + +static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); + +static ssize_t stp_time_zone_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x4000)) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); +} + +static DEVICE_ATTR(time_zone_offset, 0400, + stp_time_zone_offset_show, NULL); + +static ssize_t stp_timing_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", stp_info.tmd); +} + +static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); + +static ssize_t stp_timing_state_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", stp_info.tst); +} + +static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); + +static ssize_t stp_online_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%i\n", stp_online); +} + +static ssize_t stp_online_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int value; + + value = simple_strtoul(buf, NULL, 0); + if (value != 0 && value != 1) + return -EINVAL; + if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return -EOPNOTSUPP; + mutex_lock(&clock_sync_mutex); + stp_online = value; + if (stp_online) + set_bit(CLOCK_SYNC_STP, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_STP, &clock_sync_flags); + queue_work(time_sync_wq, &stp_work); + mutex_unlock(&clock_sync_mutex); + return count; +} + +/* + * Can't use DEVICE_ATTR because the attribute should be named + * stp/online but dev_attr_online already exists in this file .. + */ +static struct device_attribute dev_attr_stp_online = { + .attr = { .name = "online", .mode = 0600 }, + .show = stp_online_show, + .store = stp_online_store, +}; + +static struct device_attribute *stp_attributes[] = { + &dev_attr_ctn_id, + &dev_attr_ctn_type, + &dev_attr_dst_offset, + &dev_attr_leap_seconds, + &dev_attr_stp_online, + &dev_attr_stratum, + &dev_attr_time_offset, + &dev_attr_time_zone_offset, + &dev_attr_timing_mode, + &dev_attr_timing_state, + NULL +}; + +static int __init stp_init_sysfs(void) +{ + struct device_attribute **attr; + int rc; + + rc = subsys_system_register(&stp_subsys, NULL); + if (rc) + goto out; + for (attr = stp_attributes; *attr; attr++) { + rc = device_create_file(stp_subsys.dev_root, *attr); + if (rc) + goto out_unreg; + } + return 0; +out_unreg: + for (; attr >= stp_attributes; attr--) + device_remove_file(stp_subsys.dev_root, *attr); + bus_unregister(&stp_subsys); +out: + return rc; } +device_initcall(stp_init_sysfs); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c new file mode 100644 index 00000000000..355a16c5570 --- /dev/null +++ b/arch/s390/kernel/topology.c @@ -0,0 +1,478 @@ +/* + * Copyright IBM Corp. 2007, 2011 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/workqueue.h> +#include <linux/bootmem.h> +#include <linux/cpuset.h> +#include <linux/device.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <asm/sysinfo.h> + +#define PTF_HORIZONTAL (0UL) +#define PTF_VERTICAL (1UL) +#define PTF_CHECK (2UL) + +struct mask_info { + struct mask_info *next; + unsigned char id; + cpumask_t mask; +}; + +static void set_topology_timer(void); +static void topology_work_fn(struct work_struct *work); +static struct sysinfo_15_1_x *tl_info; + +static int topology_enabled = 1; +static DECLARE_WORK(topology_work, topology_work_fn); + +/* topology_lock protects the socket and book linked lists */ +static DEFINE_SPINLOCK(topology_lock); +static struct mask_info socket_info; +static struct mask_info book_info; + +struct cpu_topology_s390 cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) +{ + cpumask_t mask; + + cpumask_copy(&mask, cpumask_of(cpu)); + if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) + return mask; + for (; info; info = info->next) { + if (cpumask_test_cpu(cpu, &info->mask)) + return info->mask; + } + return mask; +} + +static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, + struct mask_info *book, + struct mask_info *socket, + int one_socket_per_cpu) +{ + unsigned int cpu; + + for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) { + unsigned int rcpu; + int lcpu; + + rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; + lcpu = smp_find_processor_id(rcpu); + if (lcpu < 0) + continue; + cpumask_set_cpu(lcpu, &book->mask); + cpu_topology[lcpu].book_id = book->id; + cpumask_set_cpu(lcpu, &socket->mask); + cpu_topology[lcpu].core_id = rcpu; + if (one_socket_per_cpu) { + cpu_topology[lcpu].socket_id = rcpu; + socket = socket->next; + } else { + cpu_topology[lcpu].socket_id = socket->id; + } + smp_cpu_set_polarization(lcpu, tl_cpu->pp); + } + return socket; +} + +static void clear_masks(void) +{ + struct mask_info *info; + + info = &socket_info; + while (info) { + cpumask_clear(&info->mask); + info = info->next; + } + info = &book_info; + while (info) { + cpumask_clear(&info->mask); + info = info->next; + } +} + +static union topology_entry *next_tle(union topology_entry *tle) +{ + if (!tle->nl) + return (union topology_entry *)((struct topology_cpu *)tle + 1); + return (union topology_entry *)((struct topology_container *)tle + 1); +} + +static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) +{ + struct mask_info *socket = &socket_info; + struct mask_info *book = &book_info; + union topology_entry *tle, *end; + + tle = info->tle; + end = (union topology_entry *)((unsigned long)info + info->length); + while (tle < end) { + switch (tle->nl) { + case 2: + book = book->next; + book->id = tle->container.id; + break; + case 1: + socket = socket->next; + socket->id = tle->container.id; + break; + case 0: + add_cpus_to_mask(&tle->cpu, book, socket, 0); + break; + default: + clear_masks(); + return; + } + tle = next_tle(tle); + } +} + +static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) +{ + struct mask_info *socket = &socket_info; + struct mask_info *book = &book_info; + union topology_entry *tle, *end; + + tle = info->tle; + end = (union topology_entry *)((unsigned long)info + info->length); + while (tle < end) { + switch (tle->nl) { + case 1: + book = book->next; + book->id = tle->container.id; + break; + case 0: + socket = add_cpus_to_mask(&tle->cpu, book, socket, 1); + break; + default: + clear_masks(); + return; + } + tle = next_tle(tle); + } +} + +static void tl_to_masks(struct sysinfo_15_1_x *info) +{ + struct cpuid cpu_id; + + spin_lock_irq(&topology_lock); + get_cpu_id(&cpu_id); + clear_masks(); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + __tl_to_masks_z10(info); + break; + default: + __tl_to_masks_generic(info); + } + spin_unlock_irq(&topology_lock); +} + +static void topology_update_polarization_simple(void) +{ + int cpu; + + mutex_lock(&smp_cpu_state_mutex); + for_each_possible_cpu(cpu) + smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); + mutex_unlock(&smp_cpu_state_mutex); +} + +static int ptf(unsigned long fc) +{ + int rc; + + asm volatile( + " .insn rre,0xb9a20000,%1,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (rc) + : "d" (fc) : "cc"); + return rc; +} + +int topology_set_cpu_management(int fc) +{ + int cpu, rc; + + if (!MACHINE_HAS_TOPOLOGY) + return -EOPNOTSUPP; + if (fc) + rc = ptf(PTF_VERTICAL); + else + rc = ptf(PTF_HORIZONTAL); + if (rc) + return -EBUSY; + for_each_possible_cpu(cpu) + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + return rc; +} + +static void update_cpu_masks(void) +{ + unsigned long flags; + int cpu; + + spin_lock_irqsave(&topology_lock, flags); + for_each_possible_cpu(cpu) { + cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu); + cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu); + if (!MACHINE_HAS_TOPOLOGY) { + cpu_topology[cpu].core_id = cpu; + cpu_topology[cpu].socket_id = cpu; + cpu_topology[cpu].book_id = cpu; + } + } + spin_unlock_irqrestore(&topology_lock, flags); +} + +void store_topology(struct sysinfo_15_1_x *info) +{ + if (topology_max_mnest >= 3) + stsi(info, 15, 1, 3); + else + stsi(info, 15, 1, 2); +} + +int arch_update_cpu_topology(void) +{ + struct sysinfo_15_1_x *info = tl_info; + struct device *dev; + int cpu; + + if (!MACHINE_HAS_TOPOLOGY) { + update_cpu_masks(); + topology_update_polarization_simple(); + return 0; + } + store_topology(info); + tl_to_masks(info); + update_cpu_masks(); + for_each_online_cpu(cpu) { + dev = get_cpu_device(cpu); + kobject_uevent(&dev->kobj, KOBJ_CHANGE); + } + return 1; +} + +static void topology_work_fn(struct work_struct *work) +{ + rebuild_sched_domains(); +} + +void topology_schedule_update(void) +{ + schedule_work(&topology_work); +} + +static void topology_timer_fn(unsigned long ignored) +{ + if (ptf(PTF_CHECK)) + topology_schedule_update(); + set_topology_timer(); +} + +static struct timer_list topology_timer = + TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); + +static atomic_t topology_poll = ATOMIC_INIT(0); + +static void set_topology_timer(void) +{ + if (atomic_add_unless(&topology_poll, -1, 0)) + mod_timer(&topology_timer, jiffies + HZ / 10); + else + mod_timer(&topology_timer, jiffies + HZ * 60); +} + +void topology_expect_change(void) +{ + if (!MACHINE_HAS_TOPOLOGY) + return; + /* This is racy, but it doesn't matter since it is just a heuristic. + * Worst case is that we poll in a higher frequency for a bit longer. + */ + if (atomic_read(&topology_poll) > 60) + return; + atomic_add(60, &topology_poll); + set_topology_timer(); +} + +static int __init early_parse_topology(char *p) +{ + if (strncmp(p, "off", 3)) + return 0; + topology_enabled = 0; + return 0; +} +early_param("topology", early_parse_topology); + +static void __init alloc_masks(struct sysinfo_15_1_x *info, + struct mask_info *mask, int offset) +{ + int i, nr_masks; + + nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; + for (i = 0; i < info->mnest - offset; i++) + nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; + nr_masks = max(nr_masks, 1); + for (i = 0; i < nr_masks; i++) { + mask->next = alloc_bootmem_align( + roundup_pow_of_two(sizeof(struct mask_info)), + roundup_pow_of_two(sizeof(struct mask_info))); + mask = mask->next; + } +} + +void __init s390_init_cpu_topology(void) +{ + struct sysinfo_15_1_x *info; + int i; + + if (!MACHINE_HAS_TOPOLOGY) + return; + tl_info = alloc_bootmem_pages(PAGE_SIZE); + info = tl_info; + store_topology(info); + pr_info("The CPU configuration topology of the machine is:"); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + printk(KERN_CONT " %d", info->mag[i]); + printk(KERN_CONT " / %d\n", info->mnest); + alloc_masks(info, &socket_info, 1); + alloc_masks(info, &book_info, 2); +} + +static int cpu_management; + +static ssize_t dispatching_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + count = sprintf(buf, "%d\n", cpu_management); + mutex_unlock(&smp_cpu_state_mutex); + return count; +} + +static ssize_t dispatching_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + int val, rc; + char delim; + + if (sscanf(buf, "%d %c", &val, &delim) != 1) + return -EINVAL; + if (val != 0 && val != 1) + return -EINVAL; + rc = 0; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + if (cpu_management == val) + goto out; + rc = topology_set_cpu_management(val); + if (rc) + goto out; + cpu_management = val; + topology_expect_change(); +out: + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(dispatching, 0644, dispatching_show, + dispatching_store); + +static ssize_t cpu_polarization_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu = dev->id; + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + switch (smp_cpu_get_polarization(cpu)) { + case POLARIZATION_HRZ: + count = sprintf(buf, "horizontal\n"); + break; + case POLARIZATION_VL: + count = sprintf(buf, "vertical:low\n"); + break; + case POLARIZATION_VM: + count = sprintf(buf, "vertical:medium\n"); + break; + case POLARIZATION_VH: + count = sprintf(buf, "vertical:high\n"); + break; + default: + count = sprintf(buf, "unknown\n"); + break; + } + mutex_unlock(&smp_cpu_state_mutex); + return count; +} +static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); + +static struct attribute *topology_cpu_attrs[] = { + &dev_attr_polarization.attr, + NULL, +}; + +static struct attribute_group topology_cpu_attr_group = { + .attrs = topology_cpu_attrs, +}; + +int topology_cpu_init(struct cpu *cpu) +{ + return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); +} + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_mask; +} + +static const struct cpumask *cpu_book_mask(int cpu) +{ + return &cpu_topology[cpu].book_mask; +} + +static struct sched_domain_topology_level s390_topology[] = { + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, + { cpu_book_mask, SD_INIT_NAME(BOOK) }, + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + +static int __init topology_init(void) +{ + if (!MACHINE_HAS_TOPOLOGY) { + topology_update_polarization_simple(); + goto out; + } + set_topology_timer(); +out: + + set_sched_topology(s390_topology); + + return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); +} +device_initcall(topology_init); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 5d21e9e6e7b..c5762324d9e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/traps.c - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * @@ -14,503 +12,245 @@ * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'asm.s'. */ -#include <linux/config.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> +#include <linux/module.h> #include <linux/ptrace.h> -#include <linux/timer.h> +#include <linux/sched.h> #include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/module.h> -#include <linux/kallsyms.h> -#include <linux/reboot.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/atomic.h> -#include <asm/mathemu.h> -#include <asm/cpcmd.h> -#include <asm/s390_ext.h> -#include <asm/lowcore.h> -#include <asm/debug.h> - -/* Called from entry.S only */ -extern void handle_per_exception(struct pt_regs *regs); - -typedef void pgm_check_handler_t(struct pt_regs *, long); -pgm_check_handler_t *pgm_check_table[128]; - -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_PROCESS_DEBUG -int sysctl_userprocess_debug = 1; -#else -int sysctl_userprocess_debug = 0; -#endif -#endif - -extern pgm_check_handler_t do_protection_exception; -extern pgm_check_handler_t do_dat_exception; -#ifdef CONFIG_PFAULT -extern int pfault_init(void); -extern void pfault_fini(void); -extern void pfault_interrupt(struct pt_regs *regs, __u16 error_code); -static ext_int_info_t ext_int_pfault; -#endif -extern pgm_check_handler_t do_monitor_call; - -#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) +#include "entry.h" -#ifndef CONFIG_64BIT -#define FOURLONG "%08lx %08lx %08lx %08lx\n" -static int kstack_depth_to_print = 12; -#else /* CONFIG_64BIT */ -#define FOURLONG "%016lx %016lx %016lx %016lx\n" -static int kstack_depth_to_print = 20; -#endif /* CONFIG_64BIT */ +int show_unhandled_signals = 1; -/* - * For show_trace we have tree different stack to consider: - * - the panic stack which is used if the kernel stack has overflown - * - the asynchronous interrupt stack (cpu related) - * - the synchronous kernel stack (process related) - * The stack trace can start at any of the three stack and can potentially - * touch all of them. The order is: panic stack, async stack, sync stack. - */ -static unsigned long -__show_trace(unsigned long sp, unsigned long low, unsigned long high) +static inline void __user *get_trap_ip(struct pt_regs *regs) { - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - sp = sp & PSW_ADDR_INSN; - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN); - print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN); - low = sp; - sp = regs->gprs[15]; - } -} +#ifdef CONFIG_64BIT + unsigned long address; -void show_trace(struct task_struct *task, unsigned long * stack) -{ - register unsigned long __r15 asm ("15"); - unsigned long sp; - - sp = (unsigned long) stack; - if (!sp) - sp = task ? task->thread.ksp : __r15; - printk("Call Trace:\n"); -#ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, S390_lowcore.panic_stack - 4096, - S390_lowcore.panic_stack); -#endif - sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); - if (task) - __show_trace(sp, (unsigned long) task_stack_page(task), - (unsigned long) task_stack_page(task) + THREAD_SIZE); + if (regs->int_code & 0x200) + address = *(unsigned long *)(current->thread.trap_tdb + 24); else - __show_trace(sp, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); - printk("\n"); + address = regs->psw.addr; + return (void __user *) + ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN); +#else + return (void __user *) + ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); +#endif } -void show_stack(struct task_struct *task, unsigned long *sp) +static inline void report_user_fault(struct pt_regs *regs, int signr) { - register unsigned long * __r15 asm ("15"); - unsigned long *stack; - int i; - - // debugging aid: "show_stack(NULL);" prints the - // back trace for this cpu. - - if (!sp) - sp = task ? (unsigned long *) task->thread.ksp : __r15; - - stack = sp; - for (i = 0; i < kstack_depth_to_print; i++) { - if (((addr_t) stack & (THREAD_SIZE-1)) == 0) - break; - if (i && ((i * sizeof (long) % 32) == 0)) - printk("\n "); - printk("%p ", (void *)*stack++); - } + if ((task_pid_nr(current) > 1) && !show_unhandled_signals) + return; + if (!unhandled_signal(current, signr)) + return; + if (!printk_ratelimit()) + return; + printk("User process fault: interruption code 0x%X ", regs->int_code); + print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN); printk("\n"); - show_trace(task, sp); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - show_stack(0, 0); + show_regs(regs); } -EXPORT_SYMBOL(dump_stack); - -void show_registers(struct pt_regs *regs) -{ - mm_segment_t old_fs; - char *mode; - int i; - - mode = (regs->psw.mask & PSW_MASK_PSTATE) ? "User" : "Krnl"; - printk("%s PSW : %p %p", - mode, (void *) regs->psw.mask, - (void *) regs->psw.addr); - print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN); - printk("%s GPRS: " FOURLONG, mode, - regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); - printk(" " FOURLONG, - regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); - printk(" " FOURLONG, - regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); - printk(" " FOURLONG, - regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); - -#if 0 - /* FIXME: this isn't needed any more but it changes the ksymoops - * input. To remove or not to remove ... */ - save_access_regs(regs->acrs); - printk("%s ACRS: %08x %08x %08x %08x\n", mode, - regs->acrs[0], regs->acrs[1], regs->acrs[2], regs->acrs[3]); - printk(" %08x %08x %08x %08x\n", - regs->acrs[4], regs->acrs[5], regs->acrs[6], regs->acrs[7]); - printk(" %08x %08x %08x %08x\n", - regs->acrs[8], regs->acrs[9], regs->acrs[10], regs->acrs[11]); - printk(" %08x %08x %08x %08x\n", - regs->acrs[12], regs->acrs[13], regs->acrs[14], regs->acrs[15]); -#endif - - /* - * Print the first 20 byte of the instruction stream at the - * time of the fault. - */ - old_fs = get_fs(); - if (regs->psw.mask & PSW_MASK_PSTATE) - set_fs(USER_DS); - else - set_fs(KERNEL_DS); - printk("%s Code: ", mode); - for (i = 0; i < 20; i++) { - unsigned char c; - if (__get_user(c, (char __user *)(regs->psw.addr + i))) { - printk(" Bad PSW."); - break; - } - printk("%02x ", c); - } - set_fs(old_fs); - - printk("\n"); -} - -/* This is called from fs/proc/array.c */ -char *task_show_regs(struct task_struct *task, char *buffer) +int is_valid_bugaddr(unsigned long addr) { - struct pt_regs *regs; - - regs = task_pt_regs(task); - buffer += sprintf(buffer, "task: %p, ksp: %p\n", - task, (void *)task->thread.ksp); - buffer += sprintf(buffer, "User PSW : %p %p\n", - (void *) regs->psw.mask, (void *)regs->psw.addr); - - buffer += sprintf(buffer, "User GPRS: " FOURLONG, - regs->gprs[0], regs->gprs[1], - regs->gprs[2], regs->gprs[3]); - buffer += sprintf(buffer, " " FOURLONG, - regs->gprs[4], regs->gprs[5], - regs->gprs[6], regs->gprs[7]); - buffer += sprintf(buffer, " " FOURLONG, - regs->gprs[8], regs->gprs[9], - regs->gprs[10], regs->gprs[11]); - buffer += sprintf(buffer, " " FOURLONG, - regs->gprs[12], regs->gprs[13], - regs->gprs[14], regs->gprs[15]); - buffer += sprintf(buffer, "User ACRS: %08x %08x %08x %08x\n", - task->thread.acrs[0], task->thread.acrs[1], - task->thread.acrs[2], task->thread.acrs[3]); - buffer += sprintf(buffer, " %08x %08x %08x %08x\n", - task->thread.acrs[4], task->thread.acrs[5], - task->thread.acrs[6], task->thread.acrs[7]); - buffer += sprintf(buffer, " %08x %08x %08x %08x\n", - task->thread.acrs[8], task->thread.acrs[9], - task->thread.acrs[10], task->thread.acrs[11]); - buffer += sprintf(buffer, " %08x %08x %08x %08x\n", - task->thread.acrs[12], task->thread.acrs[13], - task->thread.acrs[14], task->thread.acrs[15]); - return buffer; + return 1; } -DEFINE_SPINLOCK(die_lock); - -void die(const char * str, struct pt_regs * regs, long err) +static void __kprobes do_trap(struct pt_regs *regs, + int si_signo, int si_code, char *str) { - static int die_counter; - - debug_stop_all(); - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); - show_regs(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception: panic_on_oops"); - do_exit(SIGSEGV); -} + siginfo_t info; -static void inline -report_user_fault(long interruption_code, struct pt_regs *regs) -{ -#if defined(CONFIG_SYSCTL) - if (!sysctl_userprocess_debug) + if (notify_die(DIE_TRAP, str, regs, 0, + regs->int_code, si_signo) == NOTIFY_STOP) return; -#endif -#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG) - printk("User process fault: interruption code 0x%lX\n", - interruption_code); - show_regs(regs); -#endif -} -static void inline do_trap(long interruption_code, int signr, char *str, - struct pt_regs *regs, siginfo_t *info) -{ - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); - - if (regs->psw.mask & PSW_MASK_PSTATE) { - struct task_struct *tsk = current; - - tsk->thread.trap_no = interruption_code & 0xffff; - force_sig_info(signr, info, tsk); - report_user_fault(interruption_code, regs); + if (user_mode(regs)) { + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = get_trap_ip(regs); + force_sig_info(si_signo, &info, current); + report_user_fault(regs, si_signo); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) - regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; - else - die(str, regs, interruption_code); + regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; + else { + enum bug_trap_type btt; + + btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); + if (btt == BUG_TRAP_TYPE_WARN) + return; + die(regs, str); + } } } -static inline void *get_check_address(struct pt_regs *regs) +void __kprobes do_per_trap(struct pt_regs *regs) { - return (void *)((regs->psw.addr-S390_lowcore.pgm_ilc) & PSW_ADDR_INSN); -} + siginfo_t info; -void do_single_step(struct pt_regs *regs) -{ - if ((current->ptrace & PT_PTRACED) != 0) - force_sig(SIGTRAP, current); + if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) + return; + if (!current->ptrace) + return; + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = + (void __force __user *) current->thread.per_event.address; + force_sig_info(SIGTRAP, &info, current); } -asmlinkage void -default_trap_handler(struct pt_regs * regs, long interruption_code) +void default_trap_handler(struct pt_regs *regs) { - if (regs->psw.mask & PSW_MASK_PSTATE) { - local_irq_enable(); + if (user_mode(regs)) { + report_user_fault(regs, SIGSEGV); do_exit(SIGSEGV); - report_user_fault(interruption_code, regs); } else - die("Unknown program exception", regs, interruption_code); + die(regs, "Unknown program exception"); } -#define DO_ERROR_INFO(signr, str, name, sicode, siaddr) \ -asmlinkage void name(struct pt_regs * regs, long interruption_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void *)siaddr; \ - do_trap(interruption_code, signr, str, regs, &info); \ +#define DO_ERROR_INFO(name, signr, sicode, str) \ +void name(struct pt_regs *regs) \ +{ \ + do_trap(regs, signr, sicode, str); \ } -DO_ERROR_INFO(SIGILL, "addressing exception", addressing_exception, - ILL_ILLADR, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "execute exception", execute_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "fixpoint divide exception", divide_exception, - FPE_INTDIV, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "fixpoint overflow exception", overflow_exception, - FPE_INTOVF, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP overflow exception", hfp_overflow_exception, - FPE_FLTOVF, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP underflow exception", hfp_underflow_exception, - FPE_FLTUND, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP significance exception", hfp_significance_exception, - FPE_FLTRES, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP divide exception", hfp_divide_exception, - FPE_FLTDIV, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP square root exception", hfp_sqrt_exception, - FPE_FLTINV, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "operand exception", operand_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "privileged operation", privileged_op, - ILL_PRVOPC, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "special operation exception", special_op_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "translation exception", translation_exception, - ILL_ILLOPN, get_check_address(regs)) - -static inline void -do_fp_trap(struct pt_regs *regs, void *location, - int fpc, long interruption_code) -{ - siginfo_t si; +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, + "addressing exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, + "execute exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, + "fixpoint divide exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, + "fixpoint overflow exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, + "HFP overflow exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, + "HFP underflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, + "HFP significance exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, + "HFP divide exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, + "HFP square root exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, + "operand exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, + "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, + "special operation exception") +DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, + "translation exception") + +#ifdef CONFIG_64BIT +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, + "transaction constraint exception") +#endif - si.si_signo = SIGFPE; - si.si_errno = 0; - si.si_addr = location; - si.si_code = 0; +static inline void do_fp_trap(struct pt_regs *regs, int fpc) +{ + int si_code = 0; /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ if (fpc & 0x8000) /* invalid fp operation */ - si.si_code = FPE_FLTINV; + si_code = FPE_FLTINV; else if (fpc & 0x4000) /* div by 0 */ - si.si_code = FPE_FLTDIV; + si_code = FPE_FLTDIV; else if (fpc & 0x2000) /* overflow */ - si.si_code = FPE_FLTOVF; + si_code = FPE_FLTOVF; else if (fpc & 0x1000) /* underflow */ - si.si_code = FPE_FLTUND; + si_code = FPE_FLTUND; else if (fpc & 0x0800) /* inexact */ - si.si_code = FPE_FLTRES; + si_code = FPE_FLTRES; } - current->thread.ieee_instruction_pointer = (addr_t) location; - do_trap(interruption_code, SIGFPE, - "floating point exception", regs, &si); + do_trap(regs, SIGFPE, si_code, "floating point exception"); } -asmlinkage void illegal_op(struct pt_regs * regs, long interruption_code) +void __kprobes illegal_op(struct pt_regs *regs) { siginfo_t info; __u8 opcode[6]; - __u16 *location; + __u16 __user *location; int signal = 0; - location = (__u16 *) get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = get_trap_ip(regs); - if (regs->psw.mask & PSW_MASK_PSTATE) { - get_user(*((__u16 *) opcode), (__u16 __user *) location); + if (user_mode(regs)) { + if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) + return; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { - if (current->ptrace & PT_PTRACED) - force_sig(SIGTRAP, current); - else + if (current->ptrace) { + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_addr = location; + force_sig_info(SIGTRAP, &info, current); + } else signal = SIGILL; #ifdef CONFIG_MATHEMU } else if (opcode[0] == 0xb3) { - get_user(*((__u16 *) (opcode+2)), location+1); + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; signal = math_emu_b3(opcode, regs); } else if (opcode[0] == 0xed) { - get_user(*((__u32 *) (opcode+2)), - (__u32 *)(location+1)); + if (get_user(*((__u32 *) (opcode+2)), + (__u32 __user *)(location+1))) + return; signal = math_emu_ed(opcode, regs); } else if (*((__u16 *) opcode) == 0xb299) { - get_user(*((__u16 *) (opcode+2)), location+1); + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; signal = math_emu_srnm(opcode, regs); } else if (*((__u16 *) opcode) == 0xb29c) { - get_user(*((__u16 *) (opcode+2)), location+1); + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; signal = math_emu_stfpc(opcode, regs); } else if (*((__u16 *) opcode) == 0xb29d) { - get_user(*((__u16 *) (opcode+2)), location+1); + if (get_user(*((__u16 *) (opcode+2)), location+1)) + return; signal = math_emu_lfpc(opcode, regs); #endif } else signal = SIGILL; - } else - signal = SIGILL; + } else { + /* + * If we get an illegal op in kernel mode, send it through the + * kprobes notifier. If kprobes doesn't pick it up, SIGILL + */ + if (notify_die(DIE_BPT, "bpt", regs, 0, + 3, SIGTRAP) != NOTIFY_STOP) + signal = SIGILL; + } #ifdef CONFIG_MATHEMU if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); - else if (signal == SIGSEGV) { - info.si_signo = signal; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void *) location; - do_trap(interruption_code, signal, - "user address fault", regs, &info); - } else + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal == SIGSEGV) + do_trap(regs, signal, SEGV_MAPERR, "user address fault"); + else #endif - if (signal) { - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void *) location; - do_trap(interruption_code, signal, - "illegal operation", regs, &info); - } + if (signal) + do_trap(regs, signal, ILL_ILLOPC, "illegal operation"); } #ifdef CONFIG_MATHEMU -asmlinkage void -specification_exception(struct pt_regs * regs, long interruption_code) +void specification_exception(struct pt_regs *regs) { __u8 opcode[6]; - __u16 *location = NULL; + __u16 __user *location = NULL; int signal = 0; - location = (__u16 *) get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = (__u16 __user *) get_trap_ip(regs); - if (regs->psw.mask & PSW_MASK_PSTATE) { + if (user_mode(regs)) { get_user(*((__u16 *) opcode), location); switch (opcode[0]) { case 0x28: /* LDR Rx,Ry */ @@ -543,43 +283,27 @@ specification_exception(struct pt_regs * regs, long interruption_code) signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); - else if (signal) { - siginfo_t info; - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPN; - info.si_addr = location; - do_trap(interruption_code, signal, - "specification exception", regs, &info); - } + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "specification exception"); } #else -DO_ERROR_INFO(SIGILL, "specification exception", specification_exception, - ILL_ILLOPN, get_check_address(regs)); +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, + "specification exception"); #endif -asmlinkage void data_exception(struct pt_regs * regs, long interruption_code) +void data_exception(struct pt_regs *regs) { - __u16 *location; + __u16 __user *location; int signal = 0; - location = (__u16 *) get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = get_trap_ip(regs); if (MACHINE_HAS_IEEE) - __asm__ volatile ("stfpc %0\n\t" - : "=m" (current->thread.fp_regs.fpc)); + asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); #ifdef CONFIG_MATHEMU - else if (regs->psw.mask & PSW_MASK_PSTATE) { + else if (user_mode(regs)) { __u8 opcode[6]; get_user(*((__u16 *) opcode), location); switch (opcode[0]) { @@ -611,7 +335,7 @@ asmlinkage void data_exception(struct pt_regs * regs, long interruption_code) break; case 0xed: get_user(*((__u32 *) (opcode+2)), - (__u32 *)(location+1)); + (__u32 __user *)(location+1)); signal = math_emu_ed(opcode, regs); break; case 0xb2: @@ -638,35 +362,21 @@ asmlinkage void data_exception(struct pt_regs * regs, long interruption_code) else signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); - else if (signal) { - siginfo_t info; - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPN; - info.si_addr = location; - do_trap(interruption_code, signal, - "data exception", regs, &info); - } + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "data exception"); } -asmlinkage void space_switch_exception(struct pt_regs * regs, long int_code) +void space_switch_exception(struct pt_regs *regs) { - siginfo_t info; - /* Set user psw back to home space mode. */ - if (regs->psw.mask & PSW_MASK_PSTATE) + if (user_mode(regs)) regs->psw.mask |= PSW_ASC_HOME; /* Send SIGILL. */ - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_PRVOPC; - info.si_addr = get_check_address(regs); - do_trap(int_code, SIGILL, "space switch event", regs, &info); + do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event"); } -asmlinkage void kernel_stack_overflow(struct pt_regs * regs) +void __kprobes kernel_stack_overflow(struct pt_regs * regs) { bust_spinlocks(1); printk("Kernel stack overflow.\n"); @@ -675,59 +385,7 @@ asmlinkage void kernel_stack_overflow(struct pt_regs * regs) panic("Corrupt kernel stack, can't continue."); } -/* init is done in lowcore.S and head.S */ - void __init trap_init(void) { - int i; - - for (i = 0; i < 128; i++) - pgm_check_table[i] = &default_trap_handler; - pgm_check_table[1] = &illegal_op; - pgm_check_table[2] = &privileged_op; - pgm_check_table[3] = &execute_exception; - pgm_check_table[4] = &do_protection_exception; - pgm_check_table[5] = &addressing_exception; - pgm_check_table[6] = &specification_exception; - pgm_check_table[7] = &data_exception; - pgm_check_table[8] = &overflow_exception; - pgm_check_table[9] = ÷_exception; - pgm_check_table[0x0A] = &overflow_exception; - pgm_check_table[0x0B] = ÷_exception; - pgm_check_table[0x0C] = &hfp_overflow_exception; - pgm_check_table[0x0D] = &hfp_underflow_exception; - pgm_check_table[0x0E] = &hfp_significance_exception; - pgm_check_table[0x0F] = &hfp_divide_exception; - pgm_check_table[0x10] = &do_dat_exception; - pgm_check_table[0x11] = &do_dat_exception; - pgm_check_table[0x12] = &translation_exception; - pgm_check_table[0x13] = &special_op_exception; -#ifdef CONFIG_64BIT - pgm_check_table[0x38] = &do_dat_exception; - pgm_check_table[0x39] = &do_dat_exception; - pgm_check_table[0x3A] = &do_dat_exception; - pgm_check_table[0x3B] = &do_dat_exception; -#endif /* CONFIG_64BIT */ - pgm_check_table[0x15] = &operand_exception; - pgm_check_table[0x1C] = &space_switch_exception; - pgm_check_table[0x1D] = &hfp_sqrt_exception; - pgm_check_table[0x40] = &do_monitor_call; - - if (MACHINE_IS_VM) { -#ifdef CONFIG_PFAULT - /* - * Try to get pfault pseudo page faults going. - */ - if (register_early_external_interrupt(0x2603, pfault_interrupt, - &ext_int_pfault) != 0) - panic("Couldn't request external interrupt 0x2603"); - - if (pfault_init() == 0) - return; - - /* Tough luck, no pfault. */ - unregister_early_external_interrupt(0x2603, pfault_interrupt, - &ext_int_pfault); -#endif - } + local_mcck_enable(); } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c new file mode 100644 index 00000000000..61364909678 --- /dev/null +++ b/arch/s390/kernel/vdso.c @@ -0,0 +1,333 @@ +/* + * vdso setup for s390 + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/elf.h> +#include <linux/security.h> +#include <linux/bootmem.h> +#include <linux/compat.h> +#include <asm/asm-offsets.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/sections.h> +#include <asm/vdso.h> +#include <asm/facility.h> + +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) +extern char vdso32_start, vdso32_end; +static void *vdso32_kbase = &vdso32_start; +static unsigned int vdso32_pages; +static struct page **vdso32_pagelist; +#endif + +#ifdef CONFIG_64BIT +extern char vdso64_start, vdso64_end; +static void *vdso64_kbase = &vdso64_start; +static unsigned int vdso64_pages; +static struct page **vdso64_pagelist; +#endif /* CONFIG_64BIT */ + +/* + * Should the kernel map a VDSO page into processes and pass its + * address down to glibc upon exec()? + */ +unsigned int __read_mostly vdso_enabled = 1; + +static int __init vdso_setup(char *s) +{ + unsigned long val; + int rc; + + rc = 0; + if (strncmp(s, "on", 3) == 0) + vdso_enabled = 1; + else if (strncmp(s, "off", 4) == 0) + vdso_enabled = 0; + else { + rc = kstrtoul(s, 0, &val); + vdso_enabled = rc ? 0 : !!val; + } + return !rc; +} +__setup("vdso=", vdso_setup); + +/* + * The vdso data page + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; +struct vdso_data *vdso_data = &vdso_data_store.data; + +/* + * Setup vdso data page. + */ +static void vdso_init_data(struct vdso_data *vd) +{ + vd->ectg_available = test_facility(31); +} + +#ifdef CONFIG_64BIT +/* + * Allocate/free per cpu vdso data. + */ +#define SEGMENT_ORDER 2 + +int vdso_alloc_per_cpu(struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + int i; + + lowcore->vdso_per_cpu_data = __LC_PASTE; + + if (!vdso_enabled) + return 0; + + segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); + page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); + page_frame = get_zeroed_page(GFP_KERNEL); + if (!segment_table || !page_table || !page_frame) + goto out; + + clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, + PAGE_SIZE << SEGMENT_ORDER); + clear_table((unsigned long *) page_table, _PAGE_INVALID, + 256*sizeof(unsigned long)); + + *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; + *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; + + psal = (u32 *) (page_table + 256*sizeof(unsigned long)); + aste = psal + 32; + + for (i = 4; i < 32; i += 4) + psal[i] = 0x80000000; + + lowcore->paste[4] = (u32)(addr_t) psal; + psal[0] = 0x02000000; + psal[2] = (u32)(addr_t) aste; + *(unsigned long *) (aste + 2) = segment_table + + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; + aste[4] = (u32)(addr_t) psal; + lowcore->vdso_per_cpu_data = page_frame; + + return 0; + +out: + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); + return -ENOMEM; +} + +void vdso_free_per_cpu(struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + + if (!vdso_enabled) + return; + + psal = (u32 *)(addr_t) lowcore->paste[4]; + aste = (u32 *)(addr_t) psal[2]; + segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; + page_table = *(unsigned long *) segment_table; + page_frame = *(unsigned long *) page_table; + + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); +} + +static void vdso_init_cr5(void) +{ + unsigned long cr5; + + if (!vdso_enabled) + return; + cr5 = offsetof(struct _lowcore, paste); + __ctl_load(cr5, 5, 5); +} +#endif /* CONFIG_64BIT */ + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + struct page **vdso_pagelist; + unsigned long vdso_pages; + unsigned long vdso_base; + int rc; + + if (!vdso_enabled) + return 0; + /* + * Only map the vdso for dynamically linked elf binaries. + */ + if (!uses_interp) + return 0; + +#ifdef CONFIG_64BIT + vdso_pagelist = vdso64_pagelist; + vdso_pages = vdso64_pages; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; + } +#endif +#else + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; +#endif + + /* + * vDSO has a problem and was disabled, just don't "enable" it for + * the process + */ + if (vdso_pages == 0) + return 0; + + current->mm->context.vdso_base = 0; + + /* + * pick a base address for the vDSO in process space. We try to put + * it at vdso_base which is the "natural" base for it, but we might + * fail and end up putting it elsewhere. + */ + down_write(&mm->mmap_sem); + vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + rc = vdso_base; + goto out_up; + } + + /* + * Put vDSO base into mm struct. We need to do this before calling + * install_special_mapping or the perf counter mmap tracking code + * will fail to recognise it as a vDSO (since arch_vma_name fails). + */ + current->mm->context.vdso_base = vdso_base; + + /* + * our vma flags don't have VM_WRITE so by default, the process + * isn't allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW + * on those pages but it's then your responsibility to never do that + * on the "data" page of the vDSO or you'll stop getting kernel + * updates and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though. + */ + rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_pagelist); + if (rc) + current->mm->context.vdso_base = 0; +out_up: + up_write(&mm->mmap_sem); + return rc; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) + return "[vdso]"; + return NULL; +} + +static int __init vdso_init(void) +{ + int i; + + if (!vdso_enabled) + return 0; + vdso_init_data(vdso_data); +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) + /* Calculate the size of the 32 bit vDSO */ + vdso32_pages = ((&vdso32_end - &vdso32_start + + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + + /* Make sure pages are in the correct state */ + vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1), + GFP_KERNEL); + BUG_ON(vdso32_pagelist == NULL); + for (i = 0; i < vdso32_pages - 1; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso32_pagelist[i] = pg; + } + vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data); + vdso32_pagelist[vdso32_pages] = NULL; +#endif + +#ifdef CONFIG_64BIT + /* Calculate the size of the 64 bit vDSO */ + vdso64_pages = ((&vdso64_end - &vdso64_start + + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + + /* Make sure pages are in the correct state */ + vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1), + GFP_KERNEL); + BUG_ON(vdso64_pagelist == NULL); + for (i = 0; i < vdso64_pages - 1; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso64_pagelist[i] = pg; + } + vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); + vdso64_pagelist[vdso64_pages] = NULL; + if (vdso_alloc_per_cpu(&S390_lowcore)) + BUG(); + vdso_init_cr5(); +#endif /* CONFIG_64BIT */ + + get_page(virt_to_page(vdso_data)); + + smp_wmb(); + + return 0; +} +early_initcall(vdso_init); + +int in_gate_area_no_mm(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore new file mode 100644 index 00000000000..e45fba9d0ce --- /dev/null +++ b/arch/s390/kernel/vdso32/.gitignore @@ -0,0 +1 @@ +vdso32.lds diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile new file mode 100644 index 00000000000..8ad2b34ad15 --- /dev/null +++ b/arch/s390/kernel/vdso32/Makefile @@ -0,0 +1,58 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o + +# Build rules + +targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_31 += -m31 -s + +KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31) + +obj-y += vdso32_wrapper.o +extra-y += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) + $(call if_changed,vdso32ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso32): %.o: %.S + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso32.so: $(obj)/vdso32.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso32.so diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S new file mode 100644 index 00000000000..36aaa25d05d --- /dev/null +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -0,0 +1,39 @@ +/* + * Userland implementation of clock_getres() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_getres + .type __kernel_clock_getres,@function +__kernel_clock_getres: + .cfi_startproc + chi %r2,__CLOCK_REALTIME + je 0f + chi %r2,__CLOCK_MONOTONIC + jne 3f +0: ltr %r3,%r3 + jz 2f /* res == NULL */ + basr %r1,0 +1: l %r0,4f-1b(%r1) + xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ + st %r0,4(%r3) /* store tp->tv_usec */ +2: lhi %r2,0 + br %r14 +3: lhi %r1,__NR_clock_getres /* fallback to svc */ + svc 0 + br %r14 +4: .long __CLOCK_REALTIME_RES + .cfi_endproc + .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S new file mode 100644 index 00000000000..65fc3979c2f --- /dev/null +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -0,0 +1,125 @@ +/* + * Userland implementation of clock_gettime() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_gettime + .type __kernel_clock_gettime,@function +__kernel_clock_gettime: + .cfi_startproc + basr %r5,0 +0: al %r5,21f-0b(%r5) /* get &_vdso_data */ + chi %r2,__CLOCK_REALTIME + je 10f + chi %r2,__CLOCK_MONOTONIC + jne 19f + + /* CLOCK_MONOTONIC */ + ltr %r3,%r3 + jz 9f /* tp == NULL */ +1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 1b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,2f + ahi %r0,-1 +2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ + lr %r2,%r0 + l %r0,__VDSO_TK_MULT(%r5) + ltr %r1,%r1 + mr %r0,%r0 + jnm 3f + a %r0,__VDSO_TK_MULT(%r5) +3: alr %r0,%r2 + al %r0,__VDSO_WTOM_NSEC(%r5) + al %r1,__VDSO_WTOM_NSEC+4(%r5) + brc 12,5f + ahi %r0,1 +5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srdl %r0,0(%r2) /* >> tk->shift */ + l %r2,__VDSO_WTOM_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 1b + basr %r5,0 +6: ltr %r0,%r0 + jnz 7f + cl %r1,20f-6b(%r5) + jl 8f +7: ahi %r2,1 + sl %r1,20f-6b(%r5) + brc 3,6b + ahi %r0,-1 + j 6b +8: st %r2,0(%r3) /* store tp->tv_sec */ + st %r1,4(%r3) /* store tp->tv_nsec */ +9: lhi %r2,0 + br %r14 + + /* CLOCK_REALTIME */ +10: ltr %r3,%r3 /* tp == NULL */ + jz 18f +11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 11b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,12f + ahi %r0,-1 +12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ + lr %r2,%r0 + l %r0,__VDSO_TK_MULT(%r5) + ltr %r1,%r1 + mr %r0,%r0 + jnm 13f + a %r0,__VDSO_TK_MULT(%r5) +13: alr %r0,%r2 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,14f + ahi %r0,1 +14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srdl %r0,0(%r2) /* >> tk->shift */ + l %r2,__VDSO_XTIME_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 11b + basr %r5,0 +15: ltr %r0,%r0 + jnz 16f + cl %r1,20f-15b(%r5) + jl 17f +16: ahi %r2,1 + sl %r1,20f-15b(%r5) + brc 3,15b + ahi %r0,-1 + j 15b +17: st %r2,0(%r3) /* store tp->tv_sec */ + st %r1,4(%r3) /* store tp->tv_nsec */ +18: lhi %r2,0 + br %r14 + + /* Fallback to system call */ +19: lhi %r1,__NR_clock_gettime + svc 0 + br %r14 + +20: .long 1000000000 +21: .long _vdso_data - 0b + .cfi_endproc + .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S new file mode 100644 index 00000000000..fd621a950f7 --- /dev/null +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -0,0 +1,79 @@ +/* + * Userland implementation of gettimeofday() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_gettimeofday + .type __kernel_gettimeofday,@function +__kernel_gettimeofday: + .cfi_startproc + basr %r5,0 +0: al %r5,13f-0b(%r5) /* get &_vdso_data */ +1: ltr %r3,%r3 /* check if tz is NULL */ + je 2f + mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) +2: ltr %r2,%r2 /* check if tv is NULL */ + je 10f + l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 1b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,3f + ahi %r0,-1 +3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ + st %r0,24(%r15) + l %r0,__VDSO_TK_MULT(%r5) + ltr %r1,%r1 + mr %r0,%r0 + jnm 4f + a %r0,__VDSO_TK_MULT(%r5) +4: al %r0,24(%r15) + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,5f + ahi %r0,1 +5: mvc 24(4,%r15),__VDSO_XTIME_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 1b + l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srdl %r0,0(%r4) /* >> tk->shift */ + l %r4,24(%r15) /* get tv_sec from stack */ + basr %r5,0 +6: ltr %r0,%r0 + jnz 7f + cl %r1,11f-6b(%r5) + jl 8f +7: ahi %r4,1 + sl %r1,11f-6b(%r5) + brc 3,6b + ahi %r0,-1 + j 6b +8: st %r4,0(%r2) /* store tv->tv_sec */ + ltr %r1,%r1 + m %r0,12f-6b(%r5) + jnm 9f + al %r0,12f-6b(%r5) +9: srl %r0,6 + st %r0,4(%r2) /* store tv->tv_usec */ +10: slr %r2,%r2 + br %r14 +11: .long 1000000000 +12: .long 274877907 +13: .long _vdso_data - 0b + .cfi_endproc + .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S new file mode 100644 index 00000000000..79a071e4357 --- /dev/null +++ b/arch/s390/kernel/vdso32/note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S new file mode 100644 index 00000000000..a8c379fa124 --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,138 @@ +/* + * This is the infamous ld script for the 32 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") +OUTPUT_ARCH(s390:31-bit) +ENTRY(_start) + +SECTIONS +{ + . = VDSO32_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + . = ALIGN(4096); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 00000000000..ae42f8ce350 --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,14 @@ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/s390/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore new file mode 100644 index 00000000000..3fd18cf9fec --- /dev/null +++ b/arch/s390/kernel/vdso64/.gitignore @@ -0,0 +1 @@ +vdso64.lds diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile new file mode 100644 index 00000000000..2a8ddfd12a5 --- /dev/null +++ b/arch/s390/kernel/vdso64/Makefile @@ -0,0 +1,58 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o + +# Build rules + +targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + +KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_64 += -m64 -s + +KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) + +obj-y += vdso64_wrapper.o +extra-y += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency (incbin is bad) +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) + $(call if_changed,vdso64ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso64): %.o: %.S + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso64.so: $(obj)/vdso64.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso64.so diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S new file mode 100644 index 00000000000..34deba7c7ed --- /dev/null +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -0,0 +1,46 @@ +/* + * Userland implementation of clock_getres() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_getres + .type __kernel_clock_getres,@function +__kernel_clock_getres: + .cfi_startproc + cghi %r2,__CLOCK_REALTIME + je 0f + cghi %r2,__CLOCK_MONOTONIC + je 0f + cghi %r2,__CLOCK_THREAD_CPUTIME_ID + je 0f + cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ + jne 2f + larl %r5,_vdso_data + icm %r0,15,__LC_ECTG_OK(%r5) + jz 2f +0: ltgr %r3,%r3 + jz 1f /* res == NULL */ + larl %r1,3f + lg %r0,0(%r1) + xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ + stg %r0,8(%r3) /* store tp->tv_usec */ +1: lghi %r2,0 + br %r14 +2: lghi %r1,__NR_clock_getres /* fallback to svc */ + svc 0 + br %r14 +3: .quad __CLOCK_REALTIME_RES + .cfi_endproc + .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S new file mode 100644 index 00000000000..91940ed33a4 --- /dev/null +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -0,0 +1,127 @@ +/* + * Userland implementation of clock_gettime() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_gettime + .type __kernel_clock_gettime,@function +__kernel_clock_gettime: + .cfi_startproc + larl %r5,_vdso_data + cghi %r2,__CLOCK_REALTIME + je 4f + cghi %r2,__CLOCK_THREAD_CPUTIME_ID + je 9f + cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ + je 9f + cghi %r2,__CLOCK_MONOTONIC + jne 12f + + /* CLOCK_MONOTONIC */ + ltgr %r3,%r3 + jz 3f /* tp == NULL */ +0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 0b + stck 48(%r15) /* Store TOD clock */ + lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + lg %r0,__VDSO_WTOM_SEC(%r5) + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ + alg %r1,__VDSO_WTOM_NSEC(%r5) + srlg %r1,%r1,0(%r2) /* >> tk->shift */ + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 0b + larl %r5,13f +1: clg %r1,0(%r5) + jl 2f + slg %r1,0(%r5) + aghi %r0,1 + j 1b +2: stg %r0,0(%r3) /* store tp->tv_sec */ + stg %r1,8(%r3) /* store tp->tv_nsec */ +3: lghi %r2,0 + br %r14 + + /* CLOCK_REALTIME */ +4: ltr %r3,%r3 /* tp == NULL */ + jz 8f +5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 5b + stck 48(%r15) /* Store TOD clock */ + lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ + srlg %r1,%r1,0(%r2) /* >> tk->shift */ + lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */ + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 5b + larl %r5,13f +6: clg %r1,0(%r5) + jl 7f + slg %r1,0(%r5) + aghi %r0,1 + j 6b +7: stg %r0,0(%r3) /* store tp->tv_sec */ + stg %r1,8(%r3) /* store tp->tv_nsec */ +8: lghi %r2,0 + br %r14 + + /* CLOCK_THREAD_CPUTIME_ID for this thread */ +9: icm %r0,15,__VDSO_ECTG_OK(%r5) + jz 12f + ear %r2,%a4 + llilh %r4,0x0100 + sar %a4,%r4 + lghi %r4,0 + epsw %r5,0 + sacf 512 /* Magic ectg instruction */ + .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 + tml %r5,0x4000 + jo 11f + tml %r5,0x8000 + jno 10f + sacf 256 + j 11f +10: sacf 0 +11: sar %a4,%r2 + algr %r1,%r0 /* r1 = cputime as TOD value */ + mghi %r1,1000 /* convert to nanoseconds */ + srlg %r1,%r1,12 /* r1 = cputime in nanosec */ + lgr %r4,%r1 + larl %r5,13f + srlg %r1,%r1,9 /* divide by 1000000000 */ + mlg %r0,8(%r5) + srlg %r0,%r0,11 /* r0 = tv_sec */ + stg %r0,0(%r3) + msg %r0,0(%r5) /* calculate tv_nsec */ + slgr %r4,%r0 /* r4 = tv_nsec */ + stg %r4,8(%r3) + lghi %r2,0 + br %r14 + + /* Fallback to system call */ +12: lghi %r1,__NR_clock_gettime + svc 0 + br %r14 + +13: .quad 1000000000 +14: .quad 19342813113834067 + .cfi_endproc + .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S new file mode 100644 index 00000000000..d0860d1d0cc --- /dev/null +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -0,0 +1,57 @@ +/* + * Userland implementation of gettimeofday() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_gettimeofday + .type __kernel_gettimeofday,@function +__kernel_gettimeofday: + .cfi_startproc + larl %r5,_vdso_data +0: ltgr %r3,%r3 /* check if tz is NULL */ + je 1f + mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) +1: ltgr %r2,%r2 /* check if tv is NULL */ + je 4f + lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 0b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ + lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */ + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 0b + lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srlg %r1,%r1,0(%r5) /* >> tk->shift */ + larl %r5,5f +2: clg %r1,0(%r5) + jl 3f + slg %r1,0(%r5) + aghi %r0,1 + j 2b +3: stg %r0,0(%r2) /* store tv->tv_sec */ + slgr %r0,%r0 /* tv_nsec -> tv_usec */ + ml %r0,8(%r5) + srlg %r0,%r0,6 + stg %r0,8(%r2) /* store tv->tv_usec */ +4: lghi %r2,0 + br %r14 +5: .quad 1000000000 + .long 274877907 + .cfi_endproc + .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S new file mode 100644 index 00000000000..79a071e4357 --- /dev/null +++ b/arch/s390/kernel/vdso64/note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S new file mode 100644 index 00000000000..9f5979d102a --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -0,0 +1,138 @@ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") +OUTPUT_ARCH(s390:64-bit) +ENTRY(_start) + +SECTIONS +{ + . = VDSO64_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + . = ALIGN(4096); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S new file mode 100644 index 00000000000..c245842b516 --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S @@ -0,0 +1,14 @@ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso64_start, vdso64_end + .balign PAGE_SIZE +vdso64_start: + .incbin "arch/s390/kernel/vdso64/vdso64.so" + .balign PAGE_SIZE +vdso64_end: + + .previous diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 9289face302..35b13ed0af5 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -2,129 +2,99 @@ * Written by Martin Schwidefsky (schwidefsky@de.ibm.com) */ +#include <asm/thread_info.h> +#include <asm/page.h> #include <asm-generic/vmlinux.lds.h> -#include <linux/config.h> #ifndef CONFIG_64BIT OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") -OUTPUT_ARCH(s390) -ENTRY(_start) +OUTPUT_ARCH(s390:31-bit) +ENTRY(startup) jiffies = jiffies_64 + 4; #else OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) -ENTRY(_start) +ENTRY(startup) jiffies = jiffies_64; #endif +PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ + note PT_NOTE FLAGS(0); /* ___ */ +} + SECTIONS { - . = 0x00000000; - _text = .; /* Text and read-only data */ - .text : { - *(.text) - SCHED_TEXT - LOCK_TEXT - *(.fixup) - *(.gnu.warning) - } = 0x0700 + . = 0x00000000; + .text : { + _text = .; /* Text and read-only data */ + HEAD_TEXT + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) + } :text = 0x0700 - _etext = .; /* End of text section */ + _etext = .; /* End of text section */ - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; + NOTES :text :note - RODATA + .dummy : { *(.dummy) } :data -#ifdef CONFIG_SHARED_KERNEL - . = ALIGN(1048576); /* VM shared segments are 1MB aligned */ + RO_DATA_SECTION(PAGE_SIZE) - _eshared = .; /* End of shareable data */ +#ifdef CONFIG_SHARED_KERNEL + . = ALIGN(0x100000); /* VM shared segments are 1MB aligned */ #endif - .data : { /* Data */ - *(.data) - CONSTRUCTORS + . = ALIGN(PAGE_SIZE); + _eshared = .; /* End of shareable data */ + _sdata = .; /* Start of data section */ + + EXCEPTION_TABLE(16) :data + + RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE) + + _edata = .; /* End of data section */ + + /* will be freed after init */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ + __init_begin = .; + + INIT_TEXT_SECTION(PAGE_SIZE) + + /* + * .exit.text is discarded at runtime, not link time, + * to deal with references from __bug_table + */ + .exit.text : { + EXIT_TEXT } - . = ALIGN(4096); - __nosave_begin = .; - .data_nosave : { *(.data.nosave) } - . = ALIGN(4096); - __nosave_end = .; - - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - _edata = .; /* End of data section */ - - . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } - - /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .init.text : { - _sinittext = .; - *(.init.text) - _einittext = .; - } - .init.data : { *(.init.data) } - . = ALIGN(256); - __setup_start = .; - .init.setup : { *(.init.setup) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { - *(.initcall1.init) - *(.initcall2.init) - *(.initcall3.init) - *(.initcall4.init) - *(.initcall5.init) - *(.initcall6.init) - *(.initcall7.init) - } - __initcall_end = .; - __con_initcall_start = .; - .con_initcall.init : { *(.con_initcall.init) } - __con_initcall_end = .; - SECURITY_INIT - . = ALIGN(256); - __initramfs_start = .; - .init.ramfs : { *(.init.initramfs) } - . = ALIGN(2); - __initramfs_end = .; - . = ALIGN(256); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; - . = ALIGN(4096); - __init_end = .; - /* freed after init ends here */ - - __bss_start = .; /* BSS */ - .bss : { *(.bss) } - . = ALIGN(2); - __bss_stop = .; - - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) + .exit.data : { + EXIT_DATA } - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } + /* early.c uses stsi, which requires page aligned data. */ + . = ALIGN(PAGE_SIZE); + INIT_DATA_SECTION(0x100) + + PERCPU_SECTION(0x100) + . = ALIGN(PAGE_SIZE); + __init_end = .; /* freed after init ends here */ + + BSS_SECTION(0, 2, 0) + + _end = . ; + + /* Debugging sections. */ + STABS_DEBUG + DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 22a895ecb7a..8c34363d6f1 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -1,166 +1,198 @@ /* - * arch/s390/kernel/vtime.c * Virtual cpu timer based timer functions. * - * S390 version - * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2004, 2012 * Author(s): Jan Glauber <jan.glauber@de.ibm.com> */ -#include <linux/config.h> -#include <linux/module.h> +#include <linux/kernel_stat.h> +#include <linux/notifier.h> +#include <linux/kprobes.h> +#include <linux/export.h> #include <linux/kernel.h> +#include <linux/timex.h> +#include <linux/types.h> #include <linux/time.h> -#include <linux/delay.h> -#include <linux/init.h> +#include <linux/cpu.h> #include <linux/smp.h> -#include <linux/types.h> -#include <linux/timex.h> -#include <linux/notifier.h> -#include <linux/kernel_stat.h> -#include <linux/rcupdate.h> -#include <linux/posix-timers.h> -#include <asm/s390_ext.h> -#include <asm/timer.h> +#include <asm/irq_regs.h> +#include <asm/cputime.h> +#include <asm/vtimer.h> +#include <asm/vtime.h> +#include <asm/irq.h> +#include "entry.h" + +static void virt_timer_expire(void); -static ext_int_info_t ext_int_info_timer; -DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); +DEFINE_PER_CPU(struct s390_idle_data, s390_idle); + +static LIST_HEAD(virt_timer_list); +static DEFINE_SPINLOCK(virt_timer_lock); +static atomic64_t virt_timer_current; +static atomic64_t virt_timer_elapsed; + +static inline u64 get_vtimer(void) +{ + u64 timer; + + asm volatile("stpt %0" : "=m" (timer)); + return timer; +} + +static inline void set_vtimer(u64 expires) +{ + u64 timer; + + asm volatile( + " stpt %0\n" /* Store current cpu timer value */ + " spt %1" /* Set new value imm. afterwards */ + : "=m" (timer) : "m" (expires)); + S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; + S390_lowcore.last_update_timer = expires; +} + +static inline int virt_timer_forward(u64 elapsed) +{ + BUG_ON(!irqs_disabled()); + + if (list_empty(&virt_timer_list)) + return 0; + elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed); + return elapsed >= atomic64_read(&virt_timer_current); +} -#ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_user_vtime(struct task_struct *tsk) +static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) { - cputime_t cputime; - __u64 timer, clock; - int rcu_user_flag; + struct thread_info *ti = task_thread_info(tsk); + u64 timer, clock, user, system, steal; timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; - asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " STCK %1" /* Store current tod clock value */ - : "=m" (S390_lowcore.last_update_timer), - "=m" (S390_lowcore.last_update_clock) ); + asm volatile( + " stpt %0\n" /* Store current cpu timer value */ + " stck %1" /* Store current tod clock value */ + : "=m" (S390_lowcore.last_update_timer), + "=m" (S390_lowcore.last_update_clock)); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; - - cputime = S390_lowcore.user_timer >> 12; - rcu_user_flag = cputime != 0; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); - - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, HARDIRQ_OFFSET, cputime); - - cputime = S390_lowcore.steal_clock; - if ((__s64) cputime > 0) { - cputime >>= 12; - S390_lowcore.steal_clock -= cputime << 12; - account_steal_time(tsk, cputime); + S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; + + user = S390_lowcore.user_timer - ti->user_timer; + S390_lowcore.steal_timer -= user; + ti->user_timer = S390_lowcore.user_timer; + account_user_time(tsk, user, user); + + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, hardirq_offset, system, system); + + steal = S390_lowcore.steal_timer; + if ((s64) steal > 0) { + S390_lowcore.steal_timer = 0; + account_steal_time(steal); } - run_local_timers(); - if (rcu_pending(smp_processor_id())) - rcu_check_callbacks(smp_processor_id(), rcu_user_flag); - scheduler_tick(); - run_posix_cpu_timers(tsk); + return virt_timer_forward(user + system); +} + +void vtime_task_switch(struct task_struct *prev) +{ + struct thread_info *ti; + + do_account_vtime(prev, 0); + ti = task_thread_info(prev); + ti->user_timer = S390_lowcore.user_timer; + ti->system_timer = S390_lowcore.system_timer; + ti = task_thread_info(current); + S390_lowcore.user_timer = ti->user_timer; + S390_lowcore.system_timer = ti->system_timer; +} + +/* + * In s390, accounting pending user time also implies + * accounting system time in order to correctly compute + * the stolen time accounting. + */ +void vtime_account_user(struct task_struct *tsk) +{ + if (do_account_vtime(tsk, HARDIRQ_OFFSET)) + virt_timer_expire(); } /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_system_vtime(struct task_struct *tsk) +void vtime_account_irq_enter(struct task_struct *tsk) { - cputime_t cputime; - __u64 timer; + struct thread_info *ti = task_thread_info(tsk); + u64 timer, system; + + WARN_ON_ONCE(!irqs_disabled()); timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); + S390_lowcore.last_update_timer = get_vtimer(); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); -} + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, 0, system, system); -static inline void set_vtimer(__u64 expires) -{ - __u64 timer; - - asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " SPT %1" /* Set new value immediatly afterwards */ - : "=m" (timer) : "m" (expires) ); - S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; - S390_lowcore.last_update_timer = expires; - - /* store expire time for this CPU timer */ - per_cpu(virt_cpu_timer, smp_processor_id()).to_expire = expires; + virt_timer_forward(system); } -#else -static inline void set_vtimer(__u64 expires) -{ - S390_lowcore.last_update_timer = expires; - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); +EXPORT_SYMBOL_GPL(vtime_account_irq_enter); - /* store expire time for this CPU timer */ - per_cpu(virt_cpu_timer, smp_processor_id()).to_expire = expires; -} -#endif +void vtime_account_system(struct task_struct *tsk) +__attribute__((alias("vtime_account_irq_enter"))); +EXPORT_SYMBOL_GPL(vtime_account_system); -static void start_cpu_timer(void) +void __kprobes vtime_stop_cpu(void) { - struct vtimer_queue *vt_list; - - vt_list = &per_cpu(virt_cpu_timer, smp_processor_id()); - - /* CPU timer interrupt is pending, don't reprogramm it */ - if (vt_list->idle & 1LL<<63) - return; - - if (!list_empty(&vt_list->list)) - set_vtimer(vt_list->idle); + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + unsigned long long idle_time; + unsigned long psw_mask; + + trace_hardirqs_on(); + + /* Wait for external, I/O or machine check interrupt. */ + psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + idle->nohz_delay = 0; + + /* Call the assembler magic in entry.S */ + psw_idle(idle, psw_mask); + + /* Account time spent with enabled wait psw loaded as idle time. */ + idle->sequence++; + smp_wmb(); + idle_time = idle->clock_idle_exit - idle->clock_idle_enter; + idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; + idle->idle_time += idle_time; + idle->idle_count++; + account_idle_time(idle_time); + smp_wmb(); + idle->sequence++; } -static void stop_cpu_timer(void) +cputime64_t s390_get_idle_time(int cpu) { - struct vtimer_queue *vt_list; - - vt_list = &per_cpu(virt_cpu_timer, smp_processor_id()); - - /* nothing to do */ - if (list_empty(&vt_list->list)) { - vt_list->idle = VTIMER_MAX_SLICE; - goto fire; - } - - /* store the actual expire value */ - asm volatile ("STPT %0" : "=m" (vt_list->idle)); - - /* - * If the CPU timer is negative we don't reprogramm - * it because we will get instantly an interrupt. - */ - if (vt_list->idle & 1LL<<63) - return; - - vt_list->offset += vt_list->to_expire - vt_list->idle; - - /* - * We cannot halt the CPU timer, we just write a value that - * nearly never expires (only after 71 years) and re-write - * the stored expire value if we continue the timer - */ - fire: - set_vtimer(VTIMER_MAX_SLICE); + struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); + unsigned long long now, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; } /* @@ -169,11 +201,11 @@ static void stop_cpu_timer(void) */ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) { - struct vtimer_list *event; + struct vtimer_list *tmp; - list_for_each_entry(event, head, entry) { - if (event->expires > timer->expires) { - list_add_tail(&timer->entry, &event->entry); + list_for_each_entry(tmp, head, entry) { + if (tmp->expires > timer->expires) { + list_add_tail(&timer->entry, &tmp->entry); return; } } @@ -181,314 +213,158 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) } /* - * Do the callback functions of expired vtimer events. - * Called from within the interrupt handler. + * Handler for expired virtual CPU timer. */ -static void do_callbacks(struct list_head *cb_list, struct pt_regs *regs) +static void virt_timer_expire(void) { - struct vtimer_queue *vt_list; - struct vtimer_list *event, *tmp; - void (*fn)(unsigned long, struct pt_regs*); - unsigned long data; - - if (list_empty(cb_list)) - return; - - vt_list = &per_cpu(virt_cpu_timer, smp_processor_id()); - - list_for_each_entry_safe(event, tmp, cb_list, entry) { - fn = event->function; - data = event->data; - fn(data, regs); - - if (!event->interval) - /* delete one shot timer */ - list_del_init(&event->entry); - else { - /* move interval timer back to list */ - spin_lock(&vt_list->lock); - list_del_init(&event->entry); - list_add_sorted(event, &vt_list->list); - spin_unlock(&vt_list->lock); - } + struct vtimer_list *timer, *tmp; + unsigned long elapsed; + LIST_HEAD(cb_list); + + /* walk timer list, fire all expired timers */ + spin_lock(&virt_timer_lock); + elapsed = atomic64_read(&virt_timer_elapsed); + list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) { + if (timer->expires < elapsed) + /* move expired timer to the callback queue */ + list_move_tail(&timer->entry, &cb_list); + else + timer->expires -= elapsed; } -} - -/* - * Handler for the virtual CPU timer. - */ -static void do_cpu_timer_interrupt(struct pt_regs *regs, __u16 error_code) -{ - int cpu; - __u64 next, delta; - struct vtimer_queue *vt_list; - struct vtimer_list *event, *tmp; - struct list_head *ptr; - /* the callback queue */ - struct list_head cb_list; - - INIT_LIST_HEAD(&cb_list); - cpu = smp_processor_id(); - vt_list = &per_cpu(virt_cpu_timer, cpu); - - /* walk timer list, fire all expired events */ - spin_lock(&vt_list->lock); - - if (vt_list->to_expire < VTIMER_MAX_SLICE) - vt_list->offset += vt_list->to_expire; - - list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { - if (event->expires > vt_list->offset) - /* found first unexpired event, leave */ - break; - - /* re-charge interval timer, we have to add the offset */ - if (event->interval) - event->expires = event->interval + vt_list->offset; - - /* move expired timer to the callback queue */ - list_move_tail(&event->entry, &cb_list); + if (!list_empty(&virt_timer_list)) { + timer = list_first_entry(&virt_timer_list, + struct vtimer_list, entry); + atomic64_set(&virt_timer_current, timer->expires); } - spin_unlock(&vt_list->lock); - do_callbacks(&cb_list, regs); - - /* next event is first in list */ - spin_lock(&vt_list->lock); - if (!list_empty(&vt_list->list)) { - ptr = vt_list->list.next; - event = list_entry(ptr, struct vtimer_list, entry); - next = event->expires - vt_list->offset; - - /* add the expired time from this interrupt handler - * and the callback functions - */ - asm volatile ("STPT %0" : "=m" (delta)); - delta = 0xffffffffffffffffLL - delta + 1; - vt_list->offset += delta; - next -= delta; - } else { - vt_list->offset = 0; - next = VTIMER_MAX_SLICE; + atomic64_sub(elapsed, &virt_timer_elapsed); + spin_unlock(&virt_timer_lock); + + /* Do callbacks and recharge periodic timers */ + list_for_each_entry_safe(timer, tmp, &cb_list, entry) { + list_del_init(&timer->entry); + timer->function(timer->data); + if (timer->interval) { + /* Recharge interval timer */ + timer->expires = timer->interval + + atomic64_read(&virt_timer_elapsed); + spin_lock(&virt_timer_lock); + list_add_sorted(timer, &virt_timer_list); + spin_unlock(&virt_timer_lock); + } } - spin_unlock(&vt_list->lock); - set_vtimer(next); } void init_virt_timer(struct vtimer_list *timer) { timer->function = NULL; INIT_LIST_HEAD(&timer->entry); - spin_lock_init(&timer->lock); } EXPORT_SYMBOL(init_virt_timer); static inline int vtimer_pending(struct vtimer_list *timer) { - return (!list_empty(&timer->entry)); + return !list_empty(&timer->entry); } -/* - * this function should only run on the specified CPU - */ static void internal_add_vtimer(struct vtimer_list *timer) { - unsigned long flags; - __u64 done; - struct vtimer_list *event; - struct vtimer_queue *vt_list; - - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); - - if (timer->cpu != smp_processor_id()) - printk("internal_add_vtimer: BUG, running on wrong CPU"); - - /* if list is empty we only have to set the timer */ - if (list_empty(&vt_list->list)) { - /* reset the offset, this may happen if the last timer was - * just deleted by mod_virt_timer and the interrupt - * didn't happen until here - */ - vt_list->offset = 0; - goto fire; + if (list_empty(&virt_timer_list)) { + /* First timer, just program it. */ + atomic64_set(&virt_timer_current, timer->expires); + atomic64_set(&virt_timer_elapsed, 0); + list_add(&timer->entry, &virt_timer_list); + } else { + /* Update timer against current base. */ + timer->expires += atomic64_read(&virt_timer_elapsed); + if (likely((s64) timer->expires < + (s64) atomic64_read(&virt_timer_current))) + /* The new timer expires before the current timer. */ + atomic64_set(&virt_timer_current, timer->expires); + /* Insert new timer into the list. */ + list_add_sorted(timer, &virt_timer_list); } - - /* save progress */ - asm volatile ("STPT %0" : "=m" (done)); - - /* calculate completed work */ - done = vt_list->to_expire - done + vt_list->offset; - vt_list->offset = 0; - - list_for_each_entry(event, &vt_list->list, entry) - event->expires -= done; - - fire: - list_add_sorted(timer, &vt_list->list); - - /* get first element, which is the next vtimer slice */ - event = list_entry(vt_list->list.next, struct vtimer_list, entry); - - set_vtimer(event->expires); - spin_unlock_irqrestore(&vt_list->lock, flags); - /* release CPU aquired in prepare_vtimer or mod_virt_timer() */ - put_cpu(); } -static inline int prepare_vtimer(struct vtimer_list *timer) +static void __add_vtimer(struct vtimer_list *timer, int periodic) { - if (!timer->function) { - printk("add_virt_timer: uninitialized timer\n"); - return -EINVAL; - } - - if (!timer->expires || timer->expires > VTIMER_MAX_SLICE) { - printk("add_virt_timer: invalid timer expire value!\n"); - return -EINVAL; - } - - if (vtimer_pending(timer)) { - printk("add_virt_timer: timer pending\n"); - return -EBUSY; - } + unsigned long flags; - timer->cpu = get_cpu(); - return 0; + timer->interval = periodic ? timer->expires : 0; + spin_lock_irqsave(&virt_timer_lock, flags); + internal_add_vtimer(timer); + spin_unlock_irqrestore(&virt_timer_lock, flags); } /* * add_virt_timer - add an oneshot virtual CPU timer */ -void add_virt_timer(void *new) +void add_virt_timer(struct vtimer_list *timer) { - struct vtimer_list *timer; - - timer = (struct vtimer_list *)new; - - if (prepare_vtimer(timer) < 0) - return; - - timer->interval = 0; - internal_add_vtimer(timer); + __add_vtimer(timer, 0); } EXPORT_SYMBOL(add_virt_timer); /* * add_virt_timer_int - add an interval virtual CPU timer */ -void add_virt_timer_periodic(void *new) +void add_virt_timer_periodic(struct vtimer_list *timer) { - struct vtimer_list *timer; - - timer = (struct vtimer_list *)new; - - if (prepare_vtimer(timer) < 0) - return; - - timer->interval = timer->expires; - internal_add_vtimer(timer); + __add_vtimer(timer, 1); } EXPORT_SYMBOL(add_virt_timer_periodic); -/* - * If we change a pending timer the function must be called on the CPU - * where the timer is running on, e.g. by smp_call_function_on() - * - * The original mod_timer adds the timer if it is not pending. For compatibility - * we do the same. The timer will be added on the current CPU as a oneshot timer. - * - * returns whether it has modified a pending timer (1) or not (0) - */ -int mod_virt_timer(struct vtimer_list *timer, __u64 expires) +static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic) { - struct vtimer_queue *vt_list; unsigned long flags; - int cpu; - - if (!timer->function) { - printk("mod_virt_timer: uninitialized timer\n"); - return -EINVAL; - } + int rc; - if (!expires || expires > VTIMER_MAX_SLICE) { - printk("mod_virt_timer: invalid expire range\n"); - return -EINVAL; - } + BUG_ON(!timer->function); - /* - * This is a common optimization triggered by the - * networking code - if the timer is re-modified - * to be the same thing then just return: - */ if (timer->expires == expires && vtimer_pending(timer)) return 1; - - cpu = get_cpu(); - vt_list = &per_cpu(virt_cpu_timer, cpu); - - /* disable interrupts before test if timer is pending */ - spin_lock_irqsave(&vt_list->lock, flags); - - /* if timer isn't pending add it on the current CPU */ - if (!vtimer_pending(timer)) { - spin_unlock_irqrestore(&vt_list->lock, flags); - /* we do not activate an interval timer with mod_virt_timer */ - timer->interval = 0; - timer->expires = expires; - timer->cpu = cpu; - internal_add_vtimer(timer); - return 0; - } - - /* check if we run on the right CPU */ - if (timer->cpu != cpu) { - printk("mod_virt_timer: running on wrong CPU, check your code\n"); - spin_unlock_irqrestore(&vt_list->lock, flags); - put_cpu(); - return -EINVAL; - } - - list_del_init(&timer->entry); + spin_lock_irqsave(&virt_timer_lock, flags); + rc = vtimer_pending(timer); + if (rc) + list_del_init(&timer->entry); + timer->interval = periodic ? expires : 0; timer->expires = expires; - - /* also change the interval if we have an interval timer */ - if (timer->interval) - timer->interval = expires; - - /* the timer can't expire anymore so we can release the lock */ - spin_unlock_irqrestore(&vt_list->lock, flags); internal_add_vtimer(timer); - return 1; + spin_unlock_irqrestore(&virt_timer_lock, flags); + return rc; +} + +/* + * returns whether it has modified a pending timer (1) or not (0) + */ +int mod_virt_timer(struct vtimer_list *timer, u64 expires) +{ + return __mod_vtimer(timer, expires, 0); } EXPORT_SYMBOL(mod_virt_timer); /* - * delete a virtual timer + * returns whether it has modified a pending timer (1) or not (0) + */ +int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires) +{ + return __mod_vtimer(timer, expires, 1); +} +EXPORT_SYMBOL(mod_virt_timer_periodic); + +/* + * Delete a virtual timer. * * returns whether the deleted timer was pending (1) or not (0) */ int del_virt_timer(struct vtimer_list *timer) { unsigned long flags; - struct vtimer_queue *vt_list; - /* check if timer is pending */ if (!vtimer_pending(timer)) return 0; - - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); - - /* we don't interrupt a running timer, just let it expire! */ + spin_lock_irqsave(&virt_timer_lock, flags); list_del_init(&timer->entry); - - /* last timer removed */ - if (list_empty(&vt_list->list)) { - vt_list->to_expire = 0; - vt_list->offset = 0; - } - - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&virt_timer_lock, flags); return 1; } EXPORT_SYMBOL(del_virt_timer); @@ -498,55 +374,29 @@ EXPORT_SYMBOL(del_virt_timer); */ void init_cpu_vtimer(void) { - struct vtimer_queue *vt_list; - unsigned long cr0; - - /* kick the virtual timer */ - S390_lowcore.exit_timer = VTIMER_MAX_SLICE; - S390_lowcore.last_update_timer = VTIMER_MAX_SLICE; - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); - asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); - __ctl_store(cr0, 0, 0); - cr0 |= 0x400; - __ctl_load(cr0, 0, 0); - - vt_list = &per_cpu(virt_cpu_timer, smp_processor_id()); - INIT_LIST_HEAD(&vt_list->list); - spin_lock_init(&vt_list->lock); - vt_list->to_expire = 0; - vt_list->offset = 0; - vt_list->idle = 0; - + /* set initial cpu timer */ + set_vtimer(VTIMER_MAX_SLICE); } -static int vtimer_idle_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int s390_nohz_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { - switch (action) { - case CPU_IDLE: - stop_cpu_timer(); - break; - case CPU_NOT_IDLE: - start_cpu_timer(); + struct s390_idle_data *idle; + long cpu = (long) hcpu; + + idle = &per_cpu(s390_idle, cpu); + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DYING: + idle->nohz_delay = 0; + default: break; } return NOTIFY_OK; } -static struct notifier_block vtimer_idle_nb = { - .notifier_call = vtimer_idle_notify, -}; - void __init vtime_init(void) { - /* request the cpu timer external interrupt */ - if (register_early_external_interrupt(0x1005, do_cpu_timer_interrupt, - &ext_int_info_timer) != 0) - panic("Couldn't request external interrupt 0x1005"); - - if (register_idle_notifier(&vtimer_idle_nb)) - panic("Couldn't register idle notifier"); - + /* Enable cpu timer interrupts on the boot cpu. */ init_cpu_vtimer(); + cpu_notifier(s390_nohz_notify, 0); } - diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig new file mode 100644 index 00000000000..10d529ac982 --- /dev/null +++ b/arch/s390/kvm/Kconfig @@ -0,0 +1,56 @@ +# +# KVM configuration +# +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + def_bool y + prompt "KVM" + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + def_tristate y + prompt "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM + select PREEMPT_NOTIFIERS + select ANON_INODES + select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_EVENTFD + select KVM_ASYNC_PF + select KVM_ASYNC_PF_SYNC + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING + ---help--- + Support hosting paravirtualized guest machines using the SIE + virtualization capability on the mainframe. This should work + on any 64bit machine. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_S390_UCONTROL + bool "Userspace controlled virtual machines" + depends on KVM + ---help--- + Allow CAP_SYS_ADMIN users to create KVM virtual machines that are + controlled by userspace. + + If unsure, say N. + +# OK, it's a little counter-intuitive to do this, but it puts it neatly under +# the virtualization menu. +source drivers/vhost/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile new file mode 100644 index 00000000000..b3b55346965 --- /dev/null +++ b/arch/s390/kvm/Makefile @@ -0,0 +1,17 @@ +# Makefile for kernel virtual machines on s390 +# +# Copyright IBM Corp. 2008 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License (version 2 only) +# as published by the Free Software Foundation. + +KVM := ../../../virt/kvm +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o + +ccflags-y := -Ivirt/kvm -Iarch/s390/kvm + +kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o +kvm-objs += diag.o gaccess.o guestdbg.o + +obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c new file mode 100644 index 00000000000..0161675878a --- /dev/null +++ b/arch/s390/kvm/diag.c @@ -0,0 +1,244 @@ +/* + * handling diagnose instructions + * + * Copyright IBM Corp. 2008, 2011 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <asm/pgalloc.h> +#include <asm/virtio-ccw.h> +#include "kvm-s390.h" +#include "trace.h" +#include "trace-s390.h" +#include "gaccess.h" + +static int diag_release_pages(struct kvm_vcpu *vcpu) +{ + unsigned long start, end; + unsigned long prefix = kvm_s390_get_prefix(vcpu); + + start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + + if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end + || start < 2 * PAGE_SIZE) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + vcpu->stat.diagnose_10++; + + /* we checked for start > end above */ + if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { + gmap_discard(start, end, vcpu->arch.gmap); + } else { + if (start < prefix) + gmap_discard(start, prefix, vcpu->arch.gmap); + if (end >= prefix) + gmap_discard(prefix + 2 * PAGE_SIZE, + end, vcpu->arch.gmap); + } + return 0; +} + +static int __diag_page_ref_service(struct kvm_vcpu *vcpu) +{ + struct prs_parm { + u16 code; + u16 subcode; + u16 parm_len; + u16 parm_version; + u64 token_addr; + u64 select_mask; + u64 compare_mask; + u64 zarch; + }; + struct prs_parm parm; + int rc; + u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4; + u16 ry = (vcpu->arch.sie_block->ipa & 0x0f); + + if (vcpu->run->s.regs.gprs[rx] & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (parm.subcode) { + case 0: /* TOKEN */ + if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) { + /* + * If the pagefault handshake is already activated, + * the token must not be changed. We have to return + * decimal 8 instead, as mandated in SC24-6084. + */ + vcpu->run->s.regs.gprs[ry] = 8; + return 0; + } + + if ((parm.compare_mask & parm.select_mask) != parm.compare_mask || + parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + vcpu->arch.pfault_token = parm.token_addr; + vcpu->arch.pfault_select = parm.select_mask; + vcpu->arch.pfault_compare = parm.compare_mask; + vcpu->run->s.regs.gprs[ry] = 0; + rc = 0; + break; + case 1: /* + * CANCEL + * Specification allows to let already pending tokens survive + * the cancel, therefore to reduce code complexity, we assume + * all outstanding tokens are already pending. + */ + if (parm.token_addr || parm.select_mask || + parm.compare_mask || parm.zarch) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + vcpu->run->s.regs.gprs[ry] = 0; + /* + * If the pfault handling was not established or is already + * canceled SC24-6084 requests to return decimal 4. + */ + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) + vcpu->run->s.regs.gprs[ry] = 4; + else + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + + rc = 0; + break; + default: + rc = -EOPNOTSUPP; + break; + } + + return rc; +} + +static int __diag_time_slice_end(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); + vcpu->stat.diagnose_44++; + kvm_vcpu_on_spin(vcpu); + return 0; +} + +static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tcpu; + int tid; + int i; + + tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + vcpu->stat.diagnose_9c++; + VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid); + + if (tid == vcpu->vcpu_id) + return 0; + + kvm_for_each_vcpu(i, tcpu, kvm) + if (tcpu->vcpu_id == tid) { + kvm_vcpu_yield_to(tcpu); + break; + } + + return 0; +} + +static int __diag_ipl_functions(struct kvm_vcpu *vcpu) +{ + unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; + unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; + + VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); + switch (subcode) { + case 3: + vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; + break; + case 4: + vcpu->run->s390_reset_flags = 0; + break; + default: + return -EOPNOTSUPP; + } + + kvm_s390_vcpu_stop(vcpu); + vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; + vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; + vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; + vcpu->run->exit_reason = KVM_EXIT_S390_RESET; + VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx", + vcpu->run->s390_reset_flags); + trace_kvm_s390_request_resets(vcpu->run->s390_reset_flags); + return -EREMOTE; +} + +static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) +{ + int ret; + + /* No virtio-ccw notification? Get out quickly. */ + if (!vcpu->kvm->arch.css_support || + (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) + return -EOPNOTSUPP; + + /* + * The layout is as follows: + * - gpr 2 contains the subchannel id (passed as addr) + * - gpr 3 contains the virtqueue index (passed as datamatch) + * - gpr 4 contains the index on the bus (optionally) + */ + ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + vcpu->run->s.regs.gprs[2] & 0xffffffff, + 8, &vcpu->run->s.regs.gprs[3], + vcpu->run->s.regs.gprs[4]); + + /* + * Return cookie in gpr 2, but don't overwrite the register if the + * diagnose will be handled by userspace. + */ + if (ret != -EOPNOTSUPP) + vcpu->run->s.regs.gprs[2] = ret; + /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */ + return ret < 0 ? ret : 0; +} + +int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) +{ + int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + trace_kvm_s390_handle_diag(vcpu, code); + switch (code) { + case 0x10: + return diag_release_pages(vcpu); + case 0x44: + return __diag_time_slice_end(vcpu); + case 0x9c: + return __diag_time_slice_end_directed(vcpu); + case 0x258: + return __diag_page_ref_service(vcpu); + case 0x308: + return __diag_ipl_functions(vcpu); + case 0x500: + return __diag_virtio_hypercall(vcpu); + default: + return -EOPNOTSUPP; + } +} diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c new file mode 100644 index 00000000000..4653ac6e182 --- /dev/null +++ b/arch/s390/kvm/gaccess.c @@ -0,0 +1,726 @@ +/* + * guest access functions + * + * Copyright IBM Corp. 2014 + * + */ + +#include <linux/vmalloc.h> +#include <linux/err.h> +#include <asm/pgtable.h> +#include "kvm-s390.h" +#include "gaccess.h" + +union asce { + unsigned long val; + struct { + unsigned long origin : 52; /* Region- or Segment-Table Origin */ + unsigned long : 2; + unsigned long g : 1; /* Subspace Group Control */ + unsigned long p : 1; /* Private Space Control */ + unsigned long s : 1; /* Storage-Alteration-Event Control */ + unsigned long x : 1; /* Space-Switch-Event Control */ + unsigned long r : 1; /* Real-Space Control */ + unsigned long : 1; + unsigned long dt : 2; /* Designation-Type Control */ + unsigned long tl : 2; /* Region- or Segment-Table Length */ + }; +}; + +enum { + ASCE_TYPE_SEGMENT = 0, + ASCE_TYPE_REGION3 = 1, + ASCE_TYPE_REGION2 = 2, + ASCE_TYPE_REGION1 = 3 +}; + +union region1_table_entry { + unsigned long val; + struct { + unsigned long rto: 52;/* Region-Table Origin */ + unsigned long : 2; + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 1; + unsigned long tf : 2; /* Region-Second-Table Offset */ + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long : 1; + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long tl : 2; /* Region-Second-Table Length */ + }; +}; + +union region2_table_entry { + unsigned long val; + struct { + unsigned long rto: 52;/* Region-Table Origin */ + unsigned long : 2; + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 1; + unsigned long tf : 2; /* Region-Third-Table Offset */ + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long : 1; + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long tl : 2; /* Region-Third-Table Length */ + }; +}; + +struct region3_table_entry_fc0 { + unsigned long sto: 52;/* Segment-Table Origin */ + unsigned long : 1; + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 1; + unsigned long tf : 2; /* Segment-Table Offset */ + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long cr : 1; /* Common-Region Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long tl : 2; /* Segment-Table Length */ +}; + +struct region3_table_entry_fc1 { + unsigned long rfaa : 33; /* Region-Frame Absolute Address */ + unsigned long : 14; + unsigned long av : 1; /* ACCF-Validity Control */ + unsigned long acc: 4; /* Access-Control Bits */ + unsigned long f : 1; /* Fetch-Protection Bit */ + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long co : 1; /* Change-Recording Override */ + unsigned long : 2; + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long cr : 1; /* Common-Region Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; +}; + +union region3_table_entry { + unsigned long val; + struct region3_table_entry_fc0 fc0; + struct region3_table_entry_fc1 fc1; + struct { + unsigned long : 53; + unsigned long fc : 1; /* Format-Control */ + unsigned long : 4; + unsigned long i : 1; /* Region-Invalid Bit */ + unsigned long cr : 1; /* Common-Region Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; + }; +}; + +struct segment_entry_fc0 { + unsigned long pto: 53;/* Page-Table Origin */ + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long : 3; + unsigned long i : 1; /* Segment-Invalid Bit */ + unsigned long cs : 1; /* Common-Segment Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; +}; + +struct segment_entry_fc1 { + unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ + unsigned long : 3; + unsigned long av : 1; /* ACCF-Validity Control */ + unsigned long acc: 4; /* Access-Control Bits */ + unsigned long f : 1; /* Fetch-Protection Bit */ + unsigned long fc : 1; /* Format-Control */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long co : 1; /* Change-Recording Override */ + unsigned long : 2; + unsigned long i : 1; /* Segment-Invalid Bit */ + unsigned long cs : 1; /* Common-Segment Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; +}; + +union segment_table_entry { + unsigned long val; + struct segment_entry_fc0 fc0; + struct segment_entry_fc1 fc1; + struct { + unsigned long : 53; + unsigned long fc : 1; /* Format-Control */ + unsigned long : 4; + unsigned long i : 1; /* Segment-Invalid Bit */ + unsigned long cs : 1; /* Common-Segment Bit */ + unsigned long tt : 2; /* Table-Type Bits */ + unsigned long : 2; + }; +}; + +enum { + TABLE_TYPE_SEGMENT = 0, + TABLE_TYPE_REGION3 = 1, + TABLE_TYPE_REGION2 = 2, + TABLE_TYPE_REGION1 = 3 +}; + +union page_table_entry { + unsigned long val; + struct { + unsigned long pfra : 52; /* Page-Frame Real Address */ + unsigned long z : 1; /* Zero Bit */ + unsigned long i : 1; /* Page-Invalid Bit */ + unsigned long p : 1; /* DAT-Protection Bit */ + unsigned long co : 1; /* Change-Recording Override */ + unsigned long : 8; + }; +}; + +/* + * vaddress union in order to easily decode a virtual address into its + * region first index, region second index etc. parts. + */ +union vaddress { + unsigned long addr; + struct { + unsigned long rfx : 11; + unsigned long rsx : 11; + unsigned long rtx : 11; + unsigned long sx : 11; + unsigned long px : 8; + unsigned long bx : 12; + }; + struct { + unsigned long rfx01 : 2; + unsigned long : 9; + unsigned long rsx01 : 2; + unsigned long : 9; + unsigned long rtx01 : 2; + unsigned long : 9; + unsigned long sx01 : 2; + unsigned long : 29; + }; +}; + +/* + * raddress union which will contain the result (real or absolute address) + * after a page table walk. The rfaa, sfaa and pfra members are used to + * simply assign them the value of a region, segment or page table entry. + */ +union raddress { + unsigned long addr; + unsigned long rfaa : 33; /* Region-Frame Absolute Address */ + unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ + unsigned long pfra : 52; /* Page-Frame Real Address */ +}; + +static int ipte_lock_count; +static DEFINE_MUTEX(ipte_mutex); + +int ipte_lock_held(struct kvm_vcpu *vcpu) +{ + union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control; + + if (vcpu->arch.sie_block->eca & 1) + return ic->kh != 0; + return ipte_lock_count != 0; +} + +static void ipte_lock_simple(struct kvm_vcpu *vcpu) +{ + union ipte_control old, new, *ic; + + mutex_lock(&ipte_mutex); + ipte_lock_count++; + if (ipte_lock_count > 1) + goto out; + ic = &vcpu->kvm->arch.sca->ipte_control; + do { + old = ACCESS_ONCE(*ic); + while (old.k) { + cond_resched(); + old = ACCESS_ONCE(*ic); + } + new = old; + new.k = 1; + } while (cmpxchg(&ic->val, old.val, new.val) != old.val); +out: + mutex_unlock(&ipte_mutex); +} + +static void ipte_unlock_simple(struct kvm_vcpu *vcpu) +{ + union ipte_control old, new, *ic; + + mutex_lock(&ipte_mutex); + ipte_lock_count--; + if (ipte_lock_count) + goto out; + ic = &vcpu->kvm->arch.sca->ipte_control; + do { + new = old = ACCESS_ONCE(*ic); + new.k = 0; + } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + if (!ipte_lock_count) + wake_up(&vcpu->kvm->arch.ipte_wq); +out: + mutex_unlock(&ipte_mutex); +} + +static void ipte_lock_siif(struct kvm_vcpu *vcpu) +{ + union ipte_control old, new, *ic; + + ic = &vcpu->kvm->arch.sca->ipte_control; + do { + old = ACCESS_ONCE(*ic); + while (old.kg) { + cond_resched(); + old = ACCESS_ONCE(*ic); + } + new = old; + new.k = 1; + new.kh++; + } while (cmpxchg(&ic->val, old.val, new.val) != old.val); +} + +static void ipte_unlock_siif(struct kvm_vcpu *vcpu) +{ + union ipte_control old, new, *ic; + + ic = &vcpu->kvm->arch.sca->ipte_control; + do { + new = old = ACCESS_ONCE(*ic); + new.kh--; + if (!new.kh) + new.k = 0; + } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + if (!new.kh) + wake_up(&vcpu->kvm->arch.ipte_wq); +} + +void ipte_lock(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.sie_block->eca & 1) + ipte_lock_siif(vcpu); + else + ipte_lock_simple(vcpu); +} + +void ipte_unlock(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.sie_block->eca & 1) + ipte_unlock_siif(vcpu); + else + ipte_unlock_simple(vcpu); +} + +static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu) +{ + switch (psw_bits(vcpu->arch.sie_block->gpsw).as) { + case PSW_AS_PRIMARY: + return vcpu->arch.sie_block->gcr[1]; + case PSW_AS_SECONDARY: + return vcpu->arch.sie_block->gcr[7]; + case PSW_AS_HOME: + return vcpu->arch.sie_block->gcr[13]; + } + return 0; +} + +static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) +{ + return kvm_read_guest(kvm, gpa, val, sizeof(*val)); +} + +/** + * guest_translate - translate a guest virtual into a guest absolute address + * @vcpu: virtual cpu + * @gva: guest virtual address + * @gpa: points to where guest physical (absolute) address should be stored + * @write: indicates if access is a write access + * + * Translate a guest virtual address into a guest absolute address by means + * of dynamic address translation as specified by the architecuture. + * If the resulting absolute address is not available in the configuration + * an addressing exception is indicated and @gpa will not be changed. + * + * Returns: - zero on success; @gpa contains the resulting absolute address + * - a negative value if guest access failed due to e.g. broken + * guest mapping + * - a positve value if an access exception happened. In this case + * the returned value is the program interruption code as defined + * by the architecture + */ +static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa, int write) +{ + union vaddress vaddr = {.addr = gva}; + union raddress raddr = {.addr = gva}; + union page_table_entry pte; + int dat_protection = 0; + union ctlreg0 ctlreg0; + unsigned long ptr; + int edat1, edat2; + union asce asce; + + ctlreg0.val = vcpu->arch.sie_block->gcr[0]; + edat1 = ctlreg0.edat && test_vfacility(8); + edat2 = edat1 && test_vfacility(78); + asce.val = get_vcpu_asce(vcpu); + if (asce.r) + goto real_address; + ptr = asce.origin * 4096; + switch (asce.dt) { + case ASCE_TYPE_REGION1: + if (vaddr.rfx01 > asce.tl) + return PGM_REGION_FIRST_TRANS; + ptr += vaddr.rfx * 8; + break; + case ASCE_TYPE_REGION2: + if (vaddr.rfx) + return PGM_ASCE_TYPE; + if (vaddr.rsx01 > asce.tl) + return PGM_REGION_SECOND_TRANS; + ptr += vaddr.rsx * 8; + break; + case ASCE_TYPE_REGION3: + if (vaddr.rfx || vaddr.rsx) + return PGM_ASCE_TYPE; + if (vaddr.rtx01 > asce.tl) + return PGM_REGION_THIRD_TRANS; + ptr += vaddr.rtx * 8; + break; + case ASCE_TYPE_SEGMENT: + if (vaddr.rfx || vaddr.rsx || vaddr.rtx) + return PGM_ASCE_TYPE; + if (vaddr.sx01 > asce.tl) + return PGM_SEGMENT_TRANSLATION; + ptr += vaddr.sx * 8; + break; + } + switch (asce.dt) { + case ASCE_TYPE_REGION1: { + union region1_table_entry rfte; + + if (kvm_is_error_gpa(vcpu->kvm, ptr)) + return PGM_ADDRESSING; + if (deref_table(vcpu->kvm, ptr, &rfte.val)) + return -EFAULT; + if (rfte.i) + return PGM_REGION_FIRST_TRANS; + if (rfte.tt != TABLE_TYPE_REGION1) + return PGM_TRANSLATION_SPEC; + if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) + return PGM_REGION_SECOND_TRANS; + if (edat1) + dat_protection |= rfte.p; + ptr = rfte.rto * 4096 + vaddr.rsx * 8; + } + /* fallthrough */ + case ASCE_TYPE_REGION2: { + union region2_table_entry rste; + + if (kvm_is_error_gpa(vcpu->kvm, ptr)) + return PGM_ADDRESSING; + if (deref_table(vcpu->kvm, ptr, &rste.val)) + return -EFAULT; + if (rste.i) + return PGM_REGION_SECOND_TRANS; + if (rste.tt != TABLE_TYPE_REGION2) + return PGM_TRANSLATION_SPEC; + if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) + return PGM_REGION_THIRD_TRANS; + if (edat1) + dat_protection |= rste.p; + ptr = rste.rto * 4096 + vaddr.rtx * 8; + } + /* fallthrough */ + case ASCE_TYPE_REGION3: { + union region3_table_entry rtte; + + if (kvm_is_error_gpa(vcpu->kvm, ptr)) + return PGM_ADDRESSING; + if (deref_table(vcpu->kvm, ptr, &rtte.val)) + return -EFAULT; + if (rtte.i) + return PGM_REGION_THIRD_TRANS; + if (rtte.tt != TABLE_TYPE_REGION3) + return PGM_TRANSLATION_SPEC; + if (rtte.cr && asce.p && edat2) + return PGM_TRANSLATION_SPEC; + if (rtte.fc && edat2) { + dat_protection |= rtte.fc1.p; + raddr.rfaa = rtte.fc1.rfaa; + goto absolute_address; + } + if (vaddr.sx01 < rtte.fc0.tf) + return PGM_SEGMENT_TRANSLATION; + if (vaddr.sx01 > rtte.fc0.tl) + return PGM_SEGMENT_TRANSLATION; + if (edat1) + dat_protection |= rtte.fc0.p; + ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8; + } + /* fallthrough */ + case ASCE_TYPE_SEGMENT: { + union segment_table_entry ste; + + if (kvm_is_error_gpa(vcpu->kvm, ptr)) + return PGM_ADDRESSING; + if (deref_table(vcpu->kvm, ptr, &ste.val)) + return -EFAULT; + if (ste.i) + return PGM_SEGMENT_TRANSLATION; + if (ste.tt != TABLE_TYPE_SEGMENT) + return PGM_TRANSLATION_SPEC; + if (ste.cs && asce.p) + return PGM_TRANSLATION_SPEC; + if (ste.fc && edat1) { + dat_protection |= ste.fc1.p; + raddr.sfaa = ste.fc1.sfaa; + goto absolute_address; + } + dat_protection |= ste.fc0.p; + ptr = ste.fc0.pto * 2048 + vaddr.px * 8; + } + } + if (kvm_is_error_gpa(vcpu->kvm, ptr)) + return PGM_ADDRESSING; + if (deref_table(vcpu->kvm, ptr, &pte.val)) + return -EFAULT; + if (pte.i) + return PGM_PAGE_TRANSLATION; + if (pte.z) + return PGM_TRANSLATION_SPEC; + if (pte.co && !edat1) + return PGM_TRANSLATION_SPEC; + dat_protection |= pte.p; + raddr.pfra = pte.pfra; +real_address: + raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); +absolute_address: + if (write && dat_protection) + return PGM_PROTECTION; + if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) + return PGM_ADDRESSING; + *gpa = raddr.addr; + return 0; +} + +static inline int is_low_address(unsigned long ga) +{ + /* Check for address ranges 0..511 and 4096..4607 */ + return (ga & ~0x11fful) == 0; +} + +static int low_address_protection_enabled(struct kvm_vcpu *vcpu) +{ + union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + union asce asce; + + if (!ctlreg0.lap) + return 0; + asce.val = get_vcpu_asce(vcpu); + if (psw_bits(*psw).t && asce.p) + return 0; + return 1; +} + +struct trans_exc_code_bits { + unsigned long addr : 52; /* Translation-exception Address */ + unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ + unsigned long : 7; + unsigned long b61 : 1; + unsigned long as : 2; /* ASCE Identifier */ +}; + +enum { + FSI_UNKNOWN = 0, /* Unknown wether fetch or store */ + FSI_STORE = 1, /* Exception was due to store operation */ + FSI_FETCH = 2 /* Exception was due to fetch operation */ +}; + +static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, + unsigned long *pages, unsigned long nr_pages, + int write) +{ + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct trans_exc_code_bits *tec_bits; + int lap_enabled, rc; + + memset(pgm, 0, sizeof(*pgm)); + tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; + tec_bits->as = psw_bits(*psw).as; + lap_enabled = low_address_protection_enabled(vcpu); + while (nr_pages) { + ga = kvm_s390_logical_to_effective(vcpu, ga); + tec_bits->addr = ga >> PAGE_SHIFT; + if (write && lap_enabled && is_low_address(ga)) { + pgm->code = PGM_PROTECTION; + return pgm->code; + } + ga &= PAGE_MASK; + if (psw_bits(*psw).t) { + rc = guest_translate(vcpu, ga, pages, write); + if (rc < 0) + return rc; + if (rc == PGM_PROTECTION) + tec_bits->b61 = 1; + if (rc) + pgm->code = rc; + } else { + *pages = kvm_s390_real_to_abs(vcpu, ga); + if (kvm_is_error_gpa(vcpu->kvm, *pages)) + pgm->code = PGM_ADDRESSING; + } + if (pgm->code) + return pgm->code; + ga += PAGE_SIZE; + pages++; + nr_pages--; + } + return 0; +} + +int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, + unsigned long len, int write) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned long _len, nr_pages, gpa, idx; + unsigned long pages_array[2]; + unsigned long *pages; + int need_ipte_lock; + union asce asce; + int rc; + + if (!len) + return 0; + /* Access register mode is not supported yet. */ + if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG) + return -EOPNOTSUPP; + nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; + pages = pages_array; + if (nr_pages > ARRAY_SIZE(pages_array)) + pages = vmalloc(nr_pages * sizeof(unsigned long)); + if (!pages) + return -ENOMEM; + asce.val = get_vcpu_asce(vcpu); + need_ipte_lock = psw_bits(*psw).t && !asce.r; + if (need_ipte_lock) + ipte_lock(vcpu); + rc = guest_page_range(vcpu, ga, pages, nr_pages, write); + for (idx = 0; idx < nr_pages && !rc; idx++) { + gpa = *(pages + idx) + (ga & ~PAGE_MASK); + _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); + if (write) + rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); + else + rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); + len -= _len; + ga += _len; + data += _len; + } + if (need_ipte_lock) + ipte_unlock(vcpu); + if (nr_pages > ARRAY_SIZE(pages_array)) + vfree(pages); + return rc; +} + +int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, + void *data, unsigned long len, int write) +{ + unsigned long _len, gpa; + int rc = 0; + + while (len && !rc) { + gpa = kvm_s390_real_to_abs(vcpu, gra); + _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); + if (write) + rc = write_guest_abs(vcpu, gpa, data, _len); + else + rc = read_guest_abs(vcpu, gpa, data, _len); + len -= _len; + gra += _len; + data += _len; + } + return rc; +} + +/** + * guest_translate_address - translate guest logical into guest absolute address + * + * Parameter semantics are the same as the ones from guest_translate. + * The memory contents at the guest address are not changed. + * + * Note: The IPTE lock is not taken during this function, so the caller + * has to take care of this. + */ +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa, int write) +{ + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct trans_exc_code_bits *tec; + union asce asce; + int rc; + + /* Access register mode is not supported yet. */ + if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG) + return -EOPNOTSUPP; + + gva = kvm_s390_logical_to_effective(vcpu, gva); + memset(pgm, 0, sizeof(*pgm)); + tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + tec->as = psw_bits(*psw).as; + tec->fsi = write ? FSI_STORE : FSI_FETCH; + tec->addr = gva >> PAGE_SHIFT; + if (is_low_address(gva) && low_address_protection_enabled(vcpu)) { + if (write) { + rc = pgm->code = PGM_PROTECTION; + return rc; + } + } + + asce.val = get_vcpu_asce(vcpu); + if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ + rc = guest_translate(vcpu, gva, gpa, write); + if (rc > 0) { + if (rc == PGM_PROTECTION) + tec->b61 = 1; + pgm->code = rc; + } + } else { + rc = 0; + *gpa = kvm_s390_real_to_abs(vcpu, gva); + if (kvm_is_error_gpa(vcpu->kvm, *gpa)) + rc = pgm->code = PGM_ADDRESSING; + } + + return rc; +} + +/** + * kvm_s390_check_low_addr_protection - check for low-address protection + * @ga: Guest address + * + * Checks whether an address is subject to low-address protection and set + * up vcpu->arch.pgm accordingly if necessary. + * + * Return: 0 if no protection exception, or PGM_PROTECTION if protected. + */ +int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga) +{ + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct trans_exc_code_bits *tec_bits; + + if (!is_low_address(ga) || !low_address_protection_enabled(vcpu)) + return 0; + + memset(pgm, 0, sizeof(*pgm)); + tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + tec_bits->fsi = FSI_STORE; + tec_bits->as = psw_bits(*psw).as; + tec_bits->addr = ga >> PAGE_SHIFT; + pgm->code = PGM_PROTECTION; + + return pgm->code; +} diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h new file mode 100644 index 00000000000..0149cf15058 --- /dev/null +++ b/arch/s390/kvm/gaccess.h @@ -0,0 +1,335 @@ +/* + * access guest memory + * + * Copyright IBM Corp. 2008, 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + +#ifndef __KVM_S390_GACCESS_H +#define __KVM_S390_GACCESS_H + +#include <linux/compiler.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <linux/ptrace.h> +#include "kvm-s390.h" + +/** + * kvm_s390_real_to_abs - convert guest real address to guest absolute address + * @vcpu - guest virtual cpu + * @gra - guest real address + * + * Returns the guest absolute address that corresponds to the passed guest real + * address @gra of a virtual guest cpu by applying its prefix. + */ +static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, + unsigned long gra) +{ + unsigned long prefix = kvm_s390_get_prefix(vcpu); + + if (gra < 2 * PAGE_SIZE) + gra += prefix; + else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE) + gra -= prefix; + return gra; +} + +/** + * kvm_s390_logical_to_effective - convert guest logical to effective address + * @vcpu: guest virtual cpu + * @ga: guest logical address + * + * Convert a guest vcpu logical address to a guest vcpu effective address by + * applying the rules of the vcpu's addressing mode defined by PSW bits 31 + * and 32 (extendended/basic addressing mode). + * + * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing + * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode) + * of @ga will be zeroed and the remaining bits will be returned. + */ +static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu, + unsigned long ga) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + + if (psw_bits(*psw).eaba == PSW_AMODE_64BIT) + return ga; + if (psw_bits(*psw).eaba == PSW_AMODE_31BIT) + return ga & ((1UL << 31) - 1); + return ga & ((1UL << 24) - 1); +} + +/* + * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions + * which shall only be used to access the lowcore of a vcpu. + * These functions should be used for e.g. interrupt handlers where no + * guest memory access protection facilities, like key or low address + * protection, are applicable. + * At a later point guest vcpu lowcore access should happen via pinned + * prefix pages, so that these pages can be accessed directly via the + * kernel mapping. All of these *_lc functions can be removed then. + */ + +/** + * put_guest_lc - write a simple variable to a guest vcpu's lowcore + * @vcpu: virtual cpu + * @x: value to copy to guest + * @gra: vcpu's destination guest real address + * + * Copies a simple value from kernel space to a guest vcpu's lowcore. + * The size of the variable may be 1, 2, 4 or 8 bytes. The destination + * must be located in the vcpu's lowcore. Otherwise the result is undefined. + * + * Returns zero on success or -EFAULT on error. + * + * Note: an error indicates that either the kernel is out of memory or + * the guest memory mapping is broken. In any case the best solution + * would be to terminate the guest. + * It is wrong to inject a guest exception. + */ +#define put_guest_lc(vcpu, x, gra) \ +({ \ + struct kvm_vcpu *__vcpu = (vcpu); \ + __typeof__(*(gra)) __x = (x); \ + unsigned long __gpa; \ + \ + __gpa = (unsigned long)(gra); \ + __gpa += kvm_s390_get_prefix(__vcpu); \ + kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x)); \ +}) + +/** + * write_guest_lc - copy data from kernel space to guest vcpu's lowcore + * @vcpu: virtual cpu + * @gra: vcpu's source guest real address + * @data: source address in kernel space + * @len: number of bytes to copy + * + * Copy data from kernel space to guest vcpu's lowcore. The entire range must + * be located within the vcpu's lowcore, otherwise the result is undefined. + * + * Returns zero on success or -EFAULT on error. + * + * Note: an error indicates that either the kernel is out of memory or + * the guest memory mapping is broken. In any case the best solution + * would be to terminate the guest. + * It is wrong to inject a guest exception. + */ +static inline __must_check +int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, + unsigned long len) +{ + unsigned long gpa = gra + kvm_s390_get_prefix(vcpu); + + return kvm_write_guest(vcpu->kvm, gpa, data, len); +} + +/** + * read_guest_lc - copy data from guest vcpu's lowcore to kernel space + * @vcpu: virtual cpu + * @gra: vcpu's source guest real address + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy data from guest vcpu's lowcore to kernel space. The entire range must + * be located within the vcpu's lowcore, otherwise the result is undefined. + * + * Returns zero on success or -EFAULT on error. + * + * Note: an error indicates that either the kernel is out of memory or + * the guest memory mapping is broken. In any case the best solution + * would be to terminate the guest. + * It is wrong to inject a guest exception. + */ +static inline __must_check +int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, + unsigned long len) +{ + unsigned long gpa = gra + kvm_s390_get_prefix(vcpu); + + return kvm_read_guest(vcpu->kvm, gpa, data, len); +} + +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa, int write); + +int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, + unsigned long len, int write); + +int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, + void *data, unsigned long len, int write); + +/** + * write_guest - copy data from kernel space to guest space + * @vcpu: virtual cpu + * @ga: guest address + * @data: source address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @data (kernel space) to @ga (guest address). + * In order to copy data to guest space the PSW of the vcpu is inspected: + * If DAT is off data will be copied to guest real or absolute memory. + * If DAT is on data will be copied to the address space as specified by + * the address space bits of the PSW: + * Primary, secondory or home space (access register mode is currently not + * implemented). + * The addressing mode of the PSW is also inspected, so that address wrap + * around is taken into account for 24-, 31- and 64-bit addressing mode, + * if the to be copied data crosses page boundaries in guest address space. + * In addition also low address and DAT protection are inspected before + * copying any data (key protection is currently not implemented). + * + * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu. + * In case of an access exception (e.g. protection exception) pgm will contain + * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()' + * will inject a correct exception into the guest. + * If no access exception happened, the contents of pgm are undefined when + * this function returns. + * + * Returns: - zero on success + * - a negative value if e.g. the guest mapping is broken or in + * case of out-of-memory. In this case the contents of pgm are + * undefined. Also parts of @data may have been copied to guest + * space. + * - a positive value if an access exception happened. In this case + * the returned value is the program interruption code and the + * contents of pgm may be used to inject an exception into the + * guest. No data has been copied to guest space. + * + * Note: in case an access exception is recognized no data has been copied to + * guest space (this is also true, if the to be copied data would cross + * one or more page boundaries in guest space). + * Therefore this function may be used for nullifying and suppressing + * instruction emulation. + * It may also be used for terminating instructions, if it is undefined + * if data has been changed in guest space in case of an exception. + */ +static inline __must_check +int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, + unsigned long len) +{ + return access_guest(vcpu, ga, data, len, 1); +} + +/** + * read_guest - copy data from guest space to kernel space + * @vcpu: virtual cpu + * @ga: guest address + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @ga (guest address) to @data (kernel space). + * + * The behaviour of read_guest is identical to write_guest, except that + * data will be copied from guest space to kernel space. + */ +static inline __must_check +int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, + unsigned long len) +{ + return access_guest(vcpu, ga, data, len, 0); +} + +/** + * write_guest_abs - copy data from kernel space to guest space absolute + * @vcpu: virtual cpu + * @gpa: guest physical (absolute) address + * @data: source address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address). + * It is up to the caller to ensure that the entire guest memory range is + * valid memory before calling this function. + * Guest low address and key protection are not checked. + * + * Returns zero on success or -EFAULT on error. + * + * If an error occurs data may have been copied partially to guest memory. + */ +static inline __must_check +int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, + unsigned long len) +{ + return kvm_write_guest(vcpu->kvm, gpa, data, len); +} + +/** + * read_guest_abs - copy data from guest space absolute to kernel space + * @vcpu: virtual cpu + * @gpa: guest physical (absolute) address + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space). + * It is up to the caller to ensure that the entire guest memory range is + * valid memory before calling this function. + * Guest key protection is not checked. + * + * Returns zero on success or -EFAULT on error. + * + * If an error occurs data may have been copied partially to kernel space. + */ +static inline __must_check +int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, + unsigned long len) +{ + return kvm_read_guest(vcpu->kvm, gpa, data, len); +} + +/** + * write_guest_real - copy data from kernel space to guest space real + * @vcpu: virtual cpu + * @gra: guest real address + * @data: source address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @data (kernel space) to @gra (guest real address). + * It is up to the caller to ensure that the entire guest memory range is + * valid memory before calling this function. + * Guest low address and key protection are not checked. + * + * Returns zero on success or -EFAULT on error. + * + * If an error occurs data may have been copied partially to guest memory. + */ +static inline __must_check +int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, + unsigned long len) +{ + return access_guest_real(vcpu, gra, data, len, 1); +} + +/** + * read_guest_real - copy data from guest space real to kernel space + * @vcpu: virtual cpu + * @gra: guest real address + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from @gra (guest real address) to @data (kernel space). + * It is up to the caller to ensure that the entire guest memory range is + * valid memory before calling this function. + * Guest key protection is not checked. + * + * Returns zero on success or -EFAULT on error. + * + * If an error occurs data may have been copied partially to kernel space. + */ +static inline __must_check +int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, + unsigned long len) +{ + return access_guest_real(vcpu, gra, data, len, 0); +} + +void ipte_lock(struct kvm_vcpu *vcpu); +void ipte_unlock(struct kvm_vcpu *vcpu); +int ipte_lock_held(struct kvm_vcpu *vcpu); +int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga); + +#endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c new file mode 100644 index 00000000000..3e8d4092ce3 --- /dev/null +++ b/arch/s390/kvm/guestdbg.c @@ -0,0 +1,482 @@ +/* + * kvm guest debug support + * + * Copyright IBM Corp. 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com> + */ +#include <linux/kvm_host.h> +#include <linux/errno.h> +#include "kvm-s390.h" +#include "gaccess.h" + +/* + * Extends the address range given by *start and *stop to include the address + * range starting with estart and the length len. Takes care of overflowing + * intervals and tries to minimize the overall intervall size. + */ +static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len) +{ + u64 estop; + + if (len > 0) + len--; + else + len = 0; + + estop = estart + len; + + /* 0-0 range represents "not set" */ + if ((*start == 0) && (*stop == 0)) { + *start = estart; + *stop = estop; + } else if (*start <= *stop) { + /* increase the existing range */ + if (estart < *start) + *start = estart; + if (estop > *stop) + *stop = estop; + } else { + /* "overflowing" interval, whereby *stop > *start */ + if (estart <= *stop) { + if (estop > *stop) + *stop = estop; + } else if (estop > *start) { + if (estart < *start) + *start = estart; + } + /* minimize the range */ + else if ((estop - *stop) < (*start - estart)) + *stop = estop; + else + *start = estart; + } +} + +#define MAX_INST_SIZE 6 + +static void enable_all_hw_bp(struct kvm_vcpu *vcpu) +{ + unsigned long start, len; + u64 *cr9 = &vcpu->arch.sie_block->gcr[9]; + u64 *cr10 = &vcpu->arch.sie_block->gcr[10]; + u64 *cr11 = &vcpu->arch.sie_block->gcr[11]; + int i; + + if (vcpu->arch.guestdbg.nr_hw_bp <= 0 || + vcpu->arch.guestdbg.hw_bp_info == NULL) + return; + + /* + * If the guest is not interrested in branching events, we can savely + * limit them to the PER address range. + */ + if (!(*cr9 & PER_EVENT_BRANCH)) + *cr9 |= PER_CONTROL_BRANCH_ADDRESS; + *cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH; + + for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) { + start = vcpu->arch.guestdbg.hw_bp_info[i].addr; + len = vcpu->arch.guestdbg.hw_bp_info[i].len; + + /* + * The instruction in front of the desired bp has to + * report instruction-fetching events + */ + if (start < MAX_INST_SIZE) { + len += start; + start = 0; + } else { + start -= MAX_INST_SIZE; + len += MAX_INST_SIZE; + } + + extend_address_range(cr10, cr11, start, len); + } +} + +static void enable_all_hw_wp(struct kvm_vcpu *vcpu) +{ + unsigned long start, len; + u64 *cr9 = &vcpu->arch.sie_block->gcr[9]; + u64 *cr10 = &vcpu->arch.sie_block->gcr[10]; + u64 *cr11 = &vcpu->arch.sie_block->gcr[11]; + int i; + + if (vcpu->arch.guestdbg.nr_hw_wp <= 0 || + vcpu->arch.guestdbg.hw_wp_info == NULL) + return; + + /* if host uses storage alternation for special address + * spaces, enable all events and give all to the guest */ + if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) { + *cr9 &= ~PER_CONTROL_ALTERATION; + *cr10 = 0; + *cr11 = PSW_ADDR_INSN; + } else { + *cr9 &= ~PER_CONTROL_ALTERATION; + *cr9 |= PER_EVENT_STORE; + + for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) { + start = vcpu->arch.guestdbg.hw_wp_info[i].addr; + len = vcpu->arch.guestdbg.hw_wp_info[i].len; + + extend_address_range(cr10, cr11, start, len); + } + } +} + +void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu) +{ + vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0]; + vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9]; + vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10]; + vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11]; +} + +void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu) +{ + vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0; + vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9; + vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10; + vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11; +} + +void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu) +{ + /* + * TODO: if guest psw has per enabled, otherwise 0s! + * This reduces the amount of reported events. + * Need to intercept all psw changes! + */ + + if (guestdbg_sstep_enabled(vcpu)) { + /* disable timer (clock-comparator) interrupts */ + vcpu->arch.sie_block->gcr[0] &= ~0x800ul; + vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH; + vcpu->arch.sie_block->gcr[10] = 0; + vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN; + } + + if (guestdbg_hw_bp_enabled(vcpu)) { + enable_all_hw_bp(vcpu); + enable_all_hw_wp(vcpu); + } + + /* TODO: Instruction-fetching-nullification not allowed for now */ + if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION) + vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION; +} + +#define MAX_WP_SIZE 100 + +static int __import_wp_info(struct kvm_vcpu *vcpu, + struct kvm_hw_breakpoint *bp_data, + struct kvm_hw_wp_info_arch *wp_info) +{ + int ret = 0; + wp_info->len = bp_data->len; + wp_info->addr = bp_data->addr; + wp_info->phys_addr = bp_data->phys_addr; + wp_info->old_data = NULL; + + if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE) + return -EINVAL; + + wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL); + if (!wp_info->old_data) + return -ENOMEM; + /* try to backup the original value */ + ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data, + wp_info->len); + if (ret) { + kfree(wp_info->old_data); + wp_info->old_data = NULL; + } + + return ret; +} + +#define MAX_BP_COUNT 50 + +int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + int ret = 0, nr_wp = 0, nr_bp = 0, i, size; + struct kvm_hw_breakpoint *bp_data = NULL; + struct kvm_hw_wp_info_arch *wp_info = NULL; + struct kvm_hw_bp_info_arch *bp_info = NULL; + + if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp) + return 0; + else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT) + return -EINVAL; + + size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint); + bp_data = kmalloc(size, GFP_KERNEL); + if (!bp_data) { + ret = -ENOMEM; + goto error; + } + + if (copy_from_user(bp_data, dbg->arch.hw_bp, size)) { + ret = -EFAULT; + goto error; + } + + for (i = 0; i < dbg->arch.nr_hw_bp; i++) { + switch (bp_data[i].type) { + case KVM_HW_WP_WRITE: + nr_wp++; + break; + case KVM_HW_BP: + nr_bp++; + break; + default: + break; + } + } + + size = nr_wp * sizeof(struct kvm_hw_wp_info_arch); + if (size > 0) { + wp_info = kmalloc(size, GFP_KERNEL); + if (!wp_info) { + ret = -ENOMEM; + goto error; + } + } + size = nr_bp * sizeof(struct kvm_hw_bp_info_arch); + if (size > 0) { + bp_info = kmalloc(size, GFP_KERNEL); + if (!bp_info) { + ret = -ENOMEM; + goto error; + } + } + + for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) { + switch (bp_data[i].type) { + case KVM_HW_WP_WRITE: + ret = __import_wp_info(vcpu, &bp_data[i], + &wp_info[nr_wp]); + if (ret) + goto error; + nr_wp++; + break; + case KVM_HW_BP: + bp_info[nr_bp].len = bp_data[i].len; + bp_info[nr_bp].addr = bp_data[i].addr; + nr_bp++; + break; + } + } + + vcpu->arch.guestdbg.nr_hw_bp = nr_bp; + vcpu->arch.guestdbg.hw_bp_info = bp_info; + vcpu->arch.guestdbg.nr_hw_wp = nr_wp; + vcpu->arch.guestdbg.hw_wp_info = wp_info; + return 0; +error: + kfree(bp_data); + kfree(wp_info); + kfree(bp_info); + return ret; +} + +void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_hw_wp_info_arch *hw_wp_info = NULL; + + for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) { + hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i]; + kfree(hw_wp_info->old_data); + hw_wp_info->old_data = NULL; + } + kfree(vcpu->arch.guestdbg.hw_wp_info); + vcpu->arch.guestdbg.hw_wp_info = NULL; + + kfree(vcpu->arch.guestdbg.hw_bp_info); + vcpu->arch.guestdbg.hw_bp_info = NULL; + + vcpu->arch.guestdbg.nr_hw_wp = 0; + vcpu->arch.guestdbg.nr_hw_bp = 0; +} + +static inline int in_addr_range(u64 addr, u64 a, u64 b) +{ + if (a <= b) + return (addr >= a) && (addr <= b); + else + /* "overflowing" interval */ + return (addr <= a) && (addr >= b); +} + +#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1) + +static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu, + unsigned long addr) +{ + struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info; + int i; + + if (vcpu->arch.guestdbg.nr_hw_bp == 0) + return NULL; + + for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) { + /* addr is directly the start or in the range of a bp */ + if (addr == bp_info->addr) + goto found; + if (bp_info->len > 0 && + in_addr_range(addr, bp_info->addr, end_of_range(bp_info))) + goto found; + + bp_info++; + } + + return NULL; +found: + return bp_info; +} + +static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_hw_wp_info_arch *wp_info = NULL; + void *temp = NULL; + + if (vcpu->arch.guestdbg.nr_hw_wp == 0) + return NULL; + + for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) { + wp_info = &vcpu->arch.guestdbg.hw_wp_info[i]; + if (!wp_info || !wp_info->old_data || wp_info->len <= 0) + continue; + + temp = kmalloc(wp_info->len, GFP_KERNEL); + if (!temp) + continue; + + /* refetch the wp data and compare it to the old value */ + if (!read_guest(vcpu, wp_info->phys_addr, temp, + wp_info->len)) { + if (memcmp(temp, wp_info->old_data, wp_info->len)) { + kfree(temp); + return wp_info; + } + } + kfree(temp); + temp = NULL; + } + + return NULL; +} + +void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu) +{ + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING; +} + +#define per_bp_event(code) \ + (code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH)) +#define per_write_wp_event(code) \ + (code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL)) + +static int debug_exit_required(struct kvm_vcpu *vcpu) +{ + u32 perc = (vcpu->arch.sie_block->perc << 24); + struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch; + struct kvm_hw_wp_info_arch *wp_info = NULL; + struct kvm_hw_bp_info_arch *bp_info = NULL; + unsigned long addr = vcpu->arch.sie_block->gpsw.addr; + unsigned long peraddr = vcpu->arch.sie_block->peraddr; + + if (guestdbg_hw_bp_enabled(vcpu)) { + if (per_write_wp_event(perc) && + vcpu->arch.guestdbg.nr_hw_wp > 0) { + wp_info = any_wp_changed(vcpu); + if (wp_info) { + debug_exit->addr = wp_info->addr; + debug_exit->type = KVM_HW_WP_WRITE; + goto exit_required; + } + } + if (per_bp_event(perc) && + vcpu->arch.guestdbg.nr_hw_bp > 0) { + bp_info = find_hw_bp(vcpu, addr); + /* remove duplicate events if PC==PER address */ + if (bp_info && (addr != peraddr)) { + debug_exit->addr = addr; + debug_exit->type = KVM_HW_BP; + vcpu->arch.guestdbg.last_bp = addr; + goto exit_required; + } + /* breakpoint missed */ + bp_info = find_hw_bp(vcpu, peraddr); + if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) { + debug_exit->addr = peraddr; + debug_exit->type = KVM_HW_BP; + goto exit_required; + } + } + } + if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) { + debug_exit->addr = addr; + debug_exit->type = KVM_SINGLESTEP; + goto exit_required; + } + + return 0; +exit_required: + return 1; +} + +#define guest_per_enabled(vcpu) \ + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) + +static void filter_guest_per_event(struct kvm_vcpu *vcpu) +{ + u32 perc = vcpu->arch.sie_block->perc << 24; + u64 peraddr = vcpu->arch.sie_block->peraddr; + u64 addr = vcpu->arch.sie_block->gpsw.addr; + u64 cr9 = vcpu->arch.sie_block->gcr[9]; + u64 cr10 = vcpu->arch.sie_block->gcr[10]; + u64 cr11 = vcpu->arch.sie_block->gcr[11]; + /* filter all events, demanded by the guest */ + u32 guest_perc = perc & cr9 & PER_EVENT_MASK; + + if (!guest_per_enabled(vcpu)) + guest_perc = 0; + + /* filter "successful-branching" events */ + if (guest_perc & PER_EVENT_BRANCH && + cr9 & PER_CONTROL_BRANCH_ADDRESS && + !in_addr_range(addr, cr10, cr11)) + guest_perc &= ~PER_EVENT_BRANCH; + + /* filter "instruction-fetching" events */ + if (guest_perc & PER_EVENT_IFETCH && + !in_addr_range(peraddr, cr10, cr11)) + guest_perc &= ~PER_EVENT_IFETCH; + + /* All other PER events will be given to the guest */ + /* TODO: Check alterated address/address space */ + + vcpu->arch.sie_block->perc = guest_perc >> 24; + + if (!guest_perc) + vcpu->arch.sie_block->iprcc &= ~PGM_PER; +} + +void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) +{ + if (debug_exit_required(vcpu)) + vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; + + filter_guest_per_event(vcpu); +} diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c new file mode 100644 index 00000000000..a0b586c1913 --- /dev/null +++ b/arch/s390/kvm/intercept.c @@ -0,0 +1,359 @@ +/* + * in-kernel handling for sie intercepts + * + * Copyright IBM Corp. 2008, 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/errno.h> +#include <linux/pagemap.h> + +#include <asm/kvm_host.h> +#include <asm/asm-offsets.h> +#include <asm/irq.h> + +#include "kvm-s390.h" +#include "gaccess.h" +#include "trace.h" +#include "trace-s390.h" + + +static const intercept_handler_t instruction_handlers[256] = { + [0x01] = kvm_s390_handle_01, + [0x82] = kvm_s390_handle_lpsw, + [0x83] = kvm_s390_handle_diag, + [0xae] = kvm_s390_handle_sigp, + [0xb2] = kvm_s390_handle_b2, + [0xb6] = kvm_s390_handle_stctl, + [0xb7] = kvm_s390_handle_lctl, + [0xb9] = kvm_s390_handle_b9, + [0xe5] = kvm_s390_handle_e5, + [0xeb] = kvm_s390_handle_eb, +}; + +static int handle_noop(struct kvm_vcpu *vcpu) +{ + switch (vcpu->arch.sie_block->icptcode) { + case 0x0: + vcpu->stat.exit_null++; + break; + case 0x10: + vcpu->stat.exit_external_request++; + break; + default: + break; /* nothing */ + } + return 0; +} + +static int handle_stop(struct kvm_vcpu *vcpu) +{ + int rc = 0; + + vcpu->stat.exit_stop_request++; + spin_lock_bh(&vcpu->arch.local_int.lock); + + trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); + + if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { + kvm_s390_vcpu_stop(vcpu); + vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; + VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); + rc = -EOPNOTSUPP; + } + + if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; + /* store status must be called unlocked. Since local_int.lock + * only protects local_int.* and not guest memory we can give + * up the lock here */ + spin_unlock_bh(&vcpu->arch.local_int.lock); + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_NOADDR); + if (rc >= 0) + rc = -EOPNOTSUPP; + } else + spin_unlock_bh(&vcpu->arch.local_int.lock); + return rc; +} + +static int handle_validity(struct kvm_vcpu *vcpu) +{ + int viwhy = vcpu->arch.sie_block->ipb >> 16; + + vcpu->stat.exit_validity++; + trace_kvm_s390_intercept_validity(vcpu, viwhy); + WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy); + return -EOPNOTSUPP; +} + +static int handle_instruction(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + vcpu->stat.exit_instruction++; + trace_kvm_s390_intercept_instruction(vcpu, + vcpu->arch.sie_block->ipa, + vcpu->arch.sie_block->ipb); + handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; +} + +static void __extract_prog_irq(struct kvm_vcpu *vcpu, + struct kvm_s390_pgm_info *pgm_info) +{ + memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info)); + pgm_info->code = vcpu->arch.sie_block->iprcc; + + switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) { + case PGM_AFX_TRANSLATION: + case PGM_ASX_TRANSLATION: + case PGM_EX_TRANSLATION: + case PGM_LFX_TRANSLATION: + case PGM_LSTE_SEQUENCE: + case PGM_LSX_TRANSLATION: + case PGM_LX_TRANSLATION: + case PGM_PRIMARY_AUTHORITY: + case PGM_SECONDARY_AUTHORITY: + case PGM_SPACE_SWITCH: + pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; + break; + case PGM_ALEN_TRANSLATION: + case PGM_ALE_SEQUENCE: + case PGM_ASTE_INSTANCE: + case PGM_ASTE_SEQUENCE: + case PGM_ASTE_VALIDITY: + case PGM_EXTENDED_AUTHORITY: + pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + break; + case PGM_ASCE_TYPE: + case PGM_PAGE_TRANSLATION: + case PGM_REGION_FIRST_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_THIRD_TRANS: + case PGM_SEGMENT_TRANSLATION: + pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + pgm_info->op_access_id = vcpu->arch.sie_block->oai; + break; + case PGM_MONITOR: + pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn; + pgm_info->mon_code = vcpu->arch.sie_block->tecmc; + break; + case PGM_DATA: + pgm_info->data_exc_code = vcpu->arch.sie_block->dxc; + break; + case PGM_PROTECTION: + pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + break; + default: + break; + } + + if (vcpu->arch.sie_block->iprcc & PGM_PER) { + pgm_info->per_code = vcpu->arch.sie_block->perc; + pgm_info->per_atmid = vcpu->arch.sie_block->peratmid; + pgm_info->per_address = vcpu->arch.sie_block->peraddr; + pgm_info->per_access_id = vcpu->arch.sie_block->peraid; + } +} + +/* + * restore ITDB to program-interruption TDB in guest lowcore + * and set TX abort indication if required +*/ +static int handle_itdb(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_itdb *itdb; + int rc; + + if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu)) + return 0; + if (current->thread.per_flags & PER_FLAG_NO_TE) + return 0; + itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba; + rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb)); + if (rc) + return rc; + memset(itdb, 0, sizeof(*itdb)); + + return 0; +} + +#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER) + +static int handle_prog(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_pgm_info pgm_info; + psw_t psw; + int rc; + + vcpu->stat.exit_program_interruption++; + + if (guestdbg_enabled(vcpu) && per_event(vcpu)) { + kvm_s390_handle_per_event(vcpu); + /* the interrupt might have been filtered out completely */ + if (vcpu->arch.sie_block->iprcc == 0) + return 0; + } + + trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc); + if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) { + rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t)); + if (rc) + return rc; + /* Avoid endless loops of specification exceptions */ + if (!is_valid_psw(&psw)) + return -EOPNOTSUPP; + } + rc = handle_itdb(vcpu); + if (rc) + return rc; + + __extract_prog_irq(vcpu, &pgm_info); + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); +} + +static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) +{ + int rc, rc2; + + vcpu->stat.exit_instr_and_program++; + rc = handle_instruction(vcpu); + rc2 = handle_prog(vcpu); + + if (rc == -EOPNOTSUPP) + vcpu->arch.sie_block->icptcode = 0x04; + if (rc) + return rc; + return rc2; +} + +/** + * handle_external_interrupt - used for external interruption interceptions + * + * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if + * the new PSW does not have external interrupts disabled. In the first case, + * we've got to deliver the interrupt manually, and in the second case, we + * drop to userspace to handle the situation there. + */ +static int handle_external_interrupt(struct kvm_vcpu *vcpu) +{ + u16 eic = vcpu->arch.sie_block->eic; + struct kvm_s390_interrupt irq; + psw_t newpsw; + int rc; + + vcpu->stat.exit_external_interrupt++; + + rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t)); + if (rc) + return rc; + /* We can not handle clock comparator or timer interrupt with bad PSW */ + if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) && + (newpsw.mask & PSW_MASK_EXT)) + return -EOPNOTSUPP; + + switch (eic) { + case EXT_IRQ_CLK_COMP: + irq.type = KVM_S390_INT_CLOCK_COMP; + break; + case EXT_IRQ_CPU_TIMER: + irq.type = KVM_S390_INT_CPU_TIMER; + break; + case EXT_IRQ_EXTERNAL_CALL: + if (kvm_s390_si_ext_call_pending(vcpu)) + return 0; + irq.type = KVM_S390_INT_EXTERNAL_CALL; + irq.parm = vcpu->arch.sie_block->extcpuaddr; + break; + default: + return -EOPNOTSUPP; + } + + return kvm_s390_inject_vcpu(vcpu, &irq); +} + +/** + * Handle MOVE PAGE partial execution interception. + * + * This interception can only happen for guests with DAT disabled and + * addresses that are currently not mapped in the host. Thus we try to + * set up the mappings for the corresponding user pages here (or throw + * addressing exceptions in case of illegal guest addresses). + */ +static int handle_mvpg_pei(struct kvm_vcpu *vcpu) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned long srcaddr, dstaddr; + int reg1, reg2, rc; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + /* Make sure that the source is paged-in */ + srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]); + if (kvm_is_error_gpa(vcpu->kvm, srcaddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); + if (rc != 0) + return rc; + + /* Make sure that the destination is paged-in */ + dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]); + if (kvm_is_error_gpa(vcpu->kvm, dstaddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); + if (rc != 0) + return rc; + + psw->addr = __rewind_psw(*psw, 4); + + return 0; +} + +static int handle_partial_execution(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */ + return handle_mvpg_pei(vcpu); + if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */ + return kvm_s390_handle_sigp_pei(vcpu); + + return -EOPNOTSUPP; +} + +static const intercept_handler_t intercept_funcs[] = { + [0x00 >> 2] = handle_noop, + [0x04 >> 2] = handle_instruction, + [0x08 >> 2] = handle_prog, + [0x0C >> 2] = handle_instruction_and_prog, + [0x10 >> 2] = handle_noop, + [0x14 >> 2] = handle_external_interrupt, + [0x18 >> 2] = handle_noop, + [0x1C >> 2] = kvm_s390_handle_wait, + [0x20 >> 2] = handle_validity, + [0x28 >> 2] = handle_stop, + [0x38 >> 2] = handle_partial_execution, +}; + +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) +{ + intercept_handler_t func; + u8 code = vcpu->arch.sie_block->icptcode; + + if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs)) + return -EOPNOTSUPP; + func = intercept_funcs[code >> 2]; + if (func) + return func(vcpu); + return -EOPNOTSUPP; +} diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c new file mode 100644 index 00000000000..90c8de22a2a --- /dev/null +++ b/arch/s390/kvm/interrupt.c @@ -0,0 +1,1619 @@ +/* + * handling kvm guest interrupts + * + * Copyright IBM Corp. 2008,2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + */ + +#include <linux/interrupt.h> +#include <linux/kvm_host.h> +#include <linux/hrtimer.h> +#include <linux/mmu_context.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <asm/asm-offsets.h> +#include <asm/uaccess.h> +#include "kvm-s390.h" +#include "gaccess.h" +#include "trace-s390.h" + +#define IOINT_SCHID_MASK 0x0000ffff +#define IOINT_SSID_MASK 0x00030000 +#define IOINT_CSSID_MASK 0x03fc0000 +#define IOINT_AI_MASK 0x04000000 + +static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu); + +static int is_ioint(u64 type) +{ + return ((type & 0xfffe0000u) != 0xfffe0000u); +} + +int psw_extint_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); +} + +static int psw_ioint_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO); +} + +static int psw_mchk_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK); +} + +static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) +{ + if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) || + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT)) + return 0; + return 1; +} + +static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) +{ + if (psw_extint_disabled(vcpu) || + !(vcpu->arch.sie_block->gcr[0] & 0x800ul)) + return 0; + if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu)) + /* No timer interrupts when single stepping */ + return 0; + return 1; +} + +static u64 int_word_to_isc_bits(u32 int_word) +{ + u8 isc = (int_word & 0x38000000) >> 27; + + return (0x80 >> isc) << 24; +} + +static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) + return 1; + case KVM_S390_INT_EMERGENCY: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) + return 1; + return 0; + case KVM_S390_INT_CLOCK_COMP: + return ckc_interrupts_enabled(vcpu); + case KVM_S390_INT_CPU_TIMER: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x400ul) + return 1; + return 0; + case KVM_S390_INT_SERVICE: + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: + case KVM_S390_INT_VIRTIO: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x200ul) + return 1; + return 0; + case KVM_S390_PROGRAM_INT: + case KVM_S390_SIGP_STOP: + case KVM_S390_SIGP_SET_PREFIX: + case KVM_S390_RESTART: + return 1; + case KVM_S390_MCHK: + if (psw_mchk_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14) + return 1; + return 0; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (psw_ioint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[6] & + int_word_to_isc_bits(inti->io.io_int_word)) + return 1; + return 0; + default: + printk(KERN_WARNING "illegal interrupt type %llx\n", + inti->type); + BUG(); + } + return 0; +} + +static void __set_cpu_idle(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); +} + +static void __unset_cpu_idle(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); +} + +static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, + &vcpu->arch.sie_block->cpuflags); + vcpu->arch.sie_block->lctl = 0x0000; + vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT); + + if (guestdbg_enabled(vcpu)) { + vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 | + LCTL_CR10 | LCTL_CR11); + vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT); + } +} + +static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) +{ + atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); +} + +static void __set_intercept_indicator(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: + case KVM_S390_INT_EMERGENCY: + case KVM_S390_INT_SERVICE: + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_CLOCK_COMP: + case KVM_S390_INT_CPU_TIMER: + if (psw_extint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_EXT_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR0; + break; + case KVM_S390_SIGP_STOP: + __set_cpuflag(vcpu, CPUSTAT_STOP_INT); + break; + case KVM_S390_MCHK: + if (psw_mchk_disabled(vcpu)) + vcpu->arch.sie_block->ictl |= ICTL_LPSW; + else + vcpu->arch.sie_block->lctl |= LCTL_CR14; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (psw_ioint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_IO_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR6; + break; + default: + BUG(); + } +} + +static int __deliver_prog_irq(struct kvm_vcpu *vcpu, + struct kvm_s390_pgm_info *pgm_info) +{ + const unsigned short table[] = { 2, 4, 4, 6 }; + int rc = 0; + + switch (pgm_info->code & ~PGM_PER) { + case PGM_AFX_TRANSLATION: + case PGM_ASX_TRANSLATION: + case PGM_EX_TRANSLATION: + case PGM_LFX_TRANSLATION: + case PGM_LSTE_SEQUENCE: + case PGM_LSX_TRANSLATION: + case PGM_LX_TRANSLATION: + case PGM_PRIMARY_AUTHORITY: + case PGM_SECONDARY_AUTHORITY: + case PGM_SPACE_SWITCH: + rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + (u64 *)__LC_TRANS_EXC_CODE); + break; + case PGM_ALEN_TRANSLATION: + case PGM_ALE_SEQUENCE: + case PGM_ASTE_INSTANCE: + case PGM_ASTE_SEQUENCE: + case PGM_ASTE_VALIDITY: + case PGM_EXTENDED_AUTHORITY: + rc = put_guest_lc(vcpu, pgm_info->exc_access_id, + (u8 *)__LC_EXC_ACCESS_ID); + break; + case PGM_ASCE_TYPE: + case PGM_PAGE_TRANSLATION: + case PGM_REGION_FIRST_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_THIRD_TRANS: + case PGM_SEGMENT_TRANSLATION: + rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + (u64 *)__LC_TRANS_EXC_CODE); + rc |= put_guest_lc(vcpu, pgm_info->exc_access_id, + (u8 *)__LC_EXC_ACCESS_ID); + rc |= put_guest_lc(vcpu, pgm_info->op_access_id, + (u8 *)__LC_OP_ACCESS_ID); + break; + case PGM_MONITOR: + rc = put_guest_lc(vcpu, pgm_info->mon_class_nr, + (u64 *)__LC_MON_CLASS_NR); + rc |= put_guest_lc(vcpu, pgm_info->mon_code, + (u64 *)__LC_MON_CODE); + break; + case PGM_DATA: + rc = put_guest_lc(vcpu, pgm_info->data_exc_code, + (u32 *)__LC_DATA_EXC_CODE); + break; + case PGM_PROTECTION: + rc = put_guest_lc(vcpu, pgm_info->trans_exc_code, + (u64 *)__LC_TRANS_EXC_CODE); + rc |= put_guest_lc(vcpu, pgm_info->exc_access_id, + (u8 *)__LC_EXC_ACCESS_ID); + break; + } + + if (pgm_info->code & PGM_PER) { + rc |= put_guest_lc(vcpu, pgm_info->per_code, + (u8 *) __LC_PER_CODE); + rc |= put_guest_lc(vcpu, pgm_info->per_atmid, + (u8 *)__LC_PER_ATMID); + rc |= put_guest_lc(vcpu, pgm_info->per_address, + (u64 *) __LC_PER_ADDRESS); + rc |= put_guest_lc(vcpu, pgm_info->per_access_id, + (u8 *) __LC_PER_ACCESS_ID); + } + + switch (vcpu->arch.sie_block->icptcode) { + case ICPT_INST: + case ICPT_INSTPROGI: + case ICPT_OPEREXC: + case ICPT_PARTEXEC: + case ICPT_IOINST: + /* last instruction only stored for these icptcodes */ + rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14], + (u16 *) __LC_PGM_ILC); + break; + case ICPT_PROGI: + rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc, + (u16 *) __LC_PGM_ILC); + break; + default: + rc |= put_guest_lc(vcpu, 0, + (u16 *) __LC_PGM_ILC); + } + + rc |= put_guest_lc(vcpu, pgm_info->code, + (u16 *)__LC_PGM_INT_CODE); + rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + + return rc; +} + +static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt_info *inti) +{ + const unsigned short table[] = { 2, 4, 4, 6 }; + int rc = 0; + + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); + vcpu->stat.deliver_emergency_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->emerg.code, 0); + rc = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, inti->emerg.code, + (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + break; + case KVM_S390_INT_EXTERNAL_CALL: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); + vcpu->stat.deliver_external_call++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->extcall.code, 0); + rc = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, inti->extcall.code, + (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + break; + case KVM_S390_INT_CLOCK_COMP: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, 0); + deliver_ckc_interrupt(vcpu); + break; + case KVM_S390_INT_CPU_TIMER: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, 0); + rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER, + (u16 *)__LC_EXT_INT_CODE); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + break; + case KVM_S390_INT_SERVICE: + VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", + inti->ext.ext_params); + vcpu->stat.deliver_service_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, 0); + rc = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + break; + case KVM_S390_INT_PFAULT_INIT: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, + inti->ext.ext_params2); + rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *) __LC_EXT_PARAMS2); + break; + case KVM_S390_INT_PFAULT_DONE: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, + inti->ext.ext_params2); + rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + break; + case KVM_S390_INT_VIRTIO: + VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", + inti->ext.ext_params, inti->ext.ext_params2); + vcpu->stat.deliver_virtio_interrupt++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->ext.ext_params, + inti->ext.ext_params2); + rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + break; + case KVM_S390_SIGP_STOP: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); + vcpu->stat.deliver_stop_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + 0, 0); + __set_intercept_indicator(vcpu, inti); + break; + + case KVM_S390_SIGP_SET_PREFIX: + VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", + inti->prefix.address); + vcpu->stat.deliver_prefix_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->prefix.address, 0); + kvm_s390_set_prefix(vcpu, inti->prefix.address); + break; + + case KVM_S390_RESTART: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); + vcpu->stat.deliver_restart_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + 0, 0); + rc = write_guest_lc(vcpu, + offsetof(struct _lowcore, restart_old_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw), + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + break; + case KVM_S390_PROGRAM_INT: + VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", + inti->pgm.code, + table[vcpu->arch.sie_block->ipa >> 14]); + vcpu->stat.deliver_program_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->pgm.code, 0); + rc = __deliver_prog_irq(vcpu, &inti->pgm); + break; + + case KVM_S390_MCHK: + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + inti->mchk.mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->mchk.cr14, + inti->mchk.mcic); + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE); + rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + break; + + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + { + __u32 param0 = ((__u32)inti->io.subchannel_id << 16) | + inti->io.subchannel_nr; + __u64 param1 = ((__u64)inti->io.io_int_parm << 32) | + inti->io.io_int_word; + VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + vcpu->stat.deliver_io_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + param0, param1); + rc = put_guest_lc(vcpu, inti->io.subchannel_id, + (u16 *)__LC_SUBCHANNEL_ID); + rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, + (u16 *)__LC_SUBCHANNEL_NR); + rc |= put_guest_lc(vcpu, inti->io.io_int_parm, + (u32 *)__LC_IO_INT_PARM); + rc |= put_guest_lc(vcpu, inti->io.io_int_word, + (u32 *)__LC_IO_INT_WORD); + rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + break; + } + default: + BUG(); + } + if (rc) { + printk("kvm: The guest lowcore is not mapped during interrupt " + "delivery, killing userspace\n"); + do_exit(SIGKILL); + } +} + +static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu) +{ + int rc; + + rc = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + if (rc) { + printk("kvm: The guest lowcore is not mapped during interrupt " + "delivery, killing userspace\n"); + do_exit(SIGKILL); + } +} + +/* Check whether SIGP interpretation facility has an external call pending */ +int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu) +{ + atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl; + + if (!psw_extint_disabled(vcpu) && + (vcpu->arch.sie_block->gcr[0] & 0x2000ul) && + (atomic_read(sigp_ctrl) & SIGP_CTRL_C) && + (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) + return 1; + + return 0; +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *inti; + int rc = 0; + + if (atomic_read(&li->active)) { + spin_lock_bh(&li->lock); + list_for_each_entry(inti, &li->list, list) + if (__interrupt_is_deliverable(vcpu, inti)) { + rc = 1; + break; + } + spin_unlock_bh(&li->lock); + } + + if ((!rc) && atomic_read(&fi->active)) { + spin_lock(&fi->lock); + list_for_each_entry(inti, &fi->list, list) + if (__interrupt_is_deliverable(vcpu, inti)) { + rc = 1; + break; + } + spin_unlock(&fi->lock); + } + + if (!rc && kvm_cpu_has_pending_timer(vcpu)) + rc = 1; + + if (!rc && kvm_s390_si_ext_call_pending(vcpu)) + rc = 1; + + return rc; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.sie_block->ckc < + get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) + return 0; + if (!ckc_interrupts_enabled(vcpu)) + return 0; + return 1; +} + +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) +{ + u64 now, sltime; + DECLARE_WAITQUEUE(wait, current); + + vcpu->stat.exit_wait_state++; + if (kvm_cpu_has_interrupt(vcpu)) + return 0; + + __set_cpu_idle(vcpu); + spin_lock_bh(&vcpu->arch.local_int.lock); + vcpu->arch.local_int.timer_due = 0; + spin_unlock_bh(&vcpu->arch.local_int.lock); + + if (psw_interrupts_disabled(vcpu)) { + VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); + __unset_cpu_idle(vcpu); + return -EOPNOTSUPP; /* disabled wait */ + } + + if (!ckc_interrupts_enabled(vcpu)) { + VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); + goto no_timer; + } + + now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; + if (vcpu->arch.sie_block->ckc < now) { + __unset_cpu_idle(vcpu); + return 0; + } + + sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + + hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); + VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); +no_timer: + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + spin_lock(&vcpu->arch.local_int.float_int->lock); + spin_lock_bh(&vcpu->arch.local_int.lock); + add_wait_queue(&vcpu->wq, &wait); + while (list_empty(&vcpu->arch.local_int.list) && + list_empty(&vcpu->arch.local_int.float_int->list) && + (!vcpu->arch.local_int.timer_due) && + !signal_pending(current) && + !kvm_s390_si_ext_call_pending(vcpu)) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock(&vcpu->arch.local_int.float_int->lock); + schedule(); + spin_lock(&vcpu->arch.local_int.float_int->lock); + spin_lock_bh(&vcpu->arch.local_int.lock); + } + __unset_cpu_idle(vcpu); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&vcpu->wq, &wait); + spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock(&vcpu->arch.local_int.float_int->lock); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); + return 0; +} + +void kvm_s390_tasklet(unsigned long parm) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; + + spin_lock(&vcpu->arch.local_int.lock); + vcpu->arch.local_int.timer_due = 1; + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + spin_unlock(&vcpu->arch.local_int.lock); +} + +/* + * low level hrtimer wake routine. Because this runs in hardirq context + * we schedule a tasklet to do the real work. + */ +enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); + vcpu->preempted = true; + tasklet_schedule(&vcpu->arch.tasklet); + + return HRTIMER_NORESTART; +} + +void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; + + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + list_del(&inti->list); + kfree(inti); + } + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + + /* clear pending external calls set by sigp interpretation facility */ + atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + atomic_clear_mask(SIGP_CTRL_C, + &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); +} + +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; + int deliver; + + __reset_intercept_indicators(vcpu); + if (atomic_read(&li->active)) { + do { + deliver = 0; + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + if (__interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&li->list)) + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } + + if (kvm_cpu_has_pending_timer(vcpu)) + deliver_ckc_interrupt(vcpu); + + if (atomic_read(&fi->active)) { + do { + deliver = 0; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + if (__interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + fi->irq_count--; + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } +} + +void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; + int deliver; + + __reset_intercept_indicators(vcpu); + if (atomic_read(&li->active)) { + do { + deliver = 0; + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + if ((inti->type == KVM_S390_MCHK) && + __interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&li->list)) + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } + + if (atomic_read(&fi->active)) { + do { + deliver = 0; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + if ((inti->type == KVM_S390_MCHK) && + __interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + fi->irq_count--; + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } +} + +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_PROGRAM_INT; + inti->pgm.code = code; + + VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1); + spin_lock_bh(&li->lock); + list_add(&inti->list, &li->list); + atomic_set(&li->active, 1); + BUG_ON(waitqueue_active(li->wq)); + spin_unlock_bh(&li->lock); + return 0; +} + +int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, + struct kvm_s390_pgm_info *pgm_info) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)", + pgm_info->code); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, + pgm_info->code, 0, 1); + + inti->type = KVM_S390_PROGRAM_INT; + memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm)); + spin_lock_bh(&li->lock); + list_add(&inti->list, &li->list); + atomic_set(&li->active, 1); + BUG_ON(waitqueue_active(li->wq)); + spin_unlock_bh(&li->lock); + return 0; +} + +struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, + u64 cr6, u64 schid) +{ + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *inti, *iter; + + if ((!schid && !cr6) || (schid && cr6)) + return NULL; + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + inti = NULL; + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (cr6 && + ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0)) + continue; + if (schid) { + if (((schid & 0x00000000ffff0000) >> 16) != + iter->io.subchannel_id) + continue; + if ((schid & 0x000000000000ffff) != + iter->io.subchannel_nr) + continue; + } + inti = iter; + break; + } + if (inti) { + list_del_init(&inti->list); + fi->irq_count--; + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + return inti; +} + +static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_local_interrupt *li; + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *iter; + struct kvm_vcpu *dst_vcpu = NULL; + int sigcpu; + int rc = 0; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { + rc = -EINVAL; + goto unlock_fi; + } + fi->irq_count++; + if (!is_ioint(inti->type)) { + list_add_tail(&inti->list, &fi->list); + } else { + u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); + + /* Keep I/O interrupts sorted in isc order. */ + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (int_word_to_isc_bits(iter->io.io_int_word) + <= isc_bits) + continue; + break; + } + list_add_tail(&inti->list, &iter->list); + } + atomic_set(&fi->active, 1); + sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); + if (sigcpu == KVM_MAX_VCPUS) { + do { + sigcpu = fi->next_rr_cpu++; + if (sigcpu == KVM_MAX_VCPUS) + sigcpu = fi->next_rr_cpu = 0; + } while (kvm_get_vcpu(kvm, sigcpu) == NULL); + } + dst_vcpu = kvm_get_vcpu(kvm, sigcpu); + li = &dst_vcpu->arch.local_int; + spin_lock_bh(&li->lock); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); + kvm_get_vcpu(kvm, sigcpu)->preempted = true; + spin_unlock_bh(&li->lock); +unlock_fi: + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + return rc; +} + +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int) +{ + struct kvm_s390_interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = s390int->type; + switch (inti->type) { + case KVM_S390_INT_VIRTIO: + VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", + s390int->parm, s390int->parm64); + inti->ext.ext_params = s390int->parm; + inti->ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_INT_SERVICE: + VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); + inti->ext.ext_params = s390int->parm; + break; + case KVM_S390_INT_PFAULT_DONE: + inti->type = s390int->type; + inti->ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_MCHK: + VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", + s390int->parm64); + inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ + inti->mchk.mcic = s390int->parm64; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (inti->type & IOINT_AI_MASK) + VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); + else + VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", + s390int->type & IOINT_CSSID_MASK, + s390int->type & IOINT_SSID_MASK, + s390int->type & IOINT_SCHID_MASK); + inti->io.subchannel_id = s390int->parm >> 16; + inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; + inti->io.io_int_parm = s390int->parm64 >> 32; + inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull; + break; + default: + kfree(inti); + return -EINVAL; + } + trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, + 2); + + return __inject_vm(kvm, inti); +} + +void kvm_s390_reinject_io_int(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + __inject_vm(kvm, inti); +} + +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int) +{ + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + switch (s390int->type) { + case KVM_S390_PROGRAM_INT: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + inti->type = s390int->type; + inti->pgm.code = s390int->parm; + VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", + s390int->parm); + break; + case KVM_S390_SIGP_SET_PREFIX: + inti->prefix.address = s390int->parm; + inti->type = s390int->type; + VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", + s390int->parm); + break; + case KVM_S390_SIGP_STOP: + case KVM_S390_RESTART: + case KVM_S390_INT_CLOCK_COMP: + case KVM_S390_INT_CPU_TIMER: + VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); + inti->type = s390int->type; + break; + case KVM_S390_INT_EXTERNAL_CALL: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", + s390int->parm); + inti->type = s390int->type; + inti->extcall.code = s390int->parm; + break; + case KVM_S390_INT_EMERGENCY: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm); + inti->type = s390int->type; + inti->emerg.code = s390int->parm; + break; + case KVM_S390_MCHK: + VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", + s390int->parm64); + inti->type = s390int->type; + inti->mchk.mcic = s390int->parm64; + break; + case KVM_S390_INT_PFAULT_INIT: + inti->type = s390int->type; + inti->ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + default: + kfree(inti); + return -EINVAL; + } + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm, + s390int->parm64, 2); + + mutex_lock(&vcpu->kvm->lock); + li = &vcpu->arch.local_int; + spin_lock_bh(&li->lock); + if (inti->type == KVM_S390_PROGRAM_INT) + list_add(&inti->list, &li->list); + else + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + if (inti->type == KVM_S390_SIGP_STOP) + li->action_bits |= ACTION_STOP_ON_STOP; + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + vcpu->preempted = true; + spin_unlock_bh(&li->lock); + mutex_unlock(&vcpu->kvm->lock); + return 0; +} + +void kvm_s390_clear_float_irqs(struct kvm *kvm) +{ + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *n, *inti = NULL; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + list_del(&inti->list); + kfree(inti); + } + fi->irq_count = 0; + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); +} + +static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, + u8 *addr) +{ + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; + struct kvm_s390_irq irq = {0}; + + irq.type = inti->type; + switch (inti->type) { + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + irq.u.ext = inti->ext; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + irq.u.io = inti->io; + break; + case KVM_S390_MCHK: + irq.u.mchk = inti->mchk; + break; + default: + return -EINVAL; + } + + if (copy_to_user(uptr, &irq, sizeof(irq))) + return -EFAULT; + + return 0; +} + +static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) +{ + struct kvm_s390_interrupt_info *inti; + struct kvm_s390_float_interrupt *fi; + int ret = 0; + int n = 0; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + + list_for_each_entry(inti, &fi->list, list) { + if (len < sizeof(struct kvm_s390_irq)) { + /* signal userspace to try again */ + ret = -ENOMEM; + break; + } + ret = copy_irq_to_user(inti, buf); + if (ret) + break; + buf += sizeof(struct kvm_s390_irq); + len -= sizeof(struct kvm_s390_irq); + n++; + } + + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + + return ret < 0 ? ret : n; +} + +static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int r; + + switch (attr->group) { + case KVM_DEV_FLIC_GET_ALL_IRQS: + r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr, + attr->attr); + break; + default: + r = -EINVAL; + } + + return r; +} + +static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti, + u64 addr) +{ + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; + void *target = NULL; + void __user *source; + u64 size; + + if (get_user(inti->type, (u64 __user *)addr)) + return -EFAULT; + + switch (inti->type) { + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + target = (void *) &inti->ext; + source = &uptr->u.ext; + size = sizeof(inti->ext); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + target = (void *) &inti->io; + source = &uptr->u.io; + size = sizeof(inti->io); + break; + case KVM_S390_MCHK: + target = (void *) &inti->mchk; + source = &uptr->u.mchk; + size = sizeof(inti->mchk); + break; + default: + return -EINVAL; + } + + if (copy_from_user(target, source, size)) + return -EFAULT; + + return 0; +} + +static int enqueue_floating_irq(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvm_s390_interrupt_info *inti = NULL; + int r = 0; + int len = attr->attr; + + if (len % sizeof(struct kvm_s390_irq) != 0) + return -EINVAL; + else if (len > KVM_S390_FLIC_MAX_BUFFER) + return -EINVAL; + + while (len >= sizeof(struct kvm_s390_irq)) { + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + r = copy_irq_from_user(inti, attr->addr); + if (r) { + kfree(inti); + return r; + } + r = __inject_vm(dev->kvm, inti); + if (r) { + kfree(inti); + return r; + } + len -= sizeof(struct kvm_s390_irq); + attr->addr += sizeof(struct kvm_s390_irq); + } + + return r; +} + +static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id) +{ + if (id >= MAX_S390_IO_ADAPTERS) + return NULL; + return kvm->arch.adapters[id]; +} + +static int register_io_adapter(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct s390_io_adapter *adapter; + struct kvm_s390_io_adapter adapter_info; + + if (copy_from_user(&adapter_info, + (void __user *)attr->addr, sizeof(adapter_info))) + return -EFAULT; + + if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) || + (dev->kvm->arch.adapters[adapter_info.id] != NULL)) + return -EINVAL; + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) + return -ENOMEM; + + INIT_LIST_HEAD(&adapter->maps); + init_rwsem(&adapter->maps_lock); + atomic_set(&adapter->nr_maps, 0); + adapter->id = adapter_info.id; + adapter->isc = adapter_info.isc; + adapter->maskable = adapter_info.maskable; + adapter->masked = false; + adapter->swap = adapter_info.swap; + dev->kvm->arch.adapters[adapter->id] = adapter; + + return 0; +} + +int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked) +{ + int ret; + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + + if (!adapter || !adapter->maskable) + return -EINVAL; + ret = adapter->masked; + adapter->masked = masked; + return ret; +} + +static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr) +{ + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + struct s390_map_info *map; + int ret; + + if (!adapter || !addr) + return -EINVAL; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + ret = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&map->list); + map->guest_addr = addr; + map->addr = gmap_translate(addr, kvm->arch.gmap); + if (map->addr == -EFAULT) { + ret = -EFAULT; + goto out; + } + ret = get_user_pages_fast(map->addr, 1, 1, &map->page); + if (ret < 0) + goto out; + BUG_ON(ret != 1); + down_write(&adapter->maps_lock); + if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) { + list_add_tail(&map->list, &adapter->maps); + ret = 0; + } else { + put_page(map->page); + ret = -EINVAL; + } + up_write(&adapter->maps_lock); +out: + if (ret) + kfree(map); + return ret; +} + +static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr) +{ + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + struct s390_map_info *map, *tmp; + int found = 0; + + if (!adapter || !addr) + return -EINVAL; + + down_write(&adapter->maps_lock); + list_for_each_entry_safe(map, tmp, &adapter->maps, list) { + if (map->guest_addr == addr) { + found = 1; + atomic_dec(&adapter->nr_maps); + list_del(&map->list); + put_page(map->page); + kfree(map); + break; + } + } + up_write(&adapter->maps_lock); + + return found ? 0 : -EINVAL; +} + +void kvm_s390_destroy_adapters(struct kvm *kvm) +{ + int i; + struct s390_map_info *map, *tmp; + + for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) { + if (!kvm->arch.adapters[i]) + continue; + list_for_each_entry_safe(map, tmp, + &kvm->arch.adapters[i]->maps, list) { + list_del(&map->list); + put_page(map->page); + kfree(map); + } + kfree(kvm->arch.adapters[i]); + } +} + +static int modify_io_adapter(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvm_s390_io_adapter_req req; + struct s390_io_adapter *adapter; + int ret; + + if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) + return -EFAULT; + + adapter = get_io_adapter(dev->kvm, req.id); + if (!adapter) + return -EINVAL; + switch (req.type) { + case KVM_S390_IO_ADAPTER_MASK: + ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask); + if (ret > 0) + ret = 0; + break; + case KVM_S390_IO_ADAPTER_MAP: + ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr); + break; + case KVM_S390_IO_ADAPTER_UNMAP: + ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int r = 0; + unsigned int i; + struct kvm_vcpu *vcpu; + + switch (attr->group) { + case KVM_DEV_FLIC_ENQUEUE: + r = enqueue_floating_irq(dev, attr); + break; + case KVM_DEV_FLIC_CLEAR_IRQS: + r = 0; + kvm_s390_clear_float_irqs(dev->kvm); + break; + case KVM_DEV_FLIC_APF_ENABLE: + dev->kvm->arch.gmap->pfault_enabled = 1; + break; + case KVM_DEV_FLIC_APF_DISABLE_WAIT: + dev->kvm->arch.gmap->pfault_enabled = 0; + /* + * Make sure no async faults are in transition when + * clearing the queues. So we don't need to worry + * about late coming workers. + */ + synchronize_srcu(&dev->kvm->srcu); + kvm_for_each_vcpu(i, vcpu, dev->kvm) + kvm_clear_async_pf_completion_queue(vcpu); + break; + case KVM_DEV_FLIC_ADAPTER_REGISTER: + r = register_io_adapter(dev, attr); + break; + case KVM_DEV_FLIC_ADAPTER_MODIFY: + r = modify_io_adapter(dev, attr); + break; + default: + r = -EINVAL; + } + + return r; +} + +static int flic_create(struct kvm_device *dev, u32 type) +{ + if (!dev) + return -EINVAL; + if (dev->kvm->arch.flic) + return -EINVAL; + dev->kvm->arch.flic = dev; + return 0; +} + +static void flic_destroy(struct kvm_device *dev) +{ + dev->kvm->arch.flic = NULL; + kfree(dev); +} + +/* s390 floating irq controller (flic) */ +struct kvm_device_ops kvm_flic_ops = { + .name = "kvm-flic", + .get_attr = flic_get_attr, + .set_attr = flic_set_attr, + .create = flic_create, + .destroy = flic_destroy, +}; + +static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) +{ + unsigned long bit; + + bit = bit_nr + (addr % PAGE_SIZE) * 8; + + return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit; +} + +static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter, + u64 addr) +{ + struct s390_map_info *map; + + if (!adapter) + return NULL; + + list_for_each_entry(map, &adapter->maps, list) { + if (map->guest_addr == addr) + return map; + } + return NULL; +} + +static int adapter_indicators_set(struct kvm *kvm, + struct s390_io_adapter *adapter, + struct kvm_s390_adapter_int *adapter_int) +{ + unsigned long bit; + int summary_set, idx; + struct s390_map_info *info; + void *map; + + info = get_map_info(adapter, adapter_int->ind_addr); + if (!info) + return -1; + map = page_address(info->page); + bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap); + set_bit(bit, map); + idx = srcu_read_lock(&kvm->srcu); + mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); + set_page_dirty_lock(info->page); + info = get_map_info(adapter, adapter_int->summary_addr); + if (!info) { + srcu_read_unlock(&kvm->srcu, idx); + return -1; + } + map = page_address(info->page); + bit = get_ind_bit(info->addr, adapter_int->summary_offset, + adapter->swap); + summary_set = test_and_set_bit(bit, map); + mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); + set_page_dirty_lock(info->page); + srcu_read_unlock(&kvm->srcu, idx); + return summary_set ? 0 : 1; +} + +/* + * < 0 - not injected due to error + * = 0 - coalesced, summary indicator already active + * > 0 - injected interrupt + */ +static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + int ret; + struct s390_io_adapter *adapter; + + /* We're only interested in the 0->1 transition. */ + if (!level) + return 0; + adapter = get_io_adapter(kvm, e->adapter.adapter_id); + if (!adapter) + return -1; + down_read(&adapter->maps_lock); + ret = adapter_indicators_set(kvm, adapter, &e->adapter); + up_read(&adapter->maps_lock); + if ((ret > 0) && !adapter->masked) { + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_IO(1, 0, 0, 0), + .parm = 0, + .parm64 = (adapter->isc << 27) | 0x80000000, + }; + ret = kvm_s390_inject_vm(kvm, &s390int); + if (ret == 0) + ret = 1; + } + return ret; +} + +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int ret; + + switch (ue->type) { + case KVM_IRQ_ROUTING_S390_ADAPTER: + e->set = set_adapter_int; + e->adapter.summary_addr = ue->u.adapter.summary_addr; + e->adapter.ind_addr = ue->u.adapter.ind_addr; + e->adapter.summary_offset = ue->u.adapter.summary_offset; + e->adapter.ind_offset = ue->u.adapter.ind_offset; + e->adapter.adapter_id = ue->u.adapter.adapter_id; + ret = 0; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + return -EINVAL; +} diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h new file mode 100644 index 00000000000..d98e4159643 --- /dev/null +++ b/arch/s390/kvm/irq.h @@ -0,0 +1,22 @@ +/* + * s390 irqchip routines + * + * Copyright IBM Corp. 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + */ +#ifndef __KVM_IRQ_H +#define __KVM_IRQ_H + +#include <linux/kvm_host.h> + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return 1; +} + +#endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c new file mode 100644 index 00000000000..2f3e14fe91a --- /dev/null +++ b/arch/s390/kvm/kvm-s390.c @@ -0,0 +1,1767 @@ +/* + * hosting zSeries kernel virtual machines + * + * Copyright IBM Corp. 2008, 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + * Heiko Carstens <heiko.carstens@de.ibm.com> + * Christian Ehrhardt <ehrhardt@de.ibm.com> + * Jason J. Herne <jjherne@us.ibm.com> + */ + +#include <linux/compiler.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/hrtimer.h> +#include <linux/init.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <asm/asm-offsets.h> +#include <asm/lowcore.h> +#include <asm/pgtable.h> +#include <asm/nmi.h> +#include <asm/switch_to.h> +#include <asm/facility.h> +#include <asm/sclp.h> +#include "kvm-s390.h" +#include "gaccess.h" + +#define CREATE_TRACE_POINTS +#include "trace.h" +#include "trace-s390.h" + +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { "userspace_handled", VCPU_STAT(exit_userspace) }, + { "exit_null", VCPU_STAT(exit_null) }, + { "exit_validity", VCPU_STAT(exit_validity) }, + { "exit_stop_request", VCPU_STAT(exit_stop_request) }, + { "exit_external_request", VCPU_STAT(exit_external_request) }, + { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, + { "exit_instruction", VCPU_STAT(exit_instruction) }, + { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, + { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, + { "instruction_lctl", VCPU_STAT(instruction_lctl) }, + { "instruction_stctl", VCPU_STAT(instruction_stctl) }, + { "instruction_stctg", VCPU_STAT(instruction_stctg) }, + { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, + { "deliver_external_call", VCPU_STAT(deliver_external_call) }, + { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, + { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, + { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, + { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, + { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, + { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, + { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, + { "instruction_stidp", VCPU_STAT(instruction_stidp) }, + { "instruction_spx", VCPU_STAT(instruction_spx) }, + { "instruction_stpx", VCPU_STAT(instruction_stpx) }, + { "instruction_stap", VCPU_STAT(instruction_stap) }, + { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, + { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, + { "instruction_stsch", VCPU_STAT(instruction_stsch) }, + { "instruction_chsc", VCPU_STAT(instruction_chsc) }, + { "instruction_essa", VCPU_STAT(instruction_essa) }, + { "instruction_stsi", VCPU_STAT(instruction_stsi) }, + { "instruction_stfl", VCPU_STAT(instruction_stfl) }, + { "instruction_tprot", VCPU_STAT(instruction_tprot) }, + { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, + { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, + { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, + { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, + { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, + { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, + { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_10", VCPU_STAT(diagnose_10) }, + { "diagnose_44", VCPU_STAT(diagnose_44) }, + { "diagnose_9c", VCPU_STAT(diagnose_9c) }, + { NULL } +}; + +unsigned long *vfacilities; +static struct gmap_notifier gmap_notifier; + +/* test availability of vfacility */ +int test_vfacility(unsigned long nr) +{ + return __test_facility(nr, (void *) vfacilities); +} + +/* Section: not file related */ +int kvm_arch_hardware_enable(void *garbage) +{ + /* every s390 is virtualization enabled ;-) */ + return 0; +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); + +int kvm_arch_hardware_setup(void) +{ + gmap_notifier.notifier_call = kvm_gmap_notifier; + gmap_register_ipte_notifier(&gmap_notifier); + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ + gmap_unregister_ipte_notifier(&gmap_notifier); +} + +void kvm_arch_check_processor_compat(void *rtn) +{ +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +void kvm_arch_exit(void) +{ +} + +/* Section: device related */ +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + if (ioctl == KVM_S390_ENABLE_SIE) + return s390_enable_sie(); + return -EINVAL; +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_S390_PSW: + case KVM_CAP_S390_GMAP: + case KVM_CAP_SYNC_MMU: +#ifdef CONFIG_KVM_S390_UCONTROL + case KVM_CAP_S390_UCONTROL: +#endif + case KVM_CAP_ASYNC_PF: + case KVM_CAP_SYNC_REGS: + case KVM_CAP_ONE_REG: + case KVM_CAP_ENABLE_CAP: + case KVM_CAP_S390_CSS_SUPPORT: + case KVM_CAP_IRQFD: + case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: + case KVM_CAP_ENABLE_CAP_VM: + case KVM_CAP_VM_ATTRIBUTES: + r = 1; + break; + case KVM_CAP_NR_VCPUS: + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; + case KVM_CAP_S390_COW: + r = MACHINE_HAS_ESOP; + break; + default: + r = 0; + } + return r; +} + +static void kvm_s390_sync_dirty_log(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + gfn_t cur_gfn, last_gfn; + unsigned long address; + struct gmap *gmap = kvm->arch.gmap; + + down_read(&gmap->mm->mmap_sem); + /* Loop over all guest pages */ + last_gfn = memslot->base_gfn + memslot->npages; + for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { + address = gfn_to_hva_memslot(memslot, cur_gfn); + + if (gmap_test_and_clear_dirty(address, gmap)) + mark_page_dirty(kvm, cur_gfn); + } + up_read(&gmap->mm->mmap_sem); +} + +/* Section: vm related */ +/* + * Get (and clear) the dirty memory log for a memory slot. + */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + int r; + unsigned long n; + struct kvm_memory_slot *memslot; + int is_dirty = 0; + + mutex_lock(&kvm->slots_lock); + + r = -EINVAL; + if (log->slot >= KVM_USER_MEM_SLOTS) + goto out; + + memslot = id_to_memslot(kvm->memslots, log->slot); + r = -ENOENT; + if (!memslot->dirty_bitmap) + goto out; + + kvm_s390_sync_dirty_log(kvm, memslot); + r = kvm_get_dirty_log(kvm, log, &is_dirty); + if (r) + goto out; + + /* Clear the dirty log */ + if (is_dirty) { + n = kvm_dirty_bitmap_bytes(memslot); + memset(memslot->dirty_bitmap, 0, n); + } + r = 0; +out: + mutex_unlock(&kvm->slots_lock); + return r; +} + +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_S390_IRQCHIP: + kvm->arch.use_irqchip = 1; + r = 0; + break; + default: + r = -EINVAL; + break; + } + return r; +} + +static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret; + unsigned int idx; + switch (attr->attr) { + case KVM_S390_VM_MEM_ENABLE_CMMA: + ret = -EBUSY; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus) == 0) { + kvm->arch.use_cmma = 1; + ret = 0; + } + mutex_unlock(&kvm->lock); + break; + case KVM_S390_VM_MEM_CLR_CMMA: + mutex_lock(&kvm->lock); + idx = srcu_read_lock(&kvm->srcu); + page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false); + srcu_read_unlock(&kvm->srcu, idx); + mutex_unlock(&kvm->lock); + ret = 0; + break; + default: + ret = -ENXIO; + break; + } + return ret; +} + +static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_S390_VM_MEM_CTRL: + ret = kvm_s390_mem_control(kvm, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} + +static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_S390_VM_MEM_CTRL: + switch (attr->attr) { + case KVM_S390_VM_MEM_ENABLE_CMMA: + case KVM_S390_VM_MEM_CLR_CMMA: + ret = 0; + break; + default: + ret = -ENXIO; + break; + } + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + struct kvm_device_attr attr; + int r; + + switch (ioctl) { + case KVM_S390_INTERRUPT: { + struct kvm_s390_interrupt s390int; + + r = -EFAULT; + if (copy_from_user(&s390int, argp, sizeof(s390int))) + break; + r = kvm_s390_inject_vm(kvm, &s390int); + break; + } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + break; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } + case KVM_CREATE_IRQCHIP: { + struct kvm_irq_routing_entry routing; + + r = -EINVAL; + if (kvm->arch.use_irqchip) { + /* Set up dummy routing. */ + memset(&routing, 0, sizeof(routing)); + kvm_set_irq_routing(kvm, &routing, 0, 0); + r = 0; + } + break; + } + case KVM_SET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_s390_vm_set_attr(kvm, &attr); + break; + } + case KVM_GET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_s390_vm_get_attr(kvm, &attr); + break; + } + case KVM_HAS_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_s390_vm_has_attr(kvm, &attr); + break; + } + default: + r = -ENOTTY; + } + + return r; +} + +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + int rc; + char debug_name[16]; + static unsigned long sca_offset; + + rc = -EINVAL; +#ifdef CONFIG_KVM_S390_UCONTROL + if (type & ~KVM_VM_S390_UCONTROL) + goto out_err; + if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) + goto out_err; +#else + if (type) + goto out_err; +#endif + + rc = s390_enable_sie(); + if (rc) + goto out_err; + + rc = -ENOMEM; + + kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + if (!kvm->arch.sca) + goto out_err; + spin_lock(&kvm_lock); + sca_offset = (sca_offset + 16) & 0x7f0; + kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); + spin_unlock(&kvm_lock); + + sprintf(debug_name, "kvm-%u", current->pid); + + kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); + if (!kvm->arch.dbf) + goto out_nodbf; + + spin_lock_init(&kvm->arch.float_int.lock); + INIT_LIST_HEAD(&kvm->arch.float_int.list); + init_waitqueue_head(&kvm->arch.ipte_wq); + + debug_register_view(kvm->arch.dbf, &debug_sprintf_view); + VM_EVENT(kvm, 3, "%s", "vm created"); + + if (type & KVM_VM_S390_UCONTROL) { + kvm->arch.gmap = NULL; + } else { + kvm->arch.gmap = gmap_alloc(current->mm); + if (!kvm->arch.gmap) + goto out_nogmap; + kvm->arch.gmap->private = kvm; + kvm->arch.gmap->pfault_enabled = 0; + } + + kvm->arch.css_support = 0; + kvm->arch.use_irqchip = 0; + + spin_lock_init(&kvm->arch.start_stop_lock); + + return 0; +out_nogmap: + debug_unregister(kvm->arch.dbf); +out_nodbf: + free_page((unsigned long)(kvm->arch.sca)); +out_err: + return rc; +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 3, "%s", "free cpu"); + trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); + kvm_s390_clear_local_irqs(vcpu); + kvm_clear_async_pf_completion_queue(vcpu); + if (!kvm_is_ucontrol(vcpu->kvm)) { + clear_bit(63 - vcpu->vcpu_id, + (unsigned long *) &vcpu->kvm->arch.sca->mcn); + if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == + (__u64) vcpu->arch.sie_block) + vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; + } + smp_mb(); + + if (kvm_is_ucontrol(vcpu->kvm)) + gmap_free(vcpu->arch.gmap); + + if (kvm_s390_cmma_enabled(vcpu->kvm)) + kvm_s390_vcpu_unsetup_cmma(vcpu); + free_page((unsigned long)(vcpu->arch.sie_block)); + + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); +} + +static void kvm_free_vcpus(struct kvm *kvm) +{ + unsigned int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_arch_vcpu_destroy(vcpu); + + mutex_lock(&kvm->lock); + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) + kvm->vcpus[i] = NULL; + + atomic_set(&kvm->online_vcpus, 0); + mutex_unlock(&kvm->lock); +} + +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kvm_free_vcpus(kvm); + free_page((unsigned long)(kvm->arch.sca)); + debug_unregister(kvm->arch.dbf); + if (!kvm_is_ucontrol(kvm)) + gmap_free(kvm->arch.gmap); + kvm_s390_destroy_adapters(kvm); + kvm_s390_clear_float_irqs(kvm); +} + +/* Section: vcpu related */ +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(vcpu); + if (kvm_is_ucontrol(vcpu->kvm)) { + vcpu->arch.gmap = gmap_alloc(current->mm); + if (!vcpu->arch.gmap) + return -ENOMEM; + vcpu->arch.gmap->private = vcpu->kvm; + return 0; + } + + vcpu->arch.gmap = vcpu->kvm->arch.gmap; + vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | + KVM_SYNC_GPRS | + KVM_SYNC_ACRS | + KVM_SYNC_CRS; + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + /* Nothing todo */ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + save_fp_ctl(&vcpu->arch.host_fpregs.fpc); + save_fp_regs(vcpu->arch.host_fpregs.fprs); + save_access_regs(vcpu->arch.host_acrs); + restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + restore_fp_regs(vcpu->arch.guest_fpregs.fprs); + restore_access_regs(vcpu->run->s.regs.acrs); + gmap_enable(vcpu->arch.gmap); + atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + gmap_disable(vcpu->arch.gmap); + save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + save_fp_regs(vcpu->arch.guest_fpregs.fprs); + save_access_regs(vcpu->run->s.regs.acrs); + restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); + restore_fp_regs(vcpu->arch.host_fpregs.fprs); + restore_access_regs(vcpu->arch.host_acrs); +} + +static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) +{ + /* this equals initial cpu reset in pop, but we don't switch to ESA */ + vcpu->arch.sie_block->gpsw.mask = 0UL; + vcpu->arch.sie_block->gpsw.addr = 0UL; + kvm_s390_set_prefix(vcpu, 0); + vcpu->arch.sie_block->cputm = 0UL; + vcpu->arch.sie_block->ckc = 0UL; + vcpu->arch.sie_block->todpr = 0; + memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); + vcpu->arch.sie_block->gcr[0] = 0xE0UL; + vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; + vcpu->arch.guest_fpregs.fpc = 0; + asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); + vcpu->arch.sie_block->gbea = 1; + vcpu->arch.sie_block->pp = 0; + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(vcpu); + kvm_s390_vcpu_stop(vcpu); + kvm_s390_clear_local_irqs(vcpu); +} + +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) +{ + free_page(vcpu->arch.sie_block->cbrlo); + vcpu->arch.sie_block->cbrlo = 0; +} + +int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) +{ + vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); + if (!vcpu->arch.sie_block->cbrlo) + return -ENOMEM; + + vcpu->arch.sie_block->ecb2 |= 0x80; + vcpu->arch.sie_block->ecb2 &= ~0x08; + return 0; +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + int rc = 0; + + atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | + CPUSTAT_SM | + CPUSTAT_STOPPED | + CPUSTAT_GED); + vcpu->arch.sie_block->ecb = 6; + if (test_vfacility(50) && test_vfacility(73)) + vcpu->arch.sie_block->ecb |= 0x10; + + vcpu->arch.sie_block->ecb2 = 8; + vcpu->arch.sie_block->eca = 0xD1002000U; + if (sclp_has_siif()) + vcpu->arch.sie_block->eca |= 1; + vcpu->arch.sie_block->fac = (int) (long) vfacilities; + vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE | + ICTL_TPROT; + + if (kvm_s390_cmma_enabled(vcpu->kvm)) { + rc = kvm_s390_vcpu_setup_cmma(vcpu); + if (rc) + return rc; + } + hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, + (unsigned long) vcpu); + vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; + get_cpu_id(&vcpu->arch.cpu_id); + vcpu->arch.cpu_id.version = 0xff; + return rc; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, + unsigned int id) +{ + struct kvm_vcpu *vcpu; + struct sie_page *sie_page; + int rc = -EINVAL; + + if (id >= KVM_MAX_VCPUS) + goto out; + + rc = -ENOMEM; + + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) + goto out; + + sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); + if (!sie_page) + goto out_free_cpu; + + vcpu->arch.sie_block = &sie_page->sie_block; + vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + + vcpu->arch.sie_block->icpua = id; + if (!kvm_is_ucontrol(kvm)) { + if (!kvm->arch.sca) { + WARN_ON_ONCE(1); + goto out_free_cpu; + } + if (!kvm->arch.sca->cpu[id].sda) + kvm->arch.sca->cpu[id].sda = + (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = + (__u32)(((__u64)kvm->arch.sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; + set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); + } + + spin_lock_init(&vcpu->arch.local_int.lock); + INIT_LIST_HEAD(&vcpu->arch.local_int.list); + vcpu->arch.local_int.float_int = &kvm->arch.float_int; + vcpu->arch.local_int.wq = &vcpu->wq; + vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; + + rc = kvm_vcpu_init(vcpu, kvm, id); + if (rc) + goto out_free_sie_block; + VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + vcpu->arch.sie_block); + trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); + + return vcpu; +out_free_sie_block: + free_page((unsigned long)(vcpu->arch.sie_block)); +out_free_cpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); +out: + return ERR_PTR(rc); +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return kvm_cpu_has_interrupt(vcpu); +} + +void s390_vcpu_block(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +void s390_vcpu_unblock(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +/* + * Kick a guest cpu out of SIE and wait until SIE is not running. + * If the CPU is not running (e.g. waiting as idle) the function will + * return immediately. */ +void exit_sie(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); + while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) + cpu_relax(); +} + +/* Kick a guest cpu out of SIE and prevent SIE-reentry */ +void exit_sie_sync(struct kvm_vcpu *vcpu) +{ + s390_vcpu_block(vcpu); + exit_sie(vcpu); +} + +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) +{ + int i; + struct kvm *kvm = gmap->private; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + /* match against both prefix pages */ + if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) { + VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); + exit_sie_sync(vcpu); + } + } +} + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + /* kvm common code refers to this, but never calls it */ + BUG(); + return 0; +} + +static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, + struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_S390_TODPR: + r = put_user(vcpu->arch.sie_block->todpr, + (u32 __user *)reg->addr); + break; + case KVM_REG_S390_EPOCHDIFF: + r = put_user(vcpu->arch.sie_block->epoch, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CPU_TIMER: + r = put_user(vcpu->arch.sie_block->cputm, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CLOCK_COMP: + r = put_user(vcpu->arch.sie_block->ckc, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFTOKEN: + r = put_user(vcpu->arch.pfault_token, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFCOMPARE: + r = put_user(vcpu->arch.pfault_compare, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFSELECT: + r = put_user(vcpu->arch.pfault_select, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PP: + r = put_user(vcpu->arch.sie_block->pp, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_GBEA: + r = put_user(vcpu->arch.sie_block->gbea, + (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + +static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, + struct kvm_one_reg *reg) +{ + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_S390_TODPR: + r = get_user(vcpu->arch.sie_block->todpr, + (u32 __user *)reg->addr); + break; + case KVM_REG_S390_EPOCHDIFF: + r = get_user(vcpu->arch.sie_block->epoch, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CPU_TIMER: + r = get_user(vcpu->arch.sie_block->cputm, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_CLOCK_COMP: + r = get_user(vcpu->arch.sie_block->ckc, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFTOKEN: + r = get_user(vcpu->arch.pfault_token, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFCOMPARE: + r = get_user(vcpu->arch.pfault_compare, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFSELECT: + r = get_user(vcpu->arch.pfault_select, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PP: + r = get_user(vcpu->arch.sie_block->pp, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_GBEA: + r = get_user(vcpu->arch.sie_block->gbea, + (u64 __user *)reg->addr); + break; + default: + break; + } + + return r; +} + +static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) +{ + kvm_s390_vcpu_initial_reset(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); + memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + restore_access_regs(vcpu->run->s.regs.acrs); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); + memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + if (test_fp_ctl(fpu->fpc)) + return -EINVAL; + memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); + vcpu->arch.guest_fpregs.fpc = fpu->fpc; + restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + restore_fp_regs(vcpu->arch.guest_fpregs.fprs); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); + fpu->fpc = vcpu->arch.guest_fpregs.fpc; + return 0; +} + +static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) +{ + int rc = 0; + + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) + rc = -EBUSY; + else { + vcpu->run->psw_mask = psw.mask; + vcpu->run->psw_addr = psw.addr; + } + return rc; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; /* not implemented yet */ +} + +#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \ + KVM_GUESTDBG_USE_HW_BP | \ + KVM_GUESTDBG_ENABLE) + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + int rc = 0; + + vcpu->guest_debug = 0; + kvm_s390_clear_bp_data(vcpu); + + if (dbg->control & ~VALID_GUESTDBG_FLAGS) + return -EINVAL; + + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + /* enforce guest PER */ + atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); + + if (dbg->control & KVM_GUESTDBG_USE_HW_BP) + rc = kvm_s390_import_bp_data(vcpu, dbg); + } else { + atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); + vcpu->arch.guestdbg.last_bp = 0; + } + + if (rc) { + vcpu->guest_debug = 0; + kvm_s390_clear_bp_data(vcpu); + atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags); + } + + return rc; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +bool kvm_s390_cmma_enabled(struct kvm *kvm) +{ + if (!MACHINE_IS_LPAR) + return false; + /* only enable for z10 and later */ + if (!MACHINE_HAS_EDAT1) + return false; + if (!kvm->arch.use_cmma) + return false; + return true; +} + +static bool ibs_enabled(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; +} + +static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) +{ +retry: + s390_vcpu_unblock(vcpu); + /* + * We use MMU_RELOAD just to re-arm the ipte notifier for the + * guest prefix page. gmap_ipte_notify will wait on the ptl lock. + * This ensures that the ipte instruction for this request has + * already finished. We might race against a second unmapper that + * wants to set the blocking bit. Lets just retry the request loop. + */ + if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { + int rc; + rc = gmap_ipte_notify(vcpu->arch.gmap, + kvm_s390_get_prefix(vcpu), + PAGE_SIZE * 2); + if (rc) + return rc; + goto retry; + } + + if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { + if (!ibs_enabled(vcpu)) { + trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1); + atomic_set_mask(CPUSTAT_IBS, + &vcpu->arch.sie_block->cpuflags); + } + goto retry; + } + + if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { + if (ibs_enabled(vcpu)) { + trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0); + atomic_clear_mask(CPUSTAT_IBS, + &vcpu->arch.sie_block->cpuflags); + } + goto retry; + } + + return 0; +} + +/** + * kvm_arch_fault_in_page - fault-in guest page if necessary + * @vcpu: The corresponding virtual cpu + * @gpa: Guest physical address + * @writable: Whether the page should be writable or not + * + * Make sure that a guest page has been faulted-in on the host. + * + * Return: Zero on success, negative error code otherwise. + */ +long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) +{ + struct mm_struct *mm = current->mm; + hva_t hva; + long rc; + + hva = gmap_fault(gpa, vcpu->arch.gmap); + if (IS_ERR_VALUE(hva)) + return (long)hva; + down_read(&mm->mmap_sem); + rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL); + up_read(&mm->mmap_sem); + + return rc < 0 ? rc : 0; +} + +static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, + unsigned long token) +{ + struct kvm_s390_interrupt inti; + inti.parm64 = token; + + if (start_token) { + inti.type = KVM_S390_INT_PFAULT_INIT; + WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); + } else { + inti.type = KVM_S390_INT_PFAULT_DONE; + WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); + } +} + +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); + __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); +} + +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); + __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); +} + +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + /* s390 will always inject the page directly */ +} + +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) +{ + /* + * s390 will always inject the page directly, + * but we still want check_async_completion to cleanup + */ + return true; +} + +static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) +{ + hva_t hva; + struct kvm_arch_async_pf arch; + int rc; + + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) + return 0; + if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != + vcpu->arch.pfault_compare) + return 0; + if (psw_extint_disabled(vcpu)) + return 0; + if (kvm_cpu_has_interrupt(vcpu)) + return 0; + if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) + return 0; + if (!vcpu->arch.gmap->pfault_enabled) + return 0; + + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); + hva += current->thread.gmap_addr & ~PAGE_MASK; + if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) + return 0; + + rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); + return rc; +} + +static int vcpu_pre_run(struct kvm_vcpu *vcpu) +{ + int rc, cpuflags; + + /* + * On s390 notifications for arriving pages will be delivered directly + * to the guest but the house keeping for completed pfaults is + * handled outside the worker. + */ + kvm_check_async_pf_completion(vcpu); + + memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); + + if (need_resched()) + schedule(); + + if (test_cpu_flag(CIF_MCCK_PENDING)) + s390_handle_mcck(); + + if (!kvm_is_ucontrol(vcpu->kvm)) + kvm_s390_deliver_pending_interrupts(vcpu); + + rc = kvm_s390_handle_requests(vcpu); + if (rc) + return rc; + + if (guestdbg_enabled(vcpu)) { + kvm_s390_backup_guest_per_regs(vcpu); + kvm_s390_patch_guest_per_regs(vcpu); + } + + vcpu->arch.sie_block->icptcode = 0; + cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); + VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); + trace_kvm_s390_sie_enter(vcpu, cpuflags); + + return 0; +} + +static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) +{ + int rc = -1; + + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", + vcpu->arch.sie_block->icptcode); + trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); + + if (guestdbg_enabled(vcpu)) + kvm_s390_restore_guest_per_regs(vcpu); + + if (exit_reason >= 0) { + rc = 0; + } else if (kvm_is_ucontrol(vcpu->kvm)) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = + current->thread.gmap_addr; + vcpu->run->s390_ucontrol.pgm_code = 0x10; + rc = -EREMOTE; + + } else if (current->thread.gmap_pfault) { + trace_kvm_s390_major_guest_pfault(vcpu); + current->thread.gmap_pfault = 0; + if (kvm_arch_setup_async_pf(vcpu)) { + rc = 0; + } else { + gpa_t gpa = current->thread.gmap_addr; + rc = kvm_arch_fault_in_page(vcpu, gpa, 1); + } + } + + if (rc == -1) { + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + trace_kvm_s390_sie_fault(vcpu); + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); + + if (rc == 0) { + if (kvm_is_ucontrol(vcpu->kvm)) + /* Don't exit for host interrupts. */ + rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; + else + rc = kvm_handle_sie_intercept(vcpu); + } + + return rc; +} + +static int __vcpu_run(struct kvm_vcpu *vcpu) +{ + int rc, exit_reason; + + /* + * We try to hold kvm->srcu during most of vcpu_run (except when run- + * ning the guest), so that memslots (and other stuff) are protected + */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + do { + rc = vcpu_pre_run(vcpu); + if (rc) + break; + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + /* + * As PF_VCPU will be used in fault handler, between + * guest_enter and guest_exit should be no uaccess. + */ + preempt_disable(); + kvm_guest_enter(); + preempt_enable(); + exit_reason = sie64a(vcpu->arch.sie_block, + vcpu->run->s.regs.gprs); + kvm_guest_exit(); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + rc = vcpu_post_run(vcpu, exit_reason); + } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); + + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + return rc; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int rc; + sigset_t sigsaved; + + if (guestdbg_exit_pending(vcpu)) { + kvm_s390_prepare_debug_exit(vcpu); + return 0; + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + kvm_s390_vcpu_start(vcpu); + + switch (kvm_run->exit_reason) { + case KVM_EXIT_S390_SIEIC: + case KVM_EXIT_UNKNOWN: + case KVM_EXIT_INTR: + case KVM_EXIT_S390_RESET: + case KVM_EXIT_S390_UCONTROL: + case KVM_EXIT_S390_TSCH: + case KVM_EXIT_DEBUG: + break; + default: + BUG(); + } + + vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; + vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; + if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { + kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; + kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); + } + if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { + kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; + memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); + kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); + } + + might_fault(); + rc = __vcpu_run(vcpu); + + if (signal_pending(current) && !rc) { + kvm_run->exit_reason = KVM_EXIT_INTR; + rc = -EINTR; + } + + if (guestdbg_exit_pending(vcpu) && !rc) { + kvm_s390_prepare_debug_exit(vcpu); + rc = 0; + } + + if (rc == -EOPNOTSUPP) { + /* intercept cannot be handled in-kernel, prepare kvm-run */ + kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; + kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + rc = 0; + } + + if (rc == -EREMOTE) { + /* intercept was handled, but userspace support is needed + * kvm_run has been prepared by the handler */ + rc = 0; + } + + kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; + kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; + kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); + memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu->stat.exit_userspace++; + return rc; +} + +/* + * store status at address + * we use have two special cases: + * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit + * KVM_S390_STORE_STATUS_PREFIXED: -> prefix + */ +int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) +{ + unsigned char archmode = 1; + unsigned int px; + u64 clkcomp; + int rc; + + if (gpa == KVM_S390_STORE_STATUS_NOADDR) { + if (write_guest_abs(vcpu, 163, &archmode, 1)) + return -EFAULT; + gpa = SAVE_AREA_BASE; + } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { + if (write_guest_real(vcpu, 163, &archmode, 1)) + return -EFAULT; + gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE); + } + rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs), + vcpu->arch.guest_fpregs.fprs, 128); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs), + vcpu->run->s.regs.gprs, 128); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw), + &vcpu->arch.sie_block->gpsw, 16); + px = kvm_s390_get_prefix(vcpu); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg), + &px, 4); + rc |= write_guest_abs(vcpu, + gpa + offsetof(struct save_area, fp_ctrl_reg), + &vcpu->arch.guest_fpregs.fpc, 4); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg), + &vcpu->arch.sie_block->todpr, 4); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer), + &vcpu->arch.sie_block->cputm, 8); + clkcomp = vcpu->arch.sie_block->ckc >> 8; + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp), + &clkcomp, 8); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs), + &vcpu->run->s.regs.acrs, 64); + rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs), + &vcpu->arch.sie_block->gcr, 128); + return rc ? -EFAULT : 0; +} + +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + /* + * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy + * copying in vcpu load/put. Lets update our copies before we save + * it into the save area + */ + save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + save_fp_regs(vcpu->arch.guest_fpregs.fprs); + save_access_regs(vcpu->run->s.regs.acrs); + + return kvm_s390_store_status_unloaded(vcpu, addr); +} + +static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) +{ + return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; +} + +static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) +{ + kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); + kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu); + exit_sie_sync(vcpu); +} + +static void __disable_ibs_on_all_vcpus(struct kvm *kvm) +{ + unsigned int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + __disable_ibs_on_vcpu(vcpu); + } +} + +static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) +{ + kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); + kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu); + exit_sie_sync(vcpu); +} + +void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) +{ + int i, online_vcpus, started_vcpus = 0; + + if (!is_vcpu_stopped(vcpu)) + return; + + trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); + /* Only one cpu at a time may enter/leave the STOPPED state. */ + spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); + online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); + + for (i = 0; i < online_vcpus; i++) { + if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) + started_vcpus++; + } + + if (started_vcpus == 0) { + /* we're the only active VCPU -> speed it up */ + __enable_ibs_on_vcpu(vcpu); + } else if (started_vcpus == 1) { + /* + * As we are starting a second VCPU, we have to disable + * the IBS facility on all VCPUs to remove potentially + * oustanding ENABLE requests. + */ + __disable_ibs_on_all_vcpus(vcpu->kvm); + } + + atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + /* + * Another VCPU might have used IBS while we were offline. + * Let's play safe and flush the VCPU at startup. + */ + vcpu->arch.sie_block->ihcpu = 0xffff; + spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); + return; +} + +void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) +{ + int i, online_vcpus, started_vcpus = 0; + struct kvm_vcpu *started_vcpu = NULL; + + if (is_vcpu_stopped(vcpu)) + return; + + trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); + /* Only one cpu at a time may enter/leave the STOPPED state. */ + spin_lock_bh(&vcpu->kvm->arch.start_stop_lock); + online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); + + atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + __disable_ibs_on_vcpu(vcpu); + + for (i = 0; i < online_vcpus; i++) { + if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) { + started_vcpus++; + started_vcpu = vcpu->kvm->vcpus[i]; + } + } + + if (started_vcpus == 1) { + /* + * As we only have one VCPU left, we want to enable the + * IBS facility for that VCPU to speed it up. + */ + __enable_ibs_on_vcpu(started_vcpu); + } + + spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock); + return; +} + +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_S390_CSS_SUPPORT: + if (!vcpu->kvm->arch.css_support) { + vcpu->kvm->arch.css_support = 1; + trace_kvm_s390_enable_css(vcpu->kvm); + } + r = 0; + break; + default: + r = -EINVAL; + break; + } + return r; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + int idx; + long r; + + switch (ioctl) { + case KVM_S390_INTERRUPT: { + struct kvm_s390_interrupt s390int; + + r = -EFAULT; + if (copy_from_user(&s390int, argp, sizeof(s390int))) + break; + r = kvm_s390_inject_vcpu(vcpu, &s390int); + break; + } + case KVM_S390_STORE_STATUS: + idx = srcu_read_lock(&vcpu->kvm->srcu); + r = kvm_s390_vcpu_store_status(vcpu, arg); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + break; + case KVM_S390_SET_INITIAL_PSW: { + psw_t psw; + + r = -EFAULT; + if (copy_from_user(&psw, argp, sizeof(psw))) + break; + r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); + break; + } + case KVM_S390_INITIAL_RESET: + r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); + break; + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + break; + if (ioctl == KVM_SET_ONE_REG) + r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); + else + r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); + break; + } +#ifdef CONFIG_KVM_S390_UCONTROL + case KVM_S390_UCAS_MAP: { + struct kvm_s390_ucas_mapping ucasmap; + + if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { + r = -EFAULT; + break; + } + + if (!kvm_is_ucontrol(vcpu->kvm)) { + r = -EINVAL; + break; + } + + r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, + ucasmap.vcpu_addr, ucasmap.length); + break; + } + case KVM_S390_UCAS_UNMAP: { + struct kvm_s390_ucas_mapping ucasmap; + + if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { + r = -EFAULT; + break; + } + + if (!kvm_is_ucontrol(vcpu->kvm)) { + r = -EINVAL; + break; + } + + r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, + ucasmap.length); + break; + } +#endif + case KVM_S390_VCPU_FAULT: { + r = gmap_fault(arg, vcpu->arch.gmap); + if (!IS_ERR_VALUE(r)) + r = 0; + break; + } + case KVM_ENABLE_CAP: + { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + break; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } + default: + r = -ENOTTY; + } + return r; +} + +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) + && (kvm_is_ucontrol(vcpu->kvm))) { + vmf->page = virt_to_page(vcpu->arch.sie_block); + get_page(vmf->page); + return 0; + } +#endif + return VM_FAULT_SIGBUS; +} + +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + +void kvm_arch_memslots_updated(struct kvm *kvm) +{ +} + +/* Section: memory related */ +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + /* A few sanity checks. We can have memory slots which have to be + located/ended at a segment boundary (1MB). The memory in userland is + ok to be fragmented into various different vmas. It is okay to mmap() + and munmap() stuff in this slot after doing this call at any time */ + + if (mem->userspace_addr & 0xffffful) + return -EINVAL; + + if (mem->memory_size & 0xffffful) + return -EINVAL; + + return 0; +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) +{ + int rc; + + /* If the basics of the memslot do not change, we do not want + * to update the gmap. Every update causes several unnecessary + * segment translation exceptions. This is usually handled just + * fine by the normal fault handler + gmap, but it will also + * cause faults on the prefix page of running guest CPUs. + */ + if (old->userspace_addr == mem->userspace_addr && + old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && + old->npages * PAGE_SIZE == mem->memory_size) + return; + + rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, + mem->guest_phys_addr, mem->memory_size); + if (rc) + printk(KERN_WARNING "kvm-s390: failed to commit memory region\n"); + return; +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ +} + +static int __init kvm_s390_init(void) +{ + int ret; + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (ret) + return ret; + + /* + * guests can ask for up to 255+1 double words, we need a full page + * to hold the maximum amount of facilities. On the other hand, we + * only set facilities that are known to work in KVM. + */ + vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); + if (!vfacilities) { + kvm_exit(); + return -ENOMEM; + } + memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); + vfacilities[0] &= 0xff82fff3f4fc2000UL; + vfacilities[1] &= 0x005c000000000000UL; + return 0; +} + +static void __exit kvm_s390_exit(void) +{ + free_page((unsigned long) vfacilities); + kvm_exit(); +} + +module_init(kvm_s390_init); +module_exit(kvm_s390_exit); + +/* + * Enable autoloading of the kvm module. + * Note that we add the module alias here instead of virt/kvm/kvm_main.c + * since x86 takes a different approach. + */ +#include <linux/miscdevice.h> +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h new file mode 100644 index 00000000000..a8655ed3161 --- /dev/null +++ b/arch/s390/kvm/kvm-s390.h @@ -0,0 +1,236 @@ +/* + * definition for kvm on s390 + * + * Copyright IBM Corp. 2008, 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + * Christian Ehrhardt <ehrhardt@de.ibm.com> + */ + +#ifndef ARCH_S390_KVM_S390_H +#define ARCH_S390_KVM_S390_H + +#include <linux/hrtimer.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> + +typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); + +/* declare vfacilities extern */ +extern unsigned long *vfacilities; + +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); + +/* Transactional Memory Execution related macros */ +#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) +#define TDB_FORMAT1 1 +#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) + +#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ + d_args); \ +} while (0) + +#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \ + "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \ + d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\ + d_args); \ +} while (0) + +static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; +} + +static inline int kvm_is_ucontrol(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if (kvm->arch.gmap) + return 0; + return 1; +#else + return 0; +#endif +} + +#define GUEST_PREFIX_SHIFT 13 +static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.sie_block->prefix << GUEST_PREFIX_SHIFT; +} + +static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) +{ + vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; + vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); +} + +static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) +{ + u32 base2 = vcpu->arch.sie_block->ipb >> 28; + u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; +} + +static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, + u64 *address1, u64 *address2) +{ + u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; + u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; + u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12; + u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff; + + *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; + *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; +} + +static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) +{ + if (r1) + *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; + if (r2) + *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; +} + +static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) +{ + u32 base2 = vcpu->arch.sie_block->ipb >> 28; + u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + + ((vcpu->arch.sie_block->ipb & 0xff00) << 4); + /* The displacement is a 20bit _SIGNED_ value */ + if (disp2 & 0x80000) + disp2+=0xfff00000; + + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2; +} + +static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) +{ + u32 base2 = vcpu->arch.sie_block->ipb >> 28; + u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; +} + +/* Set the condition code in the guest program status word */ +static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) +{ + vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44); + vcpu->arch.sie_block->gpsw.mask |= cc << 44; +} + +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); +enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); +void kvm_s390_tasklet(unsigned long parm); +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); +void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); +void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); +void kvm_s390_clear_float_irqs(struct kvm *kvm); +int __must_check kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); +struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, + u64 cr6, u64 schid); +void kvm_s390_reinject_io_int(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti); +int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); + +/* implemented in priv.c */ +int is_valid_psw(psw_t *psw); +int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); +int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); +int kvm_s390_handle_01(struct kvm_vcpu *vcpu); +int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); +int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); +int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu); +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); + +/* implemented in sigp.c */ +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); +int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); + +/* implemented in kvm-s390.c */ +long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); +int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu); +void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu); +void s390_vcpu_block(struct kvm_vcpu *vcpu); +void s390_vcpu_unblock(struct kvm_vcpu *vcpu); +void exit_sie(struct kvm_vcpu *vcpu); +void exit_sie_sync(struct kvm_vcpu *vcpu); +int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); +void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); +/* is cmma enabled */ +bool kvm_s390_cmma_enabled(struct kvm *kvm); +int test_vfacility(unsigned long nr); + +/* implemented in diag.c */ +int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); +/* implemented in interrupt.c */ +int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, + struct kvm_s390_pgm_info *pgm_info); + +/** + * kvm_s390_inject_prog_cond - conditionally inject a program check + * @vcpu: virtual cpu + * @rc: original return/error code + * + * This function is supposed to be used after regular guest access functions + * failed, to conditionally inject a program check to a vcpu. The typical + * pattern would look like + * + * rc = write_guest(vcpu, addr, data, len); + * if (rc) + * return kvm_s390_inject_prog_cond(vcpu, rc); + * + * A negative return code from guest access functions implies an internal error + * like e.g. out of memory. In these cases no program check should be injected + * to the guest. + * A positive value implies that an exception happened while accessing a guest's + * memory. In this case all data belonging to the corresponding program check + * has been stored in vcpu->arch.pgm and can be injected with + * kvm_s390_inject_prog_irq(). + * + * Returns: - the original @rc value if @rc was negative (internal error) + * - zero if @rc was already zero + * - zero or error code from injecting if @rc was positive + * (program check injected to @vcpu) + */ +static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc) +{ + if (rc <= 0) + return rc; + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); +} + +/* implemented in interrupt.c */ +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int psw_extint_disabled(struct kvm_vcpu *vcpu); +void kvm_s390_destroy_adapters(struct kvm *kvm); +int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu); + +/* implemented in guestdbg.c */ +void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); +void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu); +void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu); +int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg); +void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); +void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); +void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); + +#endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c new file mode 100644 index 00000000000..f89c1cd6775 --- /dev/null +++ b/arch/s390/kvm/priv.c @@ -0,0 +1,1020 @@ +/* + * handling privileged instructions + * + * Copyright IBM Corp. 2008, 2013 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/gfp.h> +#include <linux/errno.h> +#include <linux/compat.h> +#include <asm/asm-offsets.h> +#include <asm/facility.h> +#include <asm/current.h> +#include <asm/debug.h> +#include <asm/ebcdic.h> +#include <asm/sysinfo.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/io.h> +#include <asm/ptrace.h> +#include <asm/compat.h> +#include "gaccess.h" +#include "kvm-s390.h" +#include "trace.h" + +/* Handle SCK (SET CLOCK) interception */ +static int handle_set_clock(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *cpup; + s64 hostclk, val; + int i, rc; + u64 op2; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + op2 = kvm_s390_get_base_disp_s(vcpu); + if (op2 & 7) /* Operand must be on a doubleword boundary */ + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + rc = read_guest(vcpu, op2, &val, sizeof(val)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + if (store_tod_clock(&hostclk)) { + kvm_s390_set_psw_cc(vcpu, 3); + return 0; + } + val = (val - hostclk) & ~0x3fUL; + + mutex_lock(&vcpu->kvm->lock); + kvm_for_each_vcpu(i, cpup, vcpu->kvm) + cpup->arch.sie_block->epoch = val; + mutex_unlock(&vcpu->kvm->lock); + + kvm_s390_set_psw_cc(vcpu, 0); + return 0; +} + +static int handle_set_prefix(struct kvm_vcpu *vcpu) +{ + u64 operand2; + u32 address; + int rc; + + vcpu->stat.instruction_spx++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + operand2 = kvm_s390_get_base_disp_s(vcpu); + + /* must be word boundary */ + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* get the value */ + rc = read_guest(vcpu, operand2, &address, sizeof(address)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + address &= 0x7fffe000u; + + /* + * Make sure the new value is valid memory. We only need to check the + * first page, since address is 8k aligned and memory pieces are always + * at least 1MB aligned and have at least a size of 1MB. + */ + if (kvm_is_error_gpa(vcpu->kvm, address)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + kvm_s390_set_prefix(vcpu, address); + + VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); + trace_kvm_s390_handle_prefix(vcpu, 1, address); + return 0; +} + +static int handle_store_prefix(struct kvm_vcpu *vcpu) +{ + u64 operand2; + u32 address; + int rc; + + vcpu->stat.instruction_stpx++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + operand2 = kvm_s390_get_base_disp_s(vcpu); + + /* must be word boundary */ + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + address = kvm_s390_get_prefix(vcpu); + + /* get the value */ + rc = write_guest(vcpu, operand2, &address, sizeof(address)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); + trace_kvm_s390_handle_prefix(vcpu, 0, address); + return 0; +} + +static int handle_store_cpu_address(struct kvm_vcpu *vcpu) +{ + u16 vcpu_id = vcpu->vcpu_id; + u64 ga; + int rc; + + vcpu->stat.instruction_stap++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + ga = kvm_s390_get_base_disp_s(vcpu); + + if (ga & 1) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga); + trace_kvm_s390_handle_stap(vcpu, ga); + return 0; +} + +static void __skey_check_enable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) + return; + + s390_enable_skey(); + trace_kvm_s390_skey_related_inst(vcpu); + vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); +} + + +static int handle_skey(struct kvm_vcpu *vcpu) +{ + __skey_check_enable(vcpu); + + vcpu->stat.instruction_storage_key++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + vcpu->arch.sie_block->gpsw.addr = + __rewind_psw(vcpu->arch.sie_block->gpsw, 4); + VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); + return 0; +} + +static int handle_ipte_interlock(struct kvm_vcpu *vcpu) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + + vcpu->stat.instruction_ipte_interlock++; + if (psw_bits(*psw).p) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); + psw->addr = __rewind_psw(*psw, 4); + VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); + return 0; +} + +static int handle_test_block(struct kvm_vcpu *vcpu) +{ + gpa_t addr; + int reg2; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + kvm_s390_get_regs_rre(vcpu, NULL, ®2); + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + addr = kvm_s390_logical_to_effective(vcpu, addr); + if (kvm_s390_check_low_addr_protection(vcpu, addr)) + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + addr = kvm_s390_real_to_abs(vcpu, addr); + + if (kvm_is_error_gpa(vcpu->kvm, addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + /* + * We don't expect errors on modern systems, and do not care + * about storage keys (yet), so let's just clear the page. + */ + if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE)) + return -EFAULT; + kvm_s390_set_psw_cc(vcpu, 0); + vcpu->run->s.regs.gprs[0] = 0; + return 0; +} + +static int handle_tpi(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_interrupt_info *inti; + unsigned long len; + u32 tpi_data[3]; + int cc, rc; + u64 addr; + + rc = 0; + addr = kvm_s390_get_base_disp_s(vcpu); + if (addr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + cc = 0; + inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0); + if (!inti) + goto no_interrupt; + cc = 1; + tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr; + tpi_data[1] = inti->io.io_int_parm; + tpi_data[2] = inti->io.io_int_word; + if (addr) { + /* + * Store the two-word I/O interruption code into the + * provided area. + */ + len = sizeof(tpi_data) - 4; + rc = write_guest(vcpu, addr, &tpi_data, len); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + } else { + /* + * Store the three-word I/O interruption code into + * the appropriate lowcore area. + */ + len = sizeof(tpi_data); + if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) + rc = -EFAULT; + } + /* + * If we encounter a problem storing the interruption code, the + * instruction is suppressed from the guest's view: reinject the + * interrupt. + */ + if (!rc) + kfree(inti); + else + kvm_s390_reinject_io_int(vcpu->kvm, inti); +no_interrupt: + /* Set condition code and we're done. */ + if (!rc) + kvm_s390_set_psw_cc(vcpu, cc); + return rc ? -EFAULT : 0; +} + +static int handle_tsch(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_interrupt_info *inti; + + inti = kvm_s390_get_io_int(vcpu->kvm, 0, + vcpu->run->s.regs.gprs[1]); + + /* + * Prepare exit to userspace. + * We indicate whether we dequeued a pending I/O interrupt + * so that userspace can re-inject it if the instruction gets + * a program check. While this may re-order the pending I/O + * interrupts, this is no problem since the priority is kept + * intact. + */ + vcpu->run->exit_reason = KVM_EXIT_S390_TSCH; + vcpu->run->s390_tsch.dequeued = !!inti; + if (inti) { + vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id; + vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr; + vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm; + vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word; + } + vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb; + kfree(inti); + return -EREMOTE; +} + +static int handle_io_inst(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (vcpu->kvm->arch.css_support) { + /* + * Most I/O instructions will be handled by userspace. + * Exceptions are tpi and the interrupt portion of tsch. + */ + if (vcpu->arch.sie_block->ipa == 0xb236) + return handle_tpi(vcpu); + if (vcpu->arch.sie_block->ipa == 0xb235) + return handle_tsch(vcpu); + /* Handle in userspace. */ + return -EOPNOTSUPP; + } else { + /* + * Set condition code 3 to stop the guest from issuing channel + * I/O instructions. + */ + kvm_s390_set_psw_cc(vcpu, 3); + return 0; + } +} + +static int handle_stfl(struct kvm_vcpu *vcpu) +{ + int rc; + + vcpu->stat.instruction_stfl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list), + vfacilities, 4); + if (rc) + return rc; + VCPU_EVENT(vcpu, 5, "store facility list value %x", + *(unsigned int *) vfacilities); + trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities); + return 0; +} + +static void handle_new_psw(struct kvm_vcpu *vcpu) +{ + /* Check whether the new psw is enabled for machine checks. */ + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK) + kvm_s390_deliver_pending_machine_checks(vcpu); +} + +#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) +#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL +#define PSW_ADDR_24 0x0000000000ffffffUL +#define PSW_ADDR_31 0x000000007fffffffUL + +int is_valid_psw(psw_t *psw) +{ + if (psw->mask & PSW_MASK_UNASSIGNED) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) { + if (psw->addr & ~PSW_ADDR_31) + return 0; + } + if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24)) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA) + return 0; + if (psw->addr & 1) + return 0; + return 1; +} + +int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) +{ + psw_t *gpsw = &vcpu->arch.sie_block->gpsw; + psw_compat_t new_psw; + u64 addr; + int rc; + + if (gpsw->mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + addr = kvm_s390_get_base_disp_s(vcpu); + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + if (!(new_psw.mask & PSW32_MASK_BASE)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; + gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE; + gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; + if (!is_valid_psw(gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + handle_new_psw(vcpu); + return 0; +} + +static int handle_lpswe(struct kvm_vcpu *vcpu) +{ + psw_t new_psw; + u64 addr; + int rc; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + addr = kvm_s390_get_base_disp_s(vcpu); + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + handle_new_psw(vcpu); + return 0; +} + +static int handle_stidp(struct kvm_vcpu *vcpu) +{ + u64 stidp_data = vcpu->arch.stidp_data; + u64 operand2; + int rc; + + vcpu->stat.instruction_stidp++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + operand2 = kvm_s390_get_base_disp_s(vcpu); + + if (operand2 & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); + return 0; +} + +static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) +{ + int cpus = 0; + int n; + + cpus = atomic_read(&vcpu->kvm->online_vcpus); + + /* deal with other level 3 hypervisors */ + if (stsi(mem, 3, 2, 2)) + mem->count = 0; + if (mem->count < 8) + mem->count++; + for (n = mem->count - 1; n > 0 ; n--) + memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0])); + + mem->vm[0].cpus_total = cpus; + mem->vm[0].cpus_configured = cpus; + mem->vm[0].cpus_standby = 0; + mem->vm[0].cpus_reserved = 0; + mem->vm[0].caf = 1000; + memcpy(mem->vm[0].name, "KVMguest", 8); + ASCEBC(mem->vm[0].name, 8); + memcpy(mem->vm[0].cpi, "KVM/Linux ", 16); + ASCEBC(mem->vm[0].cpi, 16); +} + +static int handle_stsi(struct kvm_vcpu *vcpu) +{ + int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; + int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; + int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; + unsigned long mem = 0; + u64 operand2; + int rc = 0; + + vcpu->stat.instruction_stsi++; + VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (fc > 3) { + kvm_s390_set_psw_cc(vcpu, 3); + return 0; + } + + if (vcpu->run->s.regs.gprs[0] & 0x0fffff00 + || vcpu->run->s.regs.gprs[1] & 0xffff0000) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + if (fc == 0) { + vcpu->run->s.regs.gprs[0] = 3 << 28; + kvm_s390_set_psw_cc(vcpu, 0); + return 0; + } + + operand2 = kvm_s390_get_base_disp_s(vcpu); + + if (operand2 & 0xfff) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (fc) { + case 1: /* same handling for 1 and 2 */ + case 2: + mem = get_zeroed_page(GFP_KERNEL); + if (!mem) + goto out_no_data; + if (stsi((void *) mem, fc, sel1, sel2)) + goto out_no_data; + break; + case 3: + if (sel1 != 2 || sel2 != 2) + goto out_no_data; + mem = get_zeroed_page(GFP_KERNEL); + if (!mem) + goto out_no_data; + handle_stsi_3_2_2(vcpu, (void *) mem); + break; + } + + rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE); + if (rc) { + rc = kvm_s390_inject_prog_cond(vcpu, rc); + goto out; + } + trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); + free_page(mem); + kvm_s390_set_psw_cc(vcpu, 0); + vcpu->run->s.regs.gprs[0] = 0; + return 0; +out_no_data: + kvm_s390_set_psw_cc(vcpu, 3); +out: + free_page(mem); + return rc; +} + +static const intercept_handler_t b2_handlers[256] = { + [0x02] = handle_stidp, + [0x04] = handle_set_clock, + [0x10] = handle_set_prefix, + [0x11] = handle_store_prefix, + [0x12] = handle_store_cpu_address, + [0x21] = handle_ipte_interlock, + [0x29] = handle_skey, + [0x2a] = handle_skey, + [0x2b] = handle_skey, + [0x2c] = handle_test_block, + [0x30] = handle_io_inst, + [0x31] = handle_io_inst, + [0x32] = handle_io_inst, + [0x33] = handle_io_inst, + [0x34] = handle_io_inst, + [0x35] = handle_io_inst, + [0x36] = handle_io_inst, + [0x37] = handle_io_inst, + [0x38] = handle_io_inst, + [0x39] = handle_io_inst, + [0x3a] = handle_io_inst, + [0x3b] = handle_io_inst, + [0x3c] = handle_io_inst, + [0x50] = handle_ipte_interlock, + [0x5f] = handle_io_inst, + [0x74] = handle_io_inst, + [0x76] = handle_io_inst, + [0x7d] = handle_stsi, + [0xb1] = handle_stfl, + [0xb2] = handle_lpswe, +}; + +int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + /* + * A lot of B2 instructions are priviledged. Here we check for + * the privileged ones, that we can handle in the kernel. + * Anything else goes to userspace. + */ + handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); + + return -EOPNOTSUPP; +} + +static int handle_epsw(struct kvm_vcpu *vcpu) +{ + int reg1, reg2; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + /* This basically extracts the mask half of the psw. */ + vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL; + vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; + if (reg2) { + vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL; + vcpu->run->s.regs.gprs[reg2] |= + vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL; + } + return 0; +} + +#define PFMF_RESERVED 0xfffc0101UL +#define PFMF_SK 0x00020000UL +#define PFMF_CF 0x00010000UL +#define PFMF_UI 0x00008000UL +#define PFMF_FSC 0x00007000UL +#define PFMF_NQ 0x00000800UL +#define PFMF_MR 0x00000400UL +#define PFMF_MC 0x00000200UL +#define PFMF_KEY 0x000000feUL + +static int handle_pfmf(struct kvm_vcpu *vcpu) +{ + int reg1, reg2; + unsigned long start, end; + + vcpu->stat.instruction_pfmf++; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + if (!MACHINE_HAS_PFMF) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* Only provide non-quiescing support if the host supports it */ + if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* No support for conditional-SSKE */ + if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (kvm_s390_check_low_addr_protection(vcpu, start)) + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + } + + switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { + case 0x00000000: + end = (start + (1UL << 12)) & ~((1UL << 12) - 1); + break; + case 0x00001000: + end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + break; + /* We dont support EDAT2 + case 0x00002000: + end = (start + (1UL << 31)) & ~((1UL << 31) - 1); + break;*/ + default: + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + } + while (start < end) { + unsigned long useraddr, abs_addr; + + /* Translate guest address to host address */ + if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0) + abs_addr = kvm_s390_real_to_abs(vcpu, start); + else + abs_addr = start; + useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr)); + if (kvm_is_error_hva(useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (clear_user((void __user *)useraddr, PAGE_SIZE)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { + __skey_check_enable(vcpu); + if (set_guest_storage_key(current->mm, useraddr, + vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, + vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + start += PAGE_SIZE; + } + if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) + vcpu->run->s.regs.gprs[reg2] = end; + return 0; +} + +static int handle_essa(struct kvm_vcpu *vcpu) +{ + /* entries expected to be 1FF */ + int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; + unsigned long *cbrlo, cbrle; + struct gmap *gmap; + int i; + + VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries); + gmap = vcpu->arch.gmap; + vcpu->stat.instruction_essa++; + if (!kvm_s390_cmma_enabled(vcpu->kvm)) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* Rewind PSW to repeat the ESSA instruction */ + vcpu->arch.sie_block->gpsw.addr = + __rewind_psw(vcpu->arch.sie_block->gpsw, 4); + vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ + cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); + down_read(&gmap->mm->mmap_sem); + for (i = 0; i < entries; ++i) { + cbrle = cbrlo[i]; + if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE)) + /* invalid entry */ + break; + /* try to free backing */ + __gmap_zap(cbrle, gmap); + } + up_read(&gmap->mm->mmap_sem); + if (i < entries) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + return 0; +} + +static const intercept_handler_t b9_handlers[256] = { + [0x8a] = handle_ipte_interlock, + [0x8d] = handle_epsw, + [0x8e] = handle_ipte_interlock, + [0x8f] = handle_ipte_interlock, + [0xab] = handle_essa, + [0xaf] = handle_pfmf, +}; + +int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + /* This is handled just as for the B2 instructions. */ + handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); + + return -EOPNOTSUPP; +} + +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u32 val = 0; + int reg, rc; + u64 ga; + + vcpu->stat.instruction_lctl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + ga = kvm_s390_get_base_disp_rs(vcpu); + + if (ga & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); + + reg = reg1; + do { + rc = read_guest(vcpu, ga, &val, sizeof(val)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; + vcpu->arch.sie_block->gcr[reg] |= val; + ga += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 ga; + u32 val; + int reg, rc; + + vcpu->stat.instruction_stctl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + ga = kvm_s390_get_base_disp_rs(vcpu); + + if (ga & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga); + + reg = reg1; + do { + val = vcpu->arch.sie_block->gcr[reg] & 0x00000000fffffffful; + rc = write_guest(vcpu, ga, &val, sizeof(val)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + ga += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +static int handle_lctlg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 ga, val; + int reg, rc; + + vcpu->stat.instruction_lctlg++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + ga = kvm_s390_get_base_disp_rsy(vcpu); + + if (ga & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); + + do { + rc = read_guest(vcpu, ga, &val, sizeof(val)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + vcpu->arch.sie_block->gcr[reg] = val; + ga += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +static int handle_stctg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 ga, val; + int reg, rc; + + vcpu->stat.instruction_stctg++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + ga = kvm_s390_get_base_disp_rsy(vcpu); + + if (ga & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga); + + do { + val = vcpu->arch.sie_block->gcr[reg]; + rc = write_guest(vcpu, ga, &val, sizeof(val)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + ga += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +static const intercept_handler_t eb_handlers[256] = { + [0x2f] = handle_lctlg, + [0x25] = handle_stctg, +}; + +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; +} + +static int handle_tprot(struct kvm_vcpu *vcpu) +{ + u64 address1, address2; + unsigned long hva, gpa; + int ret = 0, cc = 0; + bool writable; + + vcpu->stat.instruction_tprot++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); + + /* we only handle the Linux memory detection case: + * access key == 0 + * everything else goes to userspace. */ + if (address2 & 0xf0) + return -EOPNOTSUPP; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) + ipte_lock(vcpu); + ret = guest_translate_address(vcpu, address1, &gpa, 1); + if (ret == PGM_PROTECTION) { + /* Write protected? Try again with read-only... */ + cc = 1; + ret = guest_translate_address(vcpu, address1, &gpa, 0); + } + if (ret) { + if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { + ret = kvm_s390_inject_program_int(vcpu, ret); + } else if (ret > 0) { + /* Translation not available */ + kvm_s390_set_psw_cc(vcpu, 3); + ret = 0; + } + goto out_unlock; + } + + hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable); + if (kvm_is_error_hva(hva)) { + ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } else { + if (!writable) + cc = 1; /* Write not permitted ==> read-only */ + kvm_s390_set_psw_cc(vcpu, cc); + /* Note: CC2 only occurs for storage keys (not supported yet) */ + } +out_unlock: + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) + ipte_unlock(vcpu); + return ret; +} + +int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) +{ + /* For e5xx... instructions we only handle TPROT */ + if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01) + return handle_tprot(vcpu); + return -EOPNOTSUPP; +} + +static int handle_sckpf(struct kvm_vcpu *vcpu) +{ + u32 value; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000) + return kvm_s390_inject_program_int(vcpu, + PGM_SPECIFICATION); + + value = vcpu->run->s.regs.gprs[0] & 0x000000000000ffff; + vcpu->arch.sie_block->todpr = value; + + return 0; +} + +static const intercept_handler_t x01_handlers[256] = { + [0x07] = handle_sckpf, +}; + +int kvm_s390_handle_01(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; +} diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c new file mode 100644 index 00000000000..43079a48cc9 --- /dev/null +++ b/arch/s390/kvm/sigp.c @@ -0,0 +1,475 @@ +/* + * handling interprocessor communication + * + * Copyright IBM Corp. 2008, 2013 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte <cotte@de.ibm.com> + * Christian Borntraeger <borntraeger@de.ibm.com> + * Christian Ehrhardt <ehrhardt@de.ibm.com> + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <asm/sigp.h> +#include "gaccess.h" +#include "kvm-s390.h" +#include "trace.h" + +static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, + u64 *reg) +{ + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; + int cpuflags; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return SIGP_CC_NOT_OPERATIONAL; + + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + + cpuflags = atomic_read(li->cpuflags); + if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED))) + rc = SIGP_CC_ORDER_CODE_ACCEPTED; + else { + *reg &= 0xffffffff00000000UL; + if (cpuflags & CPUSTAT_ECALL_PEND) + *reg |= SIGP_STATUS_EXT_CALL_PENDING; + if (cpuflags & CPUSTAT_STOPPED) + *reg |= SIGP_STATUS_STOPPED; + rc = SIGP_CC_STATUS_STORED; + } + + VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); + return rc; +} + +static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_EMERGENCY, + .parm = vcpu->vcpu_id, + }; + struct kvm_vcpu *dst_vcpu = NULL; + int rc = 0; + + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + + rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int); + if (!rc) + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); + + return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; +} + +static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, + u16 asn, u64 *reg) +{ + struct kvm_vcpu *dst_vcpu = NULL; + const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT; + u16 p_asn, s_asn; + psw_t *psw; + u32 flags; + + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags); + psw = &dst_vcpu->arch.sie_block->gpsw; + p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */ + s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */ + + /* Deliver the emergency signal? */ + if (!(flags & CPUSTAT_STOPPED) + || (psw->mask & psw_int_mask) != psw_int_mask + || ((flags & CPUSTAT_WAIT) && psw->addr != 0) + || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) { + return __sigp_emergency(vcpu, cpu_addr); + } else { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + return SIGP_CC_STATUS_STORED; + } +} + +static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_EXTERNAL_CALL, + .parm = vcpu->vcpu_id, + }; + struct kvm_vcpu *dst_vcpu = NULL; + int rc; + + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + + rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int); + if (!rc) + VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); + + return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; +} + +static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) +{ + struct kvm_s390_interrupt_info *inti; + int rc = SIGP_CC_ORDER_CODE_ACCEPTED; + + inti = kzalloc(sizeof(*inti), GFP_ATOMIC); + if (!inti) + return -ENOMEM; + inti->type = KVM_S390_SIGP_STOP; + + spin_lock_bh(&li->lock); + if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { + kfree(inti); + if ((action & ACTION_STORE_ON_STOP) != 0) + rc = -ESHUTDOWN; + goto out; + } + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); + li->action_bits |= action; + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); +out: + spin_unlock_bh(&li->lock); + + return rc; +} + +static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) +{ + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return SIGP_CC_NOT_OPERATIONAL; + + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + + rc = __inject_sigp_stop(li, action); + + VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); + + if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { + /* If the CPU has already been stopped, we still have + * to save the status when doing stop-and-store. This + * has to be done after unlocking all spinlocks. */ + rc = kvm_s390_store_status_unloaded(dst_vcpu, + KVM_S390_STORE_STATUS_NOADDR); + } + + return rc; +} + +static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) +{ + int rc; + unsigned int i; + struct kvm_vcpu *v; + + switch (parameter & 0xff) { + case 0: + rc = SIGP_CC_NOT_OPERATIONAL; + break; + case 1: + case 2: + kvm_for_each_vcpu(i, v, vcpu->kvm) { + v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(v); + } + + rc = SIGP_CC_ORDER_CODE_ACCEPTED; + break; + default: + rc = -EOPNOTSUPP; + } + return rc; +} + +static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, + u64 *reg) +{ + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; + struct kvm_s390_interrupt_info *inti; + int rc; + + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + + /* + * Make sure the new value is valid memory. We only need to check the + * first page, since address is 8k aligned and memory pieces are always + * at least 1MB aligned and have at least a size of 1MB. + */ + address &= 0x7fffe000u; + if (kvm_is_error_gpa(vcpu->kvm, address)) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INVALID_PARAMETER; + return SIGP_CC_STATUS_STORED; + } + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return SIGP_CC_BUSY; + + spin_lock_bh(&li->lock); + /* cpu must be in stopped state */ + if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + rc = SIGP_CC_STATUS_STORED; + kfree(inti); + goto out_li; + } + + inti->type = KVM_S390_SIGP_SET_PREFIX; + inti->prefix.address = address; + + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); + rc = SIGP_CC_ORDER_CODE_ACCEPTED; + + VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); +out_li: + spin_unlock_bh(&li->lock); + return rc; +} + +static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, + u32 addr, u64 *reg) +{ + struct kvm_vcpu *dst_vcpu = NULL; + int flags; + int rc; + + if (cpu_id < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + + spin_lock_bh(&dst_vcpu->arch.local_int.lock); + flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); + spin_unlock_bh(&dst_vcpu->arch.local_int.lock); + if (!(flags & CPUSTAT_STOPPED)) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + return SIGP_CC_STATUS_STORED; + } + + addr &= 0x7ffffe00; + rc = kvm_s390_store_status_unloaded(dst_vcpu, addr); + if (rc == -EFAULT) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INVALID_PARAMETER; + rc = SIGP_CC_STATUS_STORED; + } + return rc; +} + +static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, + u64 *reg) +{ + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return SIGP_CC_NOT_OPERATIONAL; + + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + /* running */ + rc = SIGP_CC_ORDER_CODE_ACCEPTED; + } else { + /* not running */ + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_NOT_RUNNING; + rc = SIGP_CC_STATUS_STORED; + } + + VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, + rc); + + return rc; +} + +/* Test whether the destination CPU is available and not busy */ +static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct kvm_s390_local_interrupt *li; + int rc = SIGP_CC_ORDER_CODE_ACCEPTED; + struct kvm_vcpu *dst_vcpu = NULL; + + if (cpu_addr >= KVM_MAX_VCPUS) + return SIGP_CC_NOT_OPERATIONAL; + + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + spin_lock_bh(&li->lock); + if (li->action_bits & ACTION_STOP_ON_STOP) + rc = SIGP_CC_BUSY; + spin_unlock_bh(&li->lock); + + return rc; +} + +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) +{ + int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int r3 = vcpu->arch.sie_block->ipa & 0x000f; + u32 parameter; + u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; + u8 order_code; + int rc; + + /* sigp in userspace can exit */ + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + order_code = kvm_s390_get_base_disp_rs(vcpu); + + if (r1 % 2) + parameter = vcpu->run->s.regs.gprs[r1]; + else + parameter = vcpu->run->s.regs.gprs[r1 + 1]; + + trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter); + switch (order_code) { + case SIGP_SENSE: + vcpu->stat.instruction_sigp_sense++; + rc = __sigp_sense(vcpu, cpu_addr, + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_EXTERNAL_CALL: + vcpu->stat.instruction_sigp_external_call++; + rc = __sigp_external_call(vcpu, cpu_addr); + break; + case SIGP_EMERGENCY_SIGNAL: + vcpu->stat.instruction_sigp_emergency++; + rc = __sigp_emergency(vcpu, cpu_addr); + break; + case SIGP_STOP: + vcpu->stat.instruction_sigp_stop++; + rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP); + break; + case SIGP_STOP_AND_STORE_STATUS: + vcpu->stat.instruction_sigp_stop++; + rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | + ACTION_STOP_ON_STOP); + break; + case SIGP_STORE_STATUS_AT_ADDRESS: + rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_SET_ARCHITECTURE: + vcpu->stat.instruction_sigp_arch++; + rc = __sigp_set_arch(vcpu, parameter); + break; + case SIGP_SET_PREFIX: + vcpu->stat.instruction_sigp_prefix++; + rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_COND_EMERGENCY_SIGNAL: + rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_SENSE_RUNNING: + vcpu->stat.instruction_sigp_sense_running++; + rc = __sigp_sense_running(vcpu, cpu_addr, + &vcpu->run->s.regs.gprs[r1]); + break; + case SIGP_START: + rc = sigp_check_callable(vcpu, cpu_addr); + if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) + rc = -EOPNOTSUPP; /* Handle START in user space */ + break; + case SIGP_RESTART: + vcpu->stat.instruction_sigp_restart++; + rc = sigp_check_callable(vcpu, cpu_addr); + if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) { + VCPU_EVENT(vcpu, 4, + "sigp restart %x to handle userspace", + cpu_addr); + /* user space must know about restart */ + rc = -EOPNOTSUPP; + } + break; + default: + return -EOPNOTSUPP; + } + + if (rc < 0) + return rc; + + kvm_s390_set_psw_cc(vcpu, rc); + return 0; +} + +/* + * Handle SIGP partial execution interception. + * + * This interception will occur at the source cpu when a source cpu sends an + * external call to a target cpu and the target cpu has the WAIT bit set in + * its cpuflags. Interception will occurr after the interrupt indicator bits at + * the target cpu have been set. All error cases will lead to instruction + * interception, therefore nothing is to be checked or prepared. + */ +int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) +{ + int r3 = vcpu->arch.sie_block->ipa & 0x000f; + u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; + struct kvm_vcpu *dest_vcpu; + u8 order_code = kvm_s390_get_base_disp_rs(vcpu); + + trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); + + if (order_code == SIGP_EXTERNAL_CALL) { + dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + BUG_ON(dest_vcpu == NULL); + + spin_lock_bh(&dest_vcpu->arch.local_int.lock); + if (waitqueue_active(&dest_vcpu->wq)) + wake_up_interruptible(&dest_vcpu->wq); + dest_vcpu->preempted = true; + spin_unlock_bh(&dest_vcpu->arch.local_int.lock); + + kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED); + return 0; + } + + return -EOPNOTSUPP; +} diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h new file mode 100644 index 00000000000..647e9d6a481 --- /dev/null +++ b/arch/s390/kvm/trace-s390.h @@ -0,0 +1,273 @@ +#if !defined(_TRACE_KVMS390_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVMS390_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm-s390 +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace-s390 + +/* + * Trace point for the creation of the kvm instance. + */ +TRACE_EVENT(kvm_s390_create_vm, + TP_PROTO(unsigned long type), + TP_ARGS(type), + + TP_STRUCT__entry( + __field(unsigned long, type) + ), + + TP_fast_assign( + __entry->type = type; + ), + + TP_printk("create vm%s", + __entry->type & KVM_VM_S390_UCONTROL ? " (UCONTROL)" : "") + ); + +/* + * Trace points for creation and destruction of vpcus. + */ +TRACE_EVENT(kvm_s390_create_vcpu, + TP_PROTO(unsigned int id, struct kvm_vcpu *vcpu, + struct kvm_s390_sie_block *sie_block), + TP_ARGS(id, vcpu, sie_block), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(struct kvm_vcpu *, vcpu) + __field(struct kvm_s390_sie_block *, sie_block) + ), + + TP_fast_assign( + __entry->id = id; + __entry->vcpu = vcpu; + __entry->sie_block = sie_block; + ), + + TP_printk("create cpu %d at %p, sie block at %p", __entry->id, + __entry->vcpu, __entry->sie_block) + ); + +TRACE_EVENT(kvm_s390_destroy_vcpu, + TP_PROTO(unsigned int id), + TP_ARGS(id), + + TP_STRUCT__entry( + __field(unsigned int, id) + ), + + TP_fast_assign( + __entry->id = id; + ), + + TP_printk("destroy cpu %d", __entry->id) + ); + +/* + * Trace point for start and stop of vpcus. + */ +TRACE_EVENT(kvm_s390_vcpu_start_stop, + TP_PROTO(unsigned int id, int state), + TP_ARGS(id, state), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(int, state) + ), + + TP_fast_assign( + __entry->id = id; + __entry->state = state; + ), + + TP_printk("%s cpu %d", __entry->state ? "starting" : "stopping", + __entry->id) + ); + +/* + * Trace points for injection of interrupts, either per machine or + * per vcpu. + */ + +#define kvm_s390_int_type \ + {KVM_S390_SIGP_STOP, "sigp stop"}, \ + {KVM_S390_PROGRAM_INT, "program interrupt"}, \ + {KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \ + {KVM_S390_RESTART, "sigp restart"}, \ + {KVM_S390_INT_VIRTIO, "virtio interrupt"}, \ + {KVM_S390_INT_SERVICE, "sclp interrupt"}, \ + {KVM_S390_INT_EMERGENCY, "sigp emergency"}, \ + {KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"} + +TRACE_EVENT(kvm_s390_inject_vm, + TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who), + TP_ARGS(type, parm, parm64, who), + + TP_STRUCT__entry( + __field(__u32, inttype) + __field(__u32, parm) + __field(__u64, parm64) + __field(int, who) + ), + + TP_fast_assign( + __entry->inttype = type & 0x00000000ffffffff; + __entry->parm = parm; + __entry->parm64 = parm64; + __entry->who = who; + ), + + TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx", + (__entry->who == 1) ? " (from kernel)" : + (__entry->who == 2) ? " (from user)" : "", + __entry->inttype, + __print_symbolic(__entry->inttype, kvm_s390_int_type), + __entry->parm, __entry->parm64) + ); + +TRACE_EVENT(kvm_s390_inject_vcpu, + TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \ + int who), + TP_ARGS(id, type, parm, parm64, who), + + TP_STRUCT__entry( + __field(int, id) + __field(__u32, inttype) + __field(__u32, parm) + __field(__u64, parm64) + __field(int, who) + ), + + TP_fast_assign( + __entry->id = id; + __entry->inttype = type & 0x00000000ffffffff; + __entry->parm = parm; + __entry->parm64 = parm64; + __entry->who = who; + ), + + TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx", + (__entry->who == 1) ? " (from kernel)" : + (__entry->who == 2) ? " (from user)" : "", + __entry->id, __entry->inttype, + __print_symbolic(__entry->inttype, kvm_s390_int_type), + __entry->parm, __entry->parm64) + ); + +/* + * Trace point for the actual delivery of interrupts. + */ +TRACE_EVENT(kvm_s390_deliver_interrupt, + TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1), + TP_ARGS(id, type, data0, data1), + + TP_STRUCT__entry( + __field(int, id) + __field(__u32, inttype) + __field(__u64, data0) + __field(__u64, data1) + ), + + TP_fast_assign( + __entry->id = id; + __entry->inttype = type & 0x00000000ffffffff; + __entry->data0 = data0; + __entry->data1 = data1; + ), + + TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \ + "data:%08llx %016llx", + __entry->id, __entry->inttype, + __print_symbolic(__entry->inttype, kvm_s390_int_type), + __entry->data0, __entry->data1) + ); + +/* + * Trace point for resets that may be requested from userspace. + */ +TRACE_EVENT(kvm_s390_request_resets, + TP_PROTO(__u64 resets), + TP_ARGS(resets), + + TP_STRUCT__entry( + __field(__u64, resets) + ), + + TP_fast_assign( + __entry->resets = resets; + ), + + TP_printk("requesting userspace resets %llx", + __entry->resets) + ); + +/* + * Trace point for a vcpu's stop requests. + */ +TRACE_EVENT(kvm_s390_stop_request, + TP_PROTO(unsigned int action_bits), + TP_ARGS(action_bits), + + TP_STRUCT__entry( + __field(unsigned int, action_bits) + ), + + TP_fast_assign( + __entry->action_bits = action_bits; + ), + + TP_printk("stop request, action_bits = %08x", + __entry->action_bits) + ); + + +/* + * Trace point for enabling channel I/O instruction support. + */ +TRACE_EVENT(kvm_s390_enable_css, + TP_PROTO(void *kvm), + TP_ARGS(kvm), + + TP_STRUCT__entry( + __field(void *, kvm) + ), + + TP_fast_assign( + __entry->kvm = kvm; + ), + + TP_printk("enabling channel I/O support (kvm @ %p)\n", + __entry->kvm) + ); + +/* + * Trace point for enabling and disabling interlocking-and-broadcasting + * suppression. + */ +TRACE_EVENT(kvm_s390_enable_disable_ibs, + TP_PROTO(unsigned int id, int state), + TP_ARGS(id, state), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(int, state) + ), + + TP_fast_assign( + __entry->id = id; + __entry->state = state; + ), + + TP_printk("%s ibs on cpu %d", + __entry->state ? "enabling" : "disabling", __entry->id) + ); + + +#endif /* _TRACE_KVMS390_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h new file mode 100644 index 00000000000..916834d7a73 --- /dev/null +++ b/arch/s390/kvm/trace.h @@ -0,0 +1,418 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include <linux/tracepoint.h> +#include <asm/sie.h> +#include <asm/debug.h> +#include <asm/dis.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* + * Helpers for vcpu-specific tracepoints containing the same information + * as s390dbf VCPU_EVENTs. + */ +#define VCPU_PROTO_COMMON struct kvm_vcpu *vcpu +#define VCPU_ARGS_COMMON vcpu +#define VCPU_FIELD_COMMON __field(int, id) \ + __field(unsigned long, pswmask) \ + __field(unsigned long, pswaddr) +#define VCPU_ASSIGN_COMMON do { \ + __entry->id = vcpu->vcpu_id; \ + __entry->pswmask = vcpu->arch.sie_block->gpsw.mask; \ + __entry->pswaddr = vcpu->arch.sie_block->gpsw.addr; \ + } while (0); +#define VCPU_TP_PRINTK(p_str, p_args...) \ + TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \ + __entry->pswmask, __entry->pswaddr, p_args) + +TRACE_EVENT(kvm_s390_skey_related_inst, + TP_PROTO(VCPU_PROTO_COMMON), + TP_ARGS(VCPU_ARGS_COMMON), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + ), + VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu") + ); + +TRACE_EVENT(kvm_s390_major_guest_pfault, + TP_PROTO(VCPU_PROTO_COMMON), + TP_ARGS(VCPU_ARGS_COMMON), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + ), + VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault") + ); + +TRACE_EVENT(kvm_s390_pfault_init, + TP_PROTO(VCPU_PROTO_COMMON, long pfault_token), + TP_ARGS(VCPU_ARGS_COMMON, pfault_token), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(long, pfault_token) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->pfault_token = pfault_token; + ), + VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token) + ); + +TRACE_EVENT(kvm_s390_pfault_done, + TP_PROTO(VCPU_PROTO_COMMON, long pfault_token), + TP_ARGS(VCPU_ARGS_COMMON, pfault_token), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(long, pfault_token) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->pfault_token = pfault_token; + ), + VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token) + ); + +/* + * Tracepoints for SIE entry and exit. + */ +TRACE_EVENT(kvm_s390_sie_enter, + TP_PROTO(VCPU_PROTO_COMMON, int cpuflags), + TP_ARGS(VCPU_ARGS_COMMON, cpuflags), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(int, cpuflags) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->cpuflags = cpuflags; + ), + + VCPU_TP_PRINTK("entering sie flags %x", __entry->cpuflags) + ); + +TRACE_EVENT(kvm_s390_sie_fault, + TP_PROTO(VCPU_PROTO_COMMON), + TP_ARGS(VCPU_ARGS_COMMON), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + ), + + VCPU_TP_PRINTK("%s", "fault in sie instruction") + ); + +TRACE_EVENT(kvm_s390_sie_exit, + TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode), + TP_ARGS(VCPU_ARGS_COMMON, icptcode), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(u8, icptcode) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->icptcode = icptcode; + ), + + VCPU_TP_PRINTK("exit sie icptcode %d (%s)", __entry->icptcode, + __print_symbolic(__entry->icptcode, + sie_intercept_code)) + ); + +/* + * Trace point for intercepted instructions. + */ +TRACE_EVENT(kvm_s390_intercept_instruction, + TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb), + TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u64, instruction) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->instruction = ((__u64)ipa << 48) | + ((__u64)ipb << 16); + ), + + VCPU_TP_PRINTK("intercepted instruction %016llx (%s)", + __entry->instruction, + __print_symbolic(icpt_insn_decoder(__entry->instruction), + icpt_insn_codes)) + ); + +/* + * Trace point for intercepted program interruptions. + */ +TRACE_EVENT(kvm_s390_intercept_prog, + TP_PROTO(VCPU_PROTO_COMMON, __u16 code), + TP_ARGS(VCPU_ARGS_COMMON, code), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u16, code) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->code = code; + ), + + VCPU_TP_PRINTK("intercepted program interruption %04x", + __entry->code) + ); + +/* + * Trace point for validity intercepts. + */ +TRACE_EVENT(kvm_s390_intercept_validity, + TP_PROTO(VCPU_PROTO_COMMON, __u16 viwhy), + TP_ARGS(VCPU_ARGS_COMMON, viwhy), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u16, viwhy) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->viwhy = viwhy; + ), + + VCPU_TP_PRINTK("got validity intercept %04x", __entry->viwhy) + ); + +/* + * Trace points for instructions that are of special interest. + */ + +TRACE_EVENT(kvm_s390_handle_sigp, + TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \ + __u32 parameter), + TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr, parameter), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u8, order_code) + __field(__u16, cpu_addr) + __field(__u32, parameter) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->order_code = order_code; + __entry->cpu_addr = cpu_addr; + __entry->parameter = parameter; + ), + + VCPU_TP_PRINTK("handle sigp order %02x (%s), cpu address %04x, " \ + "parameter %08x", __entry->order_code, + __print_symbolic(__entry->order_code, + sigp_order_codes), + __entry->cpu_addr, __entry->parameter) + ); + +TRACE_EVENT(kvm_s390_handle_sigp_pei, + TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr), + TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u8, order_code) + __field(__u16, cpu_addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->order_code = order_code; + __entry->cpu_addr = cpu_addr; + ), + + VCPU_TP_PRINTK("handle sigp pei order %02x (%s), cpu address %04x", + __entry->order_code, + __print_symbolic(__entry->order_code, + sigp_order_codes), + __entry->cpu_addr) + ); + +TRACE_EVENT(kvm_s390_handle_diag, + TP_PROTO(VCPU_PROTO_COMMON, __u16 code), + TP_ARGS(VCPU_ARGS_COMMON, code), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u16, code) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->code = code; + ), + + VCPU_TP_PRINTK("handle diagnose call %04x (%s)", __entry->code, + __print_symbolic(__entry->code, diagnose_codes)) + ); + +TRACE_EVENT(kvm_s390_handle_lctl, + TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr), + TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(int, g) + __field(int, reg1) + __field(int, reg3) + __field(u64, addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->g = g; + __entry->reg1 = reg1; + __entry->reg3 = reg3; + __entry->addr = addr; + ), + + VCPU_TP_PRINTK("%s: loading cr %x-%x from %016llx", + __entry->g ? "lctlg" : "lctl", + __entry->reg1, __entry->reg3, __entry->addr) + ); + +TRACE_EVENT(kvm_s390_handle_stctl, + TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr), + TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(int, g) + __field(int, reg1) + __field(int, reg3) + __field(u64, addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->g = g; + __entry->reg1 = reg1; + __entry->reg3 = reg3; + __entry->addr = addr; + ), + + VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx", + __entry->g ? "stctg" : "stctl", + __entry->reg1, __entry->reg3, __entry->addr) + ); + +TRACE_EVENT(kvm_s390_handle_prefix, + TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address), + TP_ARGS(VCPU_ARGS_COMMON, set, address), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(int, set) + __field(u32, address) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->set = set; + __entry->address = address; + ), + + VCPU_TP_PRINTK("%s prefix to %08x", + __entry->set ? "setting" : "storing", + __entry->address) + ); + +TRACE_EVENT(kvm_s390_handle_stap, + TP_PROTO(VCPU_PROTO_COMMON, u64 address), + TP_ARGS(VCPU_ARGS_COMMON, address), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(u64, address) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->address = address; + ), + + VCPU_TP_PRINTK("storing cpu address to %016llx", + __entry->address) + ); + +TRACE_EVENT(kvm_s390_handle_stfl, + TP_PROTO(VCPU_PROTO_COMMON, unsigned int facility_list), + TP_ARGS(VCPU_ARGS_COMMON, facility_list), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(unsigned int, facility_list) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->facility_list = facility_list; + ), + + VCPU_TP_PRINTK("store facility list value %08x", + __entry->facility_list) + ); + +TRACE_EVENT(kvm_s390_handle_stsi, + TP_PROTO(VCPU_PROTO_COMMON, int fc, int sel1, int sel2, u64 addr), + TP_ARGS(VCPU_ARGS_COMMON, fc, sel1, sel2, addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(int, fc) + __field(int, sel1) + __field(int, sel2) + __field(u64, addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->fc = fc; + __entry->sel1 = sel1; + __entry->sel2 = sel2; + __entry->addr = addr; + ), + + VCPU_TP_PRINTK("STSI %d.%d.%d information stored to %016llx", + __entry->fc, __entry->sel1, __entry->sel2, + __entry->addr) + ); + +#endif /* _TRACE_KVM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index d9b97b3c597..c6d752e8bf2 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -2,7 +2,7 @@ # Makefile for s390-specific library files.. # -EXTRA_AFLAGS := -traditional - -lib-y += delay.o string.o spinlock.o -lib-y += $(if $(CONFIG_64BIT),uaccess64.o,uaccess.o) +lib-y += delay.o string.o uaccess.o find.o +obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o +obj-$(CONFIG_64BIT) += mem64.o +lib-$(CONFIG_SMP) += spinlock.o diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index e96c35bddac..a9f3d0042d5 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -1,23 +1,19 @@ /* - * arch/s390/kernel/delay.c * Precise Delay Loops for S390 * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * - * Derived from "arch/i386/lib/delay.c" - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * Copyright IBM Corp. 1999, 2008 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, */ -#include <linux/config.h> #include <linux/sched.h> #include <linux/delay.h> - -#ifdef CONFIG_SMP -#include <asm/smp.h> -#endif +#include <linux/timex.h> +#include <linux/module.h> +#include <linux/irqflags.h> +#include <linux/interrupt.h> +#include <asm/vtimer.h> +#include <asm/div64.h> void __delay(unsigned long loops) { @@ -28,24 +24,106 @@ void __delay(unsigned long loops) * yield the megahertz number of the cpu. The important function * is udelay and that is done using the tod clock. -- martin. */ - __asm__ __volatile__( - "0: brct %0,0b" - : /* no outputs */ : "r" (loops/2) ); + asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1)); +} + +static void __udelay_disabled(unsigned long long usecs) +{ + unsigned long cr0, cr6, new; + u64 clock_saved, end; + + end = get_tod_clock() + (usecs << 12); + clock_saved = local_tick_disable(); + __ctl_store(cr0, 0, 0); + __ctl_store(cr6, 6, 6); + new = (cr0 & 0xffff00e0) | 0x00000800; + __ctl_load(new , 0, 0); + new = 0; + __ctl_load(new, 6, 6); + lockdep_off(); + do { + set_clock_comparator(end); + vtime_stop_cpu(); + } while (get_tod_clock_fast() < end); + lockdep_on(); + __ctl_load(cr0, 0, 0); + __ctl_load(cr6, 6, 6); + local_tick_enable(clock_saved); +} + +static void __udelay_enabled(unsigned long long usecs) +{ + u64 clock_saved, end; + + end = get_tod_clock_fast() + (usecs << 12); + do { + clock_saved = 0; + if (end < S390_lowcore.clock_comparator) { + clock_saved = local_tick_disable(); + set_clock_comparator(end); + } + vtime_stop_cpu(); + if (clock_saved) + local_tick_enable(clock_saved); + } while (get_tod_clock_fast() < end); } /* - * Waits for 'usecs' microseconds using the tod clock, giving up the time slice - * of the virtual PU inbetween to avoid congestion. + * Waits for 'usecs' microseconds using the TOD clock comparator. */ -void __udelay(unsigned long usecs) +void __udelay(unsigned long long usecs) { - uint64_t start_cc, end_cc; + unsigned long flags; - if (usecs == 0) - return; - asm volatile ("STCK %0" : "=m" (start_cc)); - do { + preempt_disable(); + local_irq_save(flags); + if (in_irq()) { + __udelay_disabled(usecs); + goto out; + } + if (in_softirq()) { + if (raw_irqs_disabled_flags(flags)) + __udelay_disabled(usecs); + else + __udelay_enabled(usecs); + goto out; + } + if (raw_irqs_disabled_flags(flags)) { + local_bh_disable(); + __udelay_disabled(usecs); + _local_bh_enable(); + goto out; + } + __udelay_enabled(usecs); +out: + local_irq_restore(flags); + preempt_enable(); +} +EXPORT_SYMBOL(__udelay); + +/* + * Simple udelay variant. To be used on startup and reboot + * when the interrupt handler isn't working. + */ +void udelay_simple(unsigned long long usecs) +{ + u64 end; + + end = get_tod_clock_fast() + (usecs << 12); + while (get_tod_clock_fast() < end) cpu_relax(); - asm volatile ("STCK %0" : "=m" (end_cc)); - } while (((end_cc - start_cc)/4096) < usecs); } + +void __ndelay(unsigned long long nsecs) +{ + u64 end; + + nsecs <<= 9; + do_div(nsecs, 125); + end = get_tod_clock_fast() + nsecs; + if (nsecs & ~0xfffUL) + __udelay(nsecs >> 12); + while (get_tod_clock_fast() < end) + barrier(); +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/s390/lib/div64.c b/arch/s390/lib/div64.c new file mode 100644 index 00000000000..261152f8324 --- /dev/null +++ b/arch/s390/lib/div64.c @@ -0,0 +1,147 @@ +/* + * __div64_32 implementation for 31 bit. + * + * Copyright IBM Corp. 2006 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + */ + +#include <linux/types.h> +#include <linux/module.h> + +#ifdef CONFIG_MARCH_G5 + +/* + * Function to divide an unsigned 64 bit integer by an unsigned + * 31 bit integer using signed 64/32 bit division. + */ +static uint32_t __div64_31(uint64_t *n, uint32_t base) +{ + register uint32_t reg2 asm("2"); + register uint32_t reg3 asm("3"); + uint32_t *words = (uint32_t *) n; + uint32_t tmp; + + /* Special case base==1, remainder = 0, quotient = n */ + if (base == 1) + return 0; + /* + * Special case base==0 will cause a fixed point divide exception + * on the dr instruction and may not happen anyway. For the + * following calculation we can assume base > 1. The first + * signed 64 / 32 bit division with an upper half of 0 will + * give the correct upper half of the 64 bit quotient. + */ + reg2 = 0UL; + reg3 = words[0]; + asm volatile( + " dr %0,%2\n" + : "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" ); + words[0] = reg3; + reg3 = words[1]; + /* + * To get the lower half of the 64 bit quotient and the 32 bit + * remainder we have to use a little trick. Since we only have + * a signed division the quotient can get too big. To avoid this + * the 64 bit dividend is halved, then the signed division will + * work. Afterwards the quotient and the remainder are doubled. + * If the last bit of the dividend has been one the remainder + * is increased by one then checked against the base. If the + * remainder has overflown subtract base and increase the + * quotient. Simple, no ? + */ + asm volatile( + " nr %2,%1\n" + " srdl %0,1\n" + " dr %0,%3\n" + " alr %0,%0\n" + " alr %1,%1\n" + " alr %0,%2\n" + " clr %0,%3\n" + " jl 0f\n" + " slr %0,%3\n" + " ahi %1,1\n" + "0:\n" + : "+d" (reg2), "+d" (reg3), "=d" (tmp) + : "d" (base), "2" (1UL) : "cc" ); + words[1] = reg3; + return reg2; +} + +/* + * Function to divide an unsigned 64 bit integer by an unsigned + * 32 bit integer using the unsigned 64/31 bit division. + */ +uint32_t __div64_32(uint64_t *n, uint32_t base) +{ + uint32_t r; + + /* + * If the most significant bit of base is set, divide n by + * (base/2). That allows to use 64/31 bit division and gives a + * good approximation of the result: n = (base/2)*q + r. The + * result needs to be corrected with two simple transformations. + * If base is already < 2^31-1 __div64_31 can be used directly. + */ + r = __div64_31(n, ((signed) base < 0) ? (base/2) : base); + if ((signed) base < 0) { + uint64_t q = *n; + /* + * First transformation: + * n = (base/2)*q + r + * = ((base/2)*2)*(q/2) + ((q&1) ? (base/2) : 0) + r + * Since r < (base/2), r + (base/2) < base. + * With q1 = (q/2) and r1 = r + ((q&1) ? (base/2) : 0) + * n = ((base/2)*2)*q1 + r1 with r1 < base. + */ + if (q & 1) + r += base/2; + q >>= 1; + /* + * Second transformation. ((base/2)*2) could have lost the + * last bit. + * n = ((base/2)*2)*q1 + r1 + * = base*q1 - ((base&1) ? q1 : 0) + r1 + */ + if (base & 1) { + int64_t rx = r - q; + /* + * base is >= 2^31. The worst case for the while + * loop is n=2^64-1 base=2^31+1. That gives a + * maximum for q=(2^64-1)/2^31 = 0x1ffffffff. Since + * base >= 2^31 the loop is finished after a maximum + * of three iterations. + */ + while (rx < 0) { + rx += base; + q--; + } + r = rx; + } + *n = q; + } + return r; +} + +#else /* MARCH_G5 */ + +uint32_t __div64_32(uint64_t *n, uint32_t base) +{ + register uint32_t reg2 asm("2"); + register uint32_t reg3 asm("3"); + uint32_t *words = (uint32_t *) n; + + reg2 = 0UL; + reg3 = words[0]; + asm volatile( + " dlr %0,%2\n" + : "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" ); + words[0] = reg3; + reg3 = words[1]; + asm volatile( + " dlr %0,%2\n" + : "+d" (reg2), "+d" (reg3) : "d" (base) : "cc" ); + words[1] = reg3; + return reg2; +} + +#endif /* MARCH_G5 */ diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c new file mode 100644 index 00000000000..922003c1b90 --- /dev/null +++ b/arch/s390/lib/find.c @@ -0,0 +1,77 @@ +/* + * MSB0 numbered special bitops handling. + * + * On s390x the bits are numbered: + * |0..............63|64............127|128...........191|192...........255| + * and on s390: + * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| + * + * The reason for this bit numbering is the fact that the hardware sets bits + * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap + * from the 'wrong end'. + */ + +#include <linux/compiler.h> +#include <linux/bitops.h> +#include <linux/export.h> + +unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size) +{ + const unsigned long *p = addr; + unsigned long result = 0; + unsigned long tmp; + + while (size & ~(BITS_PER_LONG - 1)) { + if ((tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = (*p) & (~0UL << (BITS_PER_LONG - size)); + if (!tmp) /* Are any bits set? */ + return result + size; /* Nope. */ +found: + return result + (__fls(tmp) ^ (BITS_PER_LONG - 1)); +} +EXPORT_SYMBOL(find_first_bit_inv); + +unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + (offset / BITS_PER_LONG); + unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL >> offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; +found_first: + tmp &= (~0UL << (BITS_PER_LONG - size)); + if (!tmp) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + (__fls(tmp) ^ (BITS_PER_LONG - 1)); +} +EXPORT_SYMBOL(find_next_bit_inv); diff --git a/arch/s390/lib/mem32.S b/arch/s390/lib/mem32.S new file mode 100644 index 00000000000..14ca9244b61 --- /dev/null +++ b/arch/s390/lib/mem32.S @@ -0,0 +1,92 @@ +/* + * String handling functions. + * + * Copyright IBM Corp. 2012 + */ + +#include <linux/linkage.h> + +/* + * memset implementation + * + * This code corresponds to the C construct below. We do distinguish + * between clearing (c == 0) and setting a memory array (c != 0) simply + * because nearly all memset invocations in the kernel clear memory and + * the xc instruction is preferred in such cases. + * + * void *memset(void *s, int c, size_t n) + * { + * if (likely(c == 0)) + * return __builtin_memset(s, 0, n); + * return __builtin_memset(s, c, n); + * } + */ +ENTRY(memset) + basr %r5,%r0 +.Lmemset_base: + ltr %r4,%r4 + bzr %r14 + ltr %r3,%r3 + jnz .Lmemset_fill + ahi %r4,-1 + lr %r3,%r4 + srl %r3,8 + ltr %r3,%r3 + lr %r1,%r2 + je .Lmemset_clear_rest +.Lmemset_clear_loop: + xc 0(256,%r1),0(%r1) + la %r1,256(%r1) + brct %r3,.Lmemset_clear_loop +.Lmemset_clear_rest: + ex %r4,.Lmemset_xc-.Lmemset_base(%r5) + br %r14 +.Lmemset_fill: + stc %r3,0(%r2) + chi %r4,1 + lr %r1,%r2 + ber %r14 + ahi %r4,-2 + lr %r3,%r4 + srl %r3,8 + ltr %r3,%r3 + je .Lmemset_fill_rest +.Lmemset_fill_loop: + mvc 1(256,%r1),0(%r1) + la %r1,256(%r1) + brct %r3,.Lmemset_fill_loop +.Lmemset_fill_rest: + ex %r4,.Lmemset_mvc-.Lmemset_base(%r5) + br %r14 +.Lmemset_xc: + xc 0(1,%r1),0(%r1) +.Lmemset_mvc: + mvc 1(1,%r1),0(%r1) + +/* + * memcpy implementation + * + * void *memcpy(void *dest, const void *src, size_t n) + */ +ENTRY(memcpy) + basr %r5,%r0 +.Lmemcpy_base: + ltr %r4,%r4 + bzr %r14 + ahi %r4,-1 + lr %r0,%r4 + srl %r0,8 + ltr %r0,%r0 + lr %r1,%r2 + jnz .Lmemcpy_loop +.Lmemcpy_rest: + ex %r4,.Lmemcpy_mvc-.Lmemcpy_base(%r5) + br %r14 +.Lmemcpy_loop: + mvc 0(256,%r1),0(%r3) + la %r1,256(%r1) + la %r3,256(%r3) + brct %r0,.Lmemcpy_loop + j .Lmemcpy_rest +.Lmemcpy_mvc: + mvc 0(1,%r1),0(%r3) diff --git a/arch/s390/lib/mem64.S b/arch/s390/lib/mem64.S new file mode 100644 index 00000000000..c6d553e85ab --- /dev/null +++ b/arch/s390/lib/mem64.S @@ -0,0 +1,88 @@ +/* + * String handling functions. + * + * Copyright IBM Corp. 2012 + */ + +#include <linux/linkage.h> + +/* + * memset implementation + * + * This code corresponds to the C construct below. We do distinguish + * between clearing (c == 0) and setting a memory array (c != 0) simply + * because nearly all memset invocations in the kernel clear memory and + * the xc instruction is preferred in such cases. + * + * void *memset(void *s, int c, size_t n) + * { + * if (likely(c == 0)) + * return __builtin_memset(s, 0, n); + * return __builtin_memset(s, c, n); + * } + */ +ENTRY(memset) + ltgr %r4,%r4 + bzr %r14 + ltgr %r3,%r3 + jnz .Lmemset_fill + aghi %r4,-1 + srlg %r3,%r4,8 + ltgr %r3,%r3 + lgr %r1,%r2 + jz .Lmemset_clear_rest +.Lmemset_clear_loop: + xc 0(256,%r1),0(%r1) + la %r1,256(%r1) + brctg %r3,.Lmemset_clear_loop +.Lmemset_clear_rest: + larl %r3,.Lmemset_xc + ex %r4,0(%r3) + br %r14 +.Lmemset_fill: + stc %r3,0(%r2) + cghi %r4,1 + lgr %r1,%r2 + ber %r14 + aghi %r4,-2 + srlg %r3,%r4,8 + ltgr %r3,%r3 + jz .Lmemset_fill_rest +.Lmemset_fill_loop: + mvc 1(256,%r1),0(%r1) + la %r1,256(%r1) + brctg %r3,.Lmemset_fill_loop +.Lmemset_fill_rest: + larl %r3,.Lmemset_mvc + ex %r4,0(%r3) + br %r14 +.Lmemset_xc: + xc 0(1,%r1),0(%r1) +.Lmemset_mvc: + mvc 1(1,%r1),0(%r1) + +/* + * memcpy implementation + * + * void *memcpy(void *dest, const void *src, size_t n) + */ +ENTRY(memcpy) + ltgr %r4,%r4 + bzr %r14 + aghi %r4,-1 + srlg %r5,%r4,8 + ltgr %r5,%r5 + lgr %r1,%r2 + jnz .Lmemcpy_loop +.Lmemcpy_rest: + larl %r5,.Lmemcpy_mvc + ex %r4,0(%r5) + br %r14 +.Lmemcpy_loop: + mvc 0(256,%r1),0(%r3) + la %r1,256(%r1) + la %r3,256(%r3) + brctg %r5,.Lmemcpy_loop + j .Lmemcpy_rest +.Lmemcpy_mvc: + mvc 0(1,%r1),0(%r3) diff --git a/arch/s390/lib/qrnnd.S b/arch/s390/lib/qrnnd.S new file mode 100644 index 00000000000..d321329130e --- /dev/null +++ b/arch/s390/lib/qrnnd.S @@ -0,0 +1,78 @@ +# S/390 __udiv_qrnnd + +#include <linux/linkage.h> + +# r2 : &__r +# r3 : upper half of 64 bit word n +# r4 : lower half of 64 bit word n +# r5 : divisor d +# the reminder r of the division is to be stored to &__r and +# the quotient q is to be returned + + .text +ENTRY(__udiv_qrnnd) + st %r2,24(%r15) # store pointer to reminder for later + lr %r0,%r3 # reload n + lr %r1,%r4 + ltr %r2,%r5 # reload and test divisor + jp 5f + # divisor >= 0x80000000 + srdl %r0,2 # n/4 + srl %r2,1 # d/2 + slr %r1,%r2 # special case if last bit of d is set + brc 3,0f # (n/4) div (n/2) can overflow by 1 + ahi %r0,-1 # trick: subtract n/2, then divide +0: dr %r0,%r2 # signed division + ahi %r1,1 # trick part 2: add 1 to the quotient + # now (n >> 2) = (d >> 1) * %r1 + %r0 + lhi %r3,1 + nr %r3,%r1 # test last bit of q + jz 1f + alr %r0,%r2 # add (d>>1) to r +1: srl %r1,1 # q >>= 1 + # now (n >> 2) = (d&-2) * %r1 + %r0 + lhi %r3,1 + nr %r3,%r5 # test last bit of d + jz 2f + slr %r0,%r1 # r -= q + brc 3,2f # borrow ? + alr %r0,%r5 # r += d + ahi %r1,-1 +2: # now (n >> 2) = d * %r1 + %r0 + alr %r1,%r1 # q <<= 1 + alr %r0,%r0 # r <<= 1 + brc 12,3f # overflow on r ? + slr %r0,%r5 # r -= d + ahi %r1,1 # q += 1 +3: lhi %r3,2 + nr %r3,%r4 # test next to last bit of n + jz 4f + ahi %r0,1 # r += 1 +4: clr %r0,%r5 # r >= d ? + jl 6f + slr %r0,%r5 # r -= d + ahi %r1,1 # q += 1 + # now (n >> 1) = d * %r1 + %r0 + j 6f +5: # divisor < 0x80000000 + srdl %r0,1 + dr %r0,%r2 # signed division + # now (n >> 1) = d * %r1 + %r0 +6: alr %r1,%r1 # q <<= 1 + alr %r0,%r0 # r <<= 1 + brc 12,7f # overflow on r ? + slr %r0,%r5 # r -= d + ahi %r1,1 # q += 1 +7: lhi %r3,1 + nr %r3,%r4 # isolate last bit of n + alr %r0,%r3 # r += (n & 1) + clr %r0,%r5 # r >= d ? + jl 8f + slr %r0,%r5 # r -= d + ahi %r1,1 # q += 1 +8: # now n = d * %r1 + %r0 + l %r2,24(%r15) + st %r0,0(%r2) + lr %r2,%r1 + br %r14 + .end __udiv_qrnnd diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 68d79c50208..5b0e445bc3f 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -1,9 +1,7 @@ /* - * arch/s390/lib/spinlock.c * Out of line spinlock code. * - * S390 version - * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2004, 2006 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ @@ -11,9 +9,9 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/init.h> +#include <linux/smp.h> #include <asm/io.h> -atomic_t spin_retry_counter; int spin_retry = 1000; /** @@ -26,106 +24,210 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); -static inline void -_diag44(void) +void arch_spin_lock_wait(arch_spinlock_t *lp) { -#ifdef CONFIG_64BIT - if (MACHINE_HAS_DIAG44) -#endif - asm volatile("diag 0,0,0x44"); + unsigned int cpu = SPINLOCK_LOCKVAL; + unsigned int owner; + int count; + + while (1) { + owner = ACCESS_ONCE(lp->lock); + /* Try to get the lock if it is free. */ + if (!owner) { + if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + return; + continue; + } + /* Check if the lock owner is running. */ + if (!smp_vcpu_scheduled(~owner)) { + smp_yield_cpu(~owner); + continue; + } + /* Loop for a while on the lock value. */ + count = spin_retry; + do { + owner = ACCESS_ONCE(lp->lock); + } while (owner && count-- > 0); + if (!owner) + continue; + /* + * For multiple layers of hypervisors, e.g. z/VM + LPAR + * yield the CPU if the lock is still unavailable. + */ + if (!MACHINE_IS_LPAR) + smp_yield_cpu(~owner); + } } +EXPORT_SYMBOL(arch_spin_lock_wait); -void -_raw_spin_lock_wait(raw_spinlock_t *lp, unsigned int pc) +void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) { - int count = spin_retry; + unsigned int cpu = SPINLOCK_LOCKVAL; + unsigned int owner; + int count; + local_irq_restore(flags); while (1) { - if (count-- <= 0) { - _diag44(); - count = spin_retry; + owner = ACCESS_ONCE(lp->lock); + /* Try to get the lock if it is free. */ + if (!owner) { + local_irq_disable(); + if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + return; + local_irq_restore(flags); } - atomic_inc(&spin_retry_counter); - if (_raw_compare_and_swap(&lp->lock, 0, pc) == 0) - return; + /* Check if the lock owner is running. */ + if (!smp_vcpu_scheduled(~owner)) { + smp_yield_cpu(~owner); + continue; + } + /* Loop for a while on the lock value. */ + count = spin_retry; + do { + owner = ACCESS_ONCE(lp->lock); + } while (owner && count-- > 0); + if (!owner) + continue; + /* + * For multiple layers of hypervisors, e.g. z/VM + LPAR + * yield the CPU if the lock is still unavailable. + */ + if (!MACHINE_IS_LPAR) + smp_yield_cpu(~owner); } } -EXPORT_SYMBOL(_raw_spin_lock_wait); +EXPORT_SYMBOL(arch_spin_lock_wait_flags); -int -_raw_spin_trylock_retry(raw_spinlock_t *lp, unsigned int pc) +void arch_spin_relax(arch_spinlock_t *lp) { - int count = spin_retry; + unsigned int cpu = lp->lock; + if (cpu != 0) { + if (MACHINE_IS_VM || MACHINE_IS_KVM || + !smp_vcpu_scheduled(~cpu)) + smp_yield_cpu(~cpu); + } +} +EXPORT_SYMBOL(arch_spin_relax); - while (count-- > 0) { - atomic_inc(&spin_retry_counter); - if (_raw_compare_and_swap(&lp->lock, 0, pc) == 0) +int arch_spin_trylock_retry(arch_spinlock_t *lp) +{ + int count; + + for (count = spin_retry; count > 0; count--) + if (arch_spin_trylock_once(lp)) return 1; - } return 0; } -EXPORT_SYMBOL(_raw_spin_trylock_retry); +EXPORT_SYMBOL(arch_spin_trylock_retry); -void -_raw_read_lock_wait(raw_rwlock_t *rw) +void _raw_read_lock_wait(arch_rwlock_t *rw) { unsigned int old; int count = spin_retry; while (1) { if (count-- <= 0) { - _diag44(); + smp_yield(); count = spin_retry; } - atomic_inc(&spin_retry_counter); - old = rw->lock & 0x7fffffffU; - if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + old = ACCESS_ONCE(rw->lock); + if ((int) old < 0) + continue; + if (_raw_compare_and_swap(&rw->lock, old, old + 1)) return; } } EXPORT_SYMBOL(_raw_read_lock_wait); -int -_raw_read_trylock_retry(raw_rwlock_t *rw) +void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) +{ + unsigned int old; + int count = spin_retry; + + local_irq_restore(flags); + while (1) { + if (count-- <= 0) { + smp_yield(); + count = spin_retry; + } + old = ACCESS_ONCE(rw->lock); + if ((int) old < 0) + continue; + local_irq_disable(); + if (_raw_compare_and_swap(&rw->lock, old, old + 1)) + return; + local_irq_restore(flags); + } +} +EXPORT_SYMBOL(_raw_read_lock_wait_flags); + +int _raw_read_trylock_retry(arch_rwlock_t *rw) { unsigned int old; int count = spin_retry; while (count-- > 0) { - atomic_inc(&spin_retry_counter); - old = rw->lock & 0x7fffffffU; - if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + old = ACCESS_ONCE(rw->lock); + if ((int) old < 0) + continue; + if (_raw_compare_and_swap(&rw->lock, old, old + 1)) return 1; } return 0; } EXPORT_SYMBOL(_raw_read_trylock_retry); -void -_raw_write_lock_wait(raw_rwlock_t *rw) +void _raw_write_lock_wait(arch_rwlock_t *rw) { + unsigned int old; int count = spin_retry; while (1) { if (count-- <= 0) { - _diag44(); + smp_yield(); count = spin_retry; } - atomic_inc(&spin_retry_counter); - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + old = ACCESS_ONCE(rw->lock); + if (old) + continue; + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) return; } } EXPORT_SYMBOL(_raw_write_lock_wait); -int -_raw_write_trylock_retry(raw_rwlock_t *rw) +void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) { + unsigned int old; + int count = spin_retry; + + local_irq_restore(flags); + while (1) { + if (count-- <= 0) { + smp_yield(); + count = spin_retry; + } + old = ACCESS_ONCE(rw->lock); + if (old) + continue; + local_irq_disable(); + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) + return; + local_irq_restore(flags); + } +} +EXPORT_SYMBOL(_raw_write_lock_wait_flags); + +int _raw_write_trylock_retry(arch_rwlock_t *rw) +{ + unsigned int old; int count = spin_retry; while (count-- > 0) { - atomic_inc(&spin_retry_counter); - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + old = ACCESS_ONCE(rw->lock); + if (old) + continue; + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) return 1; } return 0; diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 8240cc77e06..b647d5ff0ad 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -1,9 +1,8 @@ /* - * arch/s390/lib/string.c * Optimized string functions * * S390 version - * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2004 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) */ @@ -99,7 +98,7 @@ size_t strlcpy(char *dest, const char *src, size_t size) if (size) { size_t len = (ret >= size) ? size-1 : ret; dest[len] = '\0'; - __builtin_memcpy(dest, src, len); + memcpy(dest, src, len); } return ret; } @@ -117,8 +116,8 @@ EXPORT_SYMBOL(strlcpy); char *strncpy(char *dest, const char *src, size_t n) { size_t len = __strnend(src, n) - src; - __builtin_memset(dest + len, 0, n - len); - __builtin_memcpy(dest, src, len); + memset(dest + len, 0, n - len); + memcpy(dest, src, len); return dest; } EXPORT_SYMBOL(strncpy); @@ -164,7 +163,7 @@ size_t strlcat(char *dest, const char *src, size_t n) if (len >= n) len = n - 1; dest[len] = '\0'; - __builtin_memcpy(dest, src, len); + memcpy(dest, src, len); } return res; } @@ -187,7 +186,7 @@ char *strncat(char *dest, const char *src, size_t n) char *p = __strend(dest); p[len] = '\0'; - __builtin_memcpy(p, src, len); + memcpy(p, src, len); return dest; } EXPORT_SYMBOL(strncat); @@ -233,7 +232,7 @@ char * strrchr(const char * s, int c) if (s[len] == (char) c) return (char *) s + len; } while (--len > 0); - return 0; + return NULL; } EXPORT_SYMBOL(strrchr); @@ -267,7 +266,7 @@ char * strstr(const char * s1,const char * s2) return (char *) s1; s1++; } - return 0; + return NULL; } EXPORT_SYMBOL(strstr); @@ -341,41 +340,3 @@ void *memscan(void *s, int c, size_t n) return (void *) ret; } EXPORT_SYMBOL(memscan); - -/** - * memcpy - Copy one area of memory to another - * @dest: Where to copy to - * @src: Where to copy from - * @n: The size of the area. - * - * returns a pointer to @dest - */ -void *memcpy(void *dest, const void *src, size_t n) -{ - return __builtin_memcpy(dest, src, n); -} -EXPORT_SYMBOL(memcpy); - -/** - * memset - Fill a region of memory with the given value - * @s: Pointer to the start of the area. - * @c: The byte to fill the area with - * @n: The size of the area. - * - * returns a pointer to @s - */ -void *memset(void *s, int c, size_t n) -{ - char *xs; - - if (c == 0) - return __builtin_memset(s, 0, n); - - xs = (char *) s; - if (n > 0) - do { - *xs++ = c; - } while (--n > 0); - return s; -} -EXPORT_SYMBOL(memset); diff --git a/arch/s390/lib/uaccess.S b/arch/s390/lib/uaccess.S deleted file mode 100644 index 88fc94fe648..00000000000 --- a/arch/s390/lib/uaccess.S +++ /dev/null @@ -1,210 +0,0 @@ -/* - * arch/s390/lib/uaccess.S - * __copy_{from|to}_user functions. - * - * s390 - * Copyright (C) 2000,2002 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Authors(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * These functions have standard call interface - */ - -#include <linux/errno.h> -#include <asm/lowcore.h> -#include <asm/asm-offsets.h> - - .text - .align 4 - .globl __copy_from_user_asm - # %r2 = to, %r3 = n, %r4 = from -__copy_from_user_asm: - slr %r0,%r0 -0: mvcp 0(%r3,%r2),0(%r4),%r0 - jnz 1f - slr %r2,%r2 - br %r14 -1: la %r2,256(%r2) - la %r4,256(%r4) - ahi %r3,-256 -2: mvcp 0(%r3,%r2),0(%r4),%r0 - jnz 1b -3: slr %r2,%r2 - br %r14 -4: lhi %r0,-4096 - lr %r5,%r4 - slr %r5,%r0 - nr %r5,%r0 # %r5 = (%r4 + 4096) & -4096 - slr %r5,%r4 # %r5 = #bytes to next user page boundary - clr %r3,%r5 # copy crosses next page boundary ? - jnh 6f # no, the current page faulted - # move with the reduced length which is < 256 -5: mvcp 0(%r5,%r2),0(%r4),%r0 - slr %r3,%r5 -6: lr %r2,%r3 - br %r14 - .section __ex_table,"a" - .long 0b,4b - .long 2b,4b - .long 5b,6b - .previous - - .align 4 - .text - .globl __copy_to_user_asm - # %r2 = from, %r3 = n, %r4 = to -__copy_to_user_asm: - slr %r0,%r0 -0: mvcs 0(%r3,%r4),0(%r2),%r0 - jnz 1f - slr %r2,%r2 - br %r14 -1: la %r2,256(%r2) - la %r4,256(%r4) - ahi %r3,-256 -2: mvcs 0(%r3,%r4),0(%r2),%r0 - jnz 1b -3: slr %r2,%r2 - br %r14 -4: lhi %r0,-4096 - lr %r5,%r4 - slr %r5,%r0 - nr %r5,%r0 # %r5 = (%r4 + 4096) & -4096 - slr %r5,%r4 # %r5 = #bytes to next user page boundary - clr %r3,%r5 # copy crosses next page boundary ? - jnh 6f # no, the current page faulted - # move with the reduced length which is < 256 -5: mvcs 0(%r5,%r4),0(%r2),%r0 - slr %r3,%r5 -6: lr %r2,%r3 - br %r14 - .section __ex_table,"a" - .long 0b,4b - .long 2b,4b - .long 5b,6b - .previous - - .align 4 - .text - .globl __copy_in_user_asm - # %r2 = from, %r3 = n, %r4 = to -__copy_in_user_asm: - sacf 256 - bras 1,1f - mvc 0(1,%r4),0(%r2) -0: mvc 0(256,%r4),0(%r2) - la %r2,256(%r2) - la %r4,256(%r4) -1: ahi %r3,-256 - jnm 0b -2: ex %r3,0(%r1) - sacf 0 - slr %r2,%r2 - br 14 -3: mvc 0(1,%r4),0(%r2) - la %r2,1(%r2) - la %r4,1(%r4) - ahi %r3,-1 - jnm 3b -4: lr %r2,%r3 - sacf 0 - br %r14 - .section __ex_table,"a" - .long 0b,3b - .long 2b,3b - .long 3b,4b - .previous - - .align 4 - .text - .globl __clear_user_asm - # %r2 = to, %r3 = n -__clear_user_asm: - bras %r5,0f - .long empty_zero_page -0: l %r5,0(%r5) - slr %r0,%r0 -1: mvcs 0(%r3,%r2),0(%r5),%r0 - jnz 2f - slr %r2,%r2 - br %r14 -2: la %r2,256(%r2) - ahi %r3,-256 -3: mvcs 0(%r3,%r2),0(%r5),%r0 - jnz 2b -4: slr %r2,%r2 - br %r14 -5: lhi %r0,-4096 - lr %r4,%r2 - slr %r4,%r0 - nr %r4,%r0 # %r4 = (%r2 + 4096) & -4096 - slr %r4,%r2 # %r4 = #bytes to next user page boundary - clr %r3,%r4 # clear crosses next page boundary ? - jnh 7f # no, the current page faulted - # clear with the reduced length which is < 256 -6: mvcs 0(%r4,%r2),0(%r5),%r0 - slr %r3,%r4 -7: lr %r2,%r3 - br %r14 - .section __ex_table,"a" - .long 1b,5b - .long 3b,5b - .long 6b,7b - .previous - - .align 4 - .text - .globl __strncpy_from_user_asm - # %r2 = count, %r3 = dst, %r4 = src -__strncpy_from_user_asm: - lhi %r0,0 - lr %r1,%r4 - la %r4,0(%r4) # clear high order bit from %r4 - la %r2,0(%r2,%r4) # %r2 points to first byte after string - sacf 256 -0: srst %r2,%r1 - jo 0b - sacf 0 - lr %r1,%r2 - jh 1f # \0 found in string ? - ahi %r1,1 # include \0 in copy -1: slr %r1,%r4 # %r1 = copy length (without \0) - slr %r2,%r4 # %r2 = return length (including \0) -2: mvcp 0(%r1,%r3),0(%r4),%r0 - jnz 3f - br %r14 -3: la %r3,256(%r3) - la %r4,256(%r4) - ahi %r1,-256 - mvcp 0(%r1,%r3),0(%r4),%r0 - jnz 3b - br %r14 -4: sacf 0 - lhi %r2,-EFAULT - br %r14 - .section __ex_table,"a" - .long 0b,4b - .previous - - .align 4 - .text - .globl __strnlen_user_asm - # %r2 = count, %r3 = src -__strnlen_user_asm: - lhi %r0,0 - lr %r1,%r3 - la %r3,0(%r3) # clear high order bit from %r4 - la %r2,0(%r2,%r3) # %r2 points to first byte after string - sacf 256 -0: srst %r2,%r1 - jo 0b - sacf 0 - jh 1f # \0 found in string ? - ahi %r2,1 # strnlen_user result includes the \0 -1: slr %r2,%r3 - br %r14 -2: sacf 0 - lhi %r2,-EFAULT - br %r14 - .section __ex_table,"a" - .long 0b,2b - .previous diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c new file mode 100644 index 00000000000..53dd5d7a0c9 --- /dev/null +++ b/arch/s390/lib/uaccess.c @@ -0,0 +1,406 @@ +/* + * Standard user space access functions based on mvcp/mvcs and doing + * interesting things in the secondary space mode. + * + * Copyright IBM Corp. 2006,2014 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), + * Gerald Schaefer (gerald.schaefer@de.ibm.com) + */ + +#include <linux/jump_label.h> +#include <linux/uaccess.h> +#include <linux/export.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <asm/mmu_context.h> +#include <asm/facility.h> + +#ifndef CONFIG_64BIT +#define AHI "ahi" +#define ALR "alr" +#define CLR "clr" +#define LHI "lhi" +#define SLR "slr" +#else +#define AHI "aghi" +#define ALR "algr" +#define CLR "clgr" +#define LHI "lghi" +#define SLR "slgr" +#endif + +static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE; + +static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, + unsigned long size) +{ + register unsigned long reg0 asm("0") = 0x81UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" + "9: jz 7f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ + " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 4f\n" + "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n" + "10:"SLR" %0,%4\n" + " "ALR" %2,%4\n" + "4:"LHI" %4,-1\n" + " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */ + " bras %3,6f\n" /* memset loop */ + " xc 0(1,%2),0(%2)\n" + "5: xc 0(256,%2),0(%2)\n" + " la %2,256(%2)\n" + "6:"AHI" %4,-256\n" + " jnm 5b\n" + " ex %4,0(%3)\n" + " j 8f\n" + "7:"SLR" %0,%0\n" + "8:\n" + EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, + unsigned long size) +{ + unsigned long tmp1, tmp2; + + load_kernel_asce(); + tmp1 = -256UL; + asm volatile( + " sacf 0\n" + "0: mvcp 0(%0,%2),0(%1),%3\n" + "10:jz 8f\n" + "1:"ALR" %0,%3\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "2: mvcp 0(%0,%2),0(%1),%3\n" + "11:jnz 1b\n" + " j 8f\n" + "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ + " "LHI" %3,-4096\n" + " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "4: mvcp 0(%4,%2),0(%1),%3\n" + "12:"SLR" %0,%4\n" + " "ALR" %2,%4\n" + "5:"LHI" %4,-1\n" + " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */ + " bras %3,7f\n" /* memset loop */ + " xc 0(1,%2),0(%2)\n" + "6: xc 0(256,%2),0(%2)\n" + " la %2,256(%2)\n" + "7:"AHI" %4,-256\n" + " jnm 6b\n" + " ex %4,0(%3)\n" + " j 9f\n" + "8:"SLR" %0,%0\n" + "9: sacf 768\n" + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b) + EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (static_key_false(&have_mvcos)) + return copy_from_user_mvcos(to, from, n); + return copy_from_user_mvcp(to, from, n); +} +EXPORT_SYMBOL(__copy_from_user); + +static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, + unsigned long size) +{ + register unsigned long reg0 asm("0") = 0x810000UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + "6: jz 4f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */ + " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n" + "7:"SLR" %0,%4\n" + " j 5f\n" + "4:"SLR" %0,%0\n" + "5:\n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, + unsigned long size) +{ + unsigned long tmp1, tmp2; + + load_kernel_asce(); + tmp1 = -256UL; + asm volatile( + " sacf 0\n" + "0: mvcs 0(%0,%1),0(%2),%3\n" + "7: jz 5f\n" + "1:"ALR" %0,%3\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "2: mvcs 0(%0,%1),0(%2),%3\n" + "8: jnz 1b\n" + " j 5f\n" + "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ + " "LHI" %3,-4096\n" + " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ + " "SLR" %4,%1\n" + " "CLR" %0,%4\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "4: mvcs 0(%4,%1),0(%2),%3\n" + "9:"SLR" %0,%4\n" + " j 6f\n" + "5:"SLR" %0,%0\n" + "6: sacf 768\n" + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) + EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (static_key_false(&have_mvcos)) + return copy_to_user_mvcos(to, from, n); + return copy_to_user_mvcs(to, from, n); +} +EXPORT_SYMBOL(__copy_to_user); + +static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, + unsigned long size) +{ + register unsigned long reg0 asm("0") = 0x810081UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + /* FIXME: copy with reduced length. */ + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + " jz 2f\n" + "1:"ALR" %0,%3\n" + " "SLR" %1,%3\n" + " "SLR" %2,%3\n" + " j 0b\n" + "2:"SLR" %0,%0\n" + "3: \n" + EX_TABLE(0b,3b) + : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from, + unsigned long size) +{ + unsigned long tmp1; + + load_kernel_asce(); + asm volatile( + " sacf 256\n" + " "AHI" %0,-1\n" + " jo 5f\n" + " bras %3,3f\n" + "0:"AHI" %0,257\n" + "1: mvc 0(1,%1),0(%2)\n" + " la %1,1(%1)\n" + " la %2,1(%2)\n" + " "AHI" %0,-1\n" + " jnz 1b\n" + " j 5f\n" + "2: mvc 0(256,%1),0(%2)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + "3:"AHI" %0,-256\n" + " jnm 2b\n" + "4: ex %0,1b-0b(%3)\n" + "5: "SLR" %0,%0\n" + "6: sacf 768\n" + EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) + : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) + : : "cc", "memory"); + return size; +} + +unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n) +{ + if (static_key_false(&have_mvcos)) + return copy_in_user_mvcos(to, from, n); + return copy_in_user_mvc(to, from, n); +} +EXPORT_SYMBOL(__copy_in_user); + +static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) +{ + register unsigned long reg0 asm("0") = 0x810000UL; + unsigned long tmp1, tmp2; + + tmp1 = -4096UL; + asm volatile( + "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" + " jz 4f\n" + "1:"ALR" %0,%2\n" + " "SLR" %1,%2\n" + " j 0b\n" + "2: la %3,4095(%1)\n"/* %4 = to + 4095 */ + " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */ + " "SLR" %3,%1\n" + " "CLR" %0,%3\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" + " "SLR" %0,%3\n" + " j 5f\n" + "4:"SLR" %0,%0\n" + "5:\n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) + : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) + : "a" (empty_zero_page), "d" (reg0) : "cc", "memory"); + return size; +} + +static inline unsigned long clear_user_xc(void __user *to, unsigned long size) +{ + unsigned long tmp1, tmp2; + + load_kernel_asce(); + asm volatile( + " sacf 256\n" + " "AHI" %0,-1\n" + " jo 5f\n" + " bras %3,3f\n" + " xc 0(1,%1),0(%1)\n" + "0:"AHI" %0,257\n" + " la %2,255(%1)\n" /* %2 = ptr + 255 */ + " srl %2,12\n" + " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */ + " "SLR" %2,%1\n" + " "CLR" %0,%2\n" /* clear crosses next page boundary? */ + " jnh 5f\n" + " "AHI" %2,-1\n" + "1: ex %2,0(%3)\n" + " "AHI" %2,1\n" + " "SLR" %0,%2\n" + " j 5f\n" + "2: xc 0(256,%1),0(%1)\n" + " la %1,256(%1)\n" + "3:"AHI" %0,-256\n" + " jnm 2b\n" + "4: ex %0,0(%3)\n" + "5: "SLR" %0,%0\n" + "6: sacf 768\n" + EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) + : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) + : : "cc", "memory"); + return size; +} + +unsigned long __clear_user(void __user *to, unsigned long size) +{ + if (static_key_false(&have_mvcos)) + return clear_user_mvcos(to, size); + return clear_user_xc(to, size); +} +EXPORT_SYMBOL(__clear_user); + +static inline unsigned long strnlen_user_srst(const char __user *src, + unsigned long size) +{ + register unsigned long reg0 asm("0") = 0; + unsigned long tmp1, tmp2; + + asm volatile( + " la %2,0(%1)\n" + " la %3,0(%0,%1)\n" + " "SLR" %0,%0\n" + " sacf 256\n" + "0: srst %3,%2\n" + " jo 0b\n" + " la %0,1(%3)\n" /* strnlen_user results includes \0 */ + " "SLR" %0,%1\n" + "1: sacf 768\n" + EX_TABLE(0b,1b) + : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2) + : "d" (reg0) : "cc", "memory"); + return size; +} + +unsigned long __strnlen_user(const char __user *src, unsigned long size) +{ + if (unlikely(!size)) + return 0; + load_kernel_asce(); + return strnlen_user_srst(src, size); +} +EXPORT_SYMBOL(__strnlen_user); + +long __strncpy_from_user(char *dst, const char __user *src, long size) +{ + size_t done, len, offset, len_str; + + if (unlikely(size <= 0)) + return 0; + done = 0; + do { + offset = (size_t)src & ~PAGE_MASK; + len = min(size - done, PAGE_SIZE - offset); + if (copy_from_user(dst, src, len)) + return -EFAULT; + len_str = strnlen(dst, len); + done += len_str; + src += len_str; + dst += len_str; + } while ((len_str == len) && (done < size)); + return done; +} +EXPORT_SYMBOL(__strncpy_from_user); + +/* + * The "old" uaccess variant without mvcos can be enforced with the + * uaccess_primary kernel parameter. This is mainly for debugging purposes. + */ +static int uaccess_primary __initdata; + +static int __init parse_uaccess_pt(char *__unused) +{ + uaccess_primary = 1; + return 0; +} +early_param("uaccess_primary", parse_uaccess_pt); + +static int __init uaccess_init(void) +{ + if (IS_ENABLED(CONFIG_64BIT) && !uaccess_primary && test_facility(27)) + static_key_slow_inc(&have_mvcos); + return 0; +} +early_initcall(uaccess_init); diff --git a/arch/s390/lib/uaccess64.S b/arch/s390/lib/uaccess64.S deleted file mode 100644 index 50219786fc7..00000000000 --- a/arch/s390/lib/uaccess64.S +++ /dev/null @@ -1,206 +0,0 @@ -/* - * arch/s390x/lib/uaccess.S - * __copy_{from|to}_user functions. - * - * s390 - * Copyright (C) 2000,2002 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Authors(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * These functions have standard call interface - */ - -#include <linux/errno.h> -#include <asm/lowcore.h> -#include <asm/asm-offsets.h> - - .text - .align 4 - .globl __copy_from_user_asm - # %r2 = to, %r3 = n, %r4 = from -__copy_from_user_asm: - slgr %r0,%r0 -0: mvcp 0(%r3,%r2),0(%r4),%r0 - jnz 1f - slgr %r2,%r2 - br %r14 -1: la %r2,256(%r2) - la %r4,256(%r4) - aghi %r3,-256 -2: mvcp 0(%r3,%r2),0(%r4),%r0 - jnz 1b -3: slgr %r2,%r2 - br %r14 -4: lghi %r0,-4096 - lgr %r5,%r4 - slgr %r5,%r0 - ngr %r5,%r0 # %r5 = (%r4 + 4096) & -4096 - slgr %r5,%r4 # %r5 = #bytes to next user page boundary - clgr %r3,%r5 # copy crosses next page boundary ? - jnh 6f # no, the current page faulted - # move with the reduced length which is < 256 -5: mvcp 0(%r5,%r2),0(%r4),%r0 - slgr %r3,%r5 -6: lgr %r2,%r3 - br %r14 - .section __ex_table,"a" - .quad 0b,4b - .quad 2b,4b - .quad 5b,6b - .previous - - .align 4 - .text - .globl __copy_to_user_asm - # %r2 = from, %r3 = n, %r4 = to -__copy_to_user_asm: - slgr %r0,%r0 -0: mvcs 0(%r3,%r4),0(%r2),%r0 - jnz 1f - slgr %r2,%r2 - br %r14 -1: la %r2,256(%r2) - la %r4,256(%r4) - aghi %r3,-256 -2: mvcs 0(%r3,%r4),0(%r2),%r0 - jnz 1b -3: slgr %r2,%r2 - br %r14 -4: lghi %r0,-4096 - lgr %r5,%r4 - slgr %r5,%r0 - ngr %r5,%r0 # %r5 = (%r4 + 4096) & -4096 - slgr %r5,%r4 # %r5 = #bytes to next user page boundary - clgr %r3,%r5 # copy crosses next page boundary ? - jnh 6f # no, the current page faulted - # move with the reduced length which is < 256 -5: mvcs 0(%r5,%r4),0(%r2),%r0 - slgr %r3,%r5 -6: lgr %r2,%r3 - br %r14 - .section __ex_table,"a" - .quad 0b,4b - .quad 2b,4b - .quad 5b,6b - .previous - - .align 4 - .text - .globl __copy_in_user_asm - # %r2 = from, %r3 = n, %r4 = to -__copy_in_user_asm: - sacf 256 - bras 1,1f - mvc 0(1,%r4),0(%r2) -0: mvc 0(256,%r4),0(%r2) - la %r2,256(%r2) - la %r4,256(%r4) -1: aghi %r3,-256 - jnm 0b -2: ex %r3,0(%r1) - sacf 0 - slgr %r2,%r2 - br 14 -3: mvc 0(1,%r4),0(%r2) - la %r2,1(%r2) - la %r4,1(%r4) - aghi %r3,-1 - jnm 3b -4: lgr %r2,%r3 - sacf 0 - br %r14 - .section __ex_table,"a" - .quad 0b,3b - .quad 2b,3b - .quad 3b,4b - .previous - - .align 4 - .text - .globl __clear_user_asm - # %r2 = to, %r3 = n -__clear_user_asm: - slgr %r0,%r0 - larl %r5,empty_zero_page -1: mvcs 0(%r3,%r2),0(%r5),%r0 - jnz 2f - slgr %r2,%r2 - br %r14 -2: la %r2,256(%r2) - aghi %r3,-256 -3: mvcs 0(%r3,%r2),0(%r5),%r0 - jnz 2b -4: slgr %r2,%r2 - br %r14 -5: lghi %r0,-4096 - lgr %r4,%r2 - slgr %r4,%r0 - ngr %r4,%r0 # %r4 = (%r2 + 4096) & -4096 - slgr %r4,%r2 # %r4 = #bytes to next user page boundary - clgr %r3,%r4 # clear crosses next page boundary ? - jnh 7f # no, the current page faulted - # clear with the reduced length which is < 256 -6: mvcs 0(%r4,%r2),0(%r5),%r0 - slgr %r3,%r4 -7: lgr %r2,%r3 - br %r14 - .section __ex_table,"a" - .quad 1b,5b - .quad 3b,5b - .quad 6b,7b - .previous - - .align 4 - .text - .globl __strncpy_from_user_asm - # %r2 = count, %r3 = dst, %r4 = src -__strncpy_from_user_asm: - lghi %r0,0 - lgr %r1,%r4 - la %r2,0(%r2,%r4) # %r2 points to first byte after string - sacf 256 -0: srst %r2,%r1 - jo 0b - sacf 0 - lgr %r1,%r2 - jh 1f # \0 found in string ? - aghi %r1,1 # include \0 in copy -1: slgr %r1,%r4 # %r1 = copy length (without \0) - slgr %r2,%r4 # %r2 = return length (including \0) -2: mvcp 0(%r1,%r3),0(%r4),%r0 - jnz 3f - br %r14 -3: la %r3,256(%r3) - la %r4,256(%r4) - aghi %r1,-256 - mvcp 0(%r1,%r3),0(%r4),%r0 - jnz 3b - br %r14 -4: sacf 0 - lghi %r2,-EFAULT - br %r14 - .section __ex_table,"a" - .quad 0b,4b - .previous - - .align 4 - .text - .globl __strnlen_user_asm - # %r2 = count, %r3 = src -__strnlen_user_asm: - lghi %r0,0 - lgr %r1,%r3 - la %r2,0(%r2,%r3) # %r2 points to first byte after string - sacf 256 -0: srst %r2,%r1 - jo 0b - sacf 0 - jh 1f # \0 found in string ? - aghi %r2,1 # strnlen_user result includes the \0 -1: slgr %r2,%r3 - br %r14 -2: sacf 0 - lghi %r2,-EFAULT - br %r14 - .section __ex_table,"a" - .quad 0b,2b - .previous diff --git a/arch/s390/lib/ucmpdi2.c b/arch/s390/lib/ucmpdi2.c new file mode 100644 index 00000000000..3e05ff53258 --- /dev/null +++ b/arch/s390/lib/ucmpdi2.c @@ -0,0 +1,26 @@ +#include <linux/module.h> + +union ull_union { + unsigned long long ull; + struct { + unsigned int high; + unsigned int low; + } ui; +}; + +int __ucmpdi2(unsigned long long a, unsigned long long b) +{ + union ull_union au = {.ull = a}; + union ull_union bu = {.ull = b}; + + if (au.ui.high < bu.ui.high) + return 0; + else if (au.ui.high > bu.ui.high) + return 2; + if (au.ui.low < bu.ui.low) + return 0; + else if (au.ui.low > bu.ui.low) + return 2; + return 1; +} +EXPORT_SYMBOL(__ucmpdi2); diff --git a/arch/s390/math-emu/Makefile b/arch/s390/math-emu/Makefile index c10df144f2a..51d399549f6 100644 --- a/arch/s390/math-emu/Makefile +++ b/arch/s390/math-emu/Makefile @@ -2,7 +2,6 @@ # Makefile for the FPU instruction emulation. # -obj-$(CONFIG_MATHEMU) := math.o qrnnd.o +obj-$(CONFIG_MATHEMU) := math.o -EXTRA_CFLAGS := -I$(src) -Iinclude/math-emu -w -EXTRA_AFLAGS := -traditional +ccflags-y := -I$(src) -Iinclude/math-emu -w diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c index 648df714033..a6ba0d72433 100644 --- a/arch/s390/math-emu/math.c +++ b/arch/s390/math-emu/math.c @@ -1,27 +1,26 @@ /* - * arch/s390/math-emu/math.c - * * S390 version - * Copyright (C) 1999-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2001 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * * 'math.c' emulates IEEE instructions on a S390 processor * that does not have the IEEE fpu (all processors before G5). */ -#include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/mm.h> #include <asm/uaccess.h> #include <asm/lowcore.h> -#include "sfp-util.h" +#include <asm/sfp-util.h> #include <math-emu/soft-fp.h> #include <math-emu/single.h> #include <math-emu/double.h> #include <math-emu/quad.h> +#define FPC_VALID_MASK 0xF8F8FF03 + /* * I miss a macro to round a floating point number to the * nearest integer in the same floating point format. @@ -1565,52 +1564,52 @@ static int emu_tceb (struct pt_regs *regs, int rx, long val) { } static inline void emu_load_regd(int reg) { - if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ + if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ return; - asm volatile ( /* load reg from fp_regs.fprs[reg] */ - " bras 1,0f\n" - " ld 0,0(%1)\n" - "0: ex %0,0(1)" - : /* no output */ - : "a" (reg<<4),"a" (¤t->thread.fp_regs.fprs[reg].d) - : "1" ); + asm volatile( /* load reg from fp_regs.fprs[reg] */ + " bras 1,0f\n" + " ld 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (reg<<4),"a" (¤t->thread.fp_regs.fprs[reg].d) + : "1"); } static inline void emu_load_rege(int reg) { - if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ + if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ return; - asm volatile ( /* load reg from fp_regs.fprs[reg] */ - " bras 1,0f\n" - " le 0,0(%1)\n" - "0: ex %0,0(1)" - : /* no output */ - : "a" (reg<<4), "a" (¤t->thread.fp_regs.fprs[reg].f) - : "1" ); + asm volatile( /* load reg from fp_regs.fprs[reg] */ + " bras 1,0f\n" + " le 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (reg<<4), "a" (¤t->thread.fp_regs.fprs[reg].f) + : "1"); } static inline void emu_store_regd(int reg) { - if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ + if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ return; - asm volatile ( /* store reg to fp_regs.fprs[reg] */ - " bras 1,0f\n" - " std 0,0(%1)\n" - "0: ex %0,0(1)" - : /* no output */ - : "a" (reg<<4), "a" (¤t->thread.fp_regs.fprs[reg].d) - : "1" ); + asm volatile( /* store reg to fp_regs.fprs[reg] */ + " bras 1,0f\n" + " std 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (reg<<4), "a" (¤t->thread.fp_regs.fprs[reg].d) + : "1"); } static inline void emu_store_rege(int reg) { - if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ + if ((reg&9) != 0) /* test if reg in {0,2,4,6} */ return; - asm volatile ( /* store reg to fp_regs.fprs[reg] */ - " bras 1,0f\n" - " ste 0,0(%1)\n" - "0: ex %0,0(1)" - : /* no output */ - : "a" (reg<<4), "a" (¤t->thread.fp_regs.fprs[reg].f) - : "1" ); + asm volatile( /* store reg to fp_regs.fprs[reg] */ + " bras 1,0f\n" + " ste 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (reg<<4), "a" (¤t->thread.fp_regs.fprs[reg].f) + : "1"); } int math_emu_b3(__u8 *opcode, struct pt_regs * regs) { @@ -2089,24 +2088,23 @@ int math_emu_ldr(__u8 *opcode) { __u16 opc = *((__u16 *) opcode); if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */ - /* we got an exception therfore ry can't be in {0,2,4,6} */ - __asm__ __volatile ( /* load rx from fp_regs.fprs[ry] */ - " bras 1,0f\n" - " ld 0,0(%1)\n" - "0: ex %0,0(1)" - : /* no output */ - : "a" (opc & 0xf0), - "a" (&fp_regs->fprs[opc & 0xf].d) - : "1" ); + /* we got an exception therefore ry can't be in {0,2,4,6} */ + asm volatile( /* load rx from fp_regs.fprs[ry] */ + " bras 1,0f\n" + " ld 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].d) + : "1"); } else if ((opc & 0x9) == 0) { /* test if ry in {0,2,4,6} */ - __asm__ __volatile ( /* store ry to fp_regs.fprs[rx] */ - " bras 1,0f\n" - " std 0,0(%1)\n" - "0: ex %0,0(1)" - : /* no output */ - : "a" ((opc & 0xf) << 4), - "a" (&fp_regs->fprs[(opc & 0xf0)>>4].d) - : "1" ); + asm volatile ( /* store ry to fp_regs.fprs[rx] */ + " bras 1,0f\n" + " std 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" ((opc & 0xf) << 4), + "a" (&fp_regs->fprs[(opc & 0xf0)>>4].d) + : "1"); } else /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */ fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf]; return 0; @@ -2120,24 +2118,23 @@ int math_emu_ler(__u8 *opcode) { __u16 opc = *((__u16 *) opcode); if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */ - /* we got an exception therfore ry can't be in {0,2,4,6} */ - __asm__ __volatile ( /* load rx from fp_regs.fprs[ry] */ - " bras 1,0f\n" - " le 0,0(%1)\n" - "0: ex %0,0(1)" - : /* no output */ - : "a" (opc & 0xf0), - "a" (&fp_regs->fprs[opc & 0xf].f) - : "1" ); + /* we got an exception therefore ry can't be in {0,2,4,6} */ + asm volatile( /* load rx from fp_regs.fprs[ry] */ + " bras 1,0f\n" + " le 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].f) + : "1"); } else if ((opc & 0x9) == 0) { /* test if ry in {0,2,4,6} */ - __asm__ __volatile ( /* store ry to fp_regs.fprs[rx] */ - " bras 1,0f\n" - " ste 0,0(%1)\n" - "0: ex %0,0(1)" - : /* no output */ - : "a" ((opc & 0xf) << 4), - "a" (&fp_regs->fprs[(opc & 0xf0) >> 4].f) - : "1" ); + asm volatile( /* store ry to fp_regs.fprs[rx] */ + " bras 1,0f\n" + " ste 0,0(%1)\n" + "0: ex %0,0(1)" + : /* no output */ + : "a" ((opc & 0xf) << 4), + "a" (&fp_regs->fprs[(opc & 0xf0) >> 4].f) + : "1"); } else /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */ fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf]; return 0; diff --git a/arch/s390/math-emu/qrnnd.S b/arch/s390/math-emu/qrnnd.S deleted file mode 100644 index b01c2b648e2..00000000000 --- a/arch/s390/math-emu/qrnnd.S +++ /dev/null @@ -1,77 +0,0 @@ -# S/390 __udiv_qrnnd - -# r2 : &__r -# r3 : upper half of 64 bit word n -# r4 : lower half of 64 bit word n -# r5 : divisor d -# the reminder r of the division is to be stored to &__r and -# the quotient q is to be returned - - .text - .globl __udiv_qrnnd -__udiv_qrnnd: - st %r2,24(%r15) # store pointer to reminder for later - lr %r0,%r3 # reload n - lr %r1,%r4 - ltr %r2,%r5 # reload and test divisor - jp 5f - # divisor >= 0x80000000 - srdl %r0,2 # n/4 - srl %r2,1 # d/2 - slr %r1,%r2 # special case if last bit of d is set - brc 3,0f # (n/4) div (n/2) can overflow by 1 - ahi %r0,-1 # trick: subtract n/2, then divide -0: dr %r0,%r2 # signed division - ahi %r1,1 # trick part 2: add 1 to the quotient - # now (n >> 2) = (d >> 1) * %r1 + %r0 - lhi %r3,1 - nr %r3,%r1 # test last bit of q - jz 1f - alr %r0,%r2 # add (d>>1) to r -1: srl %r1,1 # q >>= 1 - # now (n >> 2) = (d&-2) * %r1 + %r0 - lhi %r3,1 - nr %r3,%r5 # test last bit of d - jz 2f - slr %r0,%r1 # r -= q - brc 3,2f # borrow ? - alr %r0,%r5 # r += d - ahi %r1,-1 -2: # now (n >> 2) = d * %r1 + %r0 - alr %r1,%r1 # q <<= 1 - alr %r0,%r0 # r <<= 1 - brc 12,3f # overflow on r ? - slr %r0,%r5 # r -= d - ahi %r1,1 # q += 1 -3: lhi %r3,2 - nr %r3,%r4 # test next to last bit of n - jz 4f - ahi %r0,1 # r += 1 -4: clr %r0,%r5 # r >= d ? - jl 6f - slr %r0,%r5 # r -= d - ahi %r1,1 # q += 1 - # now (n >> 1) = d * %r1 + %r0 - j 6f -5: # divisor < 0x80000000 - srdl %r0,1 - dr %r0,%r2 # signed division - # now (n >> 1) = d * %r1 + %r0 -6: alr %r1,%r1 # q <<= 1 - alr %r0,%r0 # r <<= 1 - brc 12,7f # overflow on r ? - slr %r0,%r5 # r -= d - ahi %r1,1 # q += 1 -7: lhi %r3,1 - nr %r3,%r4 # isolate last bit of n - alr %r0,%r3 # r += (n & 1) - clr %r0,%r5 # r >= d ? - jl 8f - slr %r0,%r5 # r -= d - ahi %r1,1 # q += 1 -8: # now n = d * %r1 + %r0 - l %r2,24(%r15) - st %r0,0(%r2) - lr %r2,%r1 - br %r14 - .end __udiv_qrnnd diff --git a/arch/s390/math-emu/sfp-util.h b/arch/s390/math-emu/sfp-util.h deleted file mode 100644 index ab556b600f7..00000000000 --- a/arch/s390/math-emu/sfp-util.h +++ /dev/null @@ -1,63 +0,0 @@ -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <asm/byteorder.h> - -#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({ \ - unsigned int __sh = (ah); \ - unsigned int __sl = (al); \ - __asm__ (" alr %1,%3\n" \ - " brc 12,0f\n" \ - " ahi %0,1\n" \ - "0: alr %0,%2" \ - : "+&d" (__sh), "+d" (__sl) \ - : "d" (bh), "d" (bl) : "cc" ); \ - (sh) = __sh; \ - (sl) = __sl; \ -}) - -#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({ \ - unsigned int __sh = (ah); \ - unsigned int __sl = (al); \ - __asm__ (" slr %1,%3\n" \ - " brc 3,0f\n" \ - " ahi %0,-1\n" \ - "0: slr %0,%2" \ - : "+&d" (__sh), "+d" (__sl) \ - : "d" (bh), "d" (bl) : "cc" ); \ - (sh) = __sh; \ - (sl) = __sl; \ -}) - -/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */ -#define umul_ppmm(wh, wl, u, v) ({ \ - unsigned int __wh = u; \ - unsigned int __wl = v; \ - __asm__ (" ltr 1,%0\n" \ - " mr 0,%1\n" \ - " jnm 0f\n" \ - " alr 0,%1\n" \ - "0: ltr %1,%1\n" \ - " jnm 1f\n" \ - " alr 0,%0\n" \ - "1: lr %0,0\n" \ - " lr %1,1\n" \ - : "+d" (__wh), "+d" (__wl) \ - : : "0", "1", "cc" ); \ - wh = __wh; \ - wl = __wl; \ -}) - -#define udiv_qrnnd(q, r, n1, n0, d) \ - do { unsigned long __r; \ - (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ - (r) = __r; \ - } while (0) -extern unsigned long __udiv_qrnnd (unsigned long *, unsigned long, - unsigned long , unsigned long); - -#define UDIV_NEEDS_NORMALIZATION 0 - -#define abort() return 0 - -#define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index aa9a42b6e62..839592ca265 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,6 +2,9 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o ioremap.o extmem.o mmap.o -obj-$(CONFIG_CMM) += cmm.o +obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o +obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o +obj-$(CONFIG_CMM) += cmm.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 2d5cb138575..79ddd580d60 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -1,26 +1,32 @@ /* - * arch/s390/mm/cmm.c + * Collaborative memory management interface. * - * S390 version - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * Copyright IBM Corp 2003, 2010 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, * - * Collaborative memory management interface. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/gfp.h> #include <linux/sched.h> #include <linux/sysctl.h> #include <linux/ctype.h> +#include <linux/swap.h> +#include <linux/kthread.h> +#include <linux/oom.h> +#include <linux/suspend.h> +#include <linux/uaccess.h> #include <asm/pgalloc.h> -#include <asm/uaccess.h> +#include <asm/diag.h> -static char *sender = "VMRMSVM"; +#ifdef CONFIG_CMM_IUCV +static char *cmm_default_sender = "VMRMSVM"; +#endif +static char *sender; module_param(sender, charp, 0400); MODULE_PARM_DESC(sender, "Guest name that may send SMSG messages (default VMRMSVM)"); @@ -35,106 +41,118 @@ struct cmm_page_array { unsigned long pages[CMM_NR_PAGES]; }; -static long cmm_pages = 0; -static long cmm_timed_pages = 0; -static volatile long cmm_pages_target = 0; -static volatile long cmm_timed_pages_target = 0; -static long cmm_timeout_pages = 0; -static long cmm_timeout_seconds = 0; +static long cmm_pages; +static long cmm_timed_pages; +static volatile long cmm_pages_target; +static volatile long cmm_timed_pages_target; +static long cmm_timeout_pages; +static long cmm_timeout_seconds; +static int cmm_suspended; -static struct cmm_page_array *cmm_page_list = 0; -static struct cmm_page_array *cmm_timed_page_list = 0; +static struct cmm_page_array *cmm_page_list; +static struct cmm_page_array *cmm_timed_page_list; +static DEFINE_SPINLOCK(cmm_lock); -static unsigned long cmm_thread_active = 0; -static struct work_struct cmm_thread_starter; -static wait_queue_head_t cmm_thread_wait; -static struct timer_list cmm_timer; +static struct task_struct *cmm_thread_ptr; +static DECLARE_WAIT_QUEUE_HEAD(cmm_thread_wait); +static DEFINE_TIMER(cmm_timer, NULL, 0, 0); static void cmm_timer_fn(unsigned long); static void cmm_set_timer(void); -static long -cmm_strtoul(const char *cp, char **endp) +static long cmm_alloc_pages(long nr, long *counter, + struct cmm_page_array **list) { - unsigned int base = 10; - - if (*cp == '0') { - base = 8; - cp++; - if ((*cp == 'x' || *cp == 'X') && isxdigit(cp[1])) { - base = 16; - cp++; - } - } - return simple_strtoul(cp, endp, base); -} + struct cmm_page_array *pa, *npa; + unsigned long addr; -static long -cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list) -{ - struct cmm_page_array *pa; - unsigned long page; - - pa = *list; - while (pages) { - page = __get_free_page(GFP_NOIO); - if (!page) + while (nr) { + addr = __get_free_page(GFP_NOIO); + if (!addr) break; + spin_lock(&cmm_lock); + pa = *list; if (!pa || pa->index >= CMM_NR_PAGES) { /* Need a new page for the page list. */ - pa = (struct cmm_page_array *) + spin_unlock(&cmm_lock); + npa = (struct cmm_page_array *) __get_free_page(GFP_NOIO); - if (!pa) { - free_page(page); + if (!npa) { + free_page(addr); break; } - pa->next = *list; - pa->index = 0; - *list = pa; + spin_lock(&cmm_lock); + pa = *list; + if (!pa || pa->index >= CMM_NR_PAGES) { + npa->next = pa; + npa->index = 0; + pa = npa; + *list = pa; + } else + free_page((unsigned long) npa); } - diag10(page); - pa->pages[pa->index++] = page; + diag10_range(addr >> PAGE_SHIFT, 1); + pa->pages[pa->index++] = addr; (*counter)++; - pages--; + spin_unlock(&cmm_lock); + nr--; } - return pages; + return nr; } -static void -cmm_free_pages(long pages, long *counter, struct cmm_page_array **list) +static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list) { struct cmm_page_array *pa; - unsigned long page; + unsigned long addr; + spin_lock(&cmm_lock); pa = *list; - while (pages) { + while (nr) { if (!pa || pa->index <= 0) break; - page = pa->pages[--pa->index]; + addr = pa->pages[--pa->index]; if (pa->index == 0) { pa = pa->next; free_page((unsigned long) *list); *list = pa; } - free_page(page); + free_page(addr); (*counter)--; - pages--; + nr--; } + spin_unlock(&cmm_lock); + return nr; } -static int -cmm_thread(void *dummy) +static int cmm_oom_notify(struct notifier_block *self, + unsigned long dummy, void *parm) +{ + unsigned long *freed = parm; + long nr = 256; + + nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list); + if (nr > 0) + nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list); + cmm_pages_target = cmm_pages; + cmm_timed_pages_target = cmm_timed_pages; + *freed += 256 - nr; + return NOTIFY_OK; +} + +static struct notifier_block cmm_oom_nb = { + .notifier_call = cmm_oom_notify, +}; + +static int cmm_thread(void *dummy) { int rc; - daemonize("cmmthread"); while (1) { rc = wait_event_interruptible(cmm_thread_wait, - (cmm_pages != cmm_pages_target || - cmm_timed_pages != cmm_timed_pages_target)); - if (rc == -ERESTARTSYS) { - /* Got kill signal. End thread. */ - clear_bit(0, &cmm_thread_active); + (!cmm_suspended && (cmm_pages != cmm_pages_target || + cmm_timed_pages != cmm_timed_pages_target)) || + kthread_should_stop()); + if (kthread_should_stop() || rc == -ERESTARTSYS) { cmm_pages_target = cmm_pages; cmm_timed_pages_target = cmm_timed_pages; break; @@ -151,7 +169,7 @@ cmm_thread(void *dummy) cmm_timed_pages_target = cmm_timed_pages; } else if (cmm_timed_pages_target < cmm_timed_pages) { cmm_free_pages(1, &cmm_timed_pages, - &cmm_timed_page_list); + &cmm_timed_page_list); } if (cmm_timed_pages > 0 && !timer_pending(&cmm_timer)) cmm_set_timer(); @@ -159,22 +177,12 @@ cmm_thread(void *dummy) return 0; } -static void -cmm_start_thread(void) +static void cmm_kick_thread(void) { - kernel_thread(cmm_thread, 0, 0); -} - -static void -cmm_kick_thread(void) -{ - if (!test_and_set_bit(0, &cmm_thread_active)) - schedule_work(&cmm_thread_starter); wake_up(&cmm_thread_wait); } -static void -cmm_set_timer(void) +static void cmm_set_timer(void) { if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) { if (timer_pending(&cmm_timer)) @@ -191,79 +199,66 @@ cmm_set_timer(void) add_timer(&cmm_timer); } -static void -cmm_timer_fn(unsigned long ignored) +static void cmm_timer_fn(unsigned long ignored) { - long pages; + long nr; - pages = cmm_timed_pages_target - cmm_timeout_pages; - if (pages < 0) + nr = cmm_timed_pages_target - cmm_timeout_pages; + if (nr < 0) cmm_timed_pages_target = 0; else - cmm_timed_pages_target = pages; + cmm_timed_pages_target = nr; cmm_kick_thread(); cmm_set_timer(); } -void -cmm_set_pages(long pages) +static void cmm_set_pages(long nr) { - cmm_pages_target = pages; + cmm_pages_target = nr; cmm_kick_thread(); } -long -cmm_get_pages(void) +static long cmm_get_pages(void) { return cmm_pages; } -void -cmm_add_timed_pages(long pages) +static void cmm_add_timed_pages(long nr) { - cmm_timed_pages_target += pages; + cmm_timed_pages_target += nr; cmm_kick_thread(); } -long -cmm_get_timed_pages(void) +static long cmm_get_timed_pages(void) { return cmm_timed_pages; } -void -cmm_set_timeout(long pages, long seconds) +static void cmm_set_timeout(long nr, long seconds) { - cmm_timeout_pages = pages; + cmm_timeout_pages = nr; cmm_timeout_seconds = seconds; cmm_set_timer(); } -static inline int -cmm_skip_blanks(char *cp, char **endp) +static int cmm_skip_blanks(char *cp, char **endp) { char *str; - for (str = cp; *str == ' ' || *str == '\t'; str++); + for (str = cp; *str == ' ' || *str == '\t'; str++) + ; *endp = str; return str != cp; } -#ifdef CONFIG_CMM_PROC -/* These will someday get removed. */ -#define VM_CMM_PAGES 1111 -#define VM_CMM_TIMED_PAGES 1112 -#define VM_CMM_TIMEOUT 1113 - static struct ctl_table cmm_table[]; -static int -cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, - void *buffer, size_t *lenp, loff_t *ppos) +static int cmm_pages_handler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[16], *p; - long pages; - int len; + unsigned int len; + long nr; if (!*lenp || (*ppos && !write)) { *lenp = 0; @@ -277,17 +272,17 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, return -EFAULT; buf[sizeof(buf) - 1] = '\0'; cmm_skip_blanks(buf, &p); - pages = cmm_strtoul(p, &p); + nr = simple_strtoul(p, &p, 0); if (ctl == &cmm_table[0]) - cmm_set_pages(pages); + cmm_set_pages(nr); else - cmm_add_timed_pages(pages); + cmm_add_timed_pages(nr); } else { if (ctl == &cmm_table[0]) - pages = cmm_get_pages(); + nr = cmm_get_pages(); else - pages = cmm_get_timed_pages(); - len = sprintf(buf, "%ld\n", pages); + nr = cmm_get_timed_pages(); + len = sprintf(buf, "%ld\n", nr); if (len > *lenp) len = *lenp; if (copy_to_user(buffer, buf, len)) @@ -298,13 +293,12 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, return 0; } -static int -cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, - void *buffer, size_t *lenp, loff_t *ppos) +static int cmm_timeout_handler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; - long pages, seconds; - int len; + long nr, seconds; + unsigned int len; if (!*lenp || (*ppos && !write)) { *lenp = 0; @@ -318,10 +312,10 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, return -EFAULT; buf[sizeof(buf) - 1] = '\0'; cmm_skip_blanks(buf, &p); - pages = cmm_strtoul(p, &p); + nr = simple_strtoul(p, &p, 0); cmm_skip_blanks(p, &p); - seconds = cmm_strtoul(p, &p); - cmm_set_timeout(pages, seconds); + seconds = simple_strtoul(p, &p, 0); + cmm_set_timeout(nr, seconds); } else { len = sprintf(buf, "%ld %ld\n", cmm_timeout_pages, cmm_timeout_seconds); @@ -337,44 +331,38 @@ cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, static struct ctl_table cmm_table[] = { { - .ctl_name = VM_CMM_PAGES, .procname = "cmm_pages", - .mode = 0600, - .proc_handler = &cmm_pages_handler, + .mode = 0644, + .proc_handler = cmm_pages_handler, }, { - .ctl_name = VM_CMM_TIMED_PAGES, .procname = "cmm_timed_pages", - .mode = 0600, - .proc_handler = &cmm_pages_handler, + .mode = 0644, + .proc_handler = cmm_pages_handler, }, { - .ctl_name = VM_CMM_TIMEOUT, .procname = "cmm_timeout", - .mode = 0600, - .proc_handler = &cmm_timeout_handler, + .mode = 0644, + .proc_handler = cmm_timeout_handler, }, - { .ctl_name = 0 } + { } }; static struct ctl_table cmm_dir_table[] = { { - .ctl_name = CTL_VM, .procname = "vm", .maxlen = 0, .mode = 0555, .child = cmm_table, }, - { .ctl_name = 0 } + { } }; -#endif #ifdef CONFIG_CMM_IUCV #define SMSG_PREFIX "CMM" -static void -cmm_smsg_target(char *from, char *msg) +static void cmm_smsg_target(const char *from, char *msg) { - long pages, seconds; + long nr, seconds; if (strlen(sender) > 0 && strcmp(from, sender) != 0) return; @@ -383,68 +371,125 @@ cmm_smsg_target(char *from, char *msg) if (strncmp(msg, "SHRINK", 6) == 0) { if (!cmm_skip_blanks(msg + 6, &msg)) return; - pages = cmm_strtoul(msg, &msg); + nr = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') - cmm_set_pages(pages); + cmm_set_pages(nr); } else if (strncmp(msg, "RELEASE", 7) == 0) { if (!cmm_skip_blanks(msg + 7, &msg)) return; - pages = cmm_strtoul(msg, &msg); + nr = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') - cmm_add_timed_pages(pages); + cmm_add_timed_pages(nr); } else if (strncmp(msg, "REUSE", 5) == 0) { if (!cmm_skip_blanks(msg + 5, &msg)) return; - pages = cmm_strtoul(msg, &msg); + nr = simple_strtoul(msg, &msg, 0); if (!cmm_skip_blanks(msg, &msg)) return; - seconds = cmm_strtoul(msg, &msg); + seconds = simple_strtoul(msg, &msg, 0); cmm_skip_blanks(msg, &msg); if (*msg == '\0') - cmm_set_timeout(pages, seconds); + cmm_set_timeout(nr, seconds); } } #endif -struct ctl_table_header *cmm_sysctl_header; +static struct ctl_table_header *cmm_sysctl_header; + +static int cmm_suspend(void) +{ + cmm_suspended = 1; + cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); + cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); + return 0; +} + +static int cmm_resume(void) +{ + cmm_suspended = 0; + cmm_kick_thread(); + return 0; +} -static int -cmm_init (void) +static int cmm_power_event(struct notifier_block *this, + unsigned long event, void *ptr) { -#ifdef CONFIG_CMM_PROC - cmm_sysctl_header = register_sysctl_table(cmm_dir_table, 1); + switch (event) { + case PM_POST_HIBERNATION: + return cmm_resume(); + case PM_HIBERNATION_PREPARE: + return cmm_suspend(); + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block cmm_power_notifier = { + .notifier_call = cmm_power_event, +}; + +static int __init cmm_init(void) +{ + int rc = -ENOMEM; + + cmm_sysctl_header = register_sysctl_table(cmm_dir_table); + if (!cmm_sysctl_header) + goto out_sysctl; +#ifdef CONFIG_CMM_IUCV + /* convert sender to uppercase characters */ + if (sender) { + int len = strlen(sender); + while (len--) + sender[len] = toupper(sender[len]); + } else { + sender = cmm_default_sender; + } + + rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); + if (rc < 0) + goto out_smsg; #endif + rc = register_oom_notifier(&cmm_oom_nb); + if (rc < 0) + goto out_oom_notify; + rc = register_pm_notifier(&cmm_power_notifier); + if (rc) + goto out_pm; + cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); + if (!IS_ERR(cmm_thread_ptr)) + return 0; + + rc = PTR_ERR(cmm_thread_ptr); + unregister_pm_notifier(&cmm_power_notifier); +out_pm: + unregister_oom_notifier(&cmm_oom_nb); +out_oom_notify: #ifdef CONFIG_CMM_IUCV - smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); + smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target); +out_smsg: #endif - INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, 0); - init_waitqueue_head(&cmm_thread_wait); - init_timer(&cmm_timer); - return 0; + unregister_sysctl_table(cmm_sysctl_header); +out_sysctl: + del_timer_sync(&cmm_timer); + return rc; } +module_init(cmm_init); -static void -cmm_exit(void) +static void __exit cmm_exit(void) { - cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); - cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); -#ifdef CONFIG_CMM_PROC unregister_sysctl_table(cmm_sysctl_header); -#endif #ifdef CONFIG_CMM_IUCV smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target); #endif + unregister_pm_notifier(&cmm_power_notifier); + unregister_oom_notifier(&cmm_oom_nb); + kthread_stop(cmm_thread_ptr); + del_timer_sync(&cmm_timer); + cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); + cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); } - -module_init(cmm_init); module_exit(cmm_exit); -EXPORT_SYMBOL(cmm_set_pages); -EXPORT_SYMBOL(cmm_get_pages); -EXPORT_SYMBOL(cmm_add_timed_pages); -EXPORT_SYMBOL(cmm_get_timed_pages); -EXPORT_SYMBOL(cmm_set_timeout); - MODULE_LICENSE("GPL"); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c new file mode 100644 index 00000000000..46d517c3c76 --- /dev/null +++ b/arch/s390/mm/dump_pagetables.c @@ -0,0 +1,246 @@ +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <asm/sections.h> +#include <asm/pgtable.h> + +static unsigned long max_addr; + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +enum address_markers_idx { + IDENTITY_NR = 0, + KERNEL_START_NR, + KERNEL_END_NR, + VMEMMAP_NR, + VMALLOC_NR, +#ifdef CONFIG_64BIT + MODULES_NR, +#endif +}; + +static struct addr_marker address_markers[] = { + [IDENTITY_NR] = {0, "Identity Mapping"}, + [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"}, + [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"}, + [VMEMMAP_NR] = {0, "vmemmap Area"}, + [VMALLOC_NR] = {0, "vmalloc Area"}, +#ifdef CONFIG_64BIT + [MODULES_NR] = {0, "Modules Area"}, +#endif + { -1, NULL } +}; + +struct pg_state { + int level; + unsigned int current_prot; + unsigned long start_address; + unsigned long current_address; + const struct addr_marker *marker; +}; + +static void print_prot(struct seq_file *m, unsigned int pr, int level) +{ + static const char * const level_name[] = + { "ASCE", "PGD", "PUD", "PMD", "PTE" }; + + seq_printf(m, "%s ", level_name[level]); + if (pr & _PAGE_INVALID) { + seq_printf(m, "I\n"); + return; + } + seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW "); + seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " "); + seq_putc(m, '\n'); +} + +static void note_page(struct seq_file *m, struct pg_state *st, + unsigned int new_prot, int level) +{ + static const char units[] = "KMGTPE"; + int width = sizeof(unsigned long) * 2; + const char *unit = units; + unsigned int prot, cur; + unsigned long delta; + + /* + * If we have a "break" in the series, we need to flush the state + * that we have now. "break" is either changing perms, levels or + * address space marker. + */ + prot = new_prot; + cur = st->current_prot; + + if (!st->level) { + /* First entry */ + st->current_prot = new_prot; + st->level = level; + st->marker = address_markers; + seq_printf(m, "---[ %s ]---\n", st->marker->name); + } else if (prot != cur || level != st->level || + st->current_address >= st->marker[1].start_address) { + /* Print the actual finished series */ + seq_printf(m, "0x%0*lx-0x%0*lx", + width, st->start_address, + width, st->current_address); + delta = (st->current_address - st->start_address) >> 10; + while (!(delta & 0x3ff) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(m, "%9lu%c ", delta, *unit); + print_prot(m, st->current_prot, st->level); + if (st->current_address >= st->marker[1].start_address) { + st->marker++; + seq_printf(m, "---[ %s ]---\n", st->marker->name); + } + st->start_address = st->current_address; + st->current_prot = new_prot; + st->level = level; + } +} + +/* + * The actual page table walker functions. In order to keep the + * implementation of print_prot() short, we only check and pass + * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region, + * segment or page table entry is invalid or read-only. + * After all it's just a hint that the current level being walked + * contains an invalid or read-only entry. + */ +static void walk_pte_level(struct seq_file *m, struct pg_state *st, + pmd_t *pmd, unsigned long addr) +{ + unsigned int prot; + pte_t *pte; + int i; + + for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { + st->current_address = addr; + pte = pte_offset_kernel(pmd, addr); + prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID); + note_page(m, st, prot, 4); + addr += PAGE_SIZE; + } +} + +#ifdef CONFIG_64BIT +#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO) +#else +#define _PMD_PROT_MASK 0 +#endif + +static void walk_pmd_level(struct seq_file *m, struct pg_state *st, + pud_t *pud, unsigned long addr) +{ + unsigned int prot; + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) { + st->current_address = addr; + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) { + if (pmd_large(*pmd)) { + prot = pmd_val(*pmd) & _PMD_PROT_MASK; + note_page(m, st, prot, 3); + } else + walk_pte_level(m, st, pmd, addr); + } else + note_page(m, st, _PAGE_INVALID, 3); + addr += PMD_SIZE; + } +} + +#ifdef CONFIG_64BIT +#define _PUD_PROT_MASK (_REGION3_ENTRY_RO | _REGION3_ENTRY_CO) +#else +#define _PUD_PROT_MASK 0 +#endif + +static void walk_pud_level(struct seq_file *m, struct pg_state *st, + pgd_t *pgd, unsigned long addr) +{ + unsigned int prot; + pud_t *pud; + int i; + + for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { + st->current_address = addr; + pud = pud_offset(pgd, addr); + if (!pud_none(*pud)) + if (pud_large(*pud)) { + prot = pud_val(*pud) & _PUD_PROT_MASK; + note_page(m, st, prot, 2); + } else + walk_pmd_level(m, st, pud, addr); + else + note_page(m, st, _PAGE_INVALID, 2); + addr += PUD_SIZE; + } +} + +static void walk_pgd_level(struct seq_file *m) +{ + unsigned long addr = 0; + struct pg_state st; + pgd_t *pgd; + int i; + + memset(&st, 0, sizeof(st)); + for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) { + st.current_address = addr; + pgd = pgd_offset_k(addr); + if (!pgd_none(*pgd)) + walk_pud_level(m, &st, pgd, addr); + else + note_page(m, &st, _PAGE_INVALID, 1); + addr += PGDIR_SIZE; + } + /* Flush out the last page */ + st.current_address = max_addr; + note_page(m, &st, 0, 0); +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + walk_pgd_level(m); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int pt_dump_init(void) +{ + /* + * Figure out the maximum virtual address being accessible with the + * kernel ASCE. We need this to keep the page table walker functions + * from accessing non-existent entries. + */ +#ifdef CONFIG_32BIT + max_addr = 1UL << 31; +#else + max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; + max_addr = 1UL << (max_addr * 11 + 31); + address_markers[MODULES_NR].start_address = MODULES_VADDR; +#endif + address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; + address_markers[VMALLOC_NR].start_address = VMALLOC_START; + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); + return 0; +} +device_initcall(pt_dump_init); diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c new file mode 100644 index 00000000000..4d1ee88864e --- /dev/null +++ b/arch/s390/mm/extable.c @@ -0,0 +1,81 @@ +#include <linux/module.h> +#include <linux/sort.h> +#include <asm/uaccess.h> + +/* + * Search one exception table for an entry corresponding to the + * given instruction address, and return the address of the entry, + * or NULL if none is found. + * We use a binary search, and thus we assume that the table is + * already sorted. + */ +const struct exception_table_entry * +search_extable(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + const struct exception_table_entry *mid; + unsigned long addr; + + while (first <= last) { + mid = ((last - first) >> 1) + first; + addr = extable_insn(mid); + if (addr < value) + first = mid + 1; + else if (addr > value) + last = mid - 1; + else + return mid; + } + return NULL; +} + +/* + * The exception table needs to be sorted so that the binary + * search that we use to find entries in it works properly. + * This is used both for the kernel exception table and for + * the exception tables of modules that get loaded. + * + */ +static int cmp_ex(const void *a, const void *b) +{ + const struct exception_table_entry *x = a, *y = b; + + /* This compare is only valid after normalization. */ + return x->insn - y->insn; +} + +void sort_extable(struct exception_table_entry *start, + struct exception_table_entry *finish) +{ + struct exception_table_entry *p; + int i; + + /* Normalize entries to being relative to the start of the section */ + for (p = start, i = 0; p < finish; p++, i += 8) + p->insn += i; + sort(start, finish - start, sizeof(*start), cmp_ex, NULL); + /* Denormalize all entries */ + for (p = start, i = 0; p < finish; p++, i += 8) + p->insn -= i; +} + +#ifdef CONFIG_MODULES +/* + * If the exception table is sorted, any referring to the module init + * will be at the beginning or the end. + */ +void trim_init_extable(struct module *m) +{ + /* Trim the beginning */ + while (m->num_exentries && + within_module_init(extable_insn(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /* Trim the end */ + while (m->num_exentries && + within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index a9566bcab68..519bba716cc 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -1,12 +1,14 @@ /* - * File...........: arch/s390/mm/extmem.c * Author(s)......: Carsten Otte <cotte@de.ibm.com> * Rob M van der Heij <rvdheij@nl.ibm.com> * Steven Shultz <shultzss@us.ibm.com> * Bugreports.to..: <Linux390@de.ibm.com> - * (C) IBM Corporation 2002-2004 + * Copyright IBM Corp. 2002, 2004 */ +#define KMSG_COMPONENT "extmem" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/kernel.h> #include <linux/string.h> #include <linux/spinlock.h> @@ -14,25 +16,15 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/bootmem.h> +#include <linux/ctype.h> +#include <linux/ioport.h> #include <asm/page.h> +#include <asm/pgtable.h> #include <asm/ebcdic.h> #include <asm/errno.h> #include <asm/extmem.h> #include <asm/cpcmd.h> -#include <linux/ctype.h> - -#define DCSS_DEBUG /* Debug messages on/off */ - -#define DCSS_NAME "extmem" -#ifdef DCSS_DEBUG -#define PRINT_DEBUG(x...) printk(KERN_DEBUG DCSS_NAME " debug:" x) -#else -#define PRINT_DEBUG(x...) do {} while (0) -#endif -#define PRINT_INFO(x...) printk(KERN_INFO DCSS_NAME " info:" x) -#define PRINT_WARN(x...) printk(KERN_WARNING DCSS_NAME " warning:" x) -#define PRINT_ERR(x...) printk(KERN_ERR DCSS_NAME " error:" x) - +#include <asm/setup.h> #define DCSS_LOADSHR 0x00 #define DCSS_LOADNSR 0x04 @@ -40,20 +32,40 @@ #define DCSS_FINDSEG 0x0c #define DCSS_LOADNOLY 0x10 #define DCSS_SEGEXT 0x18 +#define DCSS_LOADSHRX 0x20 +#define DCSS_LOADNSRX 0x24 +#define DCSS_FINDSEGX 0x2c +#define DCSS_SEGEXTX 0x38 #define DCSS_FINDSEGA 0x0c struct qrange { - unsigned int start; // 3byte start address, 1 byte type - unsigned int end; // 3byte end address, 1 byte reserved + unsigned long start; /* last byte type */ + unsigned long end; /* last byte reserved */ }; struct qout64 { + unsigned long segstart; + unsigned long segend; + int segcnt; + int segrcnt; + struct qrange range[6]; +}; + +#ifdef CONFIG_64BIT +struct qrange_old { + unsigned int start; /* last byte type */ + unsigned int end; /* last byte reserved */ +}; + +/* output area format for the Diag x'64' old subcode x'18' */ +struct qout64_old { int segstart; int segend; int segcnt; int segrcnt; - struct qrange range[6]; + struct qrange_old range[6]; }; +#endif struct qin64 { char qopcode; @@ -68,6 +80,7 @@ struct qin64 { struct dcss_segment { struct list_head list; char dcss_name[8]; + char res_name[15]; unsigned long start_addr; unsigned long end; atomic_t ref_count; @@ -75,22 +88,68 @@ struct dcss_segment { unsigned int vm_segtype; struct qrange range[6]; int segcnt; + struct resource *res; }; -static DEFINE_SPINLOCK(dcss_lock); -static struct list_head dcss_list = LIST_HEAD_INIT(dcss_list); +static DEFINE_MUTEX(dcss_lock); +static LIST_HEAD(dcss_list); static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", "EW/EN-MIXED" }; +static int loadshr_scode, loadnsr_scode, findseg_scode; +static int segext_scode, purgeseg_scode; +static int scode_set; -extern struct { - unsigned long addr, size, type; -} memory_chunk[MEMORY_CHUNKS]; +/* set correct Diag x'64' subcodes. */ +static int +dcss_set_subcodes(void) +{ +#ifdef CONFIG_64BIT + char *name = kmalloc(8 * sizeof(char), GFP_KERNEL | GFP_DMA); + unsigned long rx, ry; + int rc; + + if (name == NULL) + return -ENOMEM; + + rx = (unsigned long) name; + ry = DCSS_FINDSEGX; + + strcpy(name, "dummy"); + asm volatile( + " diag %0,%1,0x64\n" + "0: ipm %2\n" + " srl %2,28\n" + " j 2f\n" + "1: la %2,3\n" + "2:\n" + EX_TABLE(0b, 1b) + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); + + kfree(name); + /* Diag x'64' new subcodes are supported, set to new subcodes */ + if (rc != 3) { + loadshr_scode = DCSS_LOADSHRX; + loadnsr_scode = DCSS_LOADNSRX; + purgeseg_scode = DCSS_PURGESEG; + findseg_scode = DCSS_FINDSEGX; + segext_scode = DCSS_SEGEXTX; + return 0; + } +#endif + /* Diag x'64' new subcodes are not supported, set to old subcodes */ + loadshr_scode = DCSS_LOADNOLY; + loadnsr_scode = DCSS_LOADNSR; + purgeseg_scode = DCSS_PURGESEG; + findseg_scode = DCSS_FINDSEG; + segext_scode = DCSS_SEGEXT; + return 0; +} /* * Create the 8 bytes, ebcdic VM segment name from * an ascii name. */ -static void inline +static void dcss_mkname(char *name, char *dcss_name) { int i; @@ -117,7 +176,7 @@ segment_by_name (char *name) struct list_head *l; struct dcss_segment *tmp, *retval = NULL; - assert_spin_locked(&dcss_lock); + BUG_ON(!mutex_is_locked(&dcss_lock)); dcss_mkname (name, dcss_name); list_for_each (l, &dcss_list) { tmp = list_entry (l, struct dcss_segment, list); @@ -134,25 +193,45 @@ segment_by_name (char *name) * Perform a function on a dcss segment. */ static inline int -dcss_diag (__u8 func, void *parameter, +dcss_diag(int *func, void *parameter, unsigned long *ret1, unsigned long *ret2) { unsigned long rx, ry; int rc; + if (scode_set == 0) { + rc = dcss_set_subcodes(); + if (rc < 0) + return rc; + scode_set = 1; + } rx = (unsigned long) parameter; - ry = (unsigned long) func; - __asm__ __volatile__( + ry = (unsigned long) *func; + #ifdef CONFIG_64BIT - " sam31\n" // switch to 31 bit - " diag %0,%1,0x64\n" - " sam64\n" // switch back to 64 bit + /* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */ + if (*func > DCSS_SEGEXT) + asm volatile( + " diag %0,%1,0x64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); + /* 31-bit Diag x'64' old subcode, switch to 31-bit addressing mode */ + else + asm volatile( + " sam31\n" + " diag %0,%1,0x64\n" + " sam64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); #else - " diag %0,%1,0x64\n" + asm volatile( + " diag %0,%1,0x64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); #endif - " ipm %2\n" - " srl %2,28\n" - : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc" ); *ret1 = rx; *ret2 = ry; return rc; @@ -172,12 +251,13 @@ dcss_diag_translate_rc (int vm_rc) { static int query_segment_type (struct dcss_segment *seg) { - struct qin64 *qin = kmalloc (sizeof(struct qin64), GFP_DMA); - struct qout64 *qout = kmalloc (sizeof(struct qout64), GFP_DMA); - - int diag_cc, rc, i; unsigned long dummy, vmrc; + int diag_cc, rc, i; + struct qout64 *qout; + struct qin64 *qin; + qin = kmalloc(sizeof(*qin), GFP_KERNEL | GFP_DMA); + qout = kmalloc(sizeof(*qout), GFP_KERNEL | GFP_DMA); if ((qin == NULL) || (qout == NULL)) { rc = -ENOMEM; goto out_free; @@ -189,15 +269,47 @@ query_segment_type (struct dcss_segment *seg) qin->qoutlen = sizeof(struct qout64); memcpy (qin->qname, seg->dcss_name, 8); - diag_cc = dcss_diag (DCSS_SEGEXT, qin, &dummy, &vmrc); + diag_cc = dcss_diag(&segext_scode, qin, &dummy, &vmrc); + if (diag_cc < 0) { + rc = diag_cc; + goto out_free; + } if (diag_cc > 1) { + pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc); rc = dcss_diag_translate_rc (vmrc); goto out_free; } +#ifdef CONFIG_64BIT + /* Only old format of output area of Diagnose x'64' is supported, + copy data for the new format. */ + if (segext_scode == DCSS_SEGEXT) { + struct qout64_old *qout_old; + qout_old = kzalloc(sizeof(*qout_old), GFP_KERNEL | GFP_DMA); + if (qout_old == NULL) { + rc = -ENOMEM; + goto out_free; + } + memcpy(qout_old, qout, sizeof(struct qout64_old)); + qout->segstart = (unsigned long) qout_old->segstart; + qout->segend = (unsigned long) qout_old->segend; + qout->segcnt = qout_old->segcnt; + qout->segrcnt = qout_old->segrcnt; + + if (qout->segcnt > 6) + qout->segrcnt = 6; + for (i = 0; i < qout->segrcnt; i++) { + qout->range[i].start = + (unsigned long) qout_old->range[i].start; + qout->range[i].end = + (unsigned long) qout_old->range[i].end; + } + kfree(qout_old); + } +#endif if (qout->segcnt > 6) { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto out_free; } @@ -212,11 +324,11 @@ query_segment_type (struct dcss_segment *seg) for (i=0; i<qout->segcnt; i++) { if (((qout->range[i].start & 0xff) != SEG_TYPE_EW) && ((qout->range[i].start & 0xff) != SEG_TYPE_EN)) { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto out_free; } if (start != qout->range[i].start >> PAGE_SHIFT) { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto out_free; } start = (qout->range[i].end >> PAGE_SHIFT) + 1; @@ -240,25 +352,29 @@ query_segment_type (struct dcss_segment *seg) } /* - * check if the given segment collides with guest storage. - * returns 1 if this is the case, 0 if no collision was found + * get info about a segment + * possible return values: + * -ENOSYS : we are not running on VM + * -EIO : could not perform query diagnose + * -ENOENT : no such segment + * -EOPNOTSUPP: multi-part segment cannot be used with linux + * -ENOMEM : out of memory + * 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h */ -static int -segment_overlaps_storage(struct dcss_segment *seg) +int +segment_type (char* name) { - int i; + int rc; + struct dcss_segment seg; - for (i=0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - if (memory_chunk[i].type != 0) - continue; - if ((memory_chunk[i].addr >> 20) > (seg->end >> 20)) - continue; - if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20) - < (seg->start_addr >> 20)) - continue; - return 1; - } - return 0; + if (!MACHINE_IS_VM) + return -ENOSYS; + + dcss_mkname(name, seg.dcss_name); + rc = query_segment_type (&seg); + if (rc < 0) + return rc; + return seg.vm_segtype; } /* @@ -271,7 +387,7 @@ segment_overlaps_others (struct dcss_segment *seg) struct list_head *l; struct dcss_segment *tmp; - assert_spin_locked(&dcss_lock); + BUG_ON(!mutex_is_locked(&dcss_lock)); list_for_each(l, &dcss_list) { tmp = list_entry(l, struct dcss_segment, list); if ((tmp->start_addr >> 20) > (seg->end >> 20)) @@ -286,55 +402,17 @@ segment_overlaps_others (struct dcss_segment *seg) } /* - * check if segment exceeds the kernel mapping range (detected or set via mem=) - * returns 1 if this is the case, 0 if segment fits into the range - */ -static inline int -segment_exceeds_range (struct dcss_segment *seg) -{ - int seg_last_pfn = (seg->end) >> PAGE_SHIFT; - if (seg_last_pfn > max_pfn) - return 1; - return 0; -} - -/* - * get info about a segment - * possible return values: - * -ENOSYS : we are not running on VM - * -EIO : could not perform query diagnose - * -ENOENT : no such segment - * -ENOTSUPP: multi-part segment cannot be used with linux - * -ENOSPC : segment cannot be used (overlaps with storage) - * -ENOMEM : out of memory - * 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h - */ -int -segment_type (char* name) -{ - int rc; - struct dcss_segment seg; - - if (!MACHINE_IS_VM) - return -ENOSYS; - - dcss_mkname(name, seg.dcss_name); - rc = query_segment_type (&seg); - if (rc < 0) - return rc; - return seg.vm_segtype; -} - -/* * real segment loading function, called from segment_load */ static int __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end) { - struct dcss_segment *seg = kmalloc(sizeof(struct dcss_segment), - GFP_DMA); - int dcss_command, rc, diag_cc; + unsigned long start_addr, end_addr, dummy; + struct dcss_segment *seg; + int rc, diag_cc; + start_addr = end_addr = 0; + seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA); if (seg == NULL) { rc = -ENOMEM; goto out; @@ -343,56 +421,84 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long rc = query_segment_type (seg); if (rc < 0) goto out_free; - if (segment_exceeds_range(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - exceeds" - " kernel mapping range\n",name); - rc = -ERANGE; - goto out_free; + + if (loadshr_scode == DCSS_LOADSHRX) { + if (segment_overlaps_others(seg)) { + rc = -EBUSY; + goto out_free; + } } - if (segment_overlaps_storage(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - overlaps" - " storage\n",name); - rc = -ENOSPC; + + rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); + + if (rc) goto out_free; + + seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL); + if (seg->res == NULL) { + rc = -ENOMEM; + goto out_shared; } - if (segment_overlaps_others(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - overlaps" - " other segments\n",name); + seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + seg->res->start = seg->start_addr; + seg->res->end = seg->end; + memcpy(&seg->res_name, seg->dcss_name, 8); + EBCASC(seg->res_name, 8); + seg->res_name[8] = '\0'; + strncat(seg->res_name, " (DCSS)", 7); + seg->res->name = seg->res_name; + rc = seg->vm_segtype; + if (rc == SEG_TYPE_SC || + ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared)) + seg->res->flags |= IORESOURCE_READONLY; + if (request_resource(&iomem_resource, seg->res)) { rc = -EBUSY; - goto out_free; + kfree(seg->res); + goto out_shared; } + if (do_nonshared) - dcss_command = DCSS_LOADNSR; + diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, + &start_addr, &end_addr); else - dcss_command = DCSS_LOADNOLY; - - diag_cc = dcss_diag(dcss_command, seg->dcss_name, - &seg->start_addr, &seg->end); + diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name, + &start_addr, &end_addr); + if (diag_cc < 0) { + dcss_diag(&purgeseg_scode, seg->dcss_name, + &dummy, &dummy); + rc = diag_cc; + goto out_resource; + } if (diag_cc > 1) { - PRINT_WARN ("segment_load: could not load segment %s - " - "diag returned error (%ld)\n",name,seg->end); - rc = dcss_diag_translate_rc (seg->end); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, - &seg->start_addr, &seg->end); - goto out_free; + pr_warning("Loading DCSS %s failed with rc=%ld\n", name, + end_addr); + rc = dcss_diag_translate_rc(end_addr); + dcss_diag(&purgeseg_scode, seg->dcss_name, + &dummy, &dummy); + goto out_resource; } + seg->start_addr = start_addr; + seg->end = end_addr; seg->do_nonshared = do_nonshared; atomic_set(&seg->ref_count, 1); list_add(&seg->list, &dcss_list); - rc = seg->vm_segtype; *addr = seg->start_addr; *end = seg->end; if (do_nonshared) - PRINT_INFO ("segment_load: loaded segment %s range %p .. %p " - "type %s in non-shared mode\n", name, - (void*)seg->start_addr, (void*)seg->end, - segtype_string[seg->vm_segtype]); - else - PRINT_INFO ("segment_load: loaded segment %s range %p .. %p " - "type %s in shared mode\n", name, - (void*)seg->start_addr, (void*)seg->end, - segtype_string[seg->vm_segtype]); + pr_info("DCSS %s of range %p to %p and type %s loaded as " + "exclusive-writable\n", name, (void*) seg->start_addr, + (void*) seg->end, segtype_string[seg->vm_segtype]); + else { + pr_info("DCSS %s of range %p to %p and type %s loaded in " + "shared access mode\n", name, (void*) seg->start_addr, + (void*) seg->end, segtype_string[seg->vm_segtype]); + } goto out; + out_resource: + release_resource(seg->res); + kfree(seg->res); + out_shared: + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); out_free: kfree(seg); out: @@ -410,7 +516,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long * -ENOSYS : we are not running on VM * -EIO : could not perform query or load diagnose * -ENOENT : no such segment - * -ENOTSUPP: multi-part segment cannot be used with linux + * -EOPNOTSUPP: multi-part segment cannot be used with linux * -ENOSPC : segment cannot be used (overlaps with storage) * -EBUSY : segment can temporarily not be used (overlaps with dcss) * -ERANGE : segment cannot be used (exceeds kernel mapping range) @@ -428,7 +534,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, if (!MACHINE_IS_VM) return -ENOSYS; - spin_lock (&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) rc = __segment_load (name, do_nonshared, addr, end); @@ -443,7 +549,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, rc = -EPERM; } } - spin_unlock (&dcss_lock); + mutex_unlock(&dcss_lock); return rc; } @@ -457,57 +563,83 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, * -ENOENT : no such segment (segment gone!) * -EAGAIN : segment is in use by other exploiters, try later * -EINVAL : no segment with the given name is currently loaded - name invalid + * -EBUSY : segment can temporarily not be used (overlaps with dcss) * 0 : operation succeeded */ int segment_modify_shared (char *name, int do_nonshared) { struct dcss_segment *seg; - unsigned long dummy; - int dcss_command, rc, diag_cc; + unsigned long start_addr, end_addr, dummy; + int rc, diag_cc; - spin_lock (&dcss_lock); + start_addr = end_addr = 0; + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { rc = -EINVAL; goto out_unlock; } if (do_nonshared == seg->do_nonshared) { - PRINT_INFO ("segment_modify_shared: not reloading segment %s" - " - already in requested mode\n",name); + pr_info("DCSS %s is already in the requested access " + "mode\n", name); rc = 0; goto out_unlock; } if (atomic_read (&seg->ref_count) != 1) { - PRINT_WARN ("segment_modify_shared: not reloading segment %s - " - "segment is in use by other driver(s)\n",name); + pr_warning("DCSS %s is in use and cannot be reloaded\n", + name); rc = -EAGAIN; goto out_unlock; } - dcss_diag(DCSS_PURGESEG, seg->dcss_name, - &dummy, &dummy); + release_resource(seg->res); + if (do_nonshared) + seg->res->flags &= ~IORESOURCE_READONLY; + else + if (seg->vm_segtype == SEG_TYPE_SR || + seg->vm_segtype == SEG_TYPE_ER) + seg->res->flags |= IORESOURCE_READONLY; + + if (request_resource(&iomem_resource, seg->res)) { + pr_warning("DCSS %s overlaps with used memory resources " + "and cannot be reloaded\n", name); + rc = -EBUSY; + kfree(seg->res); + goto out_del_mem; + } + + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); if (do_nonshared) - dcss_command = DCSS_LOADNSR; + diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, + &start_addr, &end_addr); else - dcss_command = DCSS_LOADNOLY; - diag_cc = dcss_diag(dcss_command, seg->dcss_name, - &seg->start_addr, &seg->end); + diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name, + &start_addr, &end_addr); + if (diag_cc < 0) { + rc = diag_cc; + goto out_del_res; + } if (diag_cc > 1) { - PRINT_WARN ("segment_modify_shared: could not reload segment %s" - " - diag returned error (%ld)\n",name,seg->end); - rc = dcss_diag_translate_rc (seg->end); - goto out_del; + pr_warning("Reloading DCSS %s failed with rc=%ld\n", name, + end_addr); + rc = dcss_diag_translate_rc(end_addr); + goto out_del_res; } + seg->start_addr = start_addr; + seg->end = end_addr; seg->do_nonshared = do_nonshared; rc = 0; goto out_unlock; - out_del: + out_del_res: + release_resource(seg->res); + kfree(seg->res); + out_del_mem: + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, - &dummy, &dummy); + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); kfree(seg); out_unlock: - spin_unlock(&dcss_lock); + mutex_unlock(&dcss_lock); return rc; } @@ -525,21 +657,22 @@ segment_unload(char *name) if (!MACHINE_IS_VM) return; - spin_lock(&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { - PRINT_ERR ("could not find segment %s in segment_unload, " - "please report to linux390@de.ibm.com\n",name); + pr_err("Unloading unknown DCSS %s failed\n", name); goto out_unlock; } - if (atomic_dec_return(&seg->ref_count) == 0) { - list_del(&seg->list); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, - &dummy, &dummy); - kfree(seg); - } + if (atomic_dec_return(&seg->ref_count) != 0) + goto out_unlock; + release_resource(seg->res); + kfree(seg->res); + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); + list_del(&seg->list); + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); + kfree(seg); out_unlock: - spin_unlock(&dcss_lock); + mutex_unlock(&dcss_lock); } /* @@ -549,36 +682,91 @@ void segment_save(char *name) { struct dcss_segment *seg; - int startpfn = 0; - int endpfn = 0; char cmd1[160]; char cmd2[80]; - int i; + int i, response; if (!MACHINE_IS_VM) return; - spin_lock(&dcss_lock); + mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { - PRINT_ERR ("could not find segment %s in segment_save, please report to linux390@de.ibm.com\n",name); - return; + pr_err("Saving unknown DCSS %s failed\n", name); + goto out; } - startpfn = seg->start_addr >> PAGE_SHIFT; - endpfn = (seg->end) >> PAGE_SHIFT; sprintf(cmd1, "DEFSEG %s", name); for (i=0; i<seg->segcnt; i++) { - sprintf(cmd1+strlen(cmd1), " %X-%X %s", + sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", seg->range[i].start >> PAGE_SHIFT, seg->range[i].end >> PAGE_SHIFT, segtype_string[seg->range[i].start & 0xff]); } sprintf(cmd2, "SAVESEG %s", name); - cpcmd(cmd1, NULL, 0, NULL); - cpcmd(cmd2, NULL, 0, NULL); - spin_unlock(&dcss_lock); + response = 0; + cpcmd(cmd1, NULL, 0, &response); + if (response) { + pr_err("Saving a DCSS failed with DEFSEG response code " + "%i\n", response); + goto out; + } + cpcmd(cmd2, NULL, 0, &response); + if (response) { + pr_err("Saving a DCSS failed with SAVESEG response code " + "%i\n", response); + goto out; + } +out: + mutex_unlock(&dcss_lock); +} + +/* + * print appropriate error message for segment_load()/segment_type() + * return code + */ +void segment_warning(int rc, char *seg_name) +{ + switch (rc) { + case -ENOENT: + pr_err("DCSS %s cannot be loaded or queried\n", seg_name); + break; + case -ENOSYS: + pr_err("DCSS %s cannot be loaded or queried without " + "z/VM\n", seg_name); + break; + case -EIO: + pr_err("Loading or querying DCSS %s resulted in a " + "hardware error\n", seg_name); + break; + case -EOPNOTSUPP: + pr_err("DCSS %s has multiple page ranges and cannot be " + "loaded or queried\n", seg_name); + break; + case -ENOSPC: + pr_err("DCSS %s overlaps with used storage and cannot " + "be loaded\n", seg_name); + break; + case -EBUSY: + pr_err("%s needs used memory resources and cannot be " + "loaded or queried\n", seg_name); + break; + case -EPERM: + pr_err("DCSS %s is already loaded in a different access " + "mode\n", seg_name); + break; + case -ENOMEM: + pr_err("There is not enough memory to load or query " + "DCSS %s\n", seg_name); + break; + case -ERANGE: + pr_err("DCSS %s exceeds the kernel mapping range (%lu) " + "and cannot be loaded\n", seg_name, VMEM_MAX_PHYS); + break; + default: + break; + } } EXPORT_SYMBOL(segment_load); @@ -586,3 +774,4 @@ EXPORT_SYMBOL(segment_unload); EXPORT_SYMBOL(segment_save); EXPORT_SYMBOL(segment_type); EXPORT_SYMBOL(segment_modify_shared); +EXPORT_SYMBOL(segment_warning); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 81ade401b07..3f3b35403d0 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -1,8 +1,6 @@ /* - * arch/s390/mm/fault.c - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Hartmut Penner (hp@de.ibm.com) * Ulrich Weigand (uweigand@de.ibm.com) * @@ -10,7 +8,8 @@ * Copyright (C) 1995 Linus Torvalds */ -#include <linux/config.h> +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -20,41 +19,69 @@ #include <linux/ptrace.h> #include <linux/mman.h> #include <linux/mm.h> +#include <linux/compat.h> #include <linux/smp.h> -#include <linux/smp_lock.h> +#include <linux/kdebug.h> #include <linux/init.h> #include <linux/console.h> #include <linux/module.h> #include <linux/hardirq.h> - -#include <asm/system.h> -#include <asm/uaccess.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/hugetlb.h> +#include <asm/asm-offsets.h> #include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/facility.h> +#include "../kernel/entry.h" #ifndef CONFIG_64BIT #define __FAIL_ADDR_MASK 0x7ffff000 -#define __FIXUP_MASK 0x7fffffff #define __SUBCODE_MASK 0x0200 #define __PF_RES_FIELD 0ULL #else /* CONFIG_64BIT */ #define __FAIL_ADDR_MASK -4096L -#define __FIXUP_MASK ~0L #define __SUBCODE_MASK 0x0600 #define __PF_RES_FIELD 0x8000000000000000ULL #endif /* CONFIG_64BIT */ -#ifdef CONFIG_SYSCTL -extern int sysctl_userprocess_debug; +#define VM_FAULT_BADCONTEXT 0x010000 +#define VM_FAULT_BADMAP 0x020000 +#define VM_FAULT_BADACCESS 0x040000 +#define VM_FAULT_SIGNAL 0x080000 +#define VM_FAULT_PFAULT 0x100000 + +static unsigned long store_indication __read_mostly; + +#ifdef CONFIG_64BIT +static int __init fault_init(void) +{ + if (test_facility(75)) + store_indication = 0xc00; + return 0; +} +early_initcall(fault_init); #endif -extern void die(const char *,struct pt_regs *,long); +static inline int notify_page_fault(struct pt_regs *regs) +{ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (kprobes_built_in() && !user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } + return ret; +} -extern spinlock_t timerlist_lock; /* * Unlock any spinlocks which will prevent us from getting the - * message out (timerlist_lock is acquired through the - * console unblank code) + * message out. */ void bust_spinlocks(int yes) { @@ -76,287 +103,482 @@ void bust_spinlocks(int yes) } /* - * Check which address space is addressed by the access - * register in S390_lowcore.exc_access_id. - * Returns 1 for user space and 0 for kernel space. + * Returns the address space associated with the fault. + * Returns 0 for kernel space and 1 for user space. */ -static int __check_access_register(struct pt_regs *regs, int error_code) +static inline int user_space_fault(struct pt_regs *regs) { - int areg = S390_lowcore.exc_access_id; + unsigned long trans_exc_code; - if (areg == 0) - /* Access via access register 0 -> kernel address */ + /* + * The lowest two bits of the translation exception + * identification indicate which paging table was used. + */ + trans_exc_code = regs->int_parm_long & 3; + if (trans_exc_code == 3) /* home space -> kernel */ return 0; - save_access_regs(current->thread.acrs); - if (regs && areg < NUM_ACRS && current->thread.acrs[areg] <= 1) - /* - * access register contains 0 -> kernel address, - * access register contains 1 -> user space address - */ - return current->thread.acrs[areg]; - - /* Something unhealthy was done with the access registers... */ - die("page fault via unknown access register", regs, error_code); - do_exit(SIGKILL); + if (user_mode(regs)) + return 1; + if (trans_exc_code == 2) /* secondary space -> set_fs */ + return current->thread.mm_segment.ar4; + if (current->flags & PF_VCPU) + return 1; return 0; } -/* - * Check which address space the address belongs to. - * Returns 1 for user space and 0 for kernel space. - */ -static inline int check_user_space(struct pt_regs *regs, int error_code) +static int bad_address(void *p) { - /* - * The lowest two bits of S390_lowcore.trans_exc_code indicate - * which paging table was used: - * 0: Primary Segment Table Descriptor - * 1: STD determined via access register - * 2: Secondary Segment Table Descriptor - * 3: Home Segment Table Descriptor - */ - int descriptor = S390_lowcore.trans_exc_code & 3; - if (unlikely(descriptor == 1)) - return __check_access_register(regs, error_code); - if (descriptor == 2) - return current->thread.mm_segment.ar4; - return descriptor != 0; + unsigned long dummy; + + return probe_kernel_address((unsigned long *)p, dummy); +} + +#ifdef CONFIG_64BIT +static void dump_pagetable(unsigned long asce, unsigned long address) +{ + unsigned long *table = __va(asce & PAGE_MASK); + + pr_alert("AS:%016lx ", asce); + switch (asce & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_REGION1: + table = table + ((address >> 53) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R1:%016lx ", *table); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_REGION2: + table = table + ((address >> 42) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R2:%016lx ", *table); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_REGION3: + table = table + ((address >> 31) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R3:%016lx ", *table); + if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE)) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_SEGMENT: + table = table + ((address >> 20) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont(KERN_CONT "S:%016lx ", *table); + if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE)) + goto out; + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + } + table = table + ((address >> 12) & 0xff); + if (bad_address(table)) + goto bad; + pr_cont("P:%016lx ", *table); +out: + pr_cont("\n"); + return; +bad: + pr_cont("BAD\n"); +} + +#else /* CONFIG_64BIT */ + +static void dump_pagetable(unsigned long asce, unsigned long address) +{ + unsigned long *table = __va(asce & PAGE_MASK); + + pr_alert("AS:%08lx ", asce); + table = table + ((address >> 20) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("S:%08lx ", *table); + if (*table & _SEGMENT_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + table = table + ((address >> 12) & 0xff); + if (bad_address(table)) + goto bad; + pr_cont("P:%08lx ", *table); +out: + pr_cont("\n"); + return; +bad: + pr_cont("BAD\n"); +} + +#endif /* CONFIG_64BIT */ + +static void dump_fault_info(struct pt_regs *regs) +{ + unsigned long asce; + + pr_alert("Fault in "); + switch (regs->int_parm_long & 3) { + case 3: + pr_cont("home space "); + break; + case 2: + pr_cont("secondary space "); + break; + case 1: + pr_cont("access register "); + break; + case 0: + pr_cont("primary space "); + break; + } + pr_cont("mode while using "); + if (!user_space_fault(regs)) { + asce = S390_lowcore.kernel_asce; + pr_cont("kernel "); + } +#ifdef CONFIG_PGSTE + else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { + struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; + asce = gmap->asce; + pr_cont("gmap "); + } +#endif + else { + asce = S390_lowcore.user_asce; + pr_cont("user "); + } + pr_cont("ASCE.\n"); + dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); +} + +static inline void report_user_fault(struct pt_regs *regs, long signr) +{ + if ((task_pid_nr(current) > 1) && !show_unhandled_signals) + return; + if (!unhandled_signal(current, signr)) + return; + if (!printk_ratelimit()) + return; + printk(KERN_ALERT "User process fault: interruption code 0x%X ", + regs->int_code); + print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); + printk(KERN_CONT "\n"); + printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); + dump_fault_info(regs); + show_regs(regs); } /* * Send SIGSEGV to task. This is an external routine * to keep the stack usage of do_page_fault small. */ -static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, - int si_code, unsigned long address) +static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { struct siginfo si; -#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG) -#if defined(CONFIG_SYSCTL) - if (sysctl_userprocess_debug) -#endif - { - printk("User process fault: interruption code 0x%lX\n", - error_code); - printk("failing address: %lX\n", address); - show_regs(regs); - } -#endif + report_user_fault(regs, SIGSEGV); si.si_signo = SIGSEGV; si.si_code = si_code; - si.si_addr = (void *) address; + si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); force_sig_info(SIGSEGV, &si, current); } +static noinline void do_no_context(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + unsigned long address; + + /* Are we prepared to handle this kernel fault? */ + fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); + if (fixup) { + regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; + return; + } + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + address = regs->int_parm_long & __FAIL_ADDR_MASK; + if (!user_space_fault(regs)) + printk(KERN_ALERT "Unable to handle kernel pointer dereference" + " in virtual kernel address space\n"); + else + printk(KERN_ALERT "Unable to handle kernel paging request" + " in virtual user address space\n"); + printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); + dump_fault_info(regs); + die(regs, "Oops"); + do_exit(SIGKILL); +} + +static noinline void do_low_address(struct pt_regs *regs) +{ + /* Low-address protection hit in kernel mode means + NULL pointer write access in kernel mode. */ + if (regs->psw.mask & PSW_MASK_PSTATE) { + /* Low-address protection hit in user mode 'cannot happen'. */ + die (regs, "Low-address protection"); + do_exit(SIGKILL); + } + + do_no_context(regs); +} + +static noinline void do_sigbus(struct pt_regs *regs) +{ + struct task_struct *tsk = current; + struct siginfo si; + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRERR; + si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); + force_sig_info(SIGBUS, &si, tsk); +} + +static noinline void do_fault_error(struct pt_regs *regs, int fault) +{ + int si_code; + + switch (fault) { + case VM_FAULT_BADACCESS: + case VM_FAULT_BADMAP: + /* Bad memory access. Check if it is kernel or user space. */ + if (user_mode(regs)) { + /* User mode accesses just cause a SIGSEGV */ + si_code = (fault == VM_FAULT_BADMAP) ? + SEGV_MAPERR : SEGV_ACCERR; + do_sigsegv(regs, si_code); + return; + } + case VM_FAULT_BADCONTEXT: + case VM_FAULT_PFAULT: + do_no_context(regs); + break; + case VM_FAULT_SIGNAL: + if (!user_mode(regs)) + do_no_context(regs); + break; + default: /* fault & VM_FAULT_ERROR */ + if (fault & VM_FAULT_OOM) { + if (!user_mode(regs)) + do_no_context(regs); + else + pagefault_out_of_memory(); + } else if (fault & VM_FAULT_SIGBUS) { + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) + do_no_context(regs); + else + do_sigbus(regs); + } else + BUG(); + break; + } +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * - * error_code: + * interruption code (int_code): * 04 Protection -> Write-Protection (suprression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ -static inline void -do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) +static inline int do_exception(struct pt_regs *regs, int access) { - struct task_struct *tsk; - struct mm_struct *mm; - struct vm_area_struct * vma; - unsigned long address; - int user_address; - const struct exception_table_entry *fixup; - int si_code = SEGV_MAPERR; - - tsk = current; - mm = tsk->mm; - - /* - * Check for low-address protection. This needs to be treated - * as a special case because the translation exception code - * field is not guaranteed to contain valid data in this case. +#ifdef CONFIG_PGSTE + struct gmap *gmap; +#endif + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct *vma; + unsigned long trans_exc_code; + unsigned long address; + unsigned int flags; + int fault; + + tsk = current; + /* + * The instruction that caused the program check has + * been nullified. Don't signal single step via SIGTRAP. */ - if (is_protection && !(S390_lowcore.trans_exc_code & 4)) { - - /* Low-address protection hit in kernel mode means - NULL pointer write access in kernel mode. */ - if (!(regs->psw.mask & PSW_MASK_PSTATE)) { - address = 0; - user_address = 0; - goto no_context; - } + clear_pt_regs_flag(regs, PIF_PER_TRAP); - /* Low-address protection hit in user mode 'cannot happen'. */ - die ("Low-address protection", regs, error_code); - do_exit(SIGKILL); - } + if (notify_page_fault(regs)) + return 0; - /* - * get the failing address - * more specific the segment and page table portion of - * the address - */ - address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; - user_address = check_user_space(regs, error_code); + mm = tsk->mm; + trans_exc_code = regs->int_parm_long; /* * Verify that the fault happened in user space, that * we are not in an interrupt and that there is a * user context. */ - if (user_address == 0 || in_atomic() || !mm) - goto no_context; + fault = VM_FAULT_BADCONTEXT; + if (unlikely(!user_space_fault(regs) || in_atomic() || !mm)) + goto out; + + address = trans_exc_code & __FAIL_ADDR_MASK; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) + flags |= FAULT_FLAG_WRITE; + down_read(&mm->mmap_sem); + +#ifdef CONFIG_PGSTE + gmap = (struct gmap *) + ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); + if (gmap) { + address = __gmap_fault(address, gmap); + if (address == -EFAULT) { + fault = VM_FAULT_BADMAP; + goto out_up; + } + if (address == -ENOMEM) { + fault = VM_FAULT_OOM; + goto out_up; + } + if (gmap->pfault_enabled) + flags |= FAULT_FLAG_RETRY_NOWAIT; + } +#endif + +retry: + fault = VM_FAULT_BADMAP; + vma = find_vma(mm, address); + if (!vma) + goto out_up; + + if (unlikely(vma->vm_start > address)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out_up; + if (expand_stack(vma, address)) + goto out_up; + } /* - * When we get here, the fault happened in the current - * task's user address space, so we can switch on the - * interrupts again and then search the VMAs + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. */ - local_irq_enable(); - - down_read(&mm->mmap_sem); - - vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - si_code = SEGV_ACCERR; - if (!is_protection) { - /* page not present, check vm flags */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; - } else { - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - } + fault = VM_FAULT_BADACCESS; + if (unlikely(!(vma->vm_flags & access))) + goto out_up; -survive: + if (is_vm_hugetlb_page(vma)) + address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, address, is_protection)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - default: - BUG(); + fault = handle_mm_fault(mm, vma, address, flags); + /* No reason to continue if interrupted by SIGKILL. */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { + fault = VM_FAULT_SIGNAL; + goto out; } + if (unlikely(fault & VM_FAULT_ERROR)) + goto out_up; - up_read(&mm->mmap_sem); /* - * The instruction that caused the program check will - * be repeated. Don't signal single step via SIGTRAP. + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. */ - clear_tsk_thread_flag(current, TIF_SINGLE_STEP); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - - /* User mode accesses just cause a SIGSEGV */ - if (regs->psw.mask & PSW_MASK_PSTATE) { - tsk->thread.prot_addr = address; - tsk->thread.trap_no = error_code; - do_sigsegv(regs, error_code, si_code, address); - return; + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } + if (fault & VM_FAULT_RETRY) { +#ifdef CONFIG_PGSTE + if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { + /* FAULT_FLAG_RETRY_NOWAIT has been set, + * mmap_sem has not been released */ + current->thread.gmap_pfault = 1; + fault = VM_FAULT_PFAULT; + goto out_up; + } +#endif + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~(FAULT_FLAG_ALLOW_RETRY | + FAULT_FLAG_RETRY_NOWAIT); + flags |= FAULT_FLAG_TRIED; + down_read(&mm->mmap_sem); + goto retry; + } } - -no_context: - /* Are we prepared to handle this kernel fault? */ - fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK); - if (fixup) { - regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; - return; - } - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ - if (user_address == 0) - printk(KERN_ALERT "Unable to handle kernel pointer dereference" - " at virtual kernel address %p\n", (void *)address); - else - printk(KERN_ALERT "Unable to handle kernel paging request" - " at virtual user address %p\n", (void *)address); - - die("Oops", regs, error_code); - do_exit(SIGKILL); - - -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. -*/ -out_of_memory: + fault = 0; +out_up: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { - yield(); - goto survive; - } - printk("VM: killing process %s\n", tsk->comm); - if (regs->psw.mask & PSW_MASK_PSTATE) - do_exit(SIGKILL); - goto no_context; +out: + return fault; +} -do_sigbus: - up_read(&mm->mmap_sem); +void __kprobes do_protection_exception(struct pt_regs *regs) +{ + unsigned long trans_exc_code; + int fault; + trans_exc_code = regs->int_parm_long; /* - * Send a sigbus, regardless of whether we were in kernel - * or user mode. + * Protection exceptions are suppressing, decrement psw address. + * The exception to this rule are aborted transactions, for these + * the PSW already points to the correct location. */ - tsk->thread.prot_addr = address; - tsk->thread.trap_no = error_code; - force_sig(SIGBUS, tsk); - - /* Kernel mode? Handle exceptions or die */ - if (!(regs->psw.mask & PSW_MASK_PSTATE)) - goto no_context; + if (!(regs->int_code & 0x200)) + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); + /* + * Check for low-address protection. This needs to be treated + * as a special case because the translation exception code + * field is not guaranteed to contain valid data in this case. + */ + if (unlikely(!(trans_exc_code & 4))) { + do_low_address(regs); + return; + } + fault = do_exception(regs, VM_WRITE); + if (unlikely(fault)) + do_fault_error(regs, fault); } -void do_protection_exception(struct pt_regs *regs, unsigned long error_code) +void __kprobes do_dat_exception(struct pt_regs *regs) { - regs->psw.addr -= (error_code >> 16); - do_exception(regs, 4, 1); -} + int access, fault; -void do_dat_exception(struct pt_regs *regs, unsigned long error_code) -{ - do_exception(regs, error_code & 0xff, 0); + access = VM_READ | VM_EXEC | VM_WRITE; + fault = do_exception(regs, access); + if (unlikely(fault)) + do_fault_error(regs, fault); } #ifdef CONFIG_PFAULT /* * 'pfault' pseudo page faults routines. */ -static int pfault_disable = 0; +static int pfault_disable; static int __init nopfault(char *str) { @@ -366,71 +588,69 @@ static int __init nopfault(char *str) __setup("nopfault", nopfault); -typedef struct { - __u16 refdiagc; - __u16 reffcode; - __u16 refdwlen; - __u16 refversn; - __u64 refgaddr; - __u64 refselmk; - __u64 refcmpmk; - __u64 reserved; -} __attribute__ ((packed)) pfault_refbk_t; +struct pfault_refbk { + u16 refdiagc; + u16 reffcode; + u16 refdwlen; + u16 refversn; + u64 refgaddr; + u64 refselmk; + u64 refcmpmk; + u64 reserved; +} __attribute__ ((packed, aligned(8))); int pfault_init(void) { - pfault_refbk_t refbk = - { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48, - __PF_RES_FIELD }; + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 0, + .refdwlen = 5, + .refversn = 2, + .refgaddr = __LC_CURRENT_PID, + .refselmk = 1ULL << 48, + .refcmpmk = 1ULL << 48, + .reserved = __PF_RES_FIELD }; int rc; if (pfault_disable) return -1; - __asm__ __volatile__( - " diag %1,%0,0x258\n" - "0: j 2f\n" - "1: la %0,8\n" + asm volatile( + " diag %1,%0,0x258\n" + "0: j 2f\n" + "1: la %0,8\n" "2:\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" -#ifndef CONFIG_64BIT - " .long 0b,1b\n" -#else /* CONFIG_64BIT */ - " .quad 0b,1b\n" -#endif /* CONFIG_64BIT */ - ".previous" - : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc" ); - __ctl_set_bit(0, 9); + EX_TABLE(0b,1b) + : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc"); return rc; } void pfault_fini(void) { - pfault_refbk_t refbk = - { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 1, + .refdwlen = 5, + .refversn = 2, + }; if (pfault_disable) return; - __ctl_clear_bit(0,9); - __asm__ __volatile__( - " diag %0,0,0x258\n" + asm volatile( + " diag %0,0,0x258\n" "0:\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" -#ifndef CONFIG_64BIT - " .long 0b,0b\n" -#else /* CONFIG_64BIT */ - " .quad 0b,0b\n" -#endif /* CONFIG_64BIT */ - ".previous" - : : "a" (&refbk), "m" (refbk) : "cc" ); + EX_TABLE(0b,0b) + : : "a" (&refbk), "m" (refbk) : "cc"); } -asmlinkage void -pfault_interrupt(struct pt_regs *regs, __u16 error_code) +static DEFINE_SPINLOCK(pfault_lock); +static LIST_HEAD(pfault_list); + +static void pfault_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) { struct task_struct *tsk; __u16 subcode; + pid_t pid; /* * Get the external interruption subcode & pfault @@ -438,43 +658,118 @@ pfault_interrupt(struct pt_regs *regs, __u16 error_code) * in the 'cpu address' field associated with the * external interrupt. */ - subcode = S390_lowcore.cpu_addr; + subcode = ext_code.subcode; if ((subcode & 0xff00) != __SUBCODE_MASK) return; - - /* - * Get the token (= address of the task structure of the affected task). - */ - tsk = *(struct task_struct **) __LC_PFAULT_INTPARM; - + inc_irq_stat(IRQEXT_PFL); + /* Get the token (= pid of the affected task). */ + pid = sizeof(void *) == 4 ? param32 : param64; + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return; + spin_lock(&pfault_lock); if (subcode & 0x0080) { /* signal bit is set -> a page has been swapped in by VM */ - if (xchg(&tsk->thread.pfault_wait, -1) != 0) { + if (tsk->thread.pfault_wait == 1) { /* Initial interrupt was faster than the completion * interrupt. pfault_wait is valid. Set pfault_wait * back to zero and wake up the process. This can * safely be done because the task is still sleeping * and can't produce new pfaults. */ tsk->thread.pfault_wait = 0; + list_del(&tsk->thread.list); wake_up_process(tsk); put_task_struct(tsk); + } else { + /* Completion interrupt was faster than initial + * interrupt. Set pfault_wait to -1 so the initial + * interrupt doesn't put the task to sleep. + * If the task is not running, ignore the completion + * interrupt since it must be a leftover of a PFAULT + * CANCEL operation which didn't remove all pending + * completion interrupts. */ + if (tsk->state == TASK_RUNNING) + tsk->thread.pfault_wait = -1; } } else { /* signal bit not set -> a real page is missing. */ - get_task_struct(tsk); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (xchg(&tsk->thread.pfault_wait, 1) != 0) { + if (WARN_ON_ONCE(tsk != current)) + goto out; + if (tsk->thread.pfault_wait == 1) { + /* Already on the list with a reference: put to sleep */ + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_tsk_need_resched(tsk); + } else if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial - * interrupt (swapped in a -1 for pfault_wait). Set - * pfault_wait back to zero and exit. This can be - * done safely because tsk is running in kernel - * mode and can't produce new pfaults. */ + * interrupt (pfault_wait == -1). Set pfault_wait + * back to zero and exit. */ tsk->thread.pfault_wait = 0; - set_task_state(tsk, TASK_RUNNING); - put_task_struct(tsk); - } else + } else { + /* Initial interrupt arrived before completion + * interrupt. Let the task sleep. + * An extra task reference is needed since a different + * cpu may set the task state to TASK_RUNNING again + * before the scheduler is reached. */ + get_task_struct(tsk); + tsk->thread.pfault_wait = 1; + list_add(&tsk->thread.list, &pfault_list); + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); set_tsk_need_resched(tsk); + } } +out: + spin_unlock(&pfault_lock); + put_task_struct(tsk); } -#endif +static int pfault_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + struct thread_struct *thread, *next; + struct task_struct *tsk; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DEAD: + spin_lock_irq(&pfault_lock); + list_for_each_entry_safe(thread, next, &pfault_list, list) { + thread->pfault_wait = 0; + list_del(&thread->list); + tsk = container_of(thread, struct task_struct, thread); + wake_up_process(tsk); + put_task_struct(tsk); + } + spin_unlock_irq(&pfault_lock); + break; + default: + break; + } + return NOTIFY_OK; +} + +static int __init pfault_irq_init(void) +{ + int rc; + + rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); + if (rc) + goto out_extint; + rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; + if (rc) + goto out_pfault; + irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL); + hotcpu_notifier(pfault_cpu_notify, 0); + return 0; + +out_pfault: + unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); +out_extint: + pfault_disable = 1; + return rc; +} +early_initcall(pfault_irq_init); + +#endif /* CONFIG_PFAULT */ diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c new file mode 100644 index 00000000000..639fce46400 --- /dev/null +++ b/arch/s390/mm/gup.c @@ -0,0 +1,246 @@ +/* + * Lockless get_user_pages_fast for s390 + * + * Copyright IBM Corp. 2010 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/vmstat.h> +#include <linux/pagemap.h> +#include <linux/rwsem.h> +#include <asm/pgtable.h> + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask; + pte_t *ptep, pte; + struct page *page; + + mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL; + + ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); + do { + pte = *ptep; + barrier(); + if ((pte_val(pte) & mask) != 0) + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + if (!page_cache_get_speculative(page)) + return 0; + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + put_page(page); + return 0; + } + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + + return 1; +} + +static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + struct page *head, *page, *tail; + int refs; + + result = write ? 0 : _SEGMENT_ENTRY_PROTECT; + mask = result | _SEGMENT_ENTRY_INVALID; + if ((pmd_val(pmd) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); + + refs = 0; + head = pmd_page(pmd); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } + + return 1; +} + + +static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp, pmd; + + pmdp = (pmd_t *) pudp; +#ifdef CONFIG_64BIT + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pmdp = (pmd_t *) pud_deref(pud); + pmdp += pmd_index(addr); +#endif + do { + pmd = *pmdp; + barrier(); + next = pmd_addr_end(addr, end); + /* + * The pmd_trans_splitting() check below explains why + * pmdp_splitting_flush() has to serialize with + * smp_call_function() against our disabled IRQs, to stop + * this gup-fast code from running while we set the + * splitting bit in the pmd. Returning zero will take + * the slow path that will call wait_split_huge_page() + * if the pmd is still in splitting state. + */ + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + return 0; + if (unlikely(pmd_large(pmd))) { + if (!gup_huge_pmd(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } else if (!gup_pte_range(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp, pud; + + pudp = (pud_t *) pgdp; +#ifdef CONFIG_64BIT + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pudp = (pud_t *) pgd_deref(pgd); + pudp += pud_index(addr); +#endif + do { + pud = *pudp; + barrier(); + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next, flags; + pgd_t *pgdp, pgd; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if ((end <= start) || (end > TASK_SIZE)) + return 0; + /* + * local_irq_save() doesn't prevent pagetable teardown, but does + * prevent the pagetables from being freed on s390. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the the page and take a ref on it. + */ + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd = *pgdp; + barrier(); + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + +/** + * get_user_pages_fast() - pin user pages in memory + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @write: whether pages will be written to + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + int nr, ret; + + start &= PAGE_MASK; + nr = __get_user_pages_fast(start, nr_pages, write, pages); + if (nr == nr_pages) + return nr; + + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + nr_pages - nr, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + /* Have to be a bit careful with return values */ + if (nr > 0) + ret = (ret < 0) ? nr : ret + nr; + return ret; +} diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c new file mode 100644 index 00000000000..0ff66a7e29b --- /dev/null +++ b/arch/s390/mm/hugetlbpage.c @@ -0,0 +1,235 @@ +/* + * IBM System z Huge TLB Page Support for Kernel. + * + * Copyright IBM Corp. 2007 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <linux/mm.h> +#include <linux/hugetlb.h> + +static inline pmd_t __pte_to_pmd(pte_t pte) +{ + int none, young, prot; + pmd_t pmd; + + /* + * Convert encoding pte bits pmd bits + * .IR...wrdytp ..R...I...y. + * empty .10...000000 -> ..0...1...0. + * prot-none, clean, old .11...000001 -> ..0...1...1. + * prot-none, clean, young .11...000101 -> ..1...1...1. + * prot-none, dirty, old .10...001001 -> ..0...1...1. + * prot-none, dirty, young .10...001101 -> ..1...1...1. + * read-only, clean, old .11...010001 -> ..1...1...0. + * read-only, clean, young .01...010101 -> ..1...0...1. + * read-only, dirty, old .11...011001 -> ..1...1...0. + * read-only, dirty, young .01...011101 -> ..1...0...1. + * read-write, clean, old .11...110001 -> ..0...1...0. + * read-write, clean, young .01...110101 -> ..0...0...1. + * read-write, dirty, old .10...111001 -> ..0...1...0. + * read-write, dirty, young .00...111101 -> ..0...0...1. + * Huge ptes are dirty by definition, a clean pte is made dirty + * by the conversion. + */ + if (pte_present(pte)) { + pmd_val(pmd) = pte_val(pte) & PAGE_MASK; + if (pte_val(pte) & _PAGE_INVALID) + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + none = (pte_val(pte) & _PAGE_PRESENT) && + !(pte_val(pte) & _PAGE_READ) && + !(pte_val(pte) & _PAGE_WRITE); + prot = (pte_val(pte) & _PAGE_PROTECT) && + !(pte_val(pte) & _PAGE_WRITE); + young = pte_val(pte) & _PAGE_YOUNG; + if (none || young) + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + if (prot || (none && young)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } else + pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; + return pmd; +} + +static inline pte_t __pmd_to_pte(pmd_t pmd) +{ + pte_t pte; + + /* + * Convert encoding pmd bits pte bits + * ..R...I...y. .IR...wrdytp + * empty ..0...1...0. -> .10...000000 + * prot-none, old ..0...1...1. -> .10...001001 + * prot-none, young ..1...1...1. -> .10...001101 + * read-only, old ..1...1...0. -> .11...011001 + * read-only, young ..1...0...1. -> .01...011101 + * read-write, old ..0...1...0. -> .10...111001 + * read-write, young ..0...0...1. -> .00...111101 + * Huge ptes are dirty by definition + */ + if (pmd_present(pmd)) { + pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY | + (pmd_val(pmd) & PAGE_MASK); + if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) + pte_val(pte) |= _PAGE_INVALID; + if (pmd_prot_none(pmd)) { + if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) + pte_val(pte) |= _PAGE_YOUNG; + } else { + pte_val(pte) |= _PAGE_READ; + if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_WRITE; + if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) + pte_val(pte) |= _PAGE_YOUNG; + } + } else + pte_val(pte) = _PAGE_INVALID; + return pte; +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + pmd_t pmd; + + pmd = __pte_to_pmd(pte); + if (!MACHINE_HAS_HPAGE) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) |= pte_page(pte)[1].index; + } else + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO; + *(pmd_t *) ptep = pmd; +} + +pte_t huge_ptep_get(pte_t *ptep) +{ + unsigned long origin; + pmd_t pmd; + + pmd = *(pmd_t *) ptep; + if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) { + origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) |= *(unsigned long *) origin; + } + return __pmd_to_pte(pmd); +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pmd_t *pmdp = (pmd_t *) ptep; + pte_t pte = huge_ptep_get(ptep); + + pmdp_flush_direct(mm, addr, pmdp); + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + return pte; +} + +int arch_prepare_hugepage(struct page *page) +{ + unsigned long addr = page_to_phys(page); + pte_t pte; + pte_t *ptep; + int i; + + if (MACHINE_HAS_HPAGE) + return 0; + + ptep = (pte_t *) pte_alloc_one(&init_mm, addr); + if (!ptep) + return -ENOMEM; + + pte_val(pte) = addr; + for (i = 0; i < PTRS_PER_PTE; i++) { + set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); + pte_val(pte) += PAGE_SIZE; + } + page[1].index = (unsigned long) ptep; + return 0; +} + +void arch_release_hugepage(struct page *page) +{ + pte_t *ptep; + + if (MACHINE_HAS_HPAGE) + return; + + ptep = (pte_t *) page[1].index; + if (!ptep) + return; + clear_table((unsigned long *) ptep, _PAGE_INVALID, + PTRS_PER_PTE * sizeof(pte_t)); + page_table_free(&init_mm, (unsigned long *) ptep); + page[1].index = 0; +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp = NULL; + + pgdp = pgd_offset(mm, addr); + pudp = pud_alloc(mm, pgdp, addr); + if (pudp) + pmdp = pmd_alloc(mm, pudp, addr); + return (pte_t *) pmdp; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp = NULL; + + pgdp = pgd_offset(mm, addr); + if (pgd_present(*pgdp)) { + pudp = pud_offset(pgdp, addr); + if (pud_present(*pudp)) + pmdp = pmd_offset(pudp, addr); + } + return (pte_t *) pmdp; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + if (!MACHINE_HAS_HPAGE) + return 0; + + return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmdp, int write) +{ + struct page *page; + + if (!MACHINE_HAS_HPAGE) + return NULL; + + page = pmd_page(*pmdp); + if (page) + page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); + return page; +} diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index df953383724..0c1073ed1e8 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -1,15 +1,12 @@ /* - * arch/s390/mm/init.c - * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Hartmut Penner (hp@de.ibm.com) * * Derived from "arch/i386/mm/init.c" * Copyright (C) 1995 Linus Torvalds */ -#include <linux/config.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -24,9 +21,13 @@ #include <linux/init.h> #include <linux/pagemap.h> #include <linux/bootmem.h> - +#include <linux/memory.h> +#include <linux/pfn.h> +#include <linux/poison.h> +#include <linux/initrd.h> +#include <linux/export.h> +#include <linux/gfp.h> #include <asm/processor.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -34,282 +35,197 @@ #include <asm/lowcore.h> #include <asm/tlb.h> #include <asm/tlbflush.h> - -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +#include <asm/sections.h> +#include <asm/ctl_reg.h> +#include <asm/sclp.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); -char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); -void diag10(unsigned long addr) -{ - if (addr >= 0x7ff00000) - return; -#ifdef CONFIG_64BIT - asm volatile ( - " sam31\n" - " diag %0,%0,0x10\n" - "0: sam64\n" - ".section __ex_table,\"a\"\n" - " .align 8\n" - " .quad 0b, 0b\n" - ".previous\n" - : : "a" (addr)); -#else - asm volatile ( - " diag %0,%0,0x10\n" - "0:\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b, 0b\n" - ".previous\n" - : : "a" (addr)); -#endif -} +unsigned long empty_zero_page, zero_page_mask; +EXPORT_SYMBOL(empty_zero_page); -void show_mem(void) +static void __init setup_zero_pages(void) { - int i, total = 0, reserved = 0; - int shared = 0, cached = 0; + struct cpuid cpu_id; + unsigned int order; + struct page *page; + int i; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x9672: /* g5 */ + case 0x2064: /* z900 */ + case 0x2066: /* z900 */ + case 0x2084: /* z990 */ + case 0x2086: /* z990 */ + case 0x2094: /* z9-109 */ + case 0x2096: /* z9-109 */ + order = 0; + break; + case 0x2097: /* z10 */ + case 0x2098: /* z10 */ + case 0x2817: /* z196 */ + case 0x2818: /* z196 */ + order = 2; + break; + case 0x2827: /* zEC12 */ + case 0x2828: /* zEC12 */ + default: + order = 5; + break; + } + /* Limit number of empty zero pages for small memory sizes */ + if (order > 2 && totalram_pages <= 16384) + order = 2; + + empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!empty_zero_page) + panic("Out of memory in setup_zero_pages"); + + page = virt_to_page((void *) empty_zero_page); + split_page(page, order); + for (i = 1 << order; i > 0; i--) { + mark_page_reserved(page); + page++; + } - printk("Mem-info:\n"); - show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - i = max_mapnr; - while (i-- > 0) { - total++; - if (PageReserved(mem_map+i)) - reserved++; - else if (PageSwapCache(mem_map+i)) - cached++; - else if (page_count(mem_map+i)) - shared += page_count(mem_map+i) - 1; - } - printk("%d pages of RAM\n",total); - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); + zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } -/* References to section boundaries */ - -extern unsigned long _text; -extern unsigned long _etext; -extern unsigned long _edata; -extern unsigned long __bss_start; -extern unsigned long _end; - -extern unsigned long __init_begin; -extern unsigned long __init_end; - -extern unsigned long __initdata zholes_size[]; /* * paging_init() sets up the page tables */ - -#ifndef CONFIG_64BIT -void __init paging_init(void) -{ - pgd_t * pg_dir; - pte_t * pg_table; - pte_t pte; - int i; - unsigned long tmp; - unsigned long pfn = 0; - unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; - static const int ssm_mask = 0x04000000L; - - /* unmap whole virtual address space */ - - pg_dir = swapper_pg_dir; - - for (i=0;i<KERNEL_PGD_PTRS;i++) - pmd_clear((pmd_t*)pg_dir++); - - /* - * map whole physical memory to virtual memory (identity mapping) - */ - - pg_dir = swapper_pg_dir; - - while (pfn < max_low_pfn) { - /* - * pg_table is physical at this point - */ - pg_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - - pg_dir->pgd0 = (_PAGE_TABLE | __pa(pg_table)); - pg_dir->pgd1 = (_PAGE_TABLE | (__pa(pg_table)+1024)); - pg_dir->pgd2 = (_PAGE_TABLE | (__pa(pg_table)+2048)); - pg_dir->pgd3 = (_PAGE_TABLE | (__pa(pg_table)+3072)); - pg_dir++; - - for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) { - pte = pfn_pte(pfn, PAGE_KERNEL); - if (pfn >= max_low_pfn) - pte_clear(&init_mm, 0, &pte); - set_pte(pg_table, pte); - pfn++; - } - } - - S390_lowcore.kernel_asce = pgdir_k; - - /* enable virtual mapping in kernel mode */ - __asm__ __volatile__(" LCTL 1,1,%0\n" - " LCTL 7,7,%0\n" - " LCTL 13,13,%0\n" - " SSM %1" - : : "m" (pgdir_k), "m" (ssm_mask)); - - local_flush_tlb(); - - { - unsigned long zones_size[MAX_NR_ZONES]; - - memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_DMA] = max_low_pfn; - free_area_init_node(0, &contig_page_data, zones_size, - __pa(PAGE_OFFSET) >> PAGE_SHIFT, - zholes_size); - } - return; -} - -#else /* CONFIG_64BIT */ void __init paging_init(void) { - pgd_t * pg_dir; - pmd_t * pm_dir; - pte_t * pt_dir; - pte_t pte; - int i,j,k; - unsigned long pfn = 0; - unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | - _KERN_REGION_TABLE; - static const int ssm_mask = 0x04000000L; - - unsigned long zones_size[MAX_NR_ZONES]; - unsigned long dma_pfn, high_pfn; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + unsigned long pgd_type, asce_bits; - memset(zones_size, 0, sizeof(zones_size)); - dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; - high_pfn = max_low_pfn; - - if (dma_pfn > high_pfn) - zones_size[ZONE_DMA] = high_pfn; - else { - zones_size[ZONE_DMA] = dma_pfn; - zones_size[ZONE_NORMAL] = high_pfn - dma_pfn; + init_mm.pgd = swapper_pg_dir; +#ifdef CONFIG_64BIT + if (VMALLOC_END > (1UL << 42)) { + asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION2_ENTRY_EMPTY; + } else { + asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION3_ENTRY_EMPTY; } - - /* Initialize mem_map[]. */ - free_area_init_node(0, &contig_page_data, zones_size, - __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); - - /* - * map whole physical memory to virtual memory (identity mapping) - */ - - pg_dir = swapper_pg_dir; - - for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) { - - if (pfn >= max_low_pfn) { - pgd_clear(pg_dir); - continue; - } - - pm_dir = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE*4); - pgd_populate(&init_mm, pg_dir, pm_dir); - - for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) { - if (pfn >= max_low_pfn) { - pmd_clear(pm_dir); - continue; - } - - pt_dir = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); - - for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) { - pte = pfn_pte(pfn, PAGE_KERNEL); - if (pfn >= max_low_pfn) { - pte_clear(&init_mm, 0, &pte); - continue; - } - set_pte(pt_dir, pte); - pfn++; - } - } - } - - S390_lowcore.kernel_asce = pgdir_k; +#else + asce_bits = _ASCE_TABLE_LENGTH; + pgd_type = _SEGMENT_ENTRY_EMPTY; +#endif + S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; + clear_table((unsigned long *) init_mm.pgd, pgd_type, + sizeof(unsigned long)*2048); + vmem_map_init(); /* enable virtual mapping in kernel mode */ - __asm__ __volatile__("lctlg 1,1,%0\n\t" - "lctlg 7,7,%0\n\t" - "lctlg 13,13,%0\n\t" - "ssm %1" - : :"m" (pgdir_k), "m" (ssm_mask)); - - local_flush_tlb(); - - return; + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.kernel_asce, 7, 7); + __ctl_load(S390_lowcore.kernel_asce, 13, 13); + arch_local_irq_restore(4UL << (BITS_PER_LONG - 8)); + + sparse_memory_present_with_active_regions(MAX_NUMNODES); + sparse_init(); + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + free_area_init_nodes(max_zone_pfns); } -#endif /* CONFIG_64BIT */ void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; + if (MACHINE_HAS_TLB_LC) + cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(0, mm_cpumask(&init_mm)); + atomic_set(&init_mm.context.attach_count, 1); - max_mapnr = num_physpages = max_low_pfn; + max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); + /* Setup guest page hinting */ + cmma_init(); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); - - reservedpages = 0; + free_all_bootmem(); + setup_zero_pages(); /* Setup zeroed pages. */ - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >>10, - initsize >> 10); + mem_init_print_info(NULL); + printk("Write protected kernel read-only data: %#lx - %#lx\n", + (unsigned long)&_stext, + PFN_ALIGN((unsigned long)&_eshared) - 1); } void free_initmem(void) { - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - set_page_count(virt_to_page(addr), 1); - free_page(addr); - totalram_pages++; - } - printk ("Freeing unused kernel memory: %ldk freed\n", - ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) +void __init free_initrd_mem(unsigned long start, unsigned long end) +{ + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); +} +#endif + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_add_memory(int nid, u64 start, u64 size) +{ + unsigned long zone_start_pfn, zone_end_pfn, nr_pages; + unsigned long start_pfn = PFN_DOWN(start); + unsigned long size_pages = PFN_DOWN(size); + struct zone *zone; + int rc; + + rc = vmem_add_mapping(start, size); + if (rc) + return rc; + for_each_zone(zone) { + if (zone_idx(zone) != ZONE_MOVABLE) { + /* Add range within existing zone limits */ + zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone->zone_start_pfn + + zone->spanned_pages; + } else { + /* Add remaining range to ZONE_MOVABLE */ + zone_start_pfn = start_pfn; + zone_end_pfn = start_pfn + size_pages; + } + if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn) + continue; + nr_pages = (start_pfn + size_pages > zone_end_pfn) ? + zone_end_pfn - start_pfn : size_pages; + rc = __add_pages(nid, zone, start_pfn, nr_pages); + if (rc) + break; + start_pfn += nr_pages; + size_pages -= nr_pages; + if (!size_pages) + break; + } + if (rc) + vmem_remove_mapping(start, size); + return rc; +} + +unsigned long memory_block_size_bytes(void) +{ + /* + * Make sure the memory block size is always greater + * or equal than the memory increment size. + */ + return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm()); +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) { - if (start < end) - printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); - free_page(start); - totalram_pages++; - } + /* + * There is no hardware or firmware interface which could trigger a + * hot memory remove on s390. So there is nothing that needs to be + * implemented. + */ + return -EBUSY; } #endif +#endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/mm/ioremap.c b/arch/s390/mm/ioremap.c deleted file mode 100644 index 0f6e9ecbefe..00000000000 --- a/arch/s390/mm/ioremap.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * arch/s390/mm/ioremap.c - * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Hartmut Penner (hp@de.ibm.com) - * - * Derived from "arch/i386/mm/extable.c" - * (C) Copyright 1995 1996 Linus Torvalds - * - * Re-map IO memory to kernel address space so that we can access it. - * This is needed for high PCI addresses that aren't mapped in the - * 640k-1MB IO memory area on PC's - */ - -#include <linux/vmalloc.h> -#include <linux/mm.h> -#include <asm/io.h> -#include <asm/pgalloc.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> - -static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) -{ - unsigned long end; - unsigned long pfn; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - if (address >= end) - BUG(); - pfn = phys_addr >> PAGE_SHIFT; - do { - if (!pte_none(*pte)) { - printk("remap_area_pte: page already exists\n"); - BUG(); - } - set_pte(pte, pfn_pte(pfn, __pgprot(flags))); - address += PAGE_SIZE; - pfn++; - pte++; - } while (address && (address < end)); -} - -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - phys_addr -= address; - if (address >= end) - BUG(); - do { - pte_t * pte = pte_alloc_kernel(pmd, address); - if (!pte) - return -ENOMEM; - remap_area_pte(pte, address, end - address, address + phys_addr, flags); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -static int remap_area_pages(unsigned long address, unsigned long phys_addr, - unsigned long size, unsigned long flags) -{ - int error; - pgd_t * dir; - unsigned long end = address + size; - - phys_addr -= address; - dir = pgd_offset(&init_mm, address); - flush_cache_all(); - if (address >= end) - BUG(); - do { - pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); - error = -ENOMEM; - if (!pmd) - break; - if (remap_area_pmd(pmd, address, end - address, - phys_addr + address, flags)) - break; - error = 0; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - flush_tlb_all(); - return 0; -} - -/* - * Generic mapping function (not visible outside): - */ - -/* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. - */ -void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) -{ - void * addr; - struct vm_struct * area; - - if (phys_addr < virt_to_phys(high_memory)) - return phys_to_virt(phys_addr); - if (phys_addr & ~PAGE_MASK) - return NULL; - size = PAGE_ALIGN(size); - if (!size || size > phys_addr + size) - return NULL; - area = get_vm_area(size, VM_IOREMAP); - if (!area) - return NULL; - addr = area->addr; - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { - vfree(addr); - return NULL; - } - return addr; -} - -void iounmap(void *addr) -{ - if (addr > high_memory) - vfree(addr); -} diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c new file mode 100644 index 00000000000..2a2e35416d2 --- /dev/null +++ b/arch/s390/mm/maccess.c @@ -0,0 +1,204 @@ +/* + * Access kernel memory without faulting -- s390 specific implementation. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/cpu.h> +#include <asm/ctl_reg.h> +#include <asm/io.h> + +/* + * This function writes to kernel memory bypassing DAT and possible + * write protection. It copies one to four bytes from src to dst + * using the stura instruction. + * Returns the number of bytes copied or -EFAULT. + */ +static long probe_kernel_write_odd(void *dst, const void *src, size_t size) +{ + unsigned long count, aligned; + int offset, mask; + int rc = -EFAULT; + + aligned = (unsigned long) dst & ~3UL; + offset = (unsigned long) dst & 3; + count = min_t(unsigned long, 4 - offset, size); + mask = (0xf << (4 - count)) & 0xf; + mask >>= offset; + asm volatile( + " bras 1,0f\n" + " icm 0,0,0(%3)\n" + "0: l 0,0(%1)\n" + " lra %1,0(%1)\n" + "1: ex %2,0(1)\n" + "2: stura 0,%1\n" + " la %0,0\n" + "3:\n" + EX_TABLE(0b,3b) EX_TABLE(1b,3b) EX_TABLE(2b,3b) + : "+d" (rc), "+a" (aligned) + : "a" (mask), "a" (src) : "cc", "memory", "0", "1"); + return rc ? rc : count; +} + +long probe_kernel_write(void *dst, const void *src, size_t size) +{ + long copied = 0; + + while (size) { + copied = probe_kernel_write_odd(dst, src, size); + if (copied < 0) + break; + dst += copied; + src += copied; + size -= copied; + } + return copied < 0 ? -EFAULT : 0; +} + +static int __memcpy_real(void *dest, void *src, size_t count) +{ + register unsigned long _dest asm("2") = (unsigned long) dest; + register unsigned long _len1 asm("3") = (unsigned long) count; + register unsigned long _src asm("4") = (unsigned long) src; + register unsigned long _len2 asm("5") = (unsigned long) count; + int rc = -EFAULT; + + asm volatile ( + "0: mvcle %1,%2,0x0\n" + "1: jo 0b\n" + " lhi %0,0x0\n" + "2:\n" + EX_TABLE(1b,2b) + : "+d" (rc), "+d" (_dest), "+d" (_src), "+d" (_len1), + "+d" (_len2), "=m" (*((long *) dest)) + : "m" (*((long *) src)) + : "cc", "memory"); + return rc; +} + +/* + * Copy memory in real mode (kernel to kernel) + */ +int memcpy_real(void *dest, void *src, size_t count) +{ + unsigned long flags; + int rc; + + if (!count) + return 0; + local_irq_save(flags); + __arch_local_irq_stnsm(0xfbUL); + rc = __memcpy_real(dest, src, count); + local_irq_restore(flags); + return rc; +} + +/* + * Copy memory in absolute mode (kernel to kernel) + */ +void memcpy_absolute(void *dest, void *src, size_t count) +{ + unsigned long cr0, flags, prefix; + + flags = arch_local_irq_save(); + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); /* disable lowcore protection */ + prefix = store_prefix(); + if (prefix) { + local_mcck_disable(); + set_prefix(0); + memcpy(dest, src, count); + set_prefix(prefix); + local_mcck_enable(); + } else { + memcpy(dest, src, count); + } + __ctl_load(cr0, 0, 0); + arch_local_irq_restore(flags); +} + +/* + * Copy memory from kernel (real) to user (virtual) + */ +int copy_to_user_real(void __user *dest, void *src, unsigned long count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (memcpy_real(buf, src + offs, size)) + goto out; + if (copy_to_user(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} + +/* + * Check if physical address is within prefix or zero page + */ +static int is_swapped(unsigned long addr) +{ + unsigned long lc; + int cpu; + + if (addr < sizeof(struct _lowcore)) + return 1; + for_each_online_cpu(cpu) { + lc = (unsigned long) lowcore_ptr[cpu]; + if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc) + continue; + return 1; + } + return 0; +} + +/* + * Convert a physical pointer for /dev/mem access + * + * For swapped prefix pages a new buffer is returned that contains a copy of + * the absolute memory. The buffer size is maximum one page large. + */ +void *xlate_dev_mem_ptr(unsigned long addr) +{ + void *bounce = (void *) addr; + unsigned long size; + + get_online_cpus(); + preempt_disable(); + if (is_swapped(addr)) { + size = PAGE_SIZE - (addr & ~PAGE_MASK); + bounce = (void *) __get_free_page(GFP_ATOMIC); + if (bounce) + memcpy_absolute(bounce, (void *) addr, size); + } + preempt_enable(); + put_online_cpus(); + return bounce; +} + +/* + * Free converted buffer for /dev/mem access (if necessary) + */ +void unxlate_dev_mem_ptr(unsigned long addr, void *buf) +{ + if ((void *) addr != buf) + free_page((unsigned long) buf); +} diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c new file mode 100644 index 00000000000..5535cfe0ee1 --- /dev/null +++ b/arch/s390/mm/mem_detect.c @@ -0,0 +1,65 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/memblock.h> +#include <linux/init.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <asm/ipl.h> +#include <asm/sclp.h> +#include <asm/setup.h> + +#define ADDR2G (1ULL << 31) + +#define CHUNK_READ_WRITE 0 +#define CHUNK_READ_ONLY 1 + +static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size) +{ + memblock_add_range(&memblock.memory, start, size, 0, 0); + memblock_add_range(&memblock.physmem, start, size, 0, 0); +} + +void __init detect_memory_memblock(void) +{ + unsigned long long memsize, rnmax, rzm; + unsigned long addr, size; + int type; + + rzm = sclp_get_rzm(); + rnmax = sclp_get_rnmax(); + memsize = rzm * rnmax; + if (!rzm) + rzm = 1ULL << 17; + if (IS_ENABLED(CONFIG_32BIT)) { + rzm = min(ADDR2G, rzm); + memsize = min(ADDR2G, memsize); + } + max_physmem_end = memsize; + addr = 0; + /* keep memblock lists close to the kernel */ + memblock_set_bottom_up(true); + do { + size = 0; + type = tprot(addr); + do { + size += rzm; + if (max_physmem_end && addr + size >= max_physmem_end) + break; + } while (type == tprot(addr + size)); + if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { + if (max_physmem_end && (addr + size > max_physmem_end)) + size = max_physmem_end - addr; + memblock_physmem_add(addr, size); + } + addr += size; + } while (addr < max_physmem_end); + memblock_set_bottom_up(false); + if (!max_physmem_end) + max_physmem_end = memblock_end_of_DRAM(); +} diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 356257c171d..9b436c21195 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -1,6 +1,4 @@ /* - * linux/arch/s390/mm/mmap.c - * * flexible mmap layout support * * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. @@ -26,42 +24,140 @@ #include <linux/personality.h> #include <linux/mm.h> +#include <linux/mman.h> #include <linux/module.h> +#include <linux/random.h> +#include <linux/compat.h> +#include <asm/pgalloc.h> + +static unsigned long stack_maxrandom_size(void) +{ + if (!(current->flags & PF_RANDOMIZE)) + return 0; + if (current->personality & ADDR_NO_RANDOMIZE) + return 0; + return STACK_RND_MASK << PAGE_SHIFT; +} /* * Top of mmap area (just below the process stack). * - * Leave an at least ~128 MB hole. + * Leave at least a ~32 MB hole. */ -#define MIN_GAP (128*1024*1024) -#define MAX_GAP (TASK_SIZE/6*5) +#define MIN_GAP (32*1024*1024) +#define MAX_GAP (STACK_TOP/6*5) + +static inline int mmap_is_legacy(void) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_rnd(void) +{ + if (!(current->flags & PF_RANDOMIZE)) + return 0; + /* 8MB randomization for mmap_base */ + return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; +} + +static unsigned long mmap_base_legacy(void) +{ + return TASK_UNMAPPED_BASE + mmap_rnd(); +} static inline unsigned long mmap_base(void) { - unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + unsigned long gap = rlimit(RLIMIT_STACK); if (gap < MIN_GAP) gap = MIN_GAP; else if (gap > MAX_GAP) gap = MAX_GAP; - - return TASK_SIZE - (gap & PAGE_MASK); + gap &= PAGE_MASK; + return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap; } -static inline int mmap_is_legacy(void) +#ifndef CONFIG_64BIT + +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ +void arch_pick_mmap_layout(struct mm_struct *mm) { -#ifdef CONFIG_64BIT /* - * Force standard allocation for 64 bit programs. + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: */ - if (!test_thread_flag(TIF_31BIT)) - return 1; -#endif - return sysctl_legacy_va_layout || - (current->personality & ADDR_COMPAT_LAYOUT) || - current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY; + if (mmap_is_legacy()) { + mm->mmap_base = mmap_base_legacy(); + mm->get_unmapped_area = arch_get_unmapped_area; + } else { + mm->mmap_base = mmap_base(); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + } +} + +#else + +int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) +{ + if (is_compat_task() || (TASK_SIZE >= (1UL << 53))) + return 0; + if (!(flags & MAP_FIXED)) + addr = 0; + if ((addr + len) >= TASK_SIZE) + return crst_table_upgrade(current->mm, 1UL << 53); + return 0; } +static unsigned long +s390_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + unsigned long area; + int rc; + + area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); + if (!(area & ~PAGE_MASK)) + return area; + if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) { + /* Upgrade the page table to 4 levels and retry. */ + rc = crst_table_upgrade(mm, 1UL << 53); + if (rc) + return (unsigned long) rc; + area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); + } + return area; +} + +static unsigned long +s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct mm_struct *mm = current->mm; + unsigned long area; + int rc; + + area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); + if (!(area & ~PAGE_MASK)) + return area; + if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) { + /* Upgrade the page table to 4 levels and retry. */ + rc = crst_table_upgrade(mm, 1UL << 53); + if (rc) + return (unsigned long) rc; + area = arch_get_unmapped_area_topdown(filp, addr, len, + pgoff, flags); + } + return area; +} /* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: @@ -73,14 +169,12 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * bit is set, or if the expected stack growth is unlimited: */ if (mmap_is_legacy()) { - mm->mmap_base = TASK_UNMAPPED_BASE; - mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; + mm->mmap_base = mmap_base_legacy(); + mm->get_unmapped_area = s390_get_unmapped_area; } else { mm->mmap_base = mmap_base(); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; + mm->get_unmapped_area = s390_get_unmapped_area_topdown; } } -EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); +#endif diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c new file mode 100644 index 00000000000..a90d45e9dfb --- /dev/null +++ b/arch/s390/mm/page-states.c @@ -0,0 +1,114 @@ +/* + * Copyright IBM Corp. 2008 + * + * Guest page hinting for unused pages. + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/gfp.h> +#include <linux/init.h> + +#define ESSA_SET_STABLE 1 +#define ESSA_SET_UNUSED 2 + +static int cmma_flag = 1; + +static int __init cmma(char *str) +{ + char *parm; + + parm = strstrip(str); + if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) { + cmma_flag = 1; + return 1; + } + cmma_flag = 0; + if (strcmp(parm, "no") == 0 || strcmp(parm, "off") == 0) + return 1; + return 0; +} +__setup("cmma=", cmma); + +void __init cmma_init(void) +{ + register unsigned long tmp asm("0") = 0; + register int rc asm("1") = -EOPNOTSUPP; + + if (!cmma_flag) + return; + asm volatile( + " .insn rrf,0xb9ab0000,%1,%1,0,0\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+&d" (rc), "+&d" (tmp)); + if (rc) + cmma_flag = 0; +} + +static inline void set_page_unstable(struct page *page, int order) +{ + int i, rc; + + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" (page_to_phys(page + i)), + "i" (ESSA_SET_UNUSED)); +} + +void arch_free_page(struct page *page, int order) +{ + if (!cmma_flag) + return; + set_page_unstable(page, order); +} + +static inline void set_page_stable(struct page *page, int order) +{ + int i, rc; + + for (i = 0; i < (1 << order); i++) + asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0" + : "=&d" (rc) + : "a" (page_to_phys(page + i)), + "i" (ESSA_SET_STABLE)); +} + +void arch_alloc_page(struct page *page, int order) +{ + if (!cmma_flag) + return; + set_page_stable(page, order); +} + +void arch_set_page_states(int make_stable) +{ + unsigned long flags, order, t; + struct list_head *l; + struct page *page; + struct zone *zone; + + if (!cmma_flag) + return; + if (make_stable) + drain_local_pages(NULL); + for_each_populated_zone(zone) { + spin_lock_irqsave(&zone->lock, flags); + for_each_migratetype_order(order, t) { + list_for_each(l, &zone->free_area[order].free_list[t]) { + page = list_entry(l, struct page, lru); + if (make_stable) + set_page_stable(page, order); + else + set_page_unstable(page, order); + } + } + spin_unlock_irqrestore(&zone->lock, flags); + } +} diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c new file mode 100644 index 00000000000..8400f494623 --- /dev/null +++ b/arch/s390/mm/pageattr.c @@ -0,0 +1,146 @@ +/* + * Copyright IBM Corp. 2011 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ +#include <linux/hugetlb.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <asm/cacheflush.h> +#include <asm/pgtable.h> +#include <asm/page.h> + +#if PAGE_DEFAULT_KEY +static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) +{ + asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0" + : [addr] "+a" (addr) : [skey] "d" (skey)); + return addr; +} + +void __storage_key_init_range(unsigned long start, unsigned long end) +{ + unsigned long boundary, size; + + while (start < end) { + if (MACHINE_HAS_EDAT1) { + /* set storage keys for a 1MB frame */ + size = 1UL << 20; + boundary = (start + size) & ~(size - 1); + if (boundary <= end) { + do { + start = sske_frame(start, PAGE_DEFAULT_KEY); + } while (start < boundary); + continue; + } + } + page_set_storage_key(start, PAGE_DEFAULT_KEY, 0); + start += PAGE_SIZE; + } +} +#endif + +static pte_t *walk_page_table(unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_k(addr); + if (pgd_none(*pgdp)) + return NULL; + pudp = pud_offset(pgdp, addr); + if (pud_none(*pudp) || pud_large(*pudp)) + return NULL; + pmdp = pmd_offset(pudp, addr); + if (pmd_none(*pmdp) || pmd_large(*pmdp)) + return NULL; + ptep = pte_offset_kernel(pmdp, addr); + if (pte_none(*ptep)) + return NULL; + return ptep; +} + +static void change_page_attr(unsigned long addr, int numpages, + pte_t (*set) (pte_t)) +{ + pte_t *ptep, pte; + int i; + + for (i = 0; i < numpages; i++) { + ptep = walk_page_table(addr); + if (WARN_ON_ONCE(!ptep)) + break; + pte = *ptep; + pte = set(pte); + __ptep_ipte(addr, ptep); + *ptep = pte; + addr += PAGE_SIZE; + } +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + change_page_attr(addr, numpages, pte_wrprotect); + return 0; +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + change_page_attr(addr, numpages, pte_mkwrite); + return 0; +} + +/* not possible */ +int set_memory_nx(unsigned long addr, int numpages) +{ + return 0; +} + +int set_memory_x(unsigned long addr, int numpages) +{ + return 0; +} + +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long address; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int i; + + for (i = 0; i < numpages; i++) { + address = page_to_phys(page + i); + pgd = pgd_offset_k(address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + pte = pte_offset_kernel(pmd, address); + if (!enable) { + __ptep_ipte(address, pte); + pte_val(*pte) = _PAGE_INVALID; + continue; + } + pte_val(*pte) = __pa(address); + } +} + +#ifdef CONFIG_HIBERNATION +bool kernel_page_present(struct page *page) +{ + unsigned long addr; + int cc; + + addr = page_to_phys(page); + asm volatile( + " lra %1,0(%1)\n" + " ipm %0\n" + " srl %0,28" + : "=d" (cc), "+a" (addr) : : "cc"); + return cc == 0; +} +#endif /* CONFIG_HIBERNATION */ + +#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c new file mode 100644 index 00000000000..37b8241ec78 --- /dev/null +++ b/arch/s390/mm/pgtable.c @@ -0,0 +1,1496 @@ +/* + * Copyright IBM Corp. 2007, 2011 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/quicklist.h> +#include <linux/rcupdate.h> +#include <linux/slab.h> +#include <linux/swapops.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> + +#ifndef CONFIG_64BIT +#define ALLOC_ORDER 1 +#define FRAG_MASK 0x0f +#else +#define ALLOC_ORDER 2 +#define FRAG_MASK 0x03 +#endif + + +unsigned long *crst_table_alloc(struct mm_struct *mm) +{ + struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + + if (!page) + return NULL; + return (unsigned long *) page_to_phys(page); +} + +void crst_table_free(struct mm_struct *mm, unsigned long *table) +{ + free_pages((unsigned long) table, ALLOC_ORDER); +} + +#ifdef CONFIG_64BIT +static void __crst_table_upgrade(void *arg) +{ + struct mm_struct *mm = arg; + + if (current->active_mm == mm) { + clear_user_asce(); + set_user_asce(mm); + } + __tlb_flush_local(); +} + +int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) +{ + unsigned long *table, *pgd; + unsigned long entry; + int flush; + + BUG_ON(limit > (1UL << 53)); + flush = 0; +repeat: + table = crst_table_alloc(mm); + if (!table) + return -ENOMEM; + spin_lock_bh(&mm->page_table_lock); + if (mm->context.asce_limit < limit) { + pgd = (unsigned long *) mm->pgd; + if (mm->context.asce_limit <= (1UL << 31)) { + entry = _REGION3_ENTRY_EMPTY; + mm->context.asce_limit = 1UL << 42; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION3; + } else { + entry = _REGION2_ENTRY_EMPTY; + mm->context.asce_limit = 1UL << 53; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION2; + } + crst_table_init(table, entry); + pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->task_size = mm->context.asce_limit; + table = NULL; + flush = 1; + } + spin_unlock_bh(&mm->page_table_lock); + if (table) + crst_table_free(mm, table); + if (mm->context.asce_limit < limit) + goto repeat; + if (flush) + on_each_cpu(__crst_table_upgrade, mm, 0); + return 0; +} + +void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) +{ + pgd_t *pgd; + + if (current->active_mm == mm) { + clear_user_asce(); + __tlb_flush_mm(mm); + } + while (mm->context.asce_limit > limit) { + pgd = mm->pgd; + switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { + case _REGION_ENTRY_TYPE_R2: + mm->context.asce_limit = 1UL << 42; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION3; + break; + case _REGION_ENTRY_TYPE_R3: + mm->context.asce_limit = 1UL << 31; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_SEGMENT; + break; + default: + BUG(); + } + mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); + mm->task_size = mm->context.asce_limit; + crst_table_free(mm, (unsigned long *) pgd); + } + if (current->active_mm == mm) + set_user_asce(mm); +} +#endif + +#ifdef CONFIG_PGSTE + +/** + * gmap_alloc - allocate a guest address space + * @mm: pointer to the parent mm_struct + * + * Returns a guest address space structure. + */ +struct gmap *gmap_alloc(struct mm_struct *mm) +{ + struct gmap *gmap; + struct page *page; + unsigned long *table; + + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + if (!gmap) + goto out; + INIT_LIST_HEAD(&gmap->crst_list); + gmap->mm = mm; + page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + if (!page) + goto out_free; + list_add(&page->lru, &gmap->crst_list); + table = (unsigned long *) page_to_phys(page); + crst_table_init(table, _REGION1_ENTRY_EMPTY); + gmap->table = table; + gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); + list_add(&gmap->list, &mm->context.gmap_list); + return gmap; + +out_free: + kfree(gmap); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(gmap_alloc); + +static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) +{ + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct page *page; + + if (*table & _SEGMENT_ENTRY_INVALID) + return 0; + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry(rmap, &mp->mapper, list) { + if (rmap->entry != table) + continue; + list_del(&rmap->list); + kfree(rmap); + break; + } + *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; + return 1; +} + +static void gmap_flush_tlb(struct gmap *gmap) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | + _ASCE_TYPE_REGION1); + else + __tlb_flush_global(); +} + +/** + * gmap_free - free a guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_free(struct gmap *gmap) +{ + struct page *page, *next; + unsigned long *table; + int i; + + + /* Flush tlb. */ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | + _ASCE_TYPE_REGION1); + else + __tlb_flush_global(); + + /* Free all segment & region tables. */ + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { + table = (unsigned long *) page_to_phys(page); + if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) + /* Remove gmap rmap structures for segment table. */ + for (i = 0; i < PTRS_PER_PMD; i++, table++) + gmap_unlink_segment(gmap, table); + __free_pages(page, ALLOC_ORDER); + } + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + list_del(&gmap->list); + kfree(gmap); +} +EXPORT_SYMBOL_GPL(gmap_free); + +/** + * gmap_enable - switch primary space to the guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_enable(struct gmap *gmap) +{ + S390_lowcore.gmap = (unsigned long) gmap; +} +EXPORT_SYMBOL_GPL(gmap_enable); + +/** + * gmap_disable - switch back to the standard primary address space + * @gmap: pointer to the guest address space structure + */ +void gmap_disable(struct gmap *gmap) +{ + S390_lowcore.gmap = 0UL; +} +EXPORT_SYMBOL_GPL(gmap_disable); + +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ +static int gmap_alloc_table(struct gmap *gmap, + unsigned long *table, unsigned long init) + __releases(&gmap->mm->page_table_lock) + __acquires(&gmap->mm->page_table_lock) +{ + struct page *page; + unsigned long *new; + + /* since we dont free the gmap table until gmap_free we can unlock */ + spin_unlock(&gmap->mm->page_table_lock); + page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + spin_lock(&gmap->mm->page_table_lock); + if (!page) + return -ENOMEM; + new = (unsigned long *) page_to_phys(page); + crst_table_init(new, init); + if (*table & _REGION_ENTRY_INVALID) { + list_add(&page->lru, &gmap->crst_list); + *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + (*table & _REGION_ENTRY_TYPE_MASK); + } else + __free_pages(page, ALLOC_ORDER); + return 0; +} + +/** + * gmap_unmap_segment - unmap segment from the guest address space + * @gmap: pointer to the guest address space structure + * @addr: address in the guest address space + * @len: length of the memory area to unmap + * + * Returns 0 if the unmap succeeded, -EINVAL if not. + */ +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +{ + unsigned long *table; + unsigned long off; + int flush; + + if ((to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || to + len < to) + return -EINVAL; + + flush = 0; + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + for (off = 0; off < len; off += PMD_SIZE) { + /* Walk the guest addr space page table */ + table = gmap->table + (((to + off) >> 53) & 0x7ff); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 42) & 0x7ff); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 31) & 0x7ff); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 20) & 0x7ff); + + /* Clear segment table entry in guest address space. */ + flush |= gmap_unlink_segment(gmap, table); + *table = _SEGMENT_ENTRY_INVALID; + } +out: + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; +} +EXPORT_SYMBOL_GPL(gmap_unmap_segment); + +/** + * gmap_mmap_segment - map a segment to the guest address space + * @gmap: pointer to the guest address space structure + * @from: source address in the parent address space + * @to: target address in the guest address space + * + * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. + */ +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len) +{ + unsigned long *table; + unsigned long off; + int flush; + + if ((from | to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || from + len > TASK_MAX_SIZE || + from + len < from || to + len < to) + return -EINVAL; + + flush = 0; + down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); + for (off = 0; off < len; off += PMD_SIZE) { + /* Walk the gmap address space page table */ + table = gmap->table + (((to + off) >> 53) & 0x7ff); + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 42) & 0x7ff); + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 31) & 0x7ff); + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) + goto out_unmap; + table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); + table = table + (((to + off) >> 20) & 0x7ff); + + /* Store 'from' address in an invalid segment table entry. */ + flush |= gmap_unlink_segment(gmap, table); + *table = (from + off) | (_SEGMENT_ENTRY_INVALID | + _SEGMENT_ENTRY_PROTECT); + } + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; + +out_unmap: + spin_unlock(&gmap->mm->page_table_lock); + up_read(&gmap->mm->mmap_sem); + gmap_unmap_segment(gmap, to, len); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(gmap_map_segment); + +static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) +{ + unsigned long *table; + + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + return table; +} + +/** + * __gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, vmaddr, segment; + struct gmap_pgtable *mp; + struct page *page; + + current->thread.gmap_addr = address; + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return PTR_ERR(segment_ptr); + /* Convert the gmap address to an mm address. */ + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INVALID)) { + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } else if (segment & _SEGMENT_ENTRY_PROTECT) { + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + return vmaddr | (address & ~PMD_MASK); + } + return -EFAULT; +} +EXPORT_SYMBOL_GPL(__gmap_translate); + +/** + * gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + */ +unsigned long gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_translate(address, gmap); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_translate); + +static int gmap_connect_pgtable(unsigned long address, unsigned long segment, + unsigned long *segment_ptr, struct gmap *gmap) +{ + unsigned long vmaddr; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct mm_struct *mm; + struct page *page; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + mm = gmap->mm; + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + vma = find_vma(mm, vmaddr); + if (!vma || vma->vm_start > vmaddr) + return -EFAULT; + /* Walk the parent mm page table */ + pgd = pgd_offset(mm, vmaddr); + pud = pud_alloc(mm, pgd, vmaddr); + if (!pud) + return -ENOMEM; + pmd = pmd_alloc(mm, pud, vmaddr); + if (!pmd) + return -ENOMEM; + if (!pmd_present(*pmd) && + __pte_alloc(mm, vma, pmd, vmaddr)) + return -ENOMEM; + /* large pmds cannot yet be handled */ + if (pmd_large(*pmd)) + return -EFAULT; + /* pmd now points to a valid segment table entry. */ + rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); + if (!rmap) + return -ENOMEM; + /* Link gmap segment table entry location to page table. */ + page = pmd_page(*pmd); + mp = (struct gmap_pgtable *) page->index; + rmap->gmap = gmap; + rmap->entry = segment_ptr; + rmap->vmaddr = address & PMD_MASK; + spin_lock(&mm->page_table_lock); + if (*segment_ptr == segment) { + list_add(&rmap->list, &mp->mapper); + /* Set gmap segment table entry to page table. */ + *segment_ptr = pmd_val(*pmd) & PAGE_MASK; + rmap = NULL; + } + spin_unlock(&mm->page_table_lock); + kfree(rmap); + return 0; +} + +static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) +{ + struct gmap_rmap *rmap, *next; + struct gmap_pgtable *mp; + struct page *page; + int flush; + + flush = 0; + spin_lock(&mm->page_table_lock); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry_safe(rmap, next, &mp->mapper, list) { + *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | + _SEGMENT_ENTRY_PROTECT); + list_del(&rmap->list); + kfree(rmap); + flush = 1; + } + spin_unlock(&mm->page_table_lock); + if (flush) + __tlb_flush_global(); +} + +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, segment; + struct gmap_pgtable *mp; + struct page *page; + int rc; + + current->thread.gmap_addr = address; + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return -EFAULT; + /* Convert the gmap address to an mm address. */ + while (1) { + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INVALID)) { + /* Page table is present */ + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } + if (!(segment & _SEGMENT_ENTRY_PROTECT)) + /* Nothing mapped in the gmap address space. */ + break; + rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); + if (rc) + return rc; + } + return -EFAULT; +} + +unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_fault(address, gmap); + up_read(&gmap->mm->mmap_sem); + + return rc; +} +EXPORT_SYMBOL_GPL(gmap_fault); + +static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) +{ + if (!non_swap_entry(entry)) + dec_mm_counter(mm, MM_SWAPENTS); + else if (is_migration_entry(entry)) { + struct page *page = migration_entry_to_page(entry); + + if (PageAnon(page)) + dec_mm_counter(mm, MM_ANONPAGES); + else + dec_mm_counter(mm, MM_FILEPAGES); + } + free_swap_and_cache(entry); +} + +/** + * The mm->mmap_sem lock must be held + */ +static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) +{ + unsigned long ptev, pgstev; + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep, pte; + + ptep = get_locked_pte(mm, address, &ptl); + if (unlikely(!ptep)) + return; + pte = *ptep; + if (!pte_swap(pte)) + goto out_pte; + /* Zap unused and logically-zero pages */ + pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); + ptev = pte_val(pte); + if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || + ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { + gmap_zap_swap_entry(pte_to_swp_entry(pte), mm); + pte_clear(mm, address, ptep); + } + pgste_set_unlock(ptep, pgste); +out_pte: + pte_unmap_unlock(*ptep, ptl); +} + +/* + * this function is assumed to be called with mmap_sem held + */ +void __gmap_zap(unsigned long address, struct gmap *gmap) +{ + unsigned long *table, *segment_ptr; + unsigned long segment, pgstev, ptev; + struct gmap_pgtable *mp; + struct page *page; + + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return; + segment = *segment_ptr; + if (segment & _SEGMENT_ENTRY_INVALID) + return; + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + address = mp->vmaddr | (address & ~PMD_MASK); + /* Page table is present */ + table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN); + table = table + ((address >> 12) & 0xff); + pgstev = table[PTRS_PER_PTE]; + ptev = table[0]; + /* quick check, checked again with locks held */ + if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || + ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) + gmap_zap_unused(gmap->mm, address); +} +EXPORT_SYMBOL_GPL(__gmap_zap); + +void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) +{ + + unsigned long *table, address, size; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct page *page; + + down_read(&gmap->mm->mmap_sem); + address = from; + while (address < to) { + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INVALID)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + vma = find_vma(gmap->mm, mp->vmaddr); + size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); + zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), + size, NULL); + address = (address + PMD_SIZE) & PMD_MASK; + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @start: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep, entry; + pgste_t pgste; + int rc = 0; + + if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + /* Convert gmap address and connect the page tables */ + addr = __gmap_fault(start, gmap); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) { + rc = -EFAULT; + break; + } + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + if (unlikely(!ptep)) + continue; + /* Set notification bit in the pgste of the pte */ + entry = *ptep; + if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= PGSTE_IN_BIT; + pgste_set_unlock(ptep, pgste); + start += PAGE_SIZE; + len -= PAGE_SIZE; + } + spin_unlock(ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. + */ +void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte) +{ + unsigned long segment_offset; + struct gmap_notifier *nb; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct page *page; + + segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + segment_offset = segment_offset * (4096 / sizeof(pte_t)); + page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + spin_lock(&gmap_notifier_lock); + list_for_each_entry(rmap, &mp->mapper, list) { + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(rmap->gmap, + rmap->vmaddr + segment_offset); + } + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); + +static inline int page_table_with_pgste(struct page *page) +{ + return atomic_read(&page->_mapcount) == 0; +} + +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, + unsigned long vmaddr) +{ + struct page *page; + unsigned long *table; + struct gmap_pgtable *mp; + + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); + if (!mp) { + __free_page(page); + return NULL; + } + if (!pgtable_page_ctor(page)) { + kfree(mp); + __free_page(page); + return NULL; + } + mp->vmaddr = vmaddr & PMD_MASK; + INIT_LIST_HEAD(&mp->mapper); + page->index = (unsigned long) mp; + atomic_set(&page->_mapcount, 0); + table = (unsigned long *) page_to_phys(page); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + return table; +} + +static inline void page_table_free_pgste(unsigned long *table) +{ + struct page *page; + struct gmap_pgtable *mp; + + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + BUG_ON(!list_empty(&mp->mapper)); + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + kfree(mp); + __free_page(page); +} + +static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, bool init_skey) +{ + pte_t *start_pte, *pte; + spinlock_t *ptl; + pgste_t pgste; + + start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + pte = start_pte; + do { + pgste = pgste_get_lock(pte); + pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + if (init_skey) { + unsigned long address; + + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | + PGSTE_GR_BIT | PGSTE_GC_BIT); + + /* skip invalid and not writable pages */ + if (pte_val(*pte) & _PAGE_INVALID || + !(pte_val(*pte) & _PAGE_WRITE)) { + pgste_set_unlock(pte, pgste); + continue; + } + + address = pte_val(*pte) & PAGE_MASK; + page_set_storage_key(address, PAGE_DEFAULT_KEY, 1); + } + pgste_set_unlock(pte, pgste); + } while (pte++, addr += PAGE_SIZE, addr != end); + pte_unmap_unlock(start_pte, ptl); + + return addr; +} + +static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, bool init_skey) +{ + unsigned long next; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + continue; + next = page_table_reset_pte(mm, pmd, addr, next, init_skey); + } while (pmd++, addr = next, addr != end); + + return addr; +} + +static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, bool init_skey) +{ + unsigned long next; + pud_t *pud; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + next = page_table_reset_pmd(mm, pud, addr, next, init_skey); + } while (pud++, addr = next, addr != end); + + return addr; +} + +void page_table_reset_pgste(struct mm_struct *mm, unsigned long start, + unsigned long end, bool init_skey) +{ + unsigned long addr, next; + pgd_t *pgd; + + down_write(&mm->mmap_sem); + if (init_skey && mm_use_skey(mm)) + goto out_up; + addr = start; + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + next = page_table_reset_pud(mm, pgd, addr, next, init_skey); + } while (pgd++, addr = next, addr != end); + if (init_skey) + current->mm->context.use_skey = 1; +out_up: + up_write(&mm->mmap_sem); +} +EXPORT_SYMBOL(page_table_reset_pgste); + +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq) +{ + spinlock_t *ptl; + pgste_t old, new; + pte_t *ptep; + + down_read(&mm->mmap_sem); + ptep = get_locked_pte(current->mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + + new = old = pgste_get_lock(ptep); + pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; + pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + unsigned long address, bits, skey; + + address = pte_val(*ptep) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); + /* Set storage key ACC and FP */ + page_set_storage_key(address, skey, !nq); + /* Merge host changed & referenced into pgste */ + pgste_val(new) |= bits << 52; + } + /* changing the guest storage key is considered a change of the page */ + if ((pgste_val(new) ^ pgste_val(old)) & + (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) + pgste_val(new) |= PGSTE_UC_BIT; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(*ptep, ptl); + up_read(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL(set_guest_storage_key); + +#else /* CONFIG_PGSTE */ + +static inline int page_table_with_pgste(struct page *page) +{ + return 0; +} + +static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, + unsigned long vmaddr) +{ + return NULL; +} + +void page_table_reset_pgste(struct mm_struct *mm, unsigned long start, + unsigned long end, bool init_skey) +{ +} + +static inline void page_table_free_pgste(unsigned long *table) +{ +} + +static inline void gmap_disconnect_pgtable(struct mm_struct *mm, + unsigned long *table) +{ +} + +#endif /* CONFIG_PGSTE */ + +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ + unsigned int old, new; + + do { + old = atomic_read(v); + new = old ^ bits; + } while (atomic_cmpxchg(v, old, new) != old); + return new; +} + +/* + * page table entry allocation/free routines. + */ +unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *uninitialized_var(table); + struct page *uninitialized_var(page); + unsigned int mask, bit; + + if (mm_has_pgste(mm)) + return page_table_alloc_pgste(mm, vmaddr); + /* Allocate fragments of a 4K page as 1K/2K page table */ + spin_lock_bh(&mm->context.list_lock); + mask = FRAG_MASK; + if (!list_empty(&mm->context.pgtable_list)) { + page = list_first_entry(&mm->context.pgtable_list, + struct page, lru); + table = (unsigned long *) page_to_phys(page); + mask = atomic_read(&page->_mapcount); + mask = mask | (mask >> 4); + } + if ((mask & FRAG_MASK) == FRAG_MASK) { + spin_unlock_bh(&mm->context.list_lock); + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + atomic_set(&page->_mapcount, 1); + table = (unsigned long *) page_to_phys(page); + clear_table(table, _PAGE_INVALID, PAGE_SIZE); + spin_lock_bh(&mm->context.list_lock); + list_add(&page->lru, &mm->context.pgtable_list); + } else { + for (bit = 1; mask & bit; bit <<= 1) + table += PTRS_PER_PTE; + mask = atomic_xor_bits(&page->_mapcount, bit); + if ((mask & FRAG_MASK) == FRAG_MASK) + list_del(&page->lru); + } + spin_unlock_bh(&mm->context.list_lock); + return table; +} + +void page_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page; + unsigned int bit, mask; + + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) { + gmap_disconnect_pgtable(mm, table); + return page_table_free_pgste(table); + } + /* Free 1K/2K page table fragment of a 4K page */ + bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); + spin_lock_bh(&mm->context.list_lock); + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) + list_del(&page->lru); + mask = atomic_xor_bits(&page->_mapcount, bit); + if (mask & FRAG_MASK) + list_add(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + if (mask == 0) { + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); + } +} + +static void __page_table_free_rcu(void *table, unsigned bit) +{ + struct page *page; + + if (bit == FRAG_MASK) + return page_table_free_pgste(table); + /* Free 1K/2K page table fragment of a 4K page */ + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (atomic_xor_bits(&page->_mapcount, bit) == 0) { + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); + } +} + +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) +{ + struct mm_struct *mm; + struct page *page; + unsigned int bit, mask; + + mm = tlb->mm; + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) { + gmap_disconnect_pgtable(mm, table); + table = (unsigned long *) (__pa(table) | FRAG_MASK); + tlb_remove_table(tlb, table); + return; + } + bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); + spin_lock_bh(&mm->context.list_lock); + if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) + list_del(&page->lru); + mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); + if (mask & FRAG_MASK) + list_add_tail(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + table = (unsigned long *) (__pa(table) | (bit << 4)); + tlb_remove_table(tlb, table); +} + +static void __tlb_remove_table(void *_table) +{ + const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; + void *table = (void *)((unsigned long) _table & ~mask); + unsigned type = (unsigned long) _table & mask; + + if (type) + __page_table_free_rcu(table, type); + else + free_pages((unsigned long) table, ALLOC_ORDER); +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely + * on IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + tlb->mm->context.flush_mm = 1; + if (*batch == NULL) { + *batch = (struct mmu_table_batch *) + __get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + __tlb_flush_mm_lazy(tlb->mm); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_flush_mmu(tlb); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void thp_split_vma(struct vm_area_struct *vma) +{ + unsigned long addr; + + for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) + follow_page(vma, addr, FOLL_SPLIT); +} + +static inline void thp_split_mm(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + thp_split_vma(vma); + vma->vm_flags &= ~VM_HUGEPAGE; + vma->vm_flags |= VM_NOHUGEPAGE; + } + mm->def_flags |= VM_NOHUGEPAGE; +} +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, + struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end) +{ + unsigned long next, *table, *new; + struct page *page; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); +again: + if (pmd_none_or_clear_bad(pmd)) + continue; + table = (unsigned long *) pmd_deref(*pmd); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (page_table_with_pgste(page)) + continue; + /* Allocate new page table with pgstes */ + new = page_table_alloc_pgste(mm, addr); + if (!new) + return -ENOMEM; + + spin_lock(&mm->page_table_lock); + if (likely((unsigned long *) pmd_deref(*pmd) == table)) { + /* Nuke pmd entry pointing to the "short" page table */ + pmdp_flush_lazy(mm, addr, pmd); + pmd_clear(pmd); + /* Copy ptes from old table to new table */ + memcpy(new, table, PAGE_SIZE/2); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + /* Establish new table */ + pmd_populate(mm, pmd, (pte_t *) new); + /* Free old table with rcu, there might be a walker! */ + page_table_free_rcu(tlb, table); + new = NULL; + } + spin_unlock(&mm->page_table_lock); + if (new) { + page_table_free_pgste(new); + goto again; + } + } while (pmd++, addr = next, addr != end); + + return addr; +} + +static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, + struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t *pud; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + next = page_table_realloc_pmd(tlb, mm, pud, addr, next); + if (unlikely(IS_ERR_VALUE(next))) + return next; + } while (pud++, addr = next, addr != end); + + return addr; +} + +static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pgd_t *pgd; + + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + next = page_table_realloc_pud(tlb, mm, pgd, addr, next); + if (unlikely(IS_ERR_VALUE(next))) + return next; + } while (pgd++, addr = next, addr != end); + + return 0; +} + +/* + * switch on pgstes for its userspace process (for kvm) + */ +int s390_enable_sie(void) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + struct mmu_gather tlb; + + /* Do we have pgstes? if yes, we are done */ + if (mm_has_pgste(tsk->mm)) + return 0; + + down_write(&mm->mmap_sem); + /* split thp mappings and disable thp for future mappings */ + thp_split_mm(mm); + /* Reallocate the page tables with pgstes */ + tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); + if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE)) + mm->context.has_pgste = 1; + tlb_finish_mmu(&tlb, 0, TASK_SIZE); + up_write(&mm->mmap_sem); + return mm->context.has_pgste ? 0 : -ENOMEM; +} +EXPORT_SYMBOL_GPL(s390_enable_sie); + +/* + * Enable storage key handling from now on and initialize the storage + * keys with the default key. + */ +void s390_enable_skey(void) +{ + page_table_reset_pgste(current->mm, 0, TASK_SIZE, true); +} +EXPORT_SYMBOL_GPL(s390_enable_skey); + +/* + * Test and reset if a guest page is dirty + */ +bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap) +{ + pte_t *pte; + spinlock_t *ptl; + bool dirty = false; + + pte = get_locked_pte(gmap->mm, address, &ptl); + if (unlikely(!pte)) + return false; + + if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte)) + dirty = true; + + spin_unlock(ptl); + return dirty; +} +EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty); + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + /* No need to flush TLB + * On s390 reference bits are in storage key and never in TLB */ + return pmdp_test_and_clear_young(vma, address, pmdp); +} + +int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + + if (pmd_same(*pmdp, entry)) + return 0; + pmdp_invalidate(vma, address, pmdp); + set_pmd_at(vma->vm_mm, address, pmdp, entry); + return 1; +} + +static void pmdp_splitting_flush_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, + (unsigned long *) pmdp)) { + /* need to serialize against gup-fast (IRQ disabled) */ + smp_call_function(pmdp_splitting_flush_sync, NULL, 1); + } +} + +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(pmd_lockptr(mm, pmdp)); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; +} + +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) +{ + struct list_head *lh; + pgtable_t pgtable; + pte_t *ptep; + + assert_spin_locked(pmd_lockptr(mm, pmdp)); + + /* FIFO */ + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + ptep = (pte_t *) pgtable; + pte_val(*ptep) = _PAGE_INVALID; + ptep++; + pte_val(*ptep) = _PAGE_INVALID; + return pgtable; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c new file mode 100644 index 00000000000..fe9012a49aa --- /dev/null +++ b/arch/s390/mm/vmem.c @@ -0,0 +1,424 @@ +/* + * Copyright IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/bootmem.h> +#include <linux/pfn.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/hugetlb.h> +#include <linux/slab.h> +#include <linux/memblock.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/setup.h> +#include <asm/tlbflush.h> +#include <asm/sections.h> + +static DEFINE_MUTEX(vmem_mutex); + +struct memory_segment { + struct list_head list; + unsigned long start; + unsigned long size; +}; + +static LIST_HEAD(mem_segs); + +static void __ref *vmem_alloc_pages(unsigned int order) +{ + if (slab_is_available()) + return (void *)__get_free_pages(GFP_KERNEL, order); + return alloc_bootmem_pages((1 << order) * PAGE_SIZE); +} + +static inline pud_t *vmem_pud_alloc(void) +{ + pud_t *pud = NULL; + +#ifdef CONFIG_64BIT + pud = vmem_alloc_pages(2); + if (!pud) + return NULL; + clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); +#endif + return pud; +} + +static inline pmd_t *vmem_pmd_alloc(void) +{ + pmd_t *pmd = NULL; + +#ifdef CONFIG_64BIT + pmd = vmem_alloc_pages(2); + if (!pmd) + return NULL; + clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); +#endif + return pmd; +} + +static pte_t __ref *vmem_pte_alloc(unsigned long address) +{ + pte_t *pte; + + if (slab_is_available()) + pte = (pte_t *) page_table_alloc(&init_mm, address); + else + pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t), + PTRS_PER_PTE * sizeof(pte_t)); + if (!pte) + return NULL; + clear_table((unsigned long *) pte, _PAGE_INVALID, + PTRS_PER_PTE * sizeof(pte_t)); + return pte; +} + +/* + * Add a physical memory range to the 1:1 mapping. + */ +static int vmem_add_mem(unsigned long start, unsigned long size, int ro) +{ + unsigned long end = start + size; + unsigned long address = start; + pgd_t *pg_dir; + pud_t *pu_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + int ret = -ENOMEM; + + while (address < end) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + pu_dir = vmem_pud_alloc(); + if (!pu_dir) + goto out; + pgd_populate(&init_mm, pg_dir, pu_dir); + } + pu_dir = pud_offset(pg_dir, address); +#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) + if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && + !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { + pud_val(*pu_dir) = __pa(address) | + _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | + (ro ? _REGION_ENTRY_PROTECT : 0); + address += PUD_SIZE; + continue; + } +#endif + if (pud_none(*pu_dir)) { + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + goto out; + pud_populate(&init_mm, pu_dir, pm_dir); + } + pm_dir = pmd_offset(pu_dir, address); +#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) + if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && + !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { + pmd_val(*pm_dir) = __pa(address) | + _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | + _SEGMENT_ENTRY_YOUNG | + (ro ? _SEGMENT_ENTRY_PROTECT : 0); + address += PMD_SIZE; + continue; + } +#endif + if (pmd_none(*pm_dir)) { + pt_dir = vmem_pte_alloc(address); + if (!pt_dir) + goto out; + pmd_populate(&init_mm, pm_dir, pt_dir); + } + + pt_dir = pte_offset_kernel(pm_dir, address); + pte_val(*pt_dir) = __pa(address) | + pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); + address += PAGE_SIZE; + } + ret = 0; +out: + return ret; +} + +/* + * Remove a physical memory range from the 1:1 mapping. + * Currently only invalidates page table entries. + */ +static void vmem_remove_range(unsigned long start, unsigned long size) +{ + unsigned long end = start + size; + unsigned long address = start; + pgd_t *pg_dir; + pud_t *pu_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + + pte_val(pte) = _PAGE_INVALID; + while (address < end) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + address += PGDIR_SIZE; + continue; + } + pu_dir = pud_offset(pg_dir, address); + if (pud_none(*pu_dir)) { + address += PUD_SIZE; + continue; + } + if (pud_large(*pu_dir)) { + pud_clear(pu_dir); + address += PUD_SIZE; + continue; + } + pm_dir = pmd_offset(pu_dir, address); + if (pmd_none(*pm_dir)) { + address += PMD_SIZE; + continue; + } + if (pmd_large(*pm_dir)) { + pmd_clear(pm_dir); + address += PMD_SIZE; + continue; + } + pt_dir = pte_offset_kernel(pm_dir, address); + *pt_dir = pte; + address += PAGE_SIZE; + } + flush_tlb_kernel_range(start, end); +} + +/* + * Add a backed mem_map array to the virtual mem_map array. + */ +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +{ + unsigned long address = start; + pgd_t *pg_dir; + pud_t *pu_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + int ret = -ENOMEM; + + for (address = start; address < end;) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + pu_dir = vmem_pud_alloc(); + if (!pu_dir) + goto out; + pgd_populate(&init_mm, pg_dir, pu_dir); + } + + pu_dir = pud_offset(pg_dir, address); + if (pud_none(*pu_dir)) { + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + goto out; + pud_populate(&init_mm, pu_dir, pm_dir); + } + + pm_dir = pmd_offset(pu_dir, address); + if (pmd_none(*pm_dir)) { +#ifdef CONFIG_64BIT + /* Use 1MB frames for vmemmap if available. We always + * use large frames even if they are only partially + * used. + * Otherwise we would have also page tables since + * vmemmap_populate gets called for each section + * separately. */ + if (MACHINE_HAS_EDAT1) { + void *new_page; + + new_page = vmemmap_alloc_block(PMD_SIZE, node); + if (!new_page) + goto out; + pmd_val(*pm_dir) = __pa(new_page) | + _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | + _SEGMENT_ENTRY_CO; + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } +#endif + pt_dir = vmem_pte_alloc(address); + if (!pt_dir) + goto out; + pmd_populate(&init_mm, pm_dir, pt_dir); + } else if (pmd_large(*pm_dir)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + + pt_dir = pte_offset_kernel(pm_dir, address); + if (pte_none(*pt_dir)) { + unsigned long new_page; + + new_page =__pa(vmem_alloc_pages(0)); + if (!new_page) + goto out; + pte_val(*pt_dir) = + __pa(new_page) | pgprot_val(PAGE_KERNEL); + } + address += PAGE_SIZE; + } + memset((void *)start, 0, end - start); + ret = 0; +out: + return ret; +} + +void vmemmap_free(unsigned long start, unsigned long end) +{ +} + +/* + * Add memory segment to the segment list if it doesn't overlap with + * an already present segment. + */ +static int insert_memory_segment(struct memory_segment *seg) +{ + struct memory_segment *tmp; + + if (seg->start + seg->size > VMEM_MAX_PHYS || + seg->start + seg->size < seg->start) + return -ERANGE; + + list_for_each_entry(tmp, &mem_segs, list) { + if (seg->start >= tmp->start + tmp->size) + continue; + if (seg->start + seg->size <= tmp->start) + continue; + return -ENOSPC; + } + list_add(&seg->list, &mem_segs); + return 0; +} + +/* + * Remove memory segment from the segment list. + */ +static void remove_memory_segment(struct memory_segment *seg) +{ + list_del(&seg->list); +} + +static void __remove_shared_memory(struct memory_segment *seg) +{ + remove_memory_segment(seg); + vmem_remove_range(seg->start, seg->size); +} + +int vmem_remove_mapping(unsigned long start, unsigned long size) +{ + struct memory_segment *seg; + int ret; + + mutex_lock(&vmem_mutex); + + ret = -ENOENT; + list_for_each_entry(seg, &mem_segs, list) { + if (seg->start == start && seg->size == size) + break; + } + + if (seg->start != start || seg->size != size) + goto out; + + ret = 0; + __remove_shared_memory(seg); + kfree(seg); +out: + mutex_unlock(&vmem_mutex); + return ret; +} + +int vmem_add_mapping(unsigned long start, unsigned long size) +{ + struct memory_segment *seg; + int ret; + + mutex_lock(&vmem_mutex); + ret = -ENOMEM; + seg = kzalloc(sizeof(*seg), GFP_KERNEL); + if (!seg) + goto out; + seg->start = start; + seg->size = size; + + ret = insert_memory_segment(seg); + if (ret) + goto out_free; + + ret = vmem_add_mem(start, size, 0); + if (ret) + goto out_remove; + goto out; + +out_remove: + __remove_shared_memory(seg); +out_free: + kfree(seg); +out: + mutex_unlock(&vmem_mutex); + return ret; +} + +/* + * map whole physical memory to virtual memory (identity mapping) + * we reserve enough space in the vmalloc area for vmemmap to hotplug + * additional memory segments. + */ +void __init vmem_map_init(void) +{ + unsigned long ro_start, ro_end; + struct memblock_region *reg; + phys_addr_t start, end; + + ro_start = PFN_ALIGN((unsigned long)&_stext); + ro_end = (unsigned long)&_eshared & PAGE_MASK; + for_each_memblock(memory, reg) { + start = reg->base; + end = reg->base + reg->size - 1; + if (start >= ro_end || end <= ro_start) + vmem_add_mem(start, end - start, 0); + else if (start >= ro_start && end <= ro_end) + vmem_add_mem(start, end - start, 1); + else if (start >= ro_start) { + vmem_add_mem(start, ro_end - start, 1); + vmem_add_mem(ro_end, end - ro_end, 0); + } else if (end < ro_end) { + vmem_add_mem(start, ro_start - start, 0); + vmem_add_mem(ro_start, end - ro_start, 1); + } else { + vmem_add_mem(start, ro_start - start, 0); + vmem_add_mem(ro_start, ro_end - ro_start, 1); + vmem_add_mem(ro_end, end - ro_end, 0); + } + } +} + +/* + * Convert memblock.memory to a memory segment list so there is a single + * list that contains all memory segments. + */ +static int __init vmem_convert_memory_chunk(void) +{ + struct memblock_region *reg; + struct memory_segment *seg; + + mutex_lock(&vmem_mutex); + for_each_memblock(memory, reg) { + seg = kzalloc(sizeof(*seg), GFP_KERNEL); + if (!seg) + panic("Out of memory...\n"); + seg->start = reg->base; + seg->size = reg->size; + insert_memory_segment(seg); + } + mutex_unlock(&vmem_mutex); + return 0; +} + +core_initcall(vmem_convert_memory_chunk); diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile new file mode 100644 index 00000000000..90568c33ddb --- /dev/null +++ b/arch/s390/net/Makefile @@ -0,0 +1,4 @@ +# +# Arch-specific network modules +# +obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S new file mode 100644 index 00000000000..7e45d13816c --- /dev/null +++ b/arch/s390/net/bpf_jit.S @@ -0,0 +1,130 @@ +/* + * BPF Jit compiler for s390, help functions. + * + * Copyright IBM Corp. 2012 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <linux/linkage.h> + +/* + * Calling convention: + * registers %r2, %r6-%r8, %r10-%r11, %r13, %r15 are call saved + * %r2: skb pointer + * %r3: offset parameter + * %r5: BPF A accumulator + * %r8: return address + * %r9: save register for skb pointer + * %r10: skb->data + * %r11: skb->len - skb->data_len (headlen) + * %r12: BPF X accumulator + * + * skb_copy_bits takes 4 parameters: + * %r2 = skb pointer + * %r3 = offset into skb data + * %r4 = length to copy + * %r5 = pointer to temp buffer + */ +#define SKBDATA %r8 + + /* A = *(u32 *) (skb->data+K+X) */ +ENTRY(sk_load_word_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u32 *) (skb->data+K) */ +ENTRY(sk_load_word) + llgfr %r1,%r3 # extend offset + ahi %r3,4 # offset + 4 + clr %r11,%r3 # hlen <= offset + 4 ? + jl sk_load_word_slow + l %r5,0(%r1,%r10) # get word from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_word_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,4 # 4 bytes + la %r5,160(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = *(u16 *) (skb->data+K+X) */ +ENTRY(sk_load_half_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u16 *) (skb->data+K) */ +ENTRY(sk_load_half) + llgfr %r1,%r3 # extend offset + ahi %r3,2 # offset + 2 + clr %r11,%r3 # hlen <= offset + 2 ? + jl sk_load_half_slow + llgh %r5,0(%r1,%r10) # get half from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_half_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,2 # 2 bytes + la %r5,162(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(2,%r15),160(%r15) + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = *(u8 *) (skb->data+K+X) */ +ENTRY(sk_load_byte_ind) + ar %r3,%r12 # offset += X + bmr %r8 # < 0 -> return with cc + + /* A = *(u8 *) (skb->data+K) */ +ENTRY(sk_load_byte) + llgfr %r1,%r3 # extend offset + clr %r11,%r3 # hlen < offset ? + jle sk_load_byte_slow + lhi %r5,0 + ic %r5,0(%r1,%r10) # get byte from skb + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_byte_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,1 # 1 bytes + la %r5,163(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(3,%r15),160(%r15) + l %r5,160(%r15) # load result from temp buffer + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 + + /* A = (*(u8 *)(skb->data+K) & 0xf) << 2 */ +ENTRY(sk_load_byte_msh) + llgfr %r1,%r3 # extend offset + clr %r11,%r3 # hlen < offset ? + jle sk_load_byte_slow + lhi %r12,0 + ic %r12,0(%r1,%r10) # get byte from skb + nill %r12,0x0f + sll %r12,2 + xr %r1,%r1 # set cc to zero + br %r8 + +sk_load_byte_msh_slow: + lgr %r9,%r2 # save %r2 + lhi %r4,2 # 2 bytes + la %r5,162(%r15) # pointer to temp buffer + brasl %r14,skb_copy_bits # get data from skb + xc 160(3,%r15),160(%r15) + l %r12,160(%r15) # load result from temp buffer + nill %r12,0x0f + sll %r12,2 + ltgr %r2,%r2 # set cc to (%r2 != 0) + lgr %r2,%r9 # restore %r2 + br %r8 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c new file mode 100644 index 00000000000..a2cbd875543 --- /dev/null +++ b/arch/s390/net/bpf_jit_comp.c @@ -0,0 +1,891 @@ +/* + * BPF Jit compiler for s390. + * + * Copyright IBM Corp. 2012 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <linux/moduleloader.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include <linux/filter.h> +#include <linux/random.h> +#include <linux/init.h> +#include <asm/cacheflush.h> +#include <asm/facility.h> +#include <asm/dis.h> + +/* + * Conventions: + * %r2 = skb pointer + * %r3 = offset parameter + * %r4 = scratch register / length parameter + * %r5 = BPF A accumulator + * %r8 = return address + * %r9 = save register for skb pointer + * %r10 = skb->data + * %r11 = skb->len - skb->data_len (headlen) + * %r12 = BPF X accumulator + * %r13 = literal pool pointer + * 0(%r15) - 63(%r15) scratch memory array with BPF_MEMWORDS + */ +int bpf_jit_enable __read_mostly; + +/* + * assembly code in arch/x86/net/bpf_jit.S + */ +extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; +extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; + +struct bpf_jit { + unsigned int seen; + u8 *start; + u8 *prg; + u8 *mid; + u8 *lit; + u8 *end; + u8 *base_ip; + u8 *ret0_ip; + u8 *exit_ip; + unsigned int off_load_word; + unsigned int off_load_half; + unsigned int off_load_byte; + unsigned int off_load_bmsh; + unsigned int off_load_iword; + unsigned int off_load_ihalf; + unsigned int off_load_ibyte; +}; + +#define BPF_SIZE_MAX 4096 /* Max size for program */ + +#define SEEN_DATAREF 1 /* might call external helpers */ +#define SEEN_XREG 2 /* ebx is used */ +#define SEEN_MEM 4 /* use mem[] for temporary storage */ +#define SEEN_RET0 8 /* pc_ret0 points to a valid return 0 */ +#define SEEN_LITERAL 16 /* code uses literals */ +#define SEEN_LOAD_WORD 32 /* code uses sk_load_word */ +#define SEEN_LOAD_HALF 64 /* code uses sk_load_half */ +#define SEEN_LOAD_BYTE 128 /* code uses sk_load_byte */ +#define SEEN_LOAD_BMSH 256 /* code uses sk_load_byte_msh */ +#define SEEN_LOAD_IWORD 512 /* code uses sk_load_word_ind */ +#define SEEN_LOAD_IHALF 1024 /* code uses sk_load_half_ind */ +#define SEEN_LOAD_IBYTE 2048 /* code uses sk_load_byte_ind */ + +#define EMIT2(op) \ +({ \ + if (jit->prg + 2 <= jit->mid) \ + *(u16 *) jit->prg = op; \ + jit->prg += 2; \ +}) + +#define EMIT4(op) \ +({ \ + if (jit->prg + 4 <= jit->mid) \ + *(u32 *) jit->prg = op; \ + jit->prg += 4; \ +}) + +#define EMIT4_DISP(op, disp) \ +({ \ + unsigned int __disp = (disp) & 0xfff; \ + EMIT4(op | __disp); \ +}) + +#define EMIT4_IMM(op, imm) \ +({ \ + unsigned int __imm = (imm) & 0xffff; \ + EMIT4(op | __imm); \ +}) + +#define EMIT4_PCREL(op, pcrel) \ +({ \ + long __pcrel = ((pcrel) >> 1) & 0xffff; \ + EMIT4(op | __pcrel); \ +}) + +#define EMIT6(op1, op2) \ +({ \ + if (jit->prg + 6 <= jit->mid) { \ + *(u32 *) jit->prg = op1; \ + *(u16 *) (jit->prg + 4) = op2; \ + } \ + jit->prg += 6; \ +}) + +#define EMIT6_DISP(op1, op2, disp) \ +({ \ + unsigned int __disp = (disp) & 0xfff; \ + EMIT6(op1 | __disp, op2); \ +}) + +#define EMIT6_IMM(op, imm) \ +({ \ + unsigned int __imm = (imm); \ + EMIT6(op | (__imm >> 16), __imm & 0xffff); \ +}) + +#define EMIT_CONST(val) \ +({ \ + unsigned int ret; \ + ret = (unsigned int) (jit->lit - jit->base_ip); \ + jit->seen |= SEEN_LITERAL; \ + if (jit->lit + 4 <= jit->end) \ + *(u32 *) jit->lit = val; \ + jit->lit += 4; \ + ret; \ +}) + +#define EMIT_FN_CONST(bit, fn) \ +({ \ + unsigned int ret; \ + ret = (unsigned int) (jit->lit - jit->base_ip); \ + if (jit->seen & bit) { \ + jit->seen |= SEEN_LITERAL; \ + if (jit->lit + 8 <= jit->end) \ + *(void **) jit->lit = fn; \ + jit->lit += 8; \ + } \ + ret; \ +}) + +static void bpf_jit_prologue(struct bpf_jit *jit) +{ + /* Save registers and create stack frame if necessary */ + if (jit->seen & SEEN_DATAREF) { + /* stmg %r8,%r15,88(%r15) */ + EMIT6(0xeb8ff058, 0x0024); + /* lgr %r14,%r15 */ + EMIT4(0xb90400ef); + /* aghi %r15,<offset> */ + EMIT4_IMM(0xa7fb0000, (jit->seen & SEEN_MEM) ? -112 : -80); + /* stg %r14,152(%r15) */ + EMIT6(0xe3e0f098, 0x0024); + } else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) + /* stmg %r12,%r13,120(%r15) */ + EMIT6(0xebcdf078, 0x0024); + else if (jit->seen & SEEN_XREG) + /* stg %r12,120(%r15) */ + EMIT6(0xe3c0f078, 0x0024); + else if (jit->seen & SEEN_LITERAL) + /* stg %r13,128(%r15) */ + EMIT6(0xe3d0f080, 0x0024); + + /* Setup literal pool */ + if (jit->seen & SEEN_LITERAL) { + /* basr %r13,0 */ + EMIT2(0x0dd0); + jit->base_ip = jit->prg; + } + jit->off_load_word = EMIT_FN_CONST(SEEN_LOAD_WORD, sk_load_word); + jit->off_load_half = EMIT_FN_CONST(SEEN_LOAD_HALF, sk_load_half); + jit->off_load_byte = EMIT_FN_CONST(SEEN_LOAD_BYTE, sk_load_byte); + jit->off_load_bmsh = EMIT_FN_CONST(SEEN_LOAD_BMSH, sk_load_byte_msh); + jit->off_load_iword = EMIT_FN_CONST(SEEN_LOAD_IWORD, sk_load_word_ind); + jit->off_load_ihalf = EMIT_FN_CONST(SEEN_LOAD_IHALF, sk_load_half_ind); + jit->off_load_ibyte = EMIT_FN_CONST(SEEN_LOAD_IBYTE, sk_load_byte_ind); + + /* Filter needs to access skb data */ + if (jit->seen & SEEN_DATAREF) { + /* l %r11,<len>(%r2) */ + EMIT4_DISP(0x58b02000, offsetof(struct sk_buff, len)); + /* s %r11,<data_len>(%r2) */ + EMIT4_DISP(0x5bb02000, offsetof(struct sk_buff, data_len)); + /* lg %r10,<data>(%r2) */ + EMIT6_DISP(0xe3a02000, 0x0004, + offsetof(struct sk_buff, data)); + } +} + +static void bpf_jit_epilogue(struct bpf_jit *jit) +{ + /* Return 0 */ + if (jit->seen & SEEN_RET0) { + jit->ret0_ip = jit->prg; + /* lghi %r2,0 */ + EMIT4(0xa7290000); + } + jit->exit_ip = jit->prg; + /* Restore registers */ + if (jit->seen & SEEN_DATAREF) + /* lmg %r8,%r15,<offset>(%r15) */ + EMIT6_DISP(0xeb8ff000, 0x0004, + (jit->seen & SEEN_MEM) ? 200 : 168); + else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) + /* lmg %r12,%r13,120(%r15) */ + EMIT6(0xebcdf078, 0x0004); + else if (jit->seen & SEEN_XREG) + /* lg %r12,120(%r15) */ + EMIT6(0xe3c0f078, 0x0004); + else if (jit->seen & SEEN_LITERAL) + /* lg %r13,128(%r15) */ + EMIT6(0xe3d0f080, 0x0004); + /* br %r14 */ + EMIT2(0x07fe); +} + +/* Helper to find the offset of pkt_type in sk_buff + * Make sure its still a 3bit field starting at the MSBs within a byte. + */ +#define PKT_TYPE_MAX 0xe0 +static int pkt_type_offset; + +static int __init bpf_pkt_type_offset_init(void) +{ + struct sk_buff skb_probe = { + .pkt_type = ~0, + }; + char *ct = (char *)&skb_probe; + int off; + + pkt_type_offset = -1; + for (off = 0; off < sizeof(struct sk_buff); off++) { + if (!ct[off]) + continue; + if (ct[off] == PKT_TYPE_MAX) + pkt_type_offset = off; + else { + /* Found non matching bit pattern, fix needed. */ + WARN_ON_ONCE(1); + pkt_type_offset = -1; + return -1; + } + } + return 0; +} +device_initcall(bpf_pkt_type_offset_init); + +/* + * make sure we dont leak kernel information to user + */ +static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) +{ + /* Clear temporary memory if (seen & SEEN_MEM) */ + if (jit->seen & SEEN_MEM) + /* xc 0(64,%r15),0(%r15) */ + EMIT6(0xd73ff000, 0xf000); + /* Clear X if (seen & SEEN_XREG) */ + if (jit->seen & SEEN_XREG) + /* lhi %r12,0 */ + EMIT4(0xa7c80000); + /* Clear A if the first register does not set it. */ + switch (filter[0].code) { + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_IND: + case BPF_LD | BPF_H | BPF_IND: + case BPF_LD | BPF_B | BPF_IND: + case BPF_LD | BPF_IMM: + case BPF_LD | BPF_MEM: + case BPF_MISC | BPF_TXA: + case BPF_RET | BPF_K: + /* first instruction sets A register */ + break; + default: /* A = 0 */ + /* lhi %r5,0 */ + EMIT4(0xa7580000); + } +} + +static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, + unsigned int *addrs, int i, int last) +{ + unsigned int K; + int offset; + unsigned int mask; + u16 code; + + K = filter->k; + code = bpf_anc_helper(filter); + + switch (code) { + case BPF_ALU | BPF_ADD | BPF_X: /* A += X */ + jit->seen |= SEEN_XREG; + /* ar %r5,%r12 */ + EMIT2(0x1a5c); + break; + case BPF_ALU | BPF_ADD | BPF_K: /* A += K */ + if (!K) + break; + if (K <= 16383) + /* ahi %r5,<K> */ + EMIT4_IMM(0xa75a0000, K); + else if (test_facility(21)) + /* alfi %r5,<K> */ + EMIT6_IMM(0xc25b0000, K); + else + /* a %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5a50d000, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */ + jit->seen |= SEEN_XREG; + /* sr %r5,%r12 */ + EMIT2(0x1b5c); + break; + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ + if (!K) + break; + if (K <= 16384) + /* ahi %r5,-K */ + EMIT4_IMM(0xa75a0000, -K); + else if (test_facility(21)) + /* alfi %r5,-K */ + EMIT6_IMM(0xc25b0000, -K); + else + /* s %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5b50d000, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */ + jit->seen |= SEEN_XREG; + /* msr %r5,%r12 */ + EMIT4(0xb252005c); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ + if (K <= 16383) + /* mhi %r5,K */ + EMIT4_IMM(0xa75c0000, K); + else if (test_facility(34)) + /* msfi %r5,<K> */ + EMIT6_IMM(0xc2510000, K); + else + /* ms %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x7150d000, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */ + jit->seen |= SEEN_XREG | SEEN_RET0; + /* ltr %r12,%r12 */ + EMIT2(0x12cc); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* dlr %r4,%r12 */ + EMIT4(0xb997004c); + break; + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ + if (K == 1) + break; + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* dl %r4,<d(K)>(%r13) */ + EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */ + jit->seen |= SEEN_XREG | SEEN_RET0; + /* ltr %r12,%r12 */ + EMIT2(0x12cc); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* dlr %r4,%r12 */ + EMIT4(0xb997004c); + /* lr %r5,%r4 */ + EMIT2(0x1854); + break; + case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */ + if (K == 1) { + /* lhi %r5,0 */ + EMIT4(0xa7580000); + break; + } + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* dl %r4,<d(K)>(%r13) */ + EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); + /* lr %r5,%r4 */ + EMIT2(0x1854); + break; + case BPF_ALU | BPF_AND | BPF_X: /* A &= X */ + jit->seen |= SEEN_XREG; + /* nr %r5,%r12 */ + EMIT2(0x145c); + break; + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ + if (test_facility(21)) + /* nilf %r5,<K> */ + EMIT6_IMM(0xc05b0000, K); + else + /* n %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5450d000, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_OR | BPF_X: /* A |= X */ + jit->seen |= SEEN_XREG; + /* or %r5,%r12 */ + EMIT2(0x165c); + break; + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ + if (test_facility(21)) + /* oilf %r5,<K> */ + EMIT6_IMM(0xc05d0000, K); + else + /* o %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5650d000, EMIT_CONST(K)); + break; + case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */ + case BPF_ALU | BPF_XOR | BPF_X: + jit->seen |= SEEN_XREG; + /* xr %r5,%r12 */ + EMIT2(0x175c); + break; + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ + if (!K) + break; + /* x %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5750d000, EMIT_CONST(K)); + break; + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ + jit->seen |= SEEN_XREG; + /* sll %r5,0(%r12) */ + EMIT4(0x8950c000); + break; + case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */ + if (K == 0) + break; + /* sll %r5,K */ + EMIT4_DISP(0x89500000, K); + break; + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ + jit->seen |= SEEN_XREG; + /* srl %r5,0(%r12) */ + EMIT4(0x8850c000); + break; + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ + if (K == 0) + break; + /* srl %r5,K */ + EMIT4_DISP(0x88500000, K); + break; + case BPF_ALU | BPF_NEG: /* A = -A */ + /* lnr %r5,%r5 */ + EMIT2(0x1155); + break; + case BPF_JMP | BPF_JA: /* ip += K */ + offset = addrs[i + K] + jit->start - jit->prg; + EMIT4_PCREL(0xa7f40000, offset); + break; + case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */ + mask = 0x200000; /* jh */ + goto kbranch; + case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */ + mask = 0xa00000; /* jhe */ + goto kbranch; + case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */ + mask = 0x800000; /* je */ +kbranch: /* Emit compare if the branch targets are different */ + if (filter->jt != filter->jf) { + if (K <= 16383) + /* chi %r5,<K> */ + EMIT4_IMM(0xa75e0000, K); + else if (test_facility(21)) + /* clfi %r5,<K> */ + EMIT6_IMM(0xc25f0000, K); + else + /* c %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5950d000, EMIT_CONST(K)); + } +branch: if (filter->jt == filter->jf) { + if (filter->jt == 0) + break; + /* j <jt> */ + offset = addrs[i + filter->jt] + jit->start - jit->prg; + EMIT4_PCREL(0xa7f40000, offset); + break; + } + if (filter->jt != 0) { + /* brc <mask>,<jt> */ + offset = addrs[i + filter->jt] + jit->start - jit->prg; + EMIT4_PCREL(0xa7040000 | mask, offset); + } + if (filter->jf != 0) { + /* brc <mask^15>,<jf> */ + offset = addrs[i + filter->jf] + jit->start - jit->prg; + EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset); + } + break; + case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */ + mask = 0x700000; /* jnz */ + /* Emit test if the branch targets are different */ + if (filter->jt != filter->jf) { + if (K > 65535) { + /* lr %r4,%r5 */ + EMIT2(0x1845); + /* n %r4,<d(K)>(%r13) */ + EMIT4_DISP(0x5440d000, EMIT_CONST(K)); + } else + /* tmll %r5,K */ + EMIT4_IMM(0xa7510000, K); + } + goto branch; + case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */ + mask = 0x200000; /* jh */ + goto xbranch; + case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */ + mask = 0xa00000; /* jhe */ + goto xbranch; + case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */ + mask = 0x800000; /* je */ +xbranch: /* Emit compare if the branch targets are different */ + if (filter->jt != filter->jf) { + jit->seen |= SEEN_XREG; + /* cr %r5,%r12 */ + EMIT2(0x195c); + } + goto branch; + case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */ + mask = 0x700000; /* jnz */ + /* Emit test if the branch targets are different */ + if (filter->jt != filter->jf) { + jit->seen |= SEEN_XREG; + /* lr %r4,%r5 */ + EMIT2(0x1845); + /* nr %r4,%r12 */ + EMIT2(0x144c); + } + goto branch; + case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD; + offset = jit->off_load_word; + goto load_abs; + case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF; + offset = jit->off_load_half; + goto load_abs; + case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE; + offset = jit->off_load_byte; +load_abs: if ((int) K < 0) + goto out; +call_fn: /* lg %r1,<d(function)>(%r13) */ + EMIT6_DISP(0xe310d000, 0x0004, offset); + /* l %r3,<d(K)>(%r13) */ + EMIT4_DISP(0x5830d000, EMIT_CONST(K)); + /* basr %r8,%r1 */ + EMIT2(0x0d81); + /* jnz <ret0> */ + EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg)); + break; + case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD; + offset = jit->off_load_iword; + goto call_fn; + case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF; + offset = jit->off_load_ihalf; + goto call_fn; + case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */ + jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE; + offset = jit->off_load_ibyte; + goto call_fn; + case BPF_LDX | BPF_B | BPF_MSH: + /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */ + jit->seen |= SEEN_RET0; + if ((int) K < 0) { + /* j <ret0> */ + EMIT4_PCREL(0xa7f40000, (jit->ret0_ip - jit->prg)); + break; + } + jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH; + offset = jit->off_load_bmsh; + goto call_fn; + case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + /* l %r5,<d(len)>(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len)); + break; + case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ + jit->seen |= SEEN_XREG; + /* l %r12,<d(len)>(%r2) */ + EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len)); + break; + case BPF_LD | BPF_IMM: /* A = K */ + if (K <= 16383) + /* lhi %r5,K */ + EMIT4_IMM(0xa7580000, K); + else if (test_facility(21)) + /* llilf %r5,<K> */ + EMIT6_IMM(0xc05f0000, K); + else + /* l %r5,<d(K)>(%r13) */ + EMIT4_DISP(0x5850d000, EMIT_CONST(K)); + break; + case BPF_LDX | BPF_IMM: /* X = K */ + jit->seen |= SEEN_XREG; + if (K <= 16383) + /* lhi %r12,<K> */ + EMIT4_IMM(0xa7c80000, K); + else if (test_facility(21)) + /* llilf %r12,<K> */ + EMIT6_IMM(0xc0cf0000, K); + else + /* l %r12,<d(K)>(%r13) */ + EMIT4_DISP(0x58c0d000, EMIT_CONST(K)); + break; + case BPF_LD | BPF_MEM: /* A = mem[K] */ + jit->seen |= SEEN_MEM; + /* l %r5,<K>(%r15) */ + EMIT4_DISP(0x5850f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_LDX | BPF_MEM: /* X = mem[K] */ + jit->seen |= SEEN_XREG | SEEN_MEM; + /* l %r12,<K>(%r15) */ + EMIT4_DISP(0x58c0f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_MISC | BPF_TAX: /* X = A */ + jit->seen |= SEEN_XREG; + /* lr %r12,%r5 */ + EMIT2(0x18c5); + break; + case BPF_MISC | BPF_TXA: /* A = X */ + jit->seen |= SEEN_XREG; + /* lr %r5,%r12 */ + EMIT2(0x185c); + break; + case BPF_RET | BPF_K: + if (K == 0) { + jit->seen |= SEEN_RET0; + if (last) + break; + /* j <ret0> */ + EMIT4_PCREL(0xa7f40000, jit->ret0_ip - jit->prg); + } else { + if (K <= 16383) + /* lghi %r2,K */ + EMIT4_IMM(0xa7290000, K); + else + /* llgf %r2,<K>(%r13) */ + EMIT6_DISP(0xe320d000, 0x0016, EMIT_CONST(K)); + /* j <exit> */ + if (last && !(jit->seen & SEEN_RET0)) + break; + EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); + } + break; + case BPF_RET | BPF_A: + /* llgfr %r2,%r5 */ + EMIT4(0xb9160025); + /* j <exit> */ + EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); + break; + case BPF_ST: /* mem[K] = A */ + jit->seen |= SEEN_MEM; + /* st %r5,<K>(%r15) */ + EMIT4_DISP(0x5050f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ + jit->seen |= SEEN_XREG | SEEN_MEM; + /* st %r12,<K>(%r15) */ + EMIT4_DISP(0x50c0f000, + (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + break; + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(protocol)>(%r2) */ + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol)); + break; + case BPF_ANC | SKF_AD_IFINDEX: /* if (!skb->dev) return 0; + * A = skb->dev->ifindex */ + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + jit->seen |= SEEN_RET0; + /* lg %r1,<d(dev)>(%r2) */ + EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); + /* ltgr %r1,%r1 */ + EMIT4(0xb9020011); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); + /* l %r5,<d(ifindex)>(%r1) */ + EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex)); + break; + case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + /* l %r5,<d(mark)>(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark)); + break; + case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(queue_mapping)>(%r2) */ + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping)); + break; + case BPF_ANC | SKF_AD_HATYPE: /* if (!skb->dev) return 0; + * A = skb->dev->type */ + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); + jit->seen |= SEEN_RET0; + /* lg %r1,<d(dev)>(%r2) */ + EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); + /* ltgr %r1,%r1 */ + EMIT4(0xb9020011); + /* jz <ret0> */ + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(type)>(%r1) */ + EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); + break; + case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */ + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + /* l %r5,<d(hash)>(%r2) */ + EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash)); + break; + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* icm %r5,3,<d(vlan_tci)>(%r2) */ + EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci)); + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { + /* nill %r5,0xefff */ + EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT); + } else { + /* nill %r5,0x1000 */ + EMIT4_IMM(0xa5570000, VLAN_TAG_PRESENT); + /* srl %r5,12 */ + EMIT4_DISP(0x88500000, 12); + } + break; + case BPF_ANC | SKF_AD_PKTTYPE: + if (pkt_type_offset < 0) + goto out; + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* ic %r5,<d(pkt_type_offset)>(%r2) */ + EMIT4_DISP(0x43502000, pkt_type_offset); + /* srl %r5,5 */ + EMIT4_DISP(0x88500000, 5); + break; + case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */ +#ifdef CONFIG_SMP + /* l %r5,<d(cpu_nr)> */ + EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr)); +#else + /* lhi %r5,0 */ + EMIT4(0xa7580000); +#endif + break; + default: /* too complex, give up */ + goto out; + } + addrs[i] = jit->prg - jit->start; + return 0; +out: + return -1; +} + +/* + * Note: for security reasons, bpf code will follow a randomly + * sized amount of illegal instructions. + */ +struct bpf_binary_header { + unsigned int pages; + u8 image[]; +}; + +static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, + u8 **image_ptr) +{ + struct bpf_binary_header *header; + unsigned int sz, hole; + + /* Most BPF filters are really small, but if some of them fill a page, + * allow at least 128 extra bytes for illegal instructions. + */ + sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE); + header = module_alloc(sz); + if (!header) + return NULL; + memset(header, 0, sz); + header->pages = sz / PAGE_SIZE; + hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header)); + /* Insert random number of illegal instructions before BPF code + * and make sure the first instruction starts at an even address. + */ + *image_ptr = &header->image[(prandom_u32() % hole) & -2]; + return header; +} + +void bpf_jit_compile(struct sk_filter *fp) +{ + struct bpf_binary_header *header = NULL; + unsigned long size, prg_len, lit_len; + struct bpf_jit jit, cjit; + unsigned int *addrs; + int pass, i; + + if (!bpf_jit_enable) + return; + addrs = kcalloc(fp->len, sizeof(*addrs), GFP_KERNEL); + if (addrs == NULL) + return; + memset(&jit, 0, sizeof(cjit)); + memset(&cjit, 0, sizeof(cjit)); + + for (pass = 0; pass < 10; pass++) { + jit.prg = jit.start; + jit.lit = jit.mid; + + bpf_jit_prologue(&jit); + bpf_jit_noleaks(&jit, fp->insns); + for (i = 0; i < fp->len; i++) { + if (bpf_jit_insn(&jit, fp->insns + i, addrs, i, + i == fp->len - 1)) + goto out; + } + bpf_jit_epilogue(&jit); + if (jit.start) { + WARN_ON(jit.prg > cjit.prg || jit.lit > cjit.lit); + if (memcmp(&jit, &cjit, sizeof(jit)) == 0) + break; + } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) { + prg_len = jit.prg - jit.start; + lit_len = jit.lit - jit.mid; + size = prg_len + lit_len; + if (size >= BPF_SIZE_MAX) + goto out; + header = bpf_alloc_binary(size, &jit.start); + if (!header) + goto out; + jit.prg = jit.mid = jit.start + prg_len; + jit.lit = jit.end = jit.start + prg_len + lit_len; + jit.base_ip += (unsigned long) jit.start; + jit.exit_ip += (unsigned long) jit.start; + jit.ret0_ip += (unsigned long) jit.start; + } + cjit = jit; + } + if (bpf_jit_enable > 1) { + bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start); + if (jit.start) + print_fn_code(jit.start, jit.mid - jit.start); + } + if (jit.start) { + set_memory_ro((unsigned long)header, header->pages); + fp->bpf_func = (void *) jit.start; + fp->jited = 1; + } +out: + kfree(addrs); +} + +void bpf_jit_free(struct sk_filter *fp) +{ + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *header = (void *)addr; + + if (!fp->jited) + goto free_filter; + + set_memory_rw(addr, header->pages); + module_free(NULL, header); + +free_filter: + kfree(fp); +} diff --git a/arch/s390/oprofile/Kconfig b/arch/s390/oprofile/Kconfig deleted file mode 100644 index 208220a5f23..00000000000 --- a/arch/s390/oprofile/Kconfig +++ /dev/null @@ -1,22 +0,0 @@ - -menu "Profiling support" - -config PROFILING - bool "Profiling support" - help - Say Y here to enable profiling support mechanisms used by - profilers such as readprofile or OProfile. - - -config OPROFILE - tristate "OProfile system profiling" - depends on PROFILING - help - OProfile is a profiling system capable of profiling the - whole system, include the kernel, kernel modules, libraries, - and applications. - - If unsure, say N. - -endmenu - diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 537b2d840e6..524c4b61582 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,4 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-$(CONFIG_64BIT) += hwsampler.o diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c index bc4b84a35ca..8a6811b2cdb 100644 --- a/arch/s390/oprofile/backtrace.c +++ b/arch/s390/oprofile/backtrace.c @@ -1,8 +1,6 @@ -/** - * arch/s390/oprofile/backtrace.c - * +/* * S390 Version - * Copyright (C) 2005 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright IBM Corp. 2005 * Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com> */ @@ -60,7 +58,7 @@ void s390_backtrace(struct pt_regs * const regs, unsigned int depth) unsigned long head; struct stack_frame* head_sf; - if (user_mode (regs)) + if (user_mode(regs)) return; head = regs->gprs[15]; diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c new file mode 100644 index 00000000000..e53c6f26880 --- /dev/null +++ b/arch/s390/oprofile/hwsampler.c @@ -0,0 +1,1178 @@ +/* + * Copyright IBM Corp. 2010 + * Author: Heinz Graalfs <graalfs@de.ibm.com> + */ + +#include <linux/kernel_stat.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/workqueue.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/semaphore.h> +#include <linux/oom.h> +#include <linux/oprofile.h> + +#include <asm/facility.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> + +#include "hwsampler.h" +#include "op_counter.h" + +#define MAX_NUM_SDB 511 +#define MIN_NUM_SDB 1 + +DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); + +struct hws_execute_parms { + void *buffer; + signed int rc; +}; + +DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); +EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer); + +static DEFINE_MUTEX(hws_sem); +static DEFINE_MUTEX(hws_sem_oom); + +static unsigned char hws_flush_all; +static unsigned int hws_oom; +static unsigned int hws_alert; +static struct workqueue_struct *hws_wq; + +static unsigned int hws_state; +enum { + HWS_INIT = 1, + HWS_DEALLOCATED, + HWS_STOPPED, + HWS_STARTED, + HWS_STOPPING }; + +/* set to 1 if called by kernel during memory allocation */ +static unsigned char oom_killer_was_active; +/* size of SDBT and SDB as of allocate API */ +static unsigned long num_sdbt = 100; +static unsigned long num_sdb = 511; +/* sampling interval (machine cycles) */ +static unsigned long interval; + +static unsigned long min_sampler_rate; +static unsigned long max_sampler_rate; + +static void execute_qsi(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = qsi(ep->buffer); +} + +static void execute_ssctl(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = lsctl(ep->buffer); +} + +static int smp_ctl_ssctl_stop(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 0; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) { + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + dump_stack(); + } + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.es || cb->qsi.cs) { + printk(KERN_EMERG "CPUMF sampling did not stop properly.\n"); + dump_stack(); + } + + return rc; +} + +static int smp_ctl_ssctl_deactivate(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 1; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.cs) + printk(KERN_EMERG "CPUMF sampling was not set inactive.\n"); + + return rc; +} + +static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.h = 1; + cb->ssctl.tear = cb->first_sdbt; + cb->ssctl.dear = *(unsigned long *) cb->first_sdbt; + cb->ssctl.interval = interval; + cb->ssctl.es = 1; + cb->ssctl.cs = 1; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + if (ep.rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu); + + return rc; +} + +static int smp_ctl_qsi(int cpu) +{ + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + return ep.rc; +} + +static void hws_ext_handler(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct hws_cpu_buffer *cb = &__get_cpu_var(sampler_cpu_buffer); + + if (!(param32 & CPU_MF_INT_SF_MASK)) + return; + + if (!hws_alert) + return; + + inc_irq_stat(IRQEXT_CMS); + atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); + + if (hws_wq) + queue_work(hws_wq, &cb->worker); +} + +static void worker(struct work_struct *work); + +static void add_samples_to_oprofile(unsigned cpu, unsigned long *, + unsigned long *dear); + +static void init_all_cpu_buffers(void) +{ + int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + memset(cb, 0, sizeof(struct hws_cpu_buffer)); + } +} + +static void prepare_cpu_buffers(void) +{ + struct hws_cpu_buffer *cb; + int cpu; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + atomic_set(&cb->ext_params, 0); + cb->worker_entry = 0; + cb->sample_overflow = 0; + cb->req_alert = 0; + cb->incorrect_sdbt_entry = 0; + cb->invalid_entry_address = 0; + cb->loss_of_sample_data = 0; + cb->sample_auth_change_alert = 0; + cb->finish = 0; + cb->oom = 0; + cb->stop_mode = 0; + } +} + +/* + * allocate_sdbt() - allocate sampler memory + * @cpu: the cpu for which sampler memory is allocated + * + * A 4K page is allocated for each requested SDBT. + * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs. + * Set ALERT_REQ mask in each SDBs trailer. + * Returns zero if successful, <0 otherwise. + */ +static int allocate_sdbt(int cpu) +{ + int j, k, rc; + unsigned long *sdbt; + unsigned long sdb; + unsigned long *tail; + unsigned long *trailer; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->first_sdbt) + return -EINVAL; + + sdbt = NULL; + tail = sdbt; + + for (j = 0; j < num_sdbt; j++) { + sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdbt) { + if (sdbt) + free_page((unsigned long)sdbt); + + goto allocate_sdbt_error; + } + if (cb->first_sdbt == 0) + cb->first_sdbt = (unsigned long)sdbt; + + /* link current page to tail of chain */ + if (tail) + *tail = (unsigned long)(void *)sdbt + 1; + + mutex_unlock(&hws_sem_oom); + + for (k = 0; k < num_sdb; k++) { + /* get and set SDB page */ + sdb = get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdb) { + if (sdb) + free_page(sdb); + + goto allocate_sdbt_error; + } + *sdbt = sdb; + trailer = trailer_entry_ptr(*sdbt); + *trailer = SDB_TE_ALERT_REQ_MASK; + sdbt++; + mutex_unlock(&hws_sem_oom); + } + tail = sdbt; + } + mutex_lock(&hws_sem_oom); + if (oom_killer_was_active) + goto allocate_sdbt_error; + + rc = 0; + if (tail) + *tail = (unsigned long) + ((void *)cb->first_sdbt) + 1; + +allocate_sdbt_exit: + mutex_unlock(&hws_sem_oom); + return rc; + +allocate_sdbt_error: + rc = -ENOMEM; + goto allocate_sdbt_exit; +} + +/* + * deallocate_sdbt() - deallocate all sampler memory + * + * For each online CPU all SDBT trees are deallocated. + * Returns the number of freed pages. + */ +static int deallocate_sdbt(void) +{ + int cpu; + int counter; + + counter = 0; + + for_each_online_cpu(cpu) { + unsigned long start; + unsigned long sdbt; + unsigned long *curr; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->first_sdbt) + continue; + + sdbt = cb->first_sdbt; + curr = (unsigned long *) sdbt; + start = sdbt; + + /* we'll free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* watch for link entry reset if found */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page(sdbt); + + /* we are done if we reach the start */ + if ((unsigned long) curr == start) + break; + else + sdbt = (unsigned long) curr; + } else { + /* process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + counter++; + } + cb->first_sdbt = 0; + } + return counter; +} + +static int start_sampling(int cpu) +{ + int rc; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu); + goto start_exit; + } + + rc = -EINVAL; + if (!cb->qsi.es) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu); + goto start_exit; + } + + if (!cb->qsi.cs) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu); + goto start_exit; + } + + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n", + cpu, interval); + + rc = 0; + +start_exit: + return rc; +} + +static int stop_sampling(int cpu) +{ + unsigned long v; + int rc; + struct hws_cpu_buffer *cb; + + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (!rc && !cb->qsi.es) + printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu); + + rc = smp_ctl_ssctl_stop(cpu); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n", + cpu, rc); + goto stop_exit; + } + + printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu); + +stop_exit: + v = cb->req_alert; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert," + " count=%lu.\n", cpu, v); + + v = cb->loss_of_sample_data; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data," + " count=%lu.\n", cpu, v); + + v = cb->invalid_entry_address; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address," + " count=%lu.\n", cpu, v); + + v = cb->incorrect_sdbt_entry; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Incorrect SDBT address," + " count=%lu.\n", cpu, v); + + v = cb->sample_auth_change_alert; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Sample authorization change," + " count=%lu.\n", cpu, v); + + return rc; +} + +static int check_hardware_prerequisites(void) +{ + if (!test_facility(68)) + return -EOPNOTSUPP; + return 0; +} +/* + * hws_oom_callback() - the OOM callback function + * + * In case the callback is invoked during memory allocation for the + * hw sampler, all obtained memory is deallocated and a flag is set + * so main sampler memory allocation can exit with a failure code. + * In case the callback is invoked during sampling the hw sampler + * is deactivated for all CPUs. + */ +static int hws_oom_callback(struct notifier_block *nfb, + unsigned long dummy, void *parm) +{ + unsigned long *freed; + int cpu; + struct hws_cpu_buffer *cb; + + freed = parm; + + mutex_lock(&hws_sem_oom); + + if (hws_state == HWS_DEALLOCATED) { + /* during memory allocation */ + if (oom_killer_was_active == 0) { + oom_killer_was_active = 1; + *freed += deallocate_sdbt(); + } + } else { + int i; + cpu = get_cpu(); + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->oom) { + for_each_online_cpu(i) { + smp_ctl_ssctl_deactivate(i); + cb->oom = 1; + } + cb->finish = 1; + + printk(KERN_INFO + "hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n", + cpu); + } + } + + mutex_unlock(&hws_sem_oom); + + return NOTIFY_OK; +} + +static struct notifier_block hws_oom_notifier = { + .notifier_call = hws_oom_callback +}; + +static int hws_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + /* We do not have sampler space available for all possible CPUs. + All CPUs should be online when hw sampling is activated. */ + return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD; +} + +static struct notifier_block hws_cpu_notifier = { + .notifier_call = hws_cpu_callback +}; + +/** + * hwsampler_deactivate() - set hardware sampling temporarily inactive + * @cpu: specifies the CPU to be set inactive. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deactivate(unsigned int cpu) +{ + /* + * Deactivate hw sampling temporarily and flush the buffer + * by pushing all the pending samples to oprofile buffer. + * + * This function can be called under one of the following conditions: + * Memory unmap, task is exiting. + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.cs) { + rc = smp_ctl_ssctl_deactivate(cpu); + if (rc) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu); + cb->finish = 1; + hws_state = HWS_STOPPING; + } else { + hws_flush_all = 1; + /* Add work to queue to read pending samples.*/ + queue_work_on(cpu, hws_wq, &cb->worker); + } + } + } + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + return rc; +} + +/** + * hwsampler_activate() - activate/resume hardware sampling which was deactivated + * @cpu: specifies the CPU to be set active. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_activate(unsigned int cpu) +{ + /* + * Re-activate hw sampling. This should be called in pair with + * hwsampler_deactivate(). + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (!cb->qsi.cs) { + hws_flush_all = 0; + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_ERR + "CPU %d, CPUMF activate sampling failed.\n", + cpu); + } + } + } + + mutex_unlock(&hws_sem); + + return rc; +} + +static int check_qsi_on_setup(void) +{ + int rc; + unsigned int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (rc) + return -EOPNOTSUPP; + + if (!cb->qsi.as) { + printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n"); + return -EINVAL; + } + + if (cb->qsi.es) { + printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n"); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + return -EINVAL; + + printk(KERN_INFO + "CPU %d, CPUMF Sampling stopped now.\n", cpu); + } + } + return 0; +} + +static int check_qsi_on_start(void) +{ + unsigned int cpu; + int rc; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + if (!cb->qsi.as) + return -EINVAL; + + if (cb->qsi.es) + return -EINVAL; + + if (cb->qsi.cs) + return -EINVAL; + } + return 0; +} + +static void worker_on_start(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->worker_entry = cb->first_sdbt; +} + +static int worker_check_error(unsigned int cpu, int ext_params) +{ + int rc; + unsigned long *sdbt; + struct hws_cpu_buffer *cb; + + rc = 0; + cb = &per_cpu(sampler_cpu_buffer, cpu); + sdbt = (unsigned long *) cb->worker_entry; + + if (!sdbt || !*sdbt) + return -EINVAL; + + if (ext_params & CPU_MF_INT_SF_PRA) + cb->req_alert++; + + if (ext_params & CPU_MF_INT_SF_LSDA) + cb->loss_of_sample_data++; + + if (ext_params & CPU_MF_INT_SF_IAE) { + cb->invalid_entry_address++; + rc = -EINVAL; + } + + if (ext_params & CPU_MF_INT_SF_ISE) { + cb->incorrect_sdbt_entry++; + rc = -EINVAL; + } + + if (ext_params & CPU_MF_INT_SF_SACA) { + cb->sample_auth_change_alert++; + rc = -EINVAL; + } + + return rc; +} + +static void worker_on_finish(unsigned int cpu) +{ + int rc, i; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->finish) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.es) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n", + cpu); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", + cpu); + + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (!cb->finish) { + cb->finish = 1; + queue_work_on(i, hws_wq, + &cb->worker); + } + } + } + } +} + +static void worker_on_interrupt(unsigned int cpu) +{ + unsigned long *sdbt; + unsigned char done; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + sdbt = (unsigned long *) cb->worker_entry; + + done = 0; + /* do not proceed if stop was entered, + * forget the buffers not yet processed */ + while (!done && !cb->stop_mode) { + unsigned long *trailer; + struct hws_trailer_entry *te; + unsigned long *dear = 0; + + trailer = trailer_entry_ptr(*sdbt); + /* leave loop if no more work to do */ + if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) { + done = 1; + if (!hws_flush_all) + continue; + } + + te = (struct hws_trailer_entry *)trailer; + cb->sample_overflow += te->overflow; + + add_samples_to_oprofile(cpu, sdbt, dear); + + /* reset trailer */ + xchg((unsigned char *) te, 0x40); + + /* advance to next sdb slot in current sdbt */ + sdbt++; + /* in case link bit is set use address w/o link bit */ + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + cb->worker_entry = (unsigned long)sdbt; + } +} + +static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, + unsigned long *dear) +{ + struct hws_basic_entry *sample_data_ptr; + unsigned long *trailer; + + trailer = trailer_entry_ptr(*sdbt); + if (dear) { + if (dear > trailer) + return; + trailer = dear; + } + + sample_data_ptr = (struct hws_basic_entry *)(*sdbt); + + while ((unsigned long *)sample_data_ptr < trailer) { + struct pt_regs *regs = NULL; + struct task_struct *tsk = NULL; + + /* + * Check sampling mode, 1 indicates basic (=customer) sampling + * mode. + */ + if (sample_data_ptr->def != 1) { + /* sample slot is not yet written */ + break; + } else { + /* make sure we don't use it twice, + * the next time the sampler will set it again */ + sample_data_ptr->def = 0; + } + + /* Get pt_regs. */ + if (sample_data_ptr->P == 1) { + /* userspace sample */ + unsigned int pid = sample_data_ptr->prim_asn; + if (!counter_config.user) + goto skip_sample; + rcu_read_lock(); + tsk = pid_task(find_vpid(pid), PIDTYPE_PID); + if (tsk) + regs = task_pt_regs(tsk); + rcu_read_unlock(); + } else { + /* kernelspace sample */ + if (!counter_config.kernel) + goto skip_sample; + regs = task_pt_regs(current); + } + + mutex_lock(&hws_sem); + oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, + !sample_data_ptr->P, tsk); + mutex_unlock(&hws_sem); + skip_sample: + sample_data_ptr++; + } +} + +static void worker(struct work_struct *work) +{ + unsigned int cpu; + int ext_params; + struct hws_cpu_buffer *cb; + + cb = container_of(work, struct hws_cpu_buffer, worker); + cpu = smp_processor_id(); + ext_params = atomic_xchg(&cb->ext_params, 0); + + if (!cb->worker_entry) + worker_on_start(cpu); + + if (worker_check_error(cpu, ext_params)) + return; + + if (!cb->finish) + worker_on_interrupt(cpu); + + if (cb->finish) + worker_on_finish(cpu); +} + +/** + * hwsampler_allocate() - allocate memory for the hardware sampler + * @sdbt: number of SDBTs per online CPU (must be > 0) + * @sdb: number of SDBs per SDBT (minimum 1, maximum 511) + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb) +{ + int cpu, rc; + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_DEALLOCATED) + goto allocate_exit; + + if (sdbt < 1) + goto allocate_exit; + + if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB) + goto allocate_exit; + + num_sdbt = sdbt; + num_sdb = sdb; + + oom_killer_was_active = 0; + register_oom_notifier(&hws_oom_notifier); + + for_each_online_cpu(cpu) { + if (allocate_sdbt(cpu)) { + unregister_oom_notifier(&hws_oom_notifier); + goto allocate_error; + } + } + unregister_oom_notifier(&hws_oom_notifier); + if (oom_killer_was_active) + goto allocate_error; + + hws_state = HWS_STOPPED; + rc = 0; + +allocate_exit: + mutex_unlock(&hws_sem); + return rc; + +allocate_error: + rc = -ENOMEM; + printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n"); + goto allocate_exit; +} + +/** + * hwsampler_deallocate() - deallocate hardware sampler memory + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deallocate(void) +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto deallocate_exit; + + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + hws_alert = 0; + deallocate_sdbt(); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +deallocate_exit: + mutex_unlock(&hws_sem); + + return rc; +} + +unsigned long hwsampler_query_min_interval(void) +{ + return min_sampler_rate; +} + +unsigned long hwsampler_query_max_interval(void) +{ + return max_sampler_rate; +} + +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + return cb->sample_overflow; +} + +int hwsampler_setup(void) +{ + int rc; + int cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state) + goto setup_exit; + + hws_state = HWS_INIT; + + init_all_cpu_buffers(); + + rc = check_hardware_prerequisites(); + if (rc) + goto setup_exit; + + rc = check_qsi_on_setup(); + if (rc) + goto setup_exit; + + rc = -EINVAL; + hws_wq = create_workqueue("hwsampler"); + if (!hws_wq) + goto setup_exit; + + register_cpu_notifier(&hws_cpu_notifier); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + INIT_WORK(&cb->worker, worker); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (min_sampler_rate != cb->qsi.min_sampl_rate) { + if (min_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different min sampler rate values.\n"); + if (min_sampler_rate < cb->qsi.min_sampl_rate) + min_sampler_rate = + cb->qsi.min_sampl_rate; + } else + min_sampler_rate = cb->qsi.min_sampl_rate; + } + if (max_sampler_rate != cb->qsi.max_sampl_rate) { + if (max_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different max sampler rate values.\n"); + if (max_sampler_rate > cb->qsi.max_sampl_rate) + max_sampler_rate = + cb->qsi.max_sampl_rate; + } else + max_sampler_rate = cb->qsi.max_sampl_rate; + } + } + register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +setup_exit: + mutex_unlock(&hws_sem); + return rc; +} + +int hwsampler_shutdown(void) +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) { + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + + if (hws_state == HWS_STOPPED) { + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + hws_alert = 0; + deallocate_sdbt(); + } + if (hws_wq) { + destroy_workqueue(hws_wq); + hws_wq = NULL; + } + + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler); + hws_state = HWS_INIT; + rc = 0; + } + mutex_unlock(&hws_sem); + + unregister_cpu_notifier(&hws_cpu_notifier); + + return rc; +} + +/** + * hwsampler_start_all() - start hardware sampling on all online CPUs + * @rate: specifies the used interval when samples are taken + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_start_all(unsigned long rate) +{ + int rc, cpu; + + mutex_lock(&hws_sem); + + hws_oom = 0; + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto start_all_exit; + + interval = rate; + + /* fail if rate is not valid */ + if (interval < min_sampler_rate || interval > max_sampler_rate) + goto start_all_exit; + + rc = check_qsi_on_start(); + if (rc) + goto start_all_exit; + + prepare_cpu_buffers(); + + for_each_online_cpu(cpu) { + rc = start_sampling(cpu); + if (rc) + break; + } + if (rc) { + for_each_online_cpu(cpu) { + stop_sampling(cpu); + } + goto start_all_exit; + } + hws_state = HWS_STARTED; + rc = 0; + +start_all_exit: + mutex_unlock(&hws_sem); + + if (rc) + return rc; + + register_oom_notifier(&hws_oom_notifier); + hws_oom = 1; + hws_flush_all = 0; + /* now let them in, 1407 CPUMF external interrupts */ + hws_alert = 1; + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +/** + * hwsampler_stop_all() - stop hardware sampling on all online CPUs + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_stop_all(void) +{ + int tmp_rc, rc, cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = 0; + if (hws_state == HWS_INIT) { + mutex_unlock(&hws_sem); + return 0; + } + hws_state = HWS_STOPPING; + mutex_unlock(&hws_sem); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->stop_mode = 1; + tmp_rc = stop_sampling(cpu); + if (tmp_rc) + rc = tmp_rc; + } + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + if (hws_oom) { + unregister_oom_notifier(&hws_oom_notifier); + hws_oom = 0; + } + hws_state = HWS_STOPPED; + mutex_unlock(&hws_sem); + + return rc; +} diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h new file mode 100644 index 00000000000..a483d06f2fa --- /dev/null +++ b/arch/s390/oprofile/hwsampler.h @@ -0,0 +1,63 @@ +/* + * CPUMF HW sampler functions and internal structures + * + * Copyright IBM Corp. 2010 + * Author(s): Heinz Graalfs <graalfs@de.ibm.com> + */ + +#ifndef HWSAMPLER_H_ +#define HWSAMPLER_H_ + +#include <linux/workqueue.h> +#include <asm/cpu_mf.h> + +struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */ +{ /* bytes 0 - 7 Bit(s) */ + unsigned int s:1; /* 0: maximum buffer indicator */ + unsigned int h:1; /* 1: part. level reserved for VM use*/ + unsigned long b2_53:52; /* 2-53: zeros */ + unsigned int es:1; /* 54: sampling enable control */ + unsigned int b55_61:7; /* 55-61: - zeros */ + unsigned int cs:1; /* 62: sampling activation control */ + unsigned int b63:1; /* 63: zero */ + unsigned long interval; /* 8-15: sampling interval */ + unsigned long tear; /* 16-23: TEAR contents */ + unsigned long dear; /* 24-31: DEAR contents */ + /* 32-63: */ + unsigned long rsvrd1; /* reserved */ + unsigned long rsvrd2; /* reserved */ + unsigned long rsvrd3; /* reserved */ + unsigned long rsvrd4; /* reserved */ +}; + +struct hws_cpu_buffer { + unsigned long first_sdbt; /* @ of 1st SDB-Table for this CP*/ + unsigned long worker_entry; + unsigned long sample_overflow; /* taken from SDB ... */ + struct hws_qsi_info_block qsi; + struct hws_ssctl_request_block ssctl; + struct work_struct worker; + atomic_t ext_params; + unsigned long req_alert; + unsigned long loss_of_sample_data; + unsigned long invalid_entry_address; + unsigned long incorrect_sdbt_entry; + unsigned long sample_auth_change_alert; + unsigned int finish:1; + unsigned int oom:1; + unsigned int stop_mode:1; +}; + +int hwsampler_setup(void); +int hwsampler_shutdown(void); +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); +int hwsampler_deallocate(void); +unsigned long hwsampler_query_min_interval(void); +unsigned long hwsampler_query_max_interval(void); +int hwsampler_start_all(unsigned long interval); +int hwsampler_stop_all(void); +int hwsampler_deactivate(unsigned int cpu); +int hwsampler_activate(unsigned int cpu); +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu); + +#endif /*HWSAMPLER_H_*/ diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 7a995113b91..9ffe645d598 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -1,26 +1,524 @@ -/** - * arch/s390/oprofile/init.c - * +/* * S390 Version - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2002, 2011 * Author(s): Thomas Spatzier (tspat@de.ibm.com) + * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) + * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2011 OProfile authors */ #include <linux/oprofile.h> +#include <linux/perf_event.h> #include <linux/init.h> #include <linux/errno.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <asm/processor.h> +#include "../../../drivers/oprofile/oprof.h" extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); -int __init oprofile_arch_init(struct oprofile_operations* ops) +#ifdef CONFIG_64BIT + +#include "hwsampler.h" +#include "op_counter.h" + +#define DEFAULT_INTERVAL 4127518 + +#define DEFAULT_SDBT_BLOCKS 1 +#define DEFAULT_SDB_BLOCKS 511 + +static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; +static unsigned long oprofile_min_interval; +static unsigned long oprofile_max_interval; + +static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; +static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; + +static int hwsampler_enabled; +static int hwsampler_running; /* start_mutex must be held to change */ +static int hwsampler_available; + +static struct oprofile_operations timer_ops; + +struct op_counter_config counter_config; + +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, +}; +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing timer to be returned " + "as cpu type\n"); + } else { + force_cpu_type = 0; + } + + return 0; +} +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); +MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" + "(report cpu_type \"timer\""); + +static int __oprofile_hwsampler_start(void) +{ + int retval; + + retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + if (retval) + return retval; + + retval = hwsampler_start_all(oprofile_hw_interval); + if (retval) + hwsampler_deallocate(); + + return retval; +} + +static int oprofile_hwsampler_start(void) +{ + int retval; + + hwsampler_running = hwsampler_enabled; + + if (!hwsampler_running) + return timer_ops.start(); + + retval = perf_reserve_sampling(); + if (retval) + return retval; + + retval = __oprofile_hwsampler_start(); + if (retval) + perf_release_sampling(); + + return retval; +} + +static void oprofile_hwsampler_stop(void) +{ + if (!hwsampler_running) { + timer_ops.stop(); + return; + } + + hwsampler_stop_all(); + hwsampler_deallocate(); + perf_release_sampling(); + return; +} + +/* + * File ops used for: + * /dev/oprofile/0/enabled + * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) + */ + +static ssize_t hwsampler_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); +} + +static ssize_t hwsampler_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = val; + + return count; +} + +static const struct file_operations hwsampler_fops = { + .read = hwsampler_read, + .write = hwsampler_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/count + * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) + * + * Make sure that the value is within the hardware range. + */ + +static ssize_t hw_interval_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, + count, offset); +} + +static ssize_t hw_interval_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + if (val < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + else if (val > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + else + oprofile_hw_interval = val; + + return count; +} + +static const struct file_operations hw_interval_fops = { + .read = hw_interval_read, + .write = hw_interval_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/event + * Only a single event with number 0 is supported with this counter. + * + * /dev/oprofile/0/unit_mask + * This is a dummy file needed by the user space tools. + * No value other than 0 is accepted or returned. + */ + +static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(0, buf, count, offset); +} + +static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + if (val != 0) + return -EINVAL; + return count; +} + +static const struct file_operations zero_fops = { + .read = hwsampler_zero_read, + .write = hwsampler_zero_write, +}; + +/* /dev/oprofile/0/kernel file ops. */ + +static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.kernel, + buf, count, offset); +} + +static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.kernel = val; + + return count; +} + +static const struct file_operations kernel_fops = { + .read = hwsampler_kernel_read, + .write = hwsampler_kernel_write, +}; + +/* /dev/oprofile/0/user file ops. */ + +static ssize_t hwsampler_user_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.user, + buf, count, offset); +} + +static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.user = val; + + return count; +} + +static const struct file_operations user_fops = { + .read = hwsampler_user_read, + .write = hwsampler_user_write, +}; + + +/* + * File ops used for: /dev/oprofile/timer/enabled + * The value always has to be the inverted value of hwsampler_enabled. So + * no separate variable is created. That way we do not need locking. + */ + +static ssize_t timer_enabled_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); +} + +static ssize_t timer_enabled_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + /* Timer cannot be disabled without having hardware sampling. */ + if (val == 0 && !hwsampler_available) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = !val; + + return count; +} + +static const struct file_operations timer_enabled_fops = { + .read = timer_enabled_read, + .write = timer_enabled_write, +}; + + +static int oprofile_create_hwsampling_files(struct dentry *root) +{ + struct dentry *dir; + + dir = oprofilefs_mkdir(root, "timer"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(dir, "enabled", &timer_enabled_fops); + + if (!hwsampler_available) + return 0; + + /* reinitialize default values */ + hwsampler_enabled = 1; + counter_config.kernel = 1; + counter_config.user = 1; + + if (!force_cpu_type) { + /* + * Create the counter file system. A single virtual + * counter is created which can be used to + * enable/disable hardware sampling dynamically from + * user space. The user space will configure a single + * counter with a single event. The value of 'event' + * and 'unit_mask' are not evaluated by the kernel code + * and can only be set to 0. + */ + + dir = oprofilefs_mkdir(root, "0"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(dir, "enabled", &hwsampler_fops); + oprofilefs_create_file(dir, "event", &zero_fops); + oprofilefs_create_file(dir, "count", &hw_interval_fops); + oprofilefs_create_file(dir, "unit_mask", &zero_fops); + oprofilefs_create_file(dir, "kernel", &kernel_fops); + oprofilefs_create_file(dir, "user", &user_fops); + oprofilefs_create_ulong(dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + + } else { + /* + * Hardware sampling can be used but the cpu_type is + * forced to timer in order to deal with legacy user + * space tools. The /dev/oprofile/hwsampling fs is + * provided in that case. + */ + dir = oprofilefs_mkdir(root, "hwsampling"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(dir, "hwsampler", + &hwsampler_fops); + oprofilefs_create_file(dir, "hw_interval", + &hw_interval_fops); + oprofilefs_create_ro_ulong(dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } + return 0; +} + +static int oprofile_hwsampler_init(struct oprofile_operations *ops) +{ + /* + * Initialize the timer mode infrastructure as well in order + * to be able to switch back dynamically. oprofile_timer_init + * is not supposed to fail. + */ + if (oprofile_timer_init(ops)) + BUG(); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + ops->create_files = oprofile_create_hwsampling_files; + + /* + * If the user space tools do not support newer cpu types, + * the force_cpu_type module parameter + * can be used to always return \"timer\" as cpu type. + */ + if (force_cpu_type != timer) { + struct cpuid id; + + get_cpu_id (&id); + + switch (id.machine) { + case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; + case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; + case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; + default: return -ENODEV; + } + } + + if (hwsampler_setup()) + return -ENODEV; + + /* + * Query the range for the sampling interval from the + * hardware. + */ + oprofile_min_interval = hwsampler_query_min_interval(); + if (oprofile_min_interval == 0) + return -ENODEV; + oprofile_max_interval = hwsampler_query_max_interval(); + if (oprofile_max_interval == 0) + return -ENODEV; + + /* The initial value should be sane */ + if (oprofile_hw_interval < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + if (oprofile_hw_interval > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + + printk(KERN_INFO "oprofile: System z hardware sampling " + "facility found.\n"); + + ops->start = oprofile_hwsampler_start; + ops->stop = oprofile_hwsampler_stop; + + return 0; +} + +static void oprofile_hwsampler_exit(void) +{ + hwsampler_shutdown(); +} + +#endif /* CONFIG_64BIT */ + +int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; + +#ifdef CONFIG_64BIT + + /* + * -ENODEV is not reported to the caller. The module itself + * will use the timer mode sampling as fallback and this is + * always available. + */ + hwsampler_available = oprofile_hwsampler_init(ops) == 0; + + return 0; +#else return -ENODEV; +#endif } void oprofile_arch_exit(void) { +#ifdef CONFIG_64BIT + oprofile_hwsampler_exit(); +#endif } diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h new file mode 100644 index 00000000000..61b2531eef1 --- /dev/null +++ b/arch/s390/oprofile/op_counter.h @@ -0,0 +1,21 @@ +/* + * Copyright IBM Corp. 2011 + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) + * + * @remark Copyright 2011 OProfile authors + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +struct op_counter_config { + /* `enabled' maps to the hwsampler_file variable. */ + /* `count' maps to the oprofile_hw_interval variable. */ + /* `event' and `unit_mask' are unused. */ + unsigned long kernel; + unsigned long user; +}; + +extern struct op_counter_config counter_config; + +#endif /* OP_COUNTER_H */ diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile new file mode 100644 index 00000000000..a9e1dc4ae44 --- /dev/null +++ b/arch/s390/pci/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for the s390 PCI subsystem. +# + +obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \ + pci_event.o pci_debug.o pci_insn.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c new file mode 100644 index 00000000000..30de42730b2 --- /dev/null +++ b/arch/s390/pci/pci.c @@ -0,0 +1,935 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + * + * The System z PCI code is a rewrite from a prototype by + * the following people (Kudoz!): + * Alexander Schmidt + * Christoph Raisch + * Hannes Hering + * Hoang-Nam Nguyen + * Jan-Bernd Themann + * Stefan Roscher + * Thomas Klein + */ + +#define COMPONENT "zPCI" +#define pr_fmt(fmt) COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/kernel_stat.h> +#include <linux/seq_file.h> +#include <linux/pci.h> +#include <linux/msi.h> + +#include <asm/isc.h> +#include <asm/airq.h> +#include <asm/facility.h> +#include <asm/pci_insn.h> +#include <asm/pci_clp.h> +#include <asm/pci_dma.h> + +#define DEBUG /* enable pr_debug */ + +#define SIC_IRQ_MODE_ALL 0 +#define SIC_IRQ_MODE_SINGLE 1 + +#define ZPCI_NR_DMA_SPACES 1 +#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS + +/* list of all detected zpci devices */ +static LIST_HEAD(zpci_list); +static DEFINE_SPINLOCK(zpci_list_lock); + +static struct irq_chip zpci_irq_chip = { + .name = "zPCI", + .irq_unmask = unmask_msi_irq, + .irq_mask = mask_msi_irq, +}; + +static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); +static DEFINE_SPINLOCK(zpci_domain_lock); + +static struct airq_iv *zpci_aisb_iv; +static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES]; + +/* Adapter interrupt definitions */ +static void zpci_irq_handler(struct airq_struct *airq); + +static struct airq_struct zpci_airq = { + .handler = zpci_irq_handler, + .isc = PCI_ISC, +}; + +/* I/O Map */ +static DEFINE_SPINLOCK(zpci_iomap_lock); +static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); +struct zpci_iomap_entry *zpci_iomap_start; +EXPORT_SYMBOL_GPL(zpci_iomap_start); + +static struct kmem_cache *zdev_fmb_cache; + +struct zpci_dev *get_zdev(struct pci_dev *pdev) +{ + return (struct zpci_dev *) pdev->sysdata; +} + +struct zpci_dev *get_zdev_by_fid(u32 fid) +{ + struct zpci_dev *tmp, *zdev = NULL; + + spin_lock(&zpci_list_lock); + list_for_each_entry(tmp, &zpci_list, entry) { + if (tmp->fid == fid) { + zdev = tmp; + break; + } + } + spin_unlock(&zpci_list_lock); + return zdev; +} + +static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) +{ + return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; +} + +int pci_domain_nr(struct pci_bus *bus) +{ + return ((struct zpci_dev *) bus->sysdata)->domain; +} +EXPORT_SYMBOL_GPL(pci_domain_nr); + +int pci_proc_domain(struct pci_bus *bus) +{ + return pci_domain_nr(bus); +} +EXPORT_SYMBOL_GPL(pci_proc_domain); + +/* Modify PCI: Register adapter interruptions */ +static int zpci_set_airq(struct zpci_dev *zdev) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); + struct zpci_fib fib = {0}; + + fib.isc = PCI_ISC; + fib.sum = 1; /* enable summary notifications */ + fib.noi = airq_iv_end(zdev->aibv); + fib.aibv = (unsigned long) zdev->aibv->vector; + fib.aibvo = 0; /* each zdev has its own interrupt vector */ + fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8; + fib.aisbo = zdev->aisb & 63; + + return zpci_mod_fc(req, &fib); +} + +struct mod_pci_args { + u64 base; + u64 limit; + u64 iota; + u64 fmb_addr; +}; + +static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn); + struct zpci_fib fib = {0}; + + fib.pba = args->base; + fib.pal = args->limit; + fib.iota = args->iota; + fib.fmb_addr = args->fmb_addr; + + return zpci_mod_fc(req, &fib); +} + +/* Modify PCI: Register I/O address translation parameters */ +int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, + u64 base, u64 limit, u64 iota) +{ + struct mod_pci_args args = { base, limit, iota, 0 }; + + WARN_ON_ONCE(iota & 0x3fff); + args.iota |= ZPCI_IOTA_RTTO_FLAG; + return mod_pci(zdev, ZPCI_MOD_FC_REG_IOAT, dmaas, &args); +} + +/* Modify PCI: Unregister I/O address translation parameters */ +int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) +{ + struct mod_pci_args args = { 0, 0, 0, 0 }; + + return mod_pci(zdev, ZPCI_MOD_FC_DEREG_IOAT, dmaas, &args); +} + +/* Modify PCI: Unregister adapter interruptions */ +static int zpci_clear_airq(struct zpci_dev *zdev) +{ + struct mod_pci_args args = { 0, 0, 0, 0 }; + + return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args); +} + +/* Modify PCI: Set PCI function measurement parameters */ +int zpci_fmb_enable_device(struct zpci_dev *zdev) +{ + struct mod_pci_args args = { 0, 0, 0, 0 }; + + if (zdev->fmb) + return -EINVAL; + + zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL); + if (!zdev->fmb) + return -ENOMEM; + WARN_ON((u64) zdev->fmb & 0xf); + + args.fmb_addr = virt_to_phys(zdev->fmb); + return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args); +} + +/* Modify PCI: Disable PCI function measurement */ +int zpci_fmb_disable_device(struct zpci_dev *zdev) +{ + struct mod_pci_args args = { 0, 0, 0, 0 }; + int rc; + + if (!zdev->fmb) + return -EINVAL; + + /* Function measurement is disabled if fmb address is zero */ + rc = mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args); + + kmem_cache_free(zdev_fmb_cache, zdev->fmb); + zdev->fmb = NULL; + return rc; +} + +#define ZPCI_PCIAS_CFGSPC 15 + +static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len); + u64 data; + int rc; + + rc = zpci_load(&data, req, offset); + if (!rc) { + data = data << ((8 - len) * 8); + data = le64_to_cpu(data); + *val = (u32) data; + } else + *val = 0xffffffff; + return rc; +} + +static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len); + u64 data = val; + int rc; + + data = cpu_to_le64(data); + data = data >> ((8 - len) * 8); + rc = zpci_store(data, req, offset); + return rc; +} + +void pcibios_fixup_bus(struct pci_bus *bus) +{ +} + +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, + resource_size_t align) +{ + return 0; +} + +/* combine single writes by using store-block insn */ +void __iowrite64_copy(void __iomem *to, const void *from, size_t count) +{ + zpci_memcpy_toio(to, from, count); +} + +/* Create a virtual mapping cookie for a PCI BAR */ +void __iomem *pci_iomap(struct pci_dev *pdev, int bar, unsigned long max) +{ + struct zpci_dev *zdev = get_zdev(pdev); + u64 addr; + int idx; + + if ((bar & 7) != bar) + return NULL; + + idx = zdev->bars[bar].map_idx; + spin_lock(&zpci_iomap_lock); + zpci_iomap_start[idx].fh = zdev->fh; + zpci_iomap_start[idx].bar = bar; + spin_unlock(&zpci_iomap_lock); + + addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48); + return (void __iomem *) addr; +} +EXPORT_SYMBOL_GPL(pci_iomap); + +void pci_iounmap(struct pci_dev *pdev, void __iomem *addr) +{ + unsigned int idx; + + idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48; + spin_lock(&zpci_iomap_lock); + zpci_iomap_start[idx].fh = 0; + zpci_iomap_start[idx].bar = 0; + spin_unlock(&zpci_iomap_lock); +} +EXPORT_SYMBOL_GPL(pci_iounmap); + +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *val) +{ + struct zpci_dev *zdev = get_zdev_by_bus(bus); + int ret; + + if (!zdev || devfn != ZPCI_DEVFN) + ret = -ENODEV; + else + ret = zpci_cfg_load(zdev, where, val, size); + + return ret; +} + +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 val) +{ + struct zpci_dev *zdev = get_zdev_by_bus(bus); + int ret; + + if (!zdev || devfn != ZPCI_DEVFN) + ret = -ENODEV; + else + ret = zpci_cfg_store(zdev, where, val, size); + + return ret; +} + +static struct pci_ops pci_root_ops = { + .read = pci_read, + .write = pci_write, +}; + +static void zpci_irq_handler(struct airq_struct *airq) +{ + unsigned long si, ai; + struct airq_iv *aibv; + int irqs_on = 0; + + inc_irq_stat(IRQIO_PCI); + for (si = 0;;) { + /* Scan adapter summary indicator bit vector */ + si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv)); + if (si == -1UL) { + if (irqs_on++) + /* End of second scan with interrupts on. */ + break; + /* First scan complete, reenable interrupts. */ + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + si = 0; + continue; + } + + /* Scan the adapter interrupt vector for this device. */ + aibv = zpci_aibv[si]; + for (ai = 0;;) { + ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv)); + if (ai == -1UL) + break; + inc_irq_stat(IRQIO_MSI); + airq_iv_lock(aibv, ai); + generic_handle_irq(airq_iv_get_data(aibv, ai)); + airq_iv_unlock(aibv, ai); + } + } +} + +int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + struct zpci_dev *zdev = get_zdev(pdev); + unsigned int hwirq, msi_vecs; + unsigned long aisb; + struct msi_desc *msi; + struct msi_msg msg; + int rc, irq; + + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX); + msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI); + + /* Allocate adapter summary indicator bit */ + rc = -EIO; + aisb = airq_iv_alloc_bit(zpci_aisb_iv); + if (aisb == -1UL) + goto out; + zdev->aisb = aisb; + + /* Create adapter interrupt vector */ + rc = -ENOMEM; + zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK); + if (!zdev->aibv) + goto out_si; + + /* Wire up shortcut pointer */ + zpci_aibv[aisb] = zdev->aibv; + + /* Request MSI interrupts */ + hwirq = 0; + list_for_each_entry(msi, &pdev->msi_list, list) { + rc = -EIO; + irq = irq_alloc_desc(0); /* Alloc irq on node 0 */ + if (irq < 0) + goto out_msi; + rc = irq_set_msi_desc(irq, msi); + if (rc) + goto out_msi; + irq_set_chip_and_handler(irq, &zpci_irq_chip, + handle_simple_irq); + msg.data = hwirq; + msg.address_lo = zdev->msi_addr & 0xffffffff; + msg.address_hi = zdev->msi_addr >> 32; + write_msi_msg(irq, &msg); + airq_iv_set_data(zdev->aibv, hwirq, irq); + hwirq++; + } + + /* Enable adapter interrupts */ + rc = zpci_set_airq(zdev); + if (rc) + goto out_msi; + + return (msi_vecs == nvec) ? 0 : msi_vecs; + +out_msi: + list_for_each_entry(msi, &pdev->msi_list, list) { + if (hwirq-- == 0) + break; + irq_set_msi_desc(msi->irq, NULL); + irq_free_desc(msi->irq); + msi->msg.address_lo = 0; + msi->msg.address_hi = 0; + msi->msg.data = 0; + msi->irq = 0; + } + zpci_aibv[aisb] = NULL; + airq_iv_release(zdev->aibv); +out_si: + airq_iv_free_bit(zpci_aisb_iv, aisb); +out: + return rc; +} + +void arch_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + struct msi_desc *msi; + int rc; + + /* Disable adapter interrupts */ + rc = zpci_clear_airq(zdev); + if (rc) + return; + + /* Release MSI interrupts */ + list_for_each_entry(msi, &pdev->msi_list, list) { + if (msi->msi_attrib.is_msix) + default_msix_mask_irq(msi, 1); + else + default_msi_mask_irq(msi, 1, 1); + irq_set_msi_desc(msi->irq, NULL); + irq_free_desc(msi->irq); + msi->msg.address_lo = 0; + msi->msg.address_hi = 0; + msi->msg.data = 0; + msi->irq = 0; + } + + zpci_aibv[zdev->aisb] = NULL; + airq_iv_release(zdev->aibv); + airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); +} + +static void zpci_map_resources(struct zpci_dev *zdev) +{ + struct pci_dev *pdev = zdev->pdev; + resource_size_t len; + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + len = pci_resource_len(pdev, i); + if (!len) + continue; + pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0); + pdev->resource[i].end = pdev->resource[i].start + len - 1; + } +} + +static void zpci_unmap_resources(struct zpci_dev *zdev) +{ + struct pci_dev *pdev = zdev->pdev; + resource_size_t len; + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + len = pci_resource_len(pdev, i); + if (!len) + continue; + pci_iounmap(pdev, (void *) pdev->resource[i].start); + } +} + +static int __init zpci_irq_init(void) +{ + int rc; + + rc = register_adapter_interrupt(&zpci_airq); + if (rc) + goto out; + /* Set summary to 1 to be called every time for the ISC. */ + *zpci_airq.lsi_ptr = 1; + + rc = -ENOMEM; + zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC); + if (!zpci_aisb_iv) + goto out_airq; + + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + return 0; + +out_airq: + unregister_adapter_interrupt(&zpci_airq); +out: + return rc; +} + +static void zpci_irq_exit(void) +{ + airq_iv_release(zpci_aisb_iv); + unregister_adapter_interrupt(&zpci_airq); +} + +static int zpci_alloc_iomap(struct zpci_dev *zdev) +{ + int entry; + + spin_lock(&zpci_iomap_lock); + entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); + if (entry == ZPCI_IOMAP_MAX_ENTRIES) { + spin_unlock(&zpci_iomap_lock); + return -ENOSPC; + } + set_bit(entry, zpci_iomap); + spin_unlock(&zpci_iomap_lock); + return entry; +} + +static void zpci_free_iomap(struct zpci_dev *zdev, int entry) +{ + spin_lock(&zpci_iomap_lock); + memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry)); + clear_bit(entry, zpci_iomap); + spin_unlock(&zpci_iomap_lock); +} + +static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start, + unsigned long size, unsigned long flags) +{ + struct resource *r; + + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (!r) + return NULL; + + r->start = start; + r->end = r->start + size - 1; + r->flags = flags; + r->name = zdev->res_name; + + if (request_resource(&iomem_resource, r)) { + kfree(r); + return NULL; + } + return r; +} + +static int zpci_setup_bus_resources(struct zpci_dev *zdev, + struct list_head *resources) +{ + unsigned long addr, size, flags; + struct resource *res; + int i, entry; + + snprintf(zdev->res_name, sizeof(zdev->res_name), + "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR); + + for (i = 0; i < PCI_BAR_COUNT; i++) { + if (!zdev->bars[i].size) + continue; + entry = zpci_alloc_iomap(zdev); + if (entry < 0) + return entry; + zdev->bars[i].map_idx = entry; + + /* only MMIO is supported */ + flags = IORESOURCE_MEM; + if (zdev->bars[i].val & 8) + flags |= IORESOURCE_PREFETCH; + if (zdev->bars[i].val & 4) + flags |= IORESOURCE_MEM_64; + + addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48); + + size = 1UL << zdev->bars[i].size; + + res = __alloc_res(zdev, addr, size, flags); + if (!res) { + zpci_free_iomap(zdev, entry); + return -ENOMEM; + } + zdev->bars[i].res = res; + pci_add_resource(resources, res); + } + + return 0; +} + +static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) +{ + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + if (!zdev->bars[i].size) + continue; + + zpci_free_iomap(zdev, zdev->bars[i].map_idx); + release_resource(zdev->bars[i].res); + kfree(zdev->bars[i].res); + } +} + +int pcibios_add_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + struct resource *res; + int i; + + zdev->pdev = pdev; + pdev->dev.groups = zpci_attr_groups; + zpci_map_resources(zdev); + + for (i = 0; i < PCI_BAR_COUNT; i++) { + res = &pdev->resource[i]; + if (res->parent || !res->flags) + continue; + pci_claim_resource(pdev, i); + } + + return 0; +} + +int pcibios_enable_device(struct pci_dev *pdev, int mask) +{ + struct zpci_dev *zdev = get_zdev(pdev); + + zdev->pdev = pdev; + zpci_debug_init_device(zdev); + zpci_fmb_enable_device(zdev); + zpci_map_resources(zdev); + + return pci_enable_resources(pdev, mask); +} + +void pcibios_disable_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + + zpci_unmap_resources(zdev); + zpci_fmb_disable_device(zdev); + zpci_debug_exit_device(zdev); + zdev->pdev = NULL; +} + +#ifdef CONFIG_HIBERNATE_CALLBACKS +static int zpci_restore(struct device *dev) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + int ret = 0; + + if (zdev->state != ZPCI_FN_STATE_ONLINE) + goto out; + + ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES); + if (ret) + goto out; + + zpci_map_resources(zdev); + zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, + zdev->start_dma + zdev->iommu_size - 1, + (u64) zdev->dma_table); + +out: + return ret; +} + +static int zpci_freeze(struct device *dev) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + + if (zdev->state != ZPCI_FN_STATE_ONLINE) + return 0; + + zpci_unregister_ioat(zdev, 0); + return clp_disable_fh(zdev); +} + +struct dev_pm_ops pcibios_pm_ops = { + .thaw_noirq = zpci_restore, + .freeze_noirq = zpci_freeze, + .restore_noirq = zpci_restore, + .poweroff_noirq = zpci_freeze, +}; +#endif /* CONFIG_HIBERNATE_CALLBACKS */ + +static int zpci_alloc_domain(struct zpci_dev *zdev) +{ + spin_lock(&zpci_domain_lock); + zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES); + if (zdev->domain == ZPCI_NR_DEVICES) { + spin_unlock(&zpci_domain_lock); + return -ENOSPC; + } + set_bit(zdev->domain, zpci_domain); + spin_unlock(&zpci_domain_lock); + return 0; +} + +static void zpci_free_domain(struct zpci_dev *zdev) +{ + spin_lock(&zpci_domain_lock); + clear_bit(zdev->domain, zpci_domain); + spin_unlock(&zpci_domain_lock); +} + +void pcibios_remove_bus(struct pci_bus *bus) +{ + struct zpci_dev *zdev = get_zdev_by_bus(bus); + + zpci_exit_slot(zdev); + zpci_cleanup_bus_resources(zdev); + zpci_free_domain(zdev); + + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + + kfree(zdev); +} + +static int zpci_scan_bus(struct zpci_dev *zdev) +{ + LIST_HEAD(resources); + int ret; + + ret = zpci_setup_bus_resources(zdev, &resources); + if (ret) + return ret; + + zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops, + zdev, &resources); + if (!zdev->bus) { + zpci_cleanup_bus_resources(zdev); + return -EIO; + } + + zdev->bus->max_bus_speed = zdev->max_bus_speed; + return 0; +} + +int zpci_enable_device(struct zpci_dev *zdev) +{ + int rc; + + rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES); + if (rc) + goto out; + + rc = zpci_dma_init_device(zdev); + if (rc) + goto out_dma; + + zdev->state = ZPCI_FN_STATE_ONLINE; + return 0; + +out_dma: + clp_disable_fh(zdev); +out: + return rc; +} +EXPORT_SYMBOL_GPL(zpci_enable_device); + +int zpci_disable_device(struct zpci_dev *zdev) +{ + zpci_dma_exit_device(zdev); + return clp_disable_fh(zdev); +} +EXPORT_SYMBOL_GPL(zpci_disable_device); + +int zpci_create_device(struct zpci_dev *zdev) +{ + int rc; + + rc = zpci_alloc_domain(zdev); + if (rc) + goto out; + + if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { + rc = zpci_enable_device(zdev); + if (rc) + goto out_free; + } + rc = zpci_scan_bus(zdev); + if (rc) + goto out_disable; + + spin_lock(&zpci_list_lock); + list_add_tail(&zdev->entry, &zpci_list); + spin_unlock(&zpci_list_lock); + + zpci_init_slot(zdev); + + return 0; + +out_disable: + if (zdev->state == ZPCI_FN_STATE_ONLINE) + zpci_disable_device(zdev); +out_free: + zpci_free_domain(zdev); +out: + return rc; +} + +void zpci_stop_device(struct zpci_dev *zdev) +{ + zpci_dma_exit_device(zdev); + /* + * Note: SCLP disables fh via set-pci-fn so don't + * do that here. + */ +} +EXPORT_SYMBOL_GPL(zpci_stop_device); + +static inline int barsize(u8 size) +{ + return (size) ? (1 << size) >> 10 : 0; +} + +static int zpci_mem_init(void) +{ + zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), + 16, 0, NULL); + if (!zdev_fmb_cache) + goto error_zdev; + + /* TODO: use realloc */ + zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start), + GFP_KERNEL); + if (!zpci_iomap_start) + goto error_iomap; + return 0; + +error_iomap: + kmem_cache_destroy(zdev_fmb_cache); +error_zdev: + return -ENOMEM; +} + +static void zpci_mem_exit(void) +{ + kfree(zpci_iomap_start); + kmem_cache_destroy(zdev_fmb_cache); +} + +static unsigned int s390_pci_probe = 1; +static unsigned int s390_pci_initialized; + +char * __init pcibios_setup(char *str) +{ + if (!strcmp(str, "off")) { + s390_pci_probe = 0; + return NULL; + } + return str; +} + +bool zpci_is_enabled(void) +{ + return s390_pci_initialized; +} + +static int __init pci_base_init(void) +{ + int rc; + + if (!s390_pci_probe) + return 0; + + if (!test_facility(2) || !test_facility(69) + || !test_facility(71) || !test_facility(72)) + return 0; + + rc = zpci_debug_init(); + if (rc) + goto out; + + rc = zpci_mem_init(); + if (rc) + goto out_mem; + + rc = zpci_irq_init(); + if (rc) + goto out_irq; + + rc = zpci_dma_init(); + if (rc) + goto out_dma; + + rc = clp_scan_pci_devices(); + if (rc) + goto out_find; + + s390_pci_initialized = 1; + return 0; + +out_find: + zpci_dma_exit(); +out_dma: + zpci_irq_exit(); +out_irq: + zpci_mem_exit(); +out_mem: + zpci_debug_exit(); +out: + return rc; +} +subsys_initcall_sync(pci_base_init); + +void zpci_rescan(void) +{ + if (zpci_is_enabled()) + clp_rescan_pci_devices_simple(); +} diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c new file mode 100644 index 00000000000..96545d7659f --- /dev/null +++ b/arch/s390/pci/pci_clp.c @@ -0,0 +1,392 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#define COMPONENT "zPCI" +#define pr_fmt(fmt) COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/delay.h> +#include <linux/pci.h> +#include <asm/pci_debug.h> +#include <asm/pci_clp.h> + +static inline void zpci_err_clp(unsigned int rsp, int rc) +{ + struct { + unsigned int rsp; + int rc; + } __packed data = {rsp, rc}; + + zpci_err_hex(&data, sizeof(data)); +} + +/* + * Call Logical Processor + * Retry logic is handled by the caller. + */ +static inline u8 clp_instr(void *data) +{ + struct { u8 _[CLP_BLK_SIZE]; } *req = data; + u64 ignored; + u8 cc; + + asm volatile ( + " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req) + : [req] "a" (req) + : "cc"); + return cc; +} + +static void *clp_alloc_block(gfp_t gfp_mask) +{ + return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE)); +} + +static void clp_free_block(void *ptr) +{ + free_pages((unsigned long) ptr, get_order(CLP_BLK_SIZE)); +} + +static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, + struct clp_rsp_query_pci_grp *response) +{ + zdev->tlb_refresh = response->refresh; + zdev->dma_mask = response->dasm; + zdev->msi_addr = response->msia; + zdev->fmb_update = response->mui; + + switch (response->version) { + case 1: + zdev->max_bus_speed = PCIE_SPEED_5_0GT; + break; + default: + zdev->max_bus_speed = PCI_SPEED_UNKNOWN; + break; + } +} + +static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) +{ + struct clp_req_rsp_query_pci_grp *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP; + rrb->response.hdr.len = sizeof(rrb->response); + rrb->request.pfgid = pfgid; + + rc = clp_instr(rrb); + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) + clp_store_query_pci_fngrp(zdev, &rrb->response); + else { + zpci_err("Q PCI FGRP:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + rc = -EIO; + } + clp_free_block(rrb); + return rc; +} + +static int clp_store_query_pci_fn(struct zpci_dev *zdev, + struct clp_rsp_query_pci *response) +{ + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + zdev->bars[i].val = le32_to_cpu(response->bar[i]); + zdev->bars[i].size = response->bar_size[i]; + } + zdev->start_dma = response->sdma; + zdev->end_dma = response->edma; + zdev->pchid = response->pchid; + zdev->pfgid = response->pfgid; + zdev->pft = response->pft; + zdev->vfn = response->vfn; + zdev->uid = response->uid; + + memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip)); + if (response->util_str_avail) { + memcpy(zdev->util_str, response->util_str, + sizeof(zdev->util_str)); + } + + return 0; +} + +static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) +{ + struct clp_req_rsp_query_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; + rrb->response.hdr.len = sizeof(rrb->response); + rrb->request.fh = fh; + + rc = clp_instr(rrb); + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { + rc = clp_store_query_pci_fn(zdev, &rrb->response); + if (rc) + goto out; + if (rrb->response.pfgid) + rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid); + } else { + zpci_err("Q PCI FN:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + rc = -EIO; + } +out: + clp_free_block(rrb); + return rc; +} + +int clp_add_pci_device(u32 fid, u32 fh, int configured) +{ + struct zpci_dev *zdev; + int rc; + + zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured); + zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); + if (!zdev) + return -ENOMEM; + + zdev->fh = fh; + zdev->fid = fid; + + /* Query function properties and update zdev */ + rc = clp_query_pci_fn(zdev, fh); + if (rc) + goto error; + + if (configured) + zdev->state = ZPCI_FN_STATE_CONFIGURED; + else + zdev->state = ZPCI_FN_STATE_STANDBY; + + rc = zpci_create_device(zdev); + if (rc) + goto error; + return 0; + +error: + kfree(zdev); + return rc; +} + +/* + * Enable/Disable a given PCI function defined by its function handle. + */ +static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) +{ + struct clp_req_rsp_set_pci *rrb; + int rc, retries = 100; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + do { + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_SET_PCI_FN; + rrb->response.hdr.len = sizeof(rrb->response); + rrb->request.fh = *fh; + rrb->request.oc = command; + rrb->request.ndas = nr_dma_as; + + rc = clp_instr(rrb); + if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) { + retries--; + if (retries < 0) + break; + msleep(20); + } + } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); + + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) + *fh = rrb->response.fh; + else { + zpci_err("Set PCI FN:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + rc = -EIO; + } + clp_free_block(rrb); + return rc; +} + +int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) +{ + u32 fh = zdev->fh; + int rc; + + rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN); + if (!rc) + /* Success -> store enabled handle in zdev */ + zdev->fh = fh; + + zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); + return rc; +} + +int clp_disable_fh(struct zpci_dev *zdev) +{ + u32 fh = zdev->fh; + int rc; + + if (!zdev_enabled(zdev)) + return 0; + + rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN); + if (!rc) + /* Success -> store disabled handle in zdev */ + zdev->fh = fh; + + zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); + return rc; +} + +static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, + void (*cb)(struct clp_fh_list_entry *entry)) +{ + u64 resume_token = 0; + int entries, i, rc; + + do { + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_LIST_PCI; + /* store as many entries as possible */ + rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN; + rrb->request.resume_token = resume_token; + + /* Get PCI function handle list */ + rc = clp_instr(rrb); + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { + zpci_err("List PCI FN:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + rc = -EIO; + goto out; + } + + WARN_ON_ONCE(rrb->response.entry_size != + sizeof(struct clp_fh_list_entry)); + + entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) / + rrb->response.entry_size; + + resume_token = rrb->response.resume_token; + for (i = 0; i < entries; i++) + cb(&rrb->response.fh_list[i]); + } while (resume_token); +out: + return rc; +} + +static void __clp_add(struct clp_fh_list_entry *entry) +{ + if (!entry->vendor_id) + return; + + clp_add_pci_device(entry->fid, entry->fh, entry->config_state); +} + +static void __clp_rescan(struct clp_fh_list_entry *entry) +{ + struct zpci_dev *zdev; + + if (!entry->vendor_id) + return; + + zdev = get_zdev_by_fid(entry->fid); + if (!zdev) { + clp_add_pci_device(entry->fid, entry->fh, entry->config_state); + return; + } + + if (!entry->config_state) { + /* + * The handle is already disabled, that means no iota/irq freeing via + * the firmware interfaces anymore. Need to free resources manually + * (DMA memory, debug, sysfs)... + */ + zpci_stop_device(zdev); + } +} + +static void __clp_update(struct clp_fh_list_entry *entry) +{ + struct zpci_dev *zdev; + + if (!entry->vendor_id) + return; + + zdev = get_zdev_by_fid(entry->fid); + if (!zdev) + return; + + zdev->fh = entry->fh; +} + +int clp_scan_pci_devices(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_add); + + clp_free_block(rrb); + return rc; +} + +int clp_rescan_pci_devices(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_rescan); + + clp_free_block(rrb); + return rc; +} + +int clp_rescan_pci_devices_simple(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_NOWAIT); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_update); + + clp_free_block(rrb); + return rc; +} diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c new file mode 100644 index 00000000000..c5c66840ac0 --- /dev/null +++ b/arch/s390/pci/pci_debug.c @@ -0,0 +1,167 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#define COMPONENT "zPCI" +#define pr_fmt(fmt) COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/export.h> +#include <linux/pci.h> +#include <asm/debug.h> + +#include <asm/pci_dma.h> + +static struct dentry *debugfs_root; +debug_info_t *pci_debug_msg_id; +EXPORT_SYMBOL_GPL(pci_debug_msg_id); +debug_info_t *pci_debug_err_id; +EXPORT_SYMBOL_GPL(pci_debug_err_id); + +static char *pci_perf_names[] = { + /* hardware counters */ + "Load operations", + "Store operations", + "Store block operations", + "Refresh operations", + "DMA read bytes", + "DMA write bytes", + /* software counters */ + "Allocated pages", + "Mapped pages", + "Unmapped pages", +}; + +static int pci_perf_show(struct seq_file *m, void *v) +{ + struct zpci_dev *zdev = m->private; + u64 *stat; + int i; + + if (!zdev) + return 0; + if (!zdev->fmb) + return seq_printf(m, "FMB statistics disabled\n"); + + /* header */ + seq_printf(m, "FMB @ %p\n", zdev->fmb); + seq_printf(m, "Update interval: %u ms\n", zdev->fmb_update); + seq_printf(m, "Samples: %u\n", zdev->fmb->samples); + seq_printf(m, "Last update TOD: %Lx\n", zdev->fmb->last_update); + + /* hardware counters */ + stat = (u64 *) &zdev->fmb->ld_ops; + for (i = 0; i < 4; i++) + seq_printf(m, "%26s:\t%llu\n", + pci_perf_names[i], *(stat + i)); + if (zdev->fmb->dma_valid) + for (i = 4; i < 6; i++) + seq_printf(m, "%26s:\t%llu\n", + pci_perf_names[i], *(stat + i)); + /* software counters */ + for (i = 6; i < ARRAY_SIZE(pci_perf_names); i++) + seq_printf(m, "%26s:\t%llu\n", + pci_perf_names[i], + atomic64_read((atomic64_t *) (stat + i))); + + return 0; +} + +static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, + size_t count, loff_t *off) +{ + struct zpci_dev *zdev = ((struct seq_file *) file->private_data)->private; + unsigned long val; + int rc; + + if (!zdev) + return 0; + + rc = kstrtoul_from_user(ubuf, count, 10, &val); + if (rc) + return rc; + + switch (val) { + case 0: + rc = zpci_fmb_disable_device(zdev); + if (rc) + return rc; + break; + case 1: + rc = zpci_fmb_enable_device(zdev); + if (rc) + return rc; + break; + } + return count; +} + +static int pci_perf_seq_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, pci_perf_show, + file_inode(filp)->i_private); +} + +static const struct file_operations debugfs_pci_perf_fops = { + .open = pci_perf_seq_open, + .read = seq_read, + .write = pci_perf_seq_write, + .llseek = seq_lseek, + .release = single_release, +}; + +void zpci_debug_init_device(struct zpci_dev *zdev) +{ + zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev), + debugfs_root); + if (IS_ERR(zdev->debugfs_dev)) + zdev->debugfs_dev = NULL; + + zdev->debugfs_perf = debugfs_create_file("statistics", + S_IFREG | S_IRUGO | S_IWUSR, + zdev->debugfs_dev, zdev, + &debugfs_pci_perf_fops); + if (IS_ERR(zdev->debugfs_perf)) + zdev->debugfs_perf = NULL; +} + +void zpci_debug_exit_device(struct zpci_dev *zdev) +{ + debugfs_remove(zdev->debugfs_perf); + debugfs_remove(zdev->debugfs_dev); +} + +int __init zpci_debug_init(void) +{ + /* event trace buffer */ + pci_debug_msg_id = debug_register("pci_msg", 8, 1, 8 * sizeof(long)); + if (!pci_debug_msg_id) + return -EINVAL; + debug_register_view(pci_debug_msg_id, &debug_sprintf_view); + debug_set_level(pci_debug_msg_id, 3); + + /* error log */ + pci_debug_err_id = debug_register("pci_error", 2, 1, 16); + if (!pci_debug_err_id) + return -EINVAL; + debug_register_view(pci_debug_err_id, &debug_hex_ascii_view); + debug_set_level(pci_debug_err_id, 6); + + debugfs_root = debugfs_create_dir("pci", NULL); + return 0; +} + +void zpci_debug_exit(void) +{ + if (pci_debug_msg_id) + debug_unregister(pci_debug_msg_id); + if (pci_debug_err_id) + debug_unregister(pci_debug_err_id); + + debugfs_remove(debugfs_root); +} diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c new file mode 100644 index 00000000000..f91c0311980 --- /dev/null +++ b/arch/s390/pci/pci_dma.c @@ -0,0 +1,506 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/iommu-helper.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> +#include <linux/pci.h> +#include <asm/pci_dma.h> + +static struct kmem_cache *dma_region_table_cache; +static struct kmem_cache *dma_page_table_cache; + +static unsigned long *dma_alloc_cpu_table(void) +{ + unsigned long *table, *entry; + + table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); + if (!table) + return NULL; + + for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) + *entry = ZPCI_TABLE_INVALID | ZPCI_TABLE_PROTECTED; + return table; +} + +static void dma_free_cpu_table(void *table) +{ + kmem_cache_free(dma_region_table_cache, table); +} + +static unsigned long *dma_alloc_page_table(void) +{ + unsigned long *table, *entry; + + table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); + if (!table) + return NULL; + + for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) + *entry = ZPCI_PTE_INVALID | ZPCI_TABLE_PROTECTED; + return table; +} + +static void dma_free_page_table(void *table) +{ + kmem_cache_free(dma_page_table_cache, table); +} + +static unsigned long *dma_get_seg_table_origin(unsigned long *entry) +{ + unsigned long *sto; + + if (reg_entry_isvalid(*entry)) + sto = get_rt_sto(*entry); + else { + sto = dma_alloc_cpu_table(); + if (!sto) + return NULL; + + set_rt_sto(entry, sto); + validate_rt_entry(entry); + entry_clr_protected(entry); + } + return sto; +} + +static unsigned long *dma_get_page_table_origin(unsigned long *entry) +{ + unsigned long *pto; + + if (reg_entry_isvalid(*entry)) + pto = get_st_pto(*entry); + else { + pto = dma_alloc_page_table(); + if (!pto) + return NULL; + set_st_pto(entry, pto); + validate_st_entry(entry); + entry_clr_protected(entry); + } + return pto; +} + +static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) +{ + unsigned long *sto, *pto; + unsigned int rtx, sx, px; + + rtx = calc_rtx(dma_addr); + sto = dma_get_seg_table_origin(&rto[rtx]); + if (!sto) + return NULL; + + sx = calc_sx(dma_addr); + pto = dma_get_page_table_origin(&sto[sx]); + if (!pto) + return NULL; + + px = calc_px(dma_addr); + return &pto[px]; +} + +static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr, + dma_addr_t dma_addr, int flags) +{ + unsigned long *entry; + + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + if (!entry) { + WARN_ON_ONCE(1); + return; + } + + if (flags & ZPCI_PTE_INVALID) { + invalidate_pt_entry(entry); + return; + } else { + set_pt_pfaa(entry, page_addr); + validate_pt_entry(entry); + } + + if (flags & ZPCI_TABLE_PROTECTED) + entry_set_protected(entry); + else + entry_clr_protected(entry); +} + +static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, + dma_addr_t dma_addr, size_t size, int flags) +{ + unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + u8 *page_addr = (u8 *) (pa & PAGE_MASK); + dma_addr_t start_dma_addr = dma_addr; + unsigned long irq_flags; + int i, rc = 0; + + if (!nr_pages) + return -EINVAL; + + spin_lock_irqsave(&zdev->dma_table_lock, irq_flags); + if (!zdev->dma_table) + goto no_refresh; + + for (i = 0; i < nr_pages; i++) { + dma_update_cpu_trans(zdev, page_addr, dma_addr, flags); + page_addr += PAGE_SIZE; + dma_addr += PAGE_SIZE; + } + + /* + * rpcit is not required to establish new translations when previously + * invalid translation-table entries are validated, however it is + * required when altering previously valid entries. + */ + if (!zdev->tlb_refresh && + ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) + /* + * TODO: also need to check that the old entry is indeed INVALID + * and not only for one page but for the whole range... + * -> now we WARN_ON in that case but with lazy unmap that + * needs to be redone! + */ + goto no_refresh; + + rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, + nr_pages * PAGE_SIZE); + +no_refresh: + spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); + return rc; +} + +static void dma_free_seg_table(unsigned long entry) +{ + unsigned long *sto = get_rt_sto(entry); + int sx; + + for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) + if (reg_entry_isvalid(sto[sx])) + dma_free_page_table(get_st_pto(sto[sx])); + + dma_free_cpu_table(sto); +} + +static void dma_cleanup_tables(struct zpci_dev *zdev) +{ + unsigned long *table; + int rtx; + + if (!zdev || !zdev->dma_table) + return; + + table = zdev->dma_table; + for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) + if (reg_entry_isvalid(table[rtx])) + dma_free_seg_table(table[rtx]); + + dma_free_cpu_table(table); + zdev->dma_table = NULL; +} + +static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, + unsigned long start, int size) +{ + unsigned long boundary_size; + + boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; + return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, + start, size, 0, boundary_size, 0); +} + +static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) +{ + unsigned long offset, flags; + + spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); + offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); + if (offset == -1) + offset = __dma_alloc_iommu(zdev, 0, size); + + if (offset != -1) { + zdev->next_bit = offset + size; + if (zdev->next_bit >= zdev->iommu_pages) + zdev->next_bit = 0; + } + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); + return offset; +} + +static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size) +{ + unsigned long flags; + + spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); + if (!zdev->iommu_bitmap) + goto out; + bitmap_clear(zdev->iommu_bitmap, offset, size); + if (offset >= zdev->next_bit) + zdev->next_bit = offset + size; +out: + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); +} + +int dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + return 0; +} +EXPORT_SYMBOL_GPL(dma_set_mask); + +static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + unsigned long nr_pages, iommu_page_index; + unsigned long pa = page_to_phys(page) + offset; + int flags = ZPCI_PTE_VALID; + dma_addr_t dma_addr; + + /* This rounds up number of pages based on size and offset */ + nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); + iommu_page_index = dma_alloc_iommu(zdev, nr_pages); + if (iommu_page_index == -1) + goto out_err; + + /* Use rounded up size */ + size = nr_pages * PAGE_SIZE; + + dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE; + if (dma_addr + size > zdev->end_dma) + goto out_free; + + if (direction == DMA_NONE || direction == DMA_TO_DEVICE) + flags |= ZPCI_TABLE_PROTECTED; + + if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) { + atomic64_add(nr_pages, &zdev->fmb->mapped_pages); + return dma_addr + (offset & ~PAGE_MASK); + } + +out_free: + dma_free_iommu(zdev, iommu_page_index, nr_pages); +out_err: + zpci_err("map error:\n"); + zpci_err_hex(&pa, sizeof(pa)); + return DMA_ERROR_CODE; +} + +static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + unsigned long iommu_page_index; + int npages; + + npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); + dma_addr = dma_addr & PAGE_MASK; + if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, + ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) { + zpci_err("unmap error:\n"); + zpci_err_hex(&dma_addr, sizeof(dma_addr)); + } + + atomic64_add(npages, &zdev->fmb->unmapped_pages); + iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; + dma_free_iommu(zdev, iommu_page_index, npages); +} + +static void *s390_dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + struct page *page; + unsigned long pa; + dma_addr_t map; + + size = PAGE_ALIGN(size); + page = alloc_pages(flag, get_order(size)); + if (!page) + return NULL; + + pa = page_to_phys(page); + memset((void *) pa, 0, size); + + map = s390_dma_map_pages(dev, page, pa % PAGE_SIZE, + size, DMA_BIDIRECTIONAL, NULL); + if (dma_mapping_error(dev, map)) { + free_pages(pa, get_order(size)); + return NULL; + } + + atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages); + if (dma_handle) + *dma_handle = map; + return (void *) pa; +} + +static void s390_dma_free(struct device *dev, size_t size, + void *pa, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + + size = PAGE_ALIGN(size); + atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages); + s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); + free_pages((unsigned long) pa, get_order(size)); +} + +static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nr_elements, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + int mapped_elements = 0; + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nr_elements, i) { + struct page *page = sg_page(s); + s->dma_address = s390_dma_map_pages(dev, page, s->offset, + s->length, dir, NULL); + if (!dma_mapping_error(dev, s->dma_address)) { + s->dma_length = s->length; + mapped_elements++; + } else + goto unmap; + } +out: + return mapped_elements; + +unmap: + for_each_sg(sg, s, mapped_elements, i) { + if (s->dma_address) + s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, + dir, NULL); + s->dma_address = 0; + s->dma_length = 0; + } + mapped_elements = 0; + goto out; +} + +static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nr_elements, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nr_elements, i) { + s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL); + s->dma_address = 0; + s->dma_length = 0; + } +} + +int zpci_dma_init_device(struct zpci_dev *zdev) +{ + int rc; + + spin_lock_init(&zdev->iommu_bitmap_lock); + spin_lock_init(&zdev->dma_table_lock); + + zdev->dma_table = dma_alloc_cpu_table(); + if (!zdev->dma_table) { + rc = -ENOMEM; + goto out_clean; + } + + zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET; + zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; + zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); + if (!zdev->iommu_bitmap) { + rc = -ENOMEM; + goto out_reg; + } + + rc = zpci_register_ioat(zdev, + 0, + zdev->start_dma + PAGE_OFFSET, + zdev->start_dma + zdev->iommu_size - 1, + (u64) zdev->dma_table); + if (rc) + goto out_reg; + return 0; + +out_reg: + dma_free_cpu_table(zdev->dma_table); +out_clean: + return rc; +} + +void zpci_dma_exit_device(struct zpci_dev *zdev) +{ + zpci_unregister_ioat(zdev, 0); + dma_cleanup_tables(zdev); + vfree(zdev->iommu_bitmap); + zdev->iommu_bitmap = NULL; + zdev->next_bit = 0; +} + +static int __init dma_alloc_cpu_table_caches(void) +{ + dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", + ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, + 0, NULL); + if (!dma_region_table_cache) + return -ENOMEM; + + dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", + ZPCI_PT_SIZE, ZPCI_PT_ALIGN, + 0, NULL); + if (!dma_page_table_cache) { + kmem_cache_destroy(dma_region_table_cache); + return -ENOMEM; + } + return 0; +} + +int __init zpci_dma_init(void) +{ + return dma_alloc_cpu_table_caches(); +} + +void zpci_dma_exit(void) +{ + kmem_cache_destroy(dma_page_table_cache); + kmem_cache_destroy(dma_region_table_cache); +} + +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) + +static int __init dma_debug_do_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + return 0; +} +fs_initcall(dma_debug_do_init); + +struct dma_map_ops s390_dma_ops = { + .alloc = s390_dma_alloc, + .free = s390_dma_free, + .map_sg = s390_dma_map_sg, + .unmap_sg = s390_dma_unmap_sg, + .map_page = s390_dma_map_pages, + .unmap_page = s390_dma_unmap_pages, + /* if we support direct DMA this must be conditional */ + .is_phys = 0, + /* dma_supported is unconditionally true without a callback */ +}; +EXPORT_SYMBOL_GPL(s390_dma_ops); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c new file mode 100644 index 00000000000..6d7f5a3016c --- /dev/null +++ b/arch/s390/pci/pci_event.c @@ -0,0 +1,136 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#define COMPONENT "zPCI" +#define pr_fmt(fmt) COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <asm/pci_debug.h> +#include <asm/sclp.h> + +/* Content Code Description for PCI Function Error */ +struct zpci_ccdf_err { + u32 reserved1; + u32 fh; /* function handle */ + u32 fid; /* function id */ + u32 ett : 4; /* expected table type */ + u32 mvn : 12; /* MSI vector number */ + u32 dmaas : 8; /* DMA address space */ + u32 : 6; + u32 q : 1; /* event qualifier */ + u32 rw : 1; /* read/write */ + u64 faddr; /* failing address */ + u32 reserved3; + u16 reserved4; + u16 pec; /* PCI event code */ +} __packed; + +/* Content Code Description for PCI Function Availability */ +struct zpci_ccdf_avail { + u32 reserved1; + u32 fh; /* function handle */ + u32 fid; /* function id */ + u32 reserved2; + u32 reserved3; + u32 reserved4; + u32 reserved5; + u16 reserved6; + u16 pec; /* PCI event code */ +} __packed; + +static void __zpci_event_error(struct zpci_ccdf_err *ccdf) +{ + struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); + + zpci_err("error CCDF:\n"); + zpci_err_hex(ccdf, sizeof(*ccdf)); + + if (!zdev) + return; + + pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", + pci_name(zdev->pdev), ccdf->pec, ccdf->fid); +} + +void zpci_event_error(void *data) +{ + if (zpci_is_enabled()) + __zpci_event_error(data); +} + +static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) +{ + struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); + struct pci_dev *pdev = zdev ? zdev->pdev : NULL; + int ret; + + pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n", + pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); + zpci_err("avail CCDF:\n"); + zpci_err_hex(ccdf, sizeof(*ccdf)); + + switch (ccdf->pec) { + case 0x0301: /* Standby -> Configured */ + if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY) + break; + zdev->state = ZPCI_FN_STATE_CONFIGURED; + zdev->fh = ccdf->fh; + ret = zpci_enable_device(zdev); + if (ret) + break; + pci_rescan_bus(zdev->bus); + break; + case 0x0302: /* Reserved -> Standby */ + if (!zdev) + clp_add_pci_device(ccdf->fid, ccdf->fh, 0); + break; + case 0x0303: /* Deconfiguration requested */ + if (pdev) + pci_stop_and_remove_bus_device(pdev); + + ret = zpci_disable_device(zdev); + if (ret) + break; + + ret = sclp_pci_deconfigure(zdev->fid); + zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); + if (!ret) + zdev->state = ZPCI_FN_STATE_STANDBY; + + break; + case 0x0304: /* Configured -> Standby */ + if (pdev) { + /* Give the driver a hint that the function is + * already unusable. */ + pdev->error_state = pci_channel_io_perm_failure; + pci_stop_and_remove_bus_device(pdev); + } + + zdev->fh = ccdf->fh; + zpci_disable_device(zdev); + zdev->state = ZPCI_FN_STATE_STANDBY; + break; + case 0x0306: /* 0x308 or 0x302 for multiple devices */ + clp_rescan_pci_devices(); + break; + case 0x0308: /* Standby -> Reserved */ + if (!zdev) + break; + pci_stop_root_bus(zdev->bus); + pci_remove_root_bus(zdev->bus); + break; + default: + break; + } +} + +void zpci_event_availability(void *data) +{ + if (zpci_is_enabled()) + __zpci_event_availability(data); +} diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c new file mode 100644 index 00000000000..85267c058af --- /dev/null +++ b/arch/s390/pci/pci_insn.c @@ -0,0 +1,202 @@ +/* + * s390 specific pci instructions + * + * Copyright IBM Corp. 2013 + */ + +#include <linux/export.h> +#include <linux/errno.h> +#include <linux/delay.h> +#include <asm/pci_insn.h> +#include <asm/processor.h> + +#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */ + +/* Modify PCI Function Controls */ +static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) +{ + u8 cc; + + asm volatile ( + " .insn rxy,0xe300000000d0,%[req],%[fib]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib) + : : "cc"); + *status = req >> 24 & 0xff; + return cc; +} + +int zpci_mod_fc(u64 req, struct zpci_fib *fib) +{ + u8 cc, status; + + do { + cc = __mpcifc(req, fib, &status); + if (cc == 2) + msleep(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d\n", + __func__, cc, status); + return (cc) ? -EIO : 0; +} + +/* Refresh PCI Translations */ +static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) +{ + register u64 __addr asm("2") = addr; + register u64 __range asm("3") = range; + u8 cc; + + asm volatile ( + " .insn rre,0xb9d30000,%[fn],%[addr]\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=d" (cc), [fn] "+d" (fn) + : [addr] "d" (__addr), "d" (__range) + : "cc"); + *status = fn >> 24 & 0xff; + return cc; +} + +int zpci_refresh_trans(u64 fn, u64 addr, u64 range) +{ + u8 cc, status; + + do { + cc = __rpcit(fn, addr, range, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d dma_addr: %Lx size: %Lx\n", + __func__, cc, status, addr, range); + return (cc) ? -EIO : 0; +} + +/* Set Interruption Controls */ +void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) +{ + asm volatile ( + " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" + : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused)); +} + +/* PCI Load */ +static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) +{ + register u64 __req asm("2") = req; + register u64 __offset asm("3") = offset; + int cc = -ENXIO; + u64 __data; + + asm volatile ( + " .insn rre,0xb9d20000,%[data],%[req]\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [data] "=d" (__data), [req] "+d" (__req) + : "d" (__offset) + : "cc"); + *status = __req >> 24 & 0xff; + if (!cc) + *data = __data; + + return cc; +} + +int zpci_load(u64 *data, u64 req, u64 offset) +{ + u8 status; + int cc; + + do { + cc = __pcilg(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc > 0) ? -EIO : cc; +} +EXPORT_SYMBOL_GPL(zpci_load); + +/* PCI Store */ +static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) +{ + register u64 __req asm("2") = req; + register u64 __offset asm("3") = offset; + int cc = -ENXIO; + + asm volatile ( + " .insn rre,0xb9d00000,%[data],%[req]\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [req] "+d" (__req) + : "d" (__offset), [data] "d" (data) + : "cc"); + *status = __req >> 24 & 0xff; + return cc; +} + +int zpci_store(u64 data, u64 req, u64 offset) +{ + u8 status; + int cc; + + do { + cc = __pcistg(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc > 0) ? -EIO : cc; +} +EXPORT_SYMBOL_GPL(zpci_store); + +/* PCI Store Block */ +static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) +{ + int cc = -ENXIO; + + asm volatile ( + " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [req] "+d" (req) + : [offset] "d" (offset), [data] "Q" (*data) + : "cc"); + *status = req >> 24 & 0xff; + return cc; +} + +int zpci_store_block(const u64 *data, u64 req, u64 offset) +{ + u8 status; + int cc; + + do { + cc = __pcistb(data, req, offset, &status); + if (cc == 2) + udelay(ZPCI_INSN_BUSY_DELAY); + } while (cc == 2); + + if (cc) + printk_once(KERN_ERR "%s: error cc: %d status: %d req: %Lx offset: %Lx\n", + __func__, cc, status, req, offset); + return (cc > 0) ? -EIO : cc; +} +EXPORT_SYMBOL_GPL(zpci_store_block); diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c new file mode 100644 index 00000000000..9190214b870 --- /dev/null +++ b/arch/s390/pci/pci_sysfs.c @@ -0,0 +1,110 @@ +/* + * Copyright IBM Corp. 2012 + * + * Author(s): + * Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#define COMPONENT "zPCI" +#define pr_fmt(fmt) COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/stat.h> +#include <linux/pci.h> + +#define zpci_attr(name, fmt, member) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); \ + \ + return sprintf(buf, fmt, zdev->member); \ +} \ +static DEVICE_ATTR_RO(name) + +zpci_attr(function_id, "0x%08x\n", fid); +zpci_attr(function_handle, "0x%08x\n", fh); +zpci_attr(pchid, "0x%04x\n", pchid); +zpci_attr(pfgid, "0x%02x\n", pfgid); +zpci_attr(vfn, "0x%04x\n", vfn); +zpci_attr(pft, "0x%02x\n", pft); +zpci_attr(uid, "0x%x\n", uid); +zpci_attr(segment0, "0x%02x\n", pfip[0]); +zpci_attr(segment1, "0x%02x\n", pfip[1]); +zpci_attr(segment2, "0x%02x\n", pfip[2]); +zpci_attr(segment3, "0x%02x\n", pfip[3]); + +static ssize_t recover_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); + int ret; + + if (!device_remove_file_self(dev, attr)) + return count; + + pci_stop_and_remove_bus_device(pdev); + ret = zpci_disable_device(zdev); + if (ret) + return ret; + + ret = zpci_enable_device(zdev); + if (ret) + return ret; + + pci_rescan_bus(zdev->bus); + return count; +} +static DEVICE_ATTR_WO(recover); + +static ssize_t util_string_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); + + return memory_read_from_buffer(buf, count, &off, zdev->util_str, + sizeof(zdev->util_str)); +} +static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN); +static struct bin_attribute *zpci_bin_attrs[] = { + &bin_attr_util_string, + NULL, +}; + +static struct attribute *zpci_dev_attrs[] = { + &dev_attr_function_id.attr, + &dev_attr_function_handle.attr, + &dev_attr_pchid.attr, + &dev_attr_pfgid.attr, + &dev_attr_pft.attr, + &dev_attr_vfn.attr, + &dev_attr_uid.attr, + &dev_attr_recover.attr, + NULL, +}; +static struct attribute_group zpci_attr_group = { + .attrs = zpci_dev_attrs, + .bin_attrs = zpci_bin_attrs, +}; + +static struct attribute *pfip_attrs[] = { + &dev_attr_segment0.attr, + &dev_attr_segment1.attr, + &dev_attr_segment2.attr, + &dev_attr_segment3.attr, + NULL, +}; +static struct attribute_group pfip_attr_group = { + .name = "pfip", + .attrs = pfip_attrs, +}; + +const struct attribute_group *zpci_attr_groups[] = { + &zpci_attr_group, + &pfip_attr_group, + NULL, +}; |
