diff options
Diffstat (limited to 'arch/s390/kernel')
91 files changed, 18781 insertions, 10727 deletions
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/s390/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 4d3e38392cb..a95c4ca9961 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -2,37 +2,69 @@ # Makefile for the linux kernel. # -EXTRA_AFLAGS := -traditional +ifdef CONFIG_FUNCTION_TRACER +# Don't trace early setup code and tracing code +CFLAGS_REMOVE_early.o = -pg +CFLAGS_REMOVE_ftrace.o = -pg +endif # # Passing null pointers is ok for smp code, since we access the lowcore here. # CFLAGS_smp.o := -Wno-nonnull -obj-y := bitmap.o traps.o time.o process.o base.o early.o \ - setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ - semaphore.o s390_ext.o debug.o irq.o ipl.o dis.o diag.o +# +# Disable tailcall optimizations for stack / callchain walking functions +# since this might generate broken code when accessing register 15 and +# passing its content to other functions. +# +CFLAGS_stacktrace.o += -fno-optimize-sibling-calls +CFLAGS_dumpstack.o += -fno-optimize-sibling-calls + +# +# Pass UTS_MACHINE for user_regset definition +# +CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' + +CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w + +obj-y := traps.o time.o process.o base.o early.o setup.o vtime.o +obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o +obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o +obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o +obj-y += dumpstack.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) +obj-y += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) -extra-y += head.o init_task.o vmlinux.lds +extra-y += head.o vmlinux.lds +extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) obj-$(CONFIG_MODULES) += s390_ksyms.o module.o obj-$(CONFIG_SMP) += smp.o - +obj-$(CONFIG_SCHED_BOOK) += topology.o +obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o -obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ - compat_wrapper.o compat_exec_domain.o \ - binfmt_elf32.o $(compat-obj-y) +obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o +obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y) -obj-$(CONFIG_VIRT_TIMER) += vtime.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -# Kexec part -S390_KEXEC_OBJS := machine_kexec.o crash.o -S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) -obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS) +ifdef CONFIG_64BIT +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \ + perf_cpum_cf_events.o +obj-y += runtime_instr.o cache.o +endif +# vdso +obj-$(CONFIG_64BIT) += vdso64/ +obj-$(CONFIG_32BIT) += vdso32/ +obj-$(CONFIG_COMPAT) += vdso32/ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 1375f8a4469..afe1715a4eb 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -4,45 +4,179 @@ * and format the required data. */ -#include <linux/sched.h> - -/* Use marker if you need to separate the values later */ +#define ASM_OFFSETS_C -#define DEFINE(sym, val, marker) \ - asm volatile("\n->" #sym " %0 " #val " " #marker : : "i" (val)) +#include <linux/kbuild.h> +#include <linux/kvm_host.h> +#include <linux/sched.h> +#include <asm/cputime.h> +#include <asm/vdso.h> +#include <asm/pgtable.h> -#define BLANK() asm volatile("\n->" : : ) +/* + * Make sure that the compiler is new enough. We want a compiler that + * is known to work with the "Q" assembler constraint. + */ +#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#error Your compiler is too old; please use version 3.3.3 or newer +#endif int main(void) { - DEFINE(__THREAD_info, offsetof(struct task_struct, stack),); - DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp),); - DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info),); - DEFINE(__THREAD_mm_segment, - offsetof(struct task_struct, thread.mm_segment),); - BLANK(); - DEFINE(__TASK_pid, offsetof(struct task_struct, pid),); - BLANK(); - DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid),); - DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address),); - DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id),); - BLANK(); - DEFINE(__TI_task, offsetof(struct thread_info, task),); - DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain),); - DEFINE(__TI_flags, offsetof(struct thread_info, flags),); - DEFINE(__TI_cpu, offsetof(struct thread_info, cpu),); - DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count),); - BLANK(); - DEFINE(__PT_ARGS, offsetof(struct pt_regs, args),); - DEFINE(__PT_PSW, offsetof(struct pt_regs, psw),); - DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs),); - DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2),); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc),); - DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap),); - DEFINE(__PT_SIZE, sizeof(struct pt_regs),); - BLANK(); - DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain),); - DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs),); - DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1),); + DEFINE(__THREAD_info, offsetof(struct task_struct, stack)); + DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp)); + DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment)); + BLANK(); + DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); + BLANK(); + DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause)); + DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address)); + DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid)); + BLANK(); + DEFINE(__TI_task, offsetof(struct thread_info, task)); + DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); + DEFINE(__TI_flags, offsetof(struct thread_info, flags)); + DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table)); + DEFINE(__TI_cpu, offsetof(struct thread_info, cpu)); + DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); + DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer)); + DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer)); + DEFINE(__TI_last_break, offsetof(struct thread_info, last_break)); + BLANK(); + DEFINE(__PT_ARGS, offsetof(struct pt_regs, args)); + DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); + DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); + DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); + DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); + DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm)); + DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); + DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags)); + DEFINE(__PT_SIZE, sizeof(struct pt_regs)); + BLANK(); + DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); + DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs)); + DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1)); + BLANK(); + /* timeval/timezone offsets for use by vdso */ + DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count)); + DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); + DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); + DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); + DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); + DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); + DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult)); + DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift)); + DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base)); + DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time)); + /* constants used by the vdso */ + DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); + DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); + DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + BLANK(); + /* idle data offsets */ + DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter)); + DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit)); + DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter)); + DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit)); + /* lowcore offsets */ + DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); + DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr)); + DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code)); + DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc)); + DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); + DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc)); + DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code)); + DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code)); + DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num)); + DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code)); + DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid)); + DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address)); + DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id)); + DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id)); + DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id)); + DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id)); + DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code)); + DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id)); + DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr)); + DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm)); + DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word)); + DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list)); + DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code)); + DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code)); + DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw)); + DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw)); + DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw)); + DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw)); + DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw)); + DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw)); + DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); + DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw)); + DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw)); + DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw)); + DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw)); + DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); + BLANK(); + DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); + DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); + DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); + DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags)); + DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw)); + DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw)); + DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer)); + DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer)); + DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer)); + DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer)); + DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer)); + DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer)); + DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer)); + DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer)); + DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock)); + DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task)); + DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid)); + DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info)); + DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); + DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); + DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); + DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack)); + DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn)); + DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data)); + DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source)); + DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce)); + DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); + DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); + DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); + DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); + DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); + DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); + BLANK(); + DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); + DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area)); + DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area)); + DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area)); + DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area)); + DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); + DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); + DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); +#ifdef CONFIG_32BIT + DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); +#else /* CONFIG_32BIT */ + DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code)); + DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address)); + DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); + DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); + DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); + DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area)); + DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); + DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); + DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); + DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); + DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); + DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); + DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); + DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); +#endif /* CONFIG_32BIT */ return 0; } diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index dc7e5259770..797a823a227 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -1,18 +1,19 @@ /* * arch/s390/kernel/base.S * - * Copyright IBM Corp. 2006,2007 + * Copyright IBM Corp. 2006, 2007 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> * Michael Holzheu <holzheu@de.ibm.com> */ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> #include <asm/ptrace.h> -#include <asm/lowcore.h> +#include <asm/sigp.h> #ifdef CONFIG_64BIT - .globl s390_base_mcck_handler -s390_base_mcck_handler: +ENTRY(s390_base_mcck_handler) basr %r13,0 0: lg %r15,__LC_PANIC_STACK # load panic stack aghi %r15,-STACK_FRAME_OVERHEAD @@ -26,14 +27,14 @@ s390_base_mcck_handler: lpswe __LC_MCK_OLD_PSW .section .bss + .align 8 .globl s390_base_mcck_handler_fn s390_base_mcck_handler_fn: .quad 0 .previous - .globl s390_base_ext_handler -s390_base_ext_handler: - stmg %r0,%r15,__LC_SAVE_AREA +ENTRY(s390_base_ext_handler) + stmg %r0,%r15,__LC_SAVE_AREA_ASYNC basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_ext_handler_fn @@ -41,19 +42,19 @@ s390_base_ext_handler: ltgr %r1,%r1 jz 1f basr %r14,%r1 -1: lmg %r0,%r15,__LC_SAVE_AREA +1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpswe __LC_EXT_OLD_PSW .section .bss + .align 8 .globl s390_base_ext_handler_fn s390_base_ext_handler_fn: .quad 0 .previous - .globl s390_base_pgm_handler -s390_base_pgm_handler: - stmg %r0,%r15,__LC_SAVE_AREA +ENTRY(s390_base_pgm_handler) + stmg %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_pgm_handler_fn @@ -61,7 +62,7 @@ s390_base_pgm_handler: ltgr %r1,%r1 jz 1f basr %r14,%r1 - lmg %r0,%r15,__LC_SAVE_AREA + lmg %r0,%r15,__LC_SAVE_AREA_SYNC lpswe __LC_PGM_OLD_PSW 1: lpswe disabled_wait_psw-0b(%r13) @@ -70,15 +71,69 @@ disabled_wait_psw: .quad 0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler .section .bss + .align 8 .globl s390_base_pgm_handler_fn s390_base_pgm_handler_fn: .quad 0 .previous +# +# Calls diag 308 subcode 1 and continues execution +# +# The following conditions must be ensured before calling this function: +# * Prefix register = 0 +# * Lowcore protection is disabled +# +ENTRY(diag308_reset) + larl %r4,.Lctlregs # Save control registers + stctg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Floating point control register + stfpc 0(%r4) + larl %r4,.Lcontinue_psw # Save PSW flags + epsw %r2,%r3 + stm %r2,%r3,0(%r4) + larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 + lghi %r3,0 + lg %r4,0(%r4) # Save PSW + sturg %r4,%r3 # Use sturg, because of large pages + lghi %r1,1 + diag %r1,%r1,0x308 +.Lrestart_part2: + lhi %r0,0 # Load r0 with zero + lhi %r1,2 # Use mode 2 = ESAME (dump) + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode + sam64 # Switch to 64 bit addressing mode + larl %r4,.Lctlregs # Restore control registers + lctlg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Restore floating point ctl register + lfpc 0(%r4) + larl %r4,.Lcontinue_psw # Restore PSW flags + lpswe 0(%r4) +.Lcontinue: + br %r14 +.align 16 +.Lrestart_psw: + .long 0x00080000,0x80000000 + .Lrestart_part2 + + .section .data..nosave,"aw",@progbits +.align 8 +.Lcontinue_psw: + .quad 0,.Lcontinue + .previous + + .section .bss +.align 8 +.Lctlregs: + .rept 16 + .quad 0 + .endr +.Lfpctl: + .long 0 + .previous + #else /* CONFIG_64BIT */ - .globl s390_base_mcck_handler -s390_base_mcck_handler: +ENTRY(s390_base_mcck_handler) basr %r13,0 0: l %r15,__LC_PANIC_STACK # load panic stack ahi %r15,-STACK_FRAME_OVERHEAD @@ -93,14 +148,14 @@ s390_base_mcck_handler: 2: .long s390_base_mcck_handler_fn .section .bss + .align 4 .globl s390_base_mcck_handler_fn s390_base_mcck_handler_fn: .long 0 .previous - .globl s390_base_ext_handler -s390_base_ext_handler: - stm %r0,%r15,__LC_SAVE_AREA +ENTRY(s390_base_ext_handler) + stm %r0,%r15,__LC_SAVE_AREA_ASYNC basr %r13,0 0: ahi %r15,-STACK_FRAME_OVERHEAD l %r1,2f-0b(%r13) @@ -108,21 +163,21 @@ s390_base_ext_handler: ltr %r1,%r1 jz 1f basr %r14,%r1 -1: lm %r0,%r15,__LC_SAVE_AREA +1: lm %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpsw __LC_EXT_OLD_PSW 2: .long s390_base_ext_handler_fn .section .bss + .align 4 .globl s390_base_ext_handler_fn s390_base_ext_handler_fn: .long 0 .previous - .globl s390_base_pgm_handler -s390_base_pgm_handler: - stm %r0,%r15,__LC_SAVE_AREA +ENTRY(s390_base_pgm_handler) + stm %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 0: ahi %r15,-STACK_FRAME_OVERHEAD l %r1,2f-0b(%r13) @@ -130,7 +185,7 @@ s390_base_pgm_handler: ltr %r1,%r1 jz 1f basr %r14,%r1 - lm %r0,%r15,__LC_SAVE_AREA + lm %r0,%r15,__LC_SAVE_AREA_SYNC lpsw __LC_PGM_OLD_PSW 1: lpsw disabled_wait_psw-0b(%r13) @@ -142,6 +197,7 @@ disabled_wait_psw: .long 0x000a0000,0x00000000 + s390_base_pgm_handler .section .bss + .align 4 .globl s390_base_pgm_handler_fn s390_base_pgm_handler_fn: .long 0 diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c deleted file mode 100644 index 3e1c315b736..00000000000 --- a/arch/s390/kernel/binfmt_elf32.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Support for 32-bit Linux for S390 ELF binaries. - * - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Gerhard Tonn (ton@de.ibm.com) - * - * Heavily inspired by the 32-bit Sparc compat code which is - * Copyright (C) 1995, 1996, 1997, 1998 David S. Miller (davem@redhat.com) - * Copyright (C) 1995, 1996, 1997, 1998 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#define __ASMS390_ELF_H - -#include <linux/time.h> - -/* - * These are used to set parameters in the core dumps. - */ -#define ELF_CLASS ELFCLASS32 -#define ELF_DATA ELFDATA2MSB -#define ELF_ARCH EM_S390 - -/* - * This is used to ensure we don't load something for the wrong architecture. - */ -#define elf_check_arch(x) \ - (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \ - && (x)->e_ident[EI_CLASS] == ELF_CLASS) - -/* ELF register definitions */ -#define NUM_GPRS 16 -#define NUM_FPRS 16 -#define NUM_ACRS 16 - -/* For SVR4/S390 the function pointer to be registered with `atexit` is - passed in R14. */ -#define ELF_PLAT_INIT(_r, load_addr) \ - do { \ - _r->gprs[14] = 0; \ - } while(0) - -#define USE_ELF_CORE_DUMP -#define ELF_EXEC_PAGESIZE 4096 - -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) - -/* Wow, the "main" arch needs arch dependent functions too.. :) */ - -/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is - now struct_user_regs, they are different) */ - -#define ELF_CORE_COPY_REGS(pr_reg, regs) dump_regs32(regs, &pr_reg); - -#define ELF_CORE_COPY_TASK_REGS(tsk, regs) dump_task_regs32(tsk, regs) - -#define ELF_CORE_COPY_FPREGS(tsk, fpregs) dump_task_fpu(tsk, fpregs) - -/* This yields a mask that user programs can use to figure out what - instruction set this CPU supports. */ - -#define ELF_HWCAP (0) - -/* This yields a string that ld.so will use to load implementation - specific libraries for optimization. This is more specific in - intent than poking at uname or /proc/cpuinfo. - - For the moment, we have only optimizations for the Intel generations, - but that could change... */ - -#define ELF_PLATFORM (NULL) - -#define SET_PERSONALITY(ex, ibcs2) \ -do { \ - if (ibcs2) \ - set_personality(PER_SVR4); \ - else if (current->personality != PER_LINUX32) \ - set_personality(PER_LINUX); \ - set_thread_flag(TIF_31BIT); \ -} while (0) - -#include "compat_linux.h" - -typedef _s390_fp_regs32 elf_fpregset_t; - -typedef struct -{ - - _psw_t32 psw; - __u32 gprs[__NUM_GPRS]; - __u32 acrs[__NUM_ACRS]; - __u32 orig_gpr2; -} s390_regs32; -typedef s390_regs32 elf_gregset_t; - -static inline int dump_regs32(struct pt_regs *ptregs, elf_gregset_t *regs) -{ - int i; - - memcpy(®s->psw.mask, &ptregs->psw.mask, 4); - memcpy(®s->psw.addr, (char *)&ptregs->psw.addr + 4, 4); - for (i = 0; i < NUM_GPRS; i++) - regs->gprs[i] = ptregs->gprs[i]; - save_access_regs(regs->acrs); - regs->orig_gpr2 = ptregs->orig_gpr2; - return 1; -} - -static inline int dump_task_regs32(struct task_struct *tsk, elf_gregset_t *regs) -{ - struct pt_regs *ptregs = task_pt_regs(tsk); - int i; - - memcpy(®s->psw.mask, &ptregs->psw.mask, 4); - memcpy(®s->psw.addr, (char *)&ptregs->psw.addr + 4, 4); - for (i = 0; i < NUM_GPRS; i++) - regs->gprs[i] = ptregs->gprs[i]; - memcpy(regs->acrs, tsk->thread.acrs, sizeof(regs->acrs)); - regs->orig_gpr2 = ptregs->orig_gpr2; - return 1; -} - -static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs) -{ - if (tsk == current) - save_fp_regs((s390_fp_regs *) fpregs); - else - memcpy(fpregs, &tsk->thread.fp_regs, sizeof(elf_fpregset_t)); - return 1; -} - -#include <asm/processor.h> -#include <asm/pgalloc.h> -#include <linux/module.h> -#include <linux/elfcore.h> -#include <linux/binfmts.h> -#include <linux/compat.h> - -#define elf_prstatus elf_prstatus32 -struct elf_prstatus32 -{ - struct elf_siginfo pr_info; /* Info associated with signal */ - short pr_cursig; /* Current signal */ - u32 pr_sigpend; /* Set of pending signals */ - u32 pr_sighold; /* Set of held signals */ - pid_t pr_pid; - pid_t pr_ppid; - pid_t pr_pgrp; - pid_t pr_sid; - struct compat_timeval pr_utime; /* User time */ - struct compat_timeval pr_stime; /* System time */ - struct compat_timeval pr_cutime; /* Cumulative user time */ - struct compat_timeval pr_cstime; /* Cumulative system time */ - elf_gregset_t pr_reg; /* GP registers */ - int pr_fpvalid; /* True if math co-processor being used. */ -}; - -#define elf_prpsinfo elf_prpsinfo32 -struct elf_prpsinfo32 -{ - char pr_state; /* numeric process state */ - char pr_sname; /* char for pr_state */ - char pr_zomb; /* zombie */ - char pr_nice; /* nice val */ - u32 pr_flag; /* flags */ - u16 pr_uid; - u16 pr_gid; - pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; - /* Lots missing */ - char pr_fname[16]; /* filename of executable */ - char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ -}; - -#include <linux/highuid.h> - -/* -#define init_elf_binfmt init_elf32_binfmt -*/ - -#undef start_thread -#define start_thread start_thread31 - -static inline void start_thread31(struct pt_regs *regs, unsigned long new_psw, - unsigned long new_stackp) -{ - set_fs(USER_DS); - regs->psw.mask = psw_user32_bits; - regs->psw.addr = new_psw; - regs->gprs[15] = new_stackp; - crst_table_downgrade(current->mm, 1UL << 31); -} - -MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries," - " Copyright 2000 IBM Corporation"); -MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>"); - -#undef MODULE_DESCRIPTION -#undef MODULE_AUTHOR - -#undef cputime_to_timeval -#define cputime_to_timeval cputime_to_compat_timeval -static inline void -cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value) -{ - value->tv_usec = cputime % 1000000; - value->tv_sec = cputime / 1000000; -} - -#include "../../../fs/binfmt_elf.c" - diff --git a/arch/s390/kernel/bitmap.S b/arch/s390/kernel/bitmap.S deleted file mode 100644 index dfb41f946e2..00000000000 --- a/arch/s390/kernel/bitmap.S +++ /dev/null @@ -1,56 +0,0 @@ -/* - * arch/s390/kernel/bitmap.S - * Bitmaps for set_bit, clear_bit, test_and_set_bit, ... - * See include/asm-s390/{bitops.h|posix_types.h} for details - * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - */ - - .globl _oi_bitmap -_oi_bitmap: - .byte 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 - - .globl _ni_bitmap -_ni_bitmap: - .byte 0xFE,0xFD,0xFB,0xF7,0xEF,0xDF,0xBF,0x7F - - .globl _zb_findmap -_zb_findmap: - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4 - .byte 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 - - .globl _sb_findmap -_sb_findmap: - .byte 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - .byte 4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c new file mode 100644 index 00000000000..c0b03c28d15 --- /dev/null +++ b/arch/s390/kernel/cache.c @@ -0,0 +1,389 @@ +/* + * Extract CPU cache information and expose them via sysfs. + * + * Copyright IBM Corp. 2012 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/notifier.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <asm/facility.h> + +struct cache { + unsigned long size; + unsigned int line_size; + unsigned int associativity; + unsigned int nr_sets; + unsigned int level : 3; + unsigned int type : 2; + unsigned int private : 1; + struct list_head list; +}; + +struct cache_dir { + struct kobject *kobj; + struct cache_index_dir *index; +}; + +struct cache_index_dir { + struct kobject kobj; + int cpu; + struct cache *cache; + struct cache_index_dir *next; +}; + +enum { + CACHE_SCOPE_NOTEXISTS, + CACHE_SCOPE_PRIVATE, + CACHE_SCOPE_SHARED, + CACHE_SCOPE_RESERVED, +}; + +enum { + CACHE_TYPE_SEPARATE, + CACHE_TYPE_DATA, + CACHE_TYPE_INSTRUCTION, + CACHE_TYPE_UNIFIED, +}; + +enum { + EXTRACT_TOPOLOGY, + EXTRACT_LINE_SIZE, + EXTRACT_SIZE, + EXTRACT_ASSOCIATIVITY, +}; + +enum { + CACHE_TI_UNIFIED = 0, + CACHE_TI_DATA = 0, + CACHE_TI_INSTRUCTION, +}; + +struct cache_info { + unsigned char : 4; + unsigned char scope : 2; + unsigned char type : 2; +}; + +#define CACHE_MAX_LEVEL 8 + +union cache_topology { + struct cache_info ci[CACHE_MAX_LEVEL]; + unsigned long long raw; +}; + +static const char * const cache_type_string[] = { + "Data", + "Instruction", + "Unified", +}; + +static struct cache_dir *cache_dir_cpu[NR_CPUS]; +static LIST_HEAD(cache_list); + +void show_cacheinfo(struct seq_file *m) +{ + struct cache *cache; + int index = 0; + + list_for_each_entry(cache, &cache_list, list) { + seq_printf(m, "cache%-11d: ", index); + seq_printf(m, "level=%d ", cache->level); + seq_printf(m, "type=%s ", cache_type_string[cache->type]); + seq_printf(m, "scope=%s ", cache->private ? "Private" : "Shared"); + seq_printf(m, "size=%luK ", cache->size >> 10); + seq_printf(m, "line_size=%u ", cache->line_size); + seq_printf(m, "associativity=%d", cache->associativity); + seq_puts(m, "\n"); + index++; + } +} + +static inline unsigned long ecag(int ai, int li, int ti) +{ + unsigned long cmd, val; + + cmd = ai << 4 | li << 1 | ti; + asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ + : "=d" (val) : "a" (cmd)); + return val; +} + +static int __init cache_add(int level, int private, int type) +{ + struct cache *cache; + int ti; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return -ENOMEM; + if (type == CACHE_TYPE_INSTRUCTION) + ti = CACHE_TI_INSTRUCTION; + else + ti = CACHE_TI_UNIFIED; + cache->size = ecag(EXTRACT_SIZE, level, ti); + cache->line_size = ecag(EXTRACT_LINE_SIZE, level, ti); + cache->associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti); + cache->nr_sets = cache->size / cache->associativity; + cache->nr_sets /= cache->line_size; + cache->private = private; + cache->level = level + 1; + cache->type = type - 1; + list_add_tail(&cache->list, &cache_list); + return 0; +} + +static void __init cache_build_info(void) +{ + struct cache *cache, *next; + union cache_topology ct; + int level, private, rc; + + ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); + for (level = 0; level < CACHE_MAX_LEVEL; level++) { + switch (ct.ci[level].scope) { + case CACHE_SCOPE_SHARED: + private = 0; + break; + case CACHE_SCOPE_PRIVATE: + private = 1; + break; + default: + return; + } + if (ct.ci[level].type == CACHE_TYPE_SEPARATE) { + rc = cache_add(level, private, CACHE_TYPE_DATA); + rc |= cache_add(level, private, CACHE_TYPE_INSTRUCTION); + } else { + rc = cache_add(level, private, ct.ci[level].type); + } + if (rc) + goto error; + } + return; +error: + list_for_each_entry_safe(cache, next, &cache_list, list) { + list_del(&cache->list); + kfree(cache); + } +} + +static struct cache_dir *cache_create_cache_dir(int cpu) +{ + struct cache_dir *cache_dir; + struct kobject *kobj = NULL; + struct device *dev; + + dev = get_cpu_device(cpu); + if (!dev) + goto out; + kobj = kobject_create_and_add("cache", &dev->kobj); + if (!kobj) + goto out; + cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL); + if (!cache_dir) + goto out; + cache_dir->kobj = kobj; + cache_dir_cpu[cpu] = cache_dir; + return cache_dir; +out: + kobject_put(kobj); + return NULL; +} + +static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *kobj) +{ + return container_of(kobj, struct cache_index_dir, kobj); +} + +static void cache_index_release(struct kobject *kobj) +{ + struct cache_index_dir *index; + + index = kobj_to_cache_index_dir(kobj); + kfree(index); +} + +static ssize_t cache_index_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kobj_attribute *kobj_attr; + + kobj_attr = container_of(attr, struct kobj_attribute, attr); + return kobj_attr->show(kobj, kobj_attr, buf); +} + +#define DEFINE_CACHE_ATTR(_name, _format, _value) \ +static ssize_t cache_##_name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *buf) \ +{ \ + struct cache_index_dir *index; \ + \ + index = kobj_to_cache_index_dir(kobj); \ + return sprintf(buf, _format, _value); \ +} \ +static struct kobj_attribute cache_##_name##_attr = \ + __ATTR(_name, 0444, cache_##_name##_show, NULL); + +DEFINE_CACHE_ATTR(size, "%luK\n", index->cache->size >> 10); +DEFINE_CACHE_ATTR(coherency_line_size, "%u\n", index->cache->line_size); +DEFINE_CACHE_ATTR(number_of_sets, "%u\n", index->cache->nr_sets); +DEFINE_CACHE_ATTR(ways_of_associativity, "%u\n", index->cache->associativity); +DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type]); +DEFINE_CACHE_ATTR(level, "%d\n", index->cache->level); + +static ssize_t shared_cpu_map_func(struct kobject *kobj, int type, char *buf) +{ + struct cache_index_dir *index; + int len; + + index = kobj_to_cache_index_dir(kobj); + len = type ? + cpulist_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)) : + cpumask_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)); + len += sprintf(&buf[len], "\n"); + return len; +} + +static ssize_t shared_cpu_map_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return shared_cpu_map_func(kobj, 0, buf); +} +static struct kobj_attribute cache_shared_cpu_map_attr = + __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); + +static ssize_t shared_cpu_list_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return shared_cpu_map_func(kobj, 1, buf); +} +static struct kobj_attribute cache_shared_cpu_list_attr = + __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); + +static struct attribute *cache_index_default_attrs[] = { + &cache_type_attr.attr, + &cache_size_attr.attr, + &cache_number_of_sets_attr.attr, + &cache_ways_of_associativity_attr.attr, + &cache_level_attr.attr, + &cache_coherency_line_size_attr.attr, + &cache_shared_cpu_map_attr.attr, + &cache_shared_cpu_list_attr.attr, + NULL, +}; + +static const struct sysfs_ops cache_index_ops = { + .show = cache_index_show, +}; + +static struct kobj_type cache_index_type = { + .sysfs_ops = &cache_index_ops, + .release = cache_index_release, + .default_attrs = cache_index_default_attrs, +}; + +static int cache_create_index_dir(struct cache_dir *cache_dir, + struct cache *cache, int index, int cpu) +{ + struct cache_index_dir *index_dir; + int rc; + + index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); + if (!index_dir) + return -ENOMEM; + index_dir->cache = cache; + index_dir->cpu = cpu; + rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, + cache_dir->kobj, "index%d", index); + if (rc) + goto out; + index_dir->next = cache_dir->index; + cache_dir->index = index_dir; + return 0; +out: + kfree(index_dir); + return rc; +} + +static int cache_add_cpu(int cpu) +{ + struct cache_dir *cache_dir; + struct cache *cache; + int rc, index = 0; + + if (list_empty(&cache_list)) + return 0; + cache_dir = cache_create_cache_dir(cpu); + if (!cache_dir) + return -ENOMEM; + list_for_each_entry(cache, &cache_list, list) { + if (!cache->private) + break; + rc = cache_create_index_dir(cache_dir, cache, index, cpu); + if (rc) + return rc; + index++; + } + return 0; +} + +static void cache_remove_cpu(int cpu) +{ + struct cache_index_dir *index, *next; + struct cache_dir *cache_dir; + + cache_dir = cache_dir_cpu[cpu]; + if (!cache_dir) + return; + index = cache_dir->index; + while (index) { + next = index->next; + kobject_put(&index->kobj); + index = next; + } + kobject_put(cache_dir->kobj); + kfree(cache_dir); + cache_dir_cpu[cpu] = NULL; +} + +static int cache_hotplug(struct notifier_block *nfb, unsigned long action, + void *hcpu) +{ + int cpu = (long)hcpu; + int rc = 0; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + rc = cache_add_cpu(cpu); + if (rc) + cache_remove_cpu(cpu); + break; + case CPU_DEAD: + cache_remove_cpu(cpu); + break; + } + return rc ? NOTIFY_BAD : NOTIFY_OK; +} + +static int __init cache_init(void) +{ + int cpu; + + if (!test_facility(34)) + return 0; + cache_build_info(); + + cpu_notifier_register_begin(); + for_each_online_cpu(cpu) + cache_add_cpu(cpu); + __hotcpu_notifier(cache_hotplug, 0); + cpu_notifier_register_done(); + return 0; +} +device_initcall(cache_init); diff --git a/arch/s390/kernel/compat_exec_domain.c b/arch/s390/kernel/compat_exec_domain.c deleted file mode 100644 index 914d49444f9..00000000000 --- a/arch/s390/kernel/compat_exec_domain.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Support for 32-bit Linux for S390 personality. - * - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Gerhard Tonn (ton@de.ibm.com) - * - * - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/personality.h> -#include <linux/sched.h> - -static struct exec_domain s390_exec_domain; - -static int __init s390_init (void) -{ - s390_exec_domain.name = "Linux/s390"; - s390_exec_domain.handler = NULL; - s390_exec_domain.pers_low = PER_LINUX32; - s390_exec_domain.pers_high = PER_LINUX32; - s390_exec_domain.signal_map = default_exec_domain.signal_map; - s390_exec_domain.signal_invmap = default_exec_domain.signal_invmap; - register_exec_domain(&s390_exec_domain); - return 0; -} - -__initcall(s390_init); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 50b85d07ddd..ca38139423a 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -1,8 +1,6 @@ /* - * arch/s390x/kernel/linux32.c - * * S390 version - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Gerhard Tonn (ton@de.ibm.com) * Thomas Spatzier (tspat@de.ibm.com) @@ -24,22 +22,13 @@ #include <linux/signal.h> #include <linux/resource.h> #include <linux/times.h> -#include <linux/utsname.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/sem.h> #include <linux/msg.h> #include <linux/shm.h> -#include <linux/slab.h> #include <linux/uio.h> -#include <linux/nfs_fs.h> #include <linux/quota.h> #include <linux/module.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr.h> -#include <linux/nfsd/syscall.h> #include <linux/poll.h> #include <linux/personality.h> #include <linux/stat.h> @@ -59,23 +48,16 @@ #include <linux/ptrace.h> #include <linux/fadvise.h> #include <linux/ipc.h> +#include <linux/slab.h> #include <asm/types.h> #include <asm/uaccess.h> -#include <asm/semaphore.h> #include <net/scm.h> #include <net/sock.h> #include "compat_linux.h" -long psw_user32_bits = (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); -long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE); - /* For this source file, we want overflow handling. */ #undef high2lowuid @@ -104,92 +86,111 @@ long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | #define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid) #define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid) -asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename, + u16, user, u16, group) { return sys_chown(filename, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_lchown16(const char __user * filename, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *, + filename, u16, user, u16, group) { return sys_lchown(filename, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_fchown16(unsigned int fd, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group) { return sys_fchown(fd, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_setregid16(u16 rgid, u16 egid) +COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid) { return sys_setregid(low2highgid(rgid), low2highgid(egid)); } -asmlinkage long sys32_setgid16(u16 gid) +COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid) { return sys_setgid((gid_t)gid); } -asmlinkage long sys32_setreuid16(u16 ruid, u16 euid) +COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid) { return sys_setreuid(low2highuid(ruid), low2highuid(euid)); } -asmlinkage long sys32_setuid16(u16 uid) +COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid) { return sys_setuid((uid_t)uid); } -asmlinkage long sys32_setresuid16(u16 ruid, u16 euid, u16 suid) +COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid) { return sys_setresuid(low2highuid(ruid), low2highuid(euid), - low2highuid(suid)); + low2highuid(suid)); } -asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid) +COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp, + u16 __user *, euidp, u16 __user *, suidp) { + const struct cred *cred = current_cred(); int retval; + u16 ruid, euid, suid; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid)); + euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid)); + suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid)); + + if (!(retval = put_user(ruid, ruidp)) && + !(retval = put_user(euid, euidp))) + retval = put_user(suid, suidp); return retval; } -asmlinkage long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid) +COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid) { return sys_setresgid(low2highgid(rgid), low2highgid(egid), - low2highgid(sgid)); + low2highgid(sgid)); } -asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid) +COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp, + u16 __user *, egidp, u16 __user *, sgidp) { + const struct cred *cred = current_cred(); int retval; + u16 rgid, egid, sgid; + + rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid)); + egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid)); + sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid)); - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(rgid, rgidp)) && + !(retval = put_user(egid, egidp))) + retval = put_user(sgid, sgidp); return retval; } -asmlinkage long sys32_setfsuid16(u16 uid) +COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid) { return sys_setfsuid((uid_t)uid); } -asmlinkage long sys32_setfsgid16(u16 gid) +COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid) { return sys_setfsgid((gid_t)gid); } static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info) { + struct user_namespace *user_ns = current_user_ns(); int i; u16 group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { - group = (u16)GROUP_AT(group_info, i); + kgid = GROUP_AT(group_info, i); + group = (u16)from_kgid_munged(user_ns, kgid); if (put_user(group, grouplist+i)) return -EFAULT; } @@ -199,43 +200,51 @@ static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist) { + struct user_namespace *user_ns = current_user_ns(); int i; u16 group; + kgid_t kgid; for (i = 0; i < group_info->ngroups; i++) { if (get_user(group, grouplist+i)) return -EFAULT; - GROUP_AT(group_info, i) = (gid_t)group; + + kgid = make_kgid(user_ns, (gid_t)group); + if (!gid_valid(kgid)) + return -EINVAL; + + GROUP_AT(group_info, i) = kgid; } return 0; } -asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) +COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist) { + const struct cred *cred = current_cred(); int i; if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(cred->group_info); + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(cred->group_info); return i; } -asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) +COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist) { struct group_info *group_info; int retval; @@ -260,479 +269,67 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) return retval; } -asmlinkage long sys32_getuid16(void) -{ - return high2lowuid(current->uid); -} - -asmlinkage long sys32_geteuid16(void) +COMPAT_SYSCALL_DEFINE0(s390_getuid16) { - return high2lowuid(current->euid); + return high2lowuid(from_kuid_munged(current_user_ns(), current_uid())); } -asmlinkage long sys32_getgid16(void) +COMPAT_SYSCALL_DEFINE0(s390_geteuid16) { - return high2lowgid(current->gid); + return high2lowuid(from_kuid_munged(current_user_ns(), current_euid())); } -asmlinkage long sys32_getegid16(void) +COMPAT_SYSCALL_DEFINE0(s390_getgid16) { - return high2lowgid(current->egid); + return high2lowgid(from_kgid_munged(current_user_ns(), current_gid())); } -/* 32-bit timeval and related flotsam. */ - -static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i) +COMPAT_SYSCALL_DEFINE0(s390_getegid16) { - return (!access_ok(VERIFY_READ, o, sizeof(*o)) || - (__get_user(o->tv_sec, &i->tv_sec) || - __get_user(o->tv_usec, &i->tv_usec))); + return high2lowgid(from_kgid_munged(current_user_ns(), current_egid())); } -static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) || - __put_user(i->tv_usec, &o->tv_usec))); -} - -/* - * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation. - * - * This is really horribly ugly. - */ #ifdef CONFIG_SYSVIPC -asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr) +COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, + compat_ulong_t, third, compat_uptr_t, ptr) { if (call >> 16) /* hack for backward compatibility */ return -EINVAL; - - call &= 0xffff; - - switch (call) { - case SEMTIMEDOP: - return compat_sys_semtimedop(first, compat_ptr(ptr), - second, compat_ptr(third)); - case SEMOP: - /* struct sembuf is the same on 32 and 64bit :)) */ - return sys_semtimedop(first, compat_ptr(ptr), - second, NULL); - case SEMGET: - return sys_semget(first, second, third); - case SEMCTL: - return compat_sys_semctl(first, second, third, - compat_ptr(ptr)); - case MSGSND: - return compat_sys_msgsnd(first, second, third, - compat_ptr(ptr)); - case MSGRCV: - return compat_sys_msgrcv(first, second, 0, third, - 0, compat_ptr(ptr)); - case MSGGET: - return sys_msgget((key_t) first, second); - case MSGCTL: - return compat_sys_msgctl(first, second, compat_ptr(ptr)); - case SHMAT: - return compat_sys_shmat(first, second, third, - 0, compat_ptr(ptr)); - case SHMDT: - return sys_shmdt(compat_ptr(ptr)); - case SHMGET: - return sys_shmget(first, (unsigned)second, third); - case SHMCTL: - return compat_sys_shmctl(first, second, compat_ptr(ptr)); - } - - return -ENOSYS; + return compat_sys_ipc(call, first, second, third, ptr, third); } #endif -asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low) -{ - if ((int)high < 0) - return -EINVAL; - else - return sys_truncate(path, (high << 32) | low); -} - -asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low) -{ - if ((int)high < 0) - return -EINVAL; - else - return sys_ftruncate(fd, (high << 32) | low); -} - -int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf) -{ - compat_ino_t ino; - int err; - - if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) - return -EOVERFLOW; - - ino = stat->ino; - if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) - return -EOVERFLOW; - - err = put_user(old_encode_dev(stat->dev), &statbuf->st_dev); - err |= put_user(stat->ino, &statbuf->st_ino); - err |= put_user(stat->mode, &statbuf->st_mode); - err |= put_user(stat->nlink, &statbuf->st_nlink); - err |= put_user(high2lowuid(stat->uid), &statbuf->st_uid); - err |= put_user(high2lowgid(stat->gid), &statbuf->st_gid); - err |= put_user(old_encode_dev(stat->rdev), &statbuf->st_rdev); - err |= put_user(stat->size, &statbuf->st_size); - err |= put_user(stat->atime.tv_sec, &statbuf->st_atime); - err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec); - err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime); - err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec); - err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime); - err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec); - err |= put_user(stat->blksize, &statbuf->st_blksize); - err |= put_user(stat->blocks, &statbuf->st_blocks); -/* fixme - err |= put_user(0, &statbuf->__unused4[0]); - err |= put_user(0, &statbuf->__unused4[1]); -*/ - return err; -} - -asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval) -{ - struct timespec t; - int ret; - mm_segment_t old_fs = get_fs (); - - set_fs (KERNEL_DS); - ret = sys_sched_rr_get_interval(pid, - (struct timespec __force __user *) &t); - set_fs (old_fs); - if (put_compat_timespec(&t, interval)) - return -EFAULT; - return ret; -} - -asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, size_t sigsetsize) -{ - sigset_t s; - compat_sigset_t s32; - int ret; - mm_segment_t old_fs = get_fs(); - - if (set) { - if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) - return -EFAULT; - switch (_NSIG_WORDS) { - case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } - } - set_fs (KERNEL_DS); - ret = sys_rt_sigprocmask(how, - set ? (sigset_t __force __user *) &s : NULL, - oset ? (sigset_t __force __user *) &s : NULL, - sigsetsize); - set_fs (old_fs); - if (ret) return ret; - if (oset) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } - if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) - return -EFAULT; - } - return 0; -} - -asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, - size_t sigsetsize) -{ - sigset_t s; - compat_sigset_t s32; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs (KERNEL_DS); - ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize); - set_fs (old_fs); - if (!ret) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } - if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) - return -EFAULT; - } - return ret; -} - -asmlinkage long -sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo) -{ - siginfo_t info; - int ret; - mm_segment_t old_fs = get_fs(); - - if (copy_siginfo_from_user32(&info, uinfo)) - return -EFAULT; - set_fs (KERNEL_DS); - ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force __user *) &info); - set_fs (old_fs); - return ret; -} - -/* - * sys32_execve() executes a new program after the asm stub has set - * things up for us. This should basically do what I want it to. - */ -asmlinkage long sys32_execve(void) -{ - struct pt_regs *regs = task_pt_regs(current); - char *filename; - unsigned long result; - int rc; - - filename = getname(compat_ptr(regs->orig_gpr2)); - if (IS_ERR(filename)) { - result = PTR_ERR(filename); - goto out; - } - rc = compat_do_execve(filename, compat_ptr(regs->gprs[3]), - compat_ptr(regs->gprs[4]), regs); - if (rc) { - result = rc; - goto out_putname; - } - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); - current->thread.fp_regs.fpc=0; - asm volatile("sfpc %0,0" : : "d" (0)); - result = regs->gprs[2]; -out_putname: - putname(filename); -out: - return result; -} - - -#ifdef CONFIG_MODULES - -asmlinkage long -sys32_init_module(void __user *umod, unsigned long len, - const char __user *uargs) -{ - return sys_init_module(umod, len, uargs); -} - -asmlinkage long -sys32_delete_module(const char __user *name_user, unsigned int flags) -{ - return sys_delete_module(name_user, flags); -} - -#else /* CONFIG_MODULES */ - -asmlinkage long -sys32_init_module(void __user *umod, unsigned long len, - const char __user *uargs) -{ - return -ENOSYS; -} - -asmlinkage long -sys32_delete_module(const char __user *name_user, unsigned int flags) -{ - return -ENOSYS; -} - -#endif /* CONFIG_MODULES */ - -/* Translations due to time_t size differences. Which affects all - sorts of things, like timeval and itimerval. */ - -extern struct timezone sys_tz; - -asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ - if (tv) { - struct timeval ktv; - do_gettimeofday(&ktv); - if (put_tv32(tv, &ktv)) - return -EFAULT; - } - if (tz) { - if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) - return -EFAULT; - } - return 0; -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ - long usec; - - if (!access_ok(VERIFY_READ, i, sizeof(*i))) - return -EFAULT; - if (__get_user(o->tv_sec, &i->tv_sec)) - return -EFAULT; - if (__get_user(usec, &i->tv_usec)) - return -EFAULT; - o->tv_nsec = usec * 1000; - return 0; -} - -asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) +COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low) { - struct timespec kts; - struct timezone ktz; - - if (tv) { - if (get_ts32(&kts, tv)) - return -EFAULT; - } - if (tz) { - if (copy_from_user(&ktz, tz, sizeof(ktz))) - return -EFAULT; - } - - return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); + return sys_truncate(path, (unsigned long)high << 32 | low); } -/* These are here just in case some old sparc32 binary calls it. */ -asmlinkage long sys32_pause(void) +COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - return -ERESTARTNOHAND; + return sys_ftruncate(fd, (unsigned long)high << 32 | low); } -asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf, - size_t count, u32 poshi, u32 poslo) +COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf, + compat_size_t, count, u32, high, u32, low) { if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); + return sys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low); } -asmlinkage long sys32_pwrite64(unsigned int fd, const char __user *ubuf, - size_t count, u32 poshi, u32 poslo) +COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf, + compat_size_t, count, u32, high, u32, low) { if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); -} - -asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count) -{ - return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); -} - -asmlinkage long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, size_t count) -{ - mm_segment_t old_fs = get_fs(); - int ret; - off_t of; - - if (offset && get_user(of, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, - offset ? (off_t __force __user *) &of : NULL, count); - set_fs(old_fs); - - if (offset && put_user(of, offset)) - return -EFAULT; - - return ret; + return sys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low); } -asmlinkage long sys32_sendfile64(int out_fd, int in_fd, - compat_loff_t __user *offset, s32 count) +COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count) { - mm_segment_t old_fs = get_fs(); - int ret; - loff_t lof; - - if (offset && get_user(lof, offset)) - return -EFAULT; - - set_fs(KERNEL_DS); - ret = sys_sendfile64(out_fd, in_fd, - offset ? (loff_t __force __user *) &lof : NULL, - count); - set_fs(old_fs); - - if (offset && put_user(lof, offset)) - return -EFAULT; - - return ret; + return sys_readahead(fd, (unsigned long)high << 32 | low, count); } -#ifdef CONFIG_SYSCTL_SYSCALL -struct __sysctl_args32 { - u32 name; - int nlen; - u32 oldval; - u32 oldlenp; - u32 newval; - u32 newlen; - u32 __unused[4]; -}; - -asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args) -{ - struct __sysctl_args32 tmp; - int error; - size_t oldlen; - size_t __user *oldlenp = NULL; - unsigned long addr = (((unsigned long)&args->__unused[0]) + 7) & ~7; - - if (copy_from_user(&tmp, args, sizeof(tmp))) - return -EFAULT; - - if (tmp.oldval && tmp.oldlenp) { - /* Duh, this is ugly and might not work if sysctl_args - is in read-only memory, but do_sysctl does indirectly - a lot of uaccess in both directions and we'd have to - basically copy the whole sysctl.c here, and - glibc's __sysctl uses rw memory for the structure - anyway. */ - if (get_user(oldlen, (u32 __user *)compat_ptr(tmp.oldlenp)) || - put_user(oldlen, (size_t __user *)addr)) - return -EFAULT; - oldlenp = (size_t __user *)addr; - } - - lock_kernel(); - error = do_sysctl(compat_ptr(tmp.name), tmp.nlen, compat_ptr(tmp.oldval), - oldlenp, compat_ptr(tmp.newval), tmp.newlen); - unlock_kernel(); - if (oldlenp) { - if (!error) { - if (get_user(oldlen, (size_t __user *)addr) || - put_user(oldlen, (u32 __user *)compat_ptr(tmp.oldlenp))) - error = -EFAULT; - } - if (copy_to_user(args->__unused, tmp.__unused, - sizeof(tmp.__unused))) - error = -EFAULT; - } - return error; -} -#endif - struct stat64_emu31 { unsigned long long st_dev; unsigned int __pad1; @@ -769,8 +366,8 @@ static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) tmp.__st_ino = (u32)stat->ino; tmp.st_mode = stat->mode; tmp.st_nlink = (unsigned int)stat->nlink; - tmp.st_uid = stat->uid; - tmp.st_gid = stat->gid; + tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_rdev = huge_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_blksize = (u32)stat->blksize; @@ -782,7 +379,7 @@ static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } -asmlinkage long sys32_stat64(char __user * filename, struct stat64_emu31 __user * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_stat(filename, &stat); @@ -791,7 +388,7 @@ asmlinkage long sys32_stat64(char __user * filename, struct stat64_emu31 __user return ret; } -asmlinkage long sys32_lstat64(char __user * filename, struct stat64_emu31 __user * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_lstat(filename, &stat); @@ -800,7 +397,7 @@ asmlinkage long sys32_lstat64(char __user * filename, struct stat64_emu31 __user return ret; } -asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_fstat(fd, &stat); @@ -809,24 +406,16 @@ asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * sta return ret; } -asmlinkage long sys32_fstatat64(unsigned int dfd, char __user *filename, - struct stat64_emu31 __user* statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename, + struct stat64_emu31 __user *, statbuf, int, flag) { struct kstat stat; - int error = -EINVAL; - - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) - goto out; - - if (flag & AT_SYMLINK_NOFOLLOW) - error = vfs_lstat_fd(dfd, filename, &stat); - else - error = vfs_stat_fd(dfd, filename, &stat); + int error; - if (!error) - error = cp_stat64(statbuf, &stat); -out: - return error; + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); } /* @@ -836,78 +425,36 @@ out: */ struct mmap_arg_struct_emu31 { - u32 addr; - u32 len; - u32 prot; - u32 flags; - u32 fd; - u32 offset; + compat_ulong_t addr; + compat_ulong_t len; + compat_ulong_t prot; + compat_ulong_t flags; + compat_ulong_t fd; + compat_ulong_t offset; }; -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - struct file * file = NULL; - unsigned long error = -EBADF; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - if (!IS_ERR((void *) error) && error + len >= 0x80000000ULL) { - /* Result is out of bounds. */ - do_munmap(current->mm, addr, len); - error = -ENOMEM; - } - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - - -asmlinkage unsigned long -old32_mmap(struct mmap_arg_struct_emu31 __user *arg) +COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg) { struct mmap_arg_struct_emu31 a; - int error = -EFAULT; if (copy_from_user(&a, arg, sizeof(a))) - goto out; - - error = -EINVAL; + return -EFAULT; if (a.offset & ~PAGE_MASK) - goto out; - - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); -out: - return error; + return -EINVAL; + return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); } -asmlinkage long -sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg) +COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg) { struct mmap_arg_struct_emu31 a; - int error = -EFAULT; if (copy_from_user(&a, arg, sizeof(a))) - goto out; - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -out: - return error; + return -EFAULT; + return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); } -asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count) +COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count) { if ((compat_ssize_t) count < 0) return -EINVAL; @@ -915,7 +462,7 @@ asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count) return sys_read(fd, buf, count); } -asmlinkage long sys32_write(unsigned int fd, char __user * buf, size_t count) +COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count) { if ((compat_ssize_t) count < 0) return -EINVAL; @@ -923,37 +470,19 @@ asmlinkage long sys32_write(unsigned int fd, char __user * buf, size_t count) return sys_write(fd, buf, count); } -asmlinkage long sys32_clone(void) -{ - struct pt_regs *regs = task_pt_regs(current); - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; - - clone_flags = regs->gprs[3] & 0xffffffffUL; - newsp = regs->orig_gpr2 & 0x7fffffffUL; - parent_tidptr = compat_ptr(regs->gprs[4]); - child_tidptr = compat_ptr(regs->gprs[5]); - if (!newsp) - newsp = regs->gprs[15]; - return do_fork(clone_flags, newsp, regs, 0, - parent_tidptr, child_tidptr); -} - /* * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64. * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE} * because the 31 bit values differ from the 64 bit values. */ -asmlinkage long -sys32_fadvise64(int fd, loff_t offset, size_t len, int advise) +COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise) { if (advise == 4) advise = POSIX_FADV_DONTNEED; else if (advise == 5) advise = POSIX_FADV_NOREUSE; - return sys_fadvise64(fd, offset, len, advise); + return sys_fadvise64(fd, (unsigned long)high << 32 | low, len, advise); } struct fadvise64_64_args { @@ -963,8 +492,7 @@ struct fadvise64_64_args { int advice; }; -asmlinkage long -sys32_fadvise64_64(struct fadvise64_64_args __user *args) +COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) { struct fadvise64_64_args a; @@ -976,3 +504,17 @@ sys32_fadvise64_64(struct fadvise64_64_args __user *args) a.advice = POSIX_FADV_NOREUSE; return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); } + +COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow, + u32, nhigh, u32, nlow, unsigned int, flags) +{ + return sys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow, + ((u64)nhigh << 32) + nlow, flags); +} + +COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow, + u32, lenhigh, u32, lenlow) +{ + return sys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow, + ((u64)lenhigh << 32) + lenlow); +} diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index e89f8c0c42a..70d4b7c4bea 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -4,10 +4,6 @@ #include <linux/compat.h> #include <linux/socket.h> #include <linux/syscalls.h> -#include <linux/nfs_fs.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/export.h> /* Macro that masks the high order bit of an 32 bit pointer and converts it*/ /* to a 64 bit pointer */ @@ -21,81 +17,6 @@ struct ipc_kludge_32 { __s32 msgtyp; }; -struct old_sigaction32 { - __u32 sa_handler; /* Really a pointer, but need to deal with 32 bits */ - compat_old_sigset_t sa_mask; /* A 32 bit mask */ - __u32 sa_flags; - __u32 sa_restorer; /* Another 32 bit pointer */ -}; - -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[((128/sizeof(int)) - 3)]; - - /* kill() */ - struct { - pid_t _pid; /* sender's pid */ - uid_t _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ - } _timer; - - /* POSIX.1b signals */ - struct { - pid_t _pid; /* sender's pid */ - uid_t _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - pid_t _pid; /* which child */ - uid_t _uid; /* sender's uid */ - int _status;/* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - __u32 _addr; /* faulting insn/memory ref. - pointer */ - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - } _sifields; -} compat_siginfo_t; - -/* - * How these fields are to be accessed. - */ -#define si_pid _sifields._kill._pid -#define si_uid _sifields._kill._uid -#define si_status _sifields._sigchld._status -#define si_utime _sifields._sigchld._utime -#define si_stime _sifields._sigchld._stime -#define si_value _sifields._rt._sigval -#define si_int _sifields._rt._sigval.sival_int -#define si_ptr _sifields._rt._sigval.sival_ptr -#define si_addr _sifields._sigfault._addr -#define si_band _sifields._sigpoll._band -#define si_fd _sifields._sigpoll._fd -#define si_tid _sifields._timer._tid -#define si_overrun _sifields._timer._overrun - /* asm/sigcontext.h */ typedef union { @@ -106,6 +27,7 @@ typedef union typedef struct { unsigned int fpc; + unsigned int pad; freg_t32 fprs[__NUM_FPRS]; } _s390_fp_regs32; @@ -140,26 +62,59 @@ struct sigcontext32 }; /* asm/signal.h */ -struct sigaction32 { - __u32 sa_handler; /* pointer */ - __u32 sa_flags; - __u32 sa_restorer; /* pointer */ - compat_sigset_t sa_mask; /* mask last for extensibility */ -}; - -typedef struct { - __u32 ss_sp; /* pointer */ - int ss_flags; - compat_size_t ss_size; -} stack_t32; /* asm/ucontext.h */ struct ucontext32 { __u32 uc_flags; __u32 uc_link; /* pointer */ - stack_t32 uc_stack; + compat_stack_t uc_stack; _sigregs32 uc_mcontext; - compat_sigset_t uc_sigmask; /* mask last for extensibility */ + compat_sigset_t uc_sigmask; + /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ + unsigned char __unused[128 - sizeof(compat_sigset_t)]; }; +struct stat64_emu31; +struct mmap_arg_struct_emu31; +struct fadvise64_64_args; + +long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group); +long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group); +long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group); +long compat_sys_s390_setregid16(u16 rgid, u16 egid); +long compat_sys_s390_setgid16(u16 gid); +long compat_sys_s390_setreuid16(u16 ruid, u16 euid); +long compat_sys_s390_setuid16(u16 uid); +long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid); +long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid); +long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid); +long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid); +long compat_sys_s390_setfsuid16(u16 uid); +long compat_sys_s390_setfsgid16(u16 gid); +long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist); +long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist); +long compat_sys_s390_getuid16(void); +long compat_sys_s390_geteuid16(void); +long compat_sys_s390_getgid16(void); +long compat_sys_s390_getegid16(void); +long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low); +long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low); +long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low); +long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low); +long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count); +long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag); +long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg); +long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg); +long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count); +long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count); +long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise); +long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); +long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags); +long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow); +long compat_sys_sigreturn(void); +long compat_sys_rt_sigreturn(void); + #endif /* _ASM_S390X_S390_H */ diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h index 419aef913ee..12b82383351 100644 --- a/arch/s390/kernel/compat_ptrace.h +++ b/arch/s390/kernel/compat_ptrace.h @@ -1,62 +1,42 @@ #ifndef _PTRACE32_H #define _PTRACE32_H -#include "compat_linux.h" /* needed for _psw_t32 */ - -typedef struct { - __u32 cr[3]; -} per_cr_words32; - -typedef struct { - __u16 perc_atmid; /* 0x096 */ - __u32 address; /* 0x098 */ - __u8 access_id; /* 0x0a1 */ -} per_lowcore_words32; - -typedef struct { - union { - per_cr_words32 words; - } control_regs; - /* - * Use these flags instead of setting em_instruction_fetch - * directly they are used so that single stepping can be - * switched on & off while not affecting other tracing - */ - unsigned single_step : 1; - unsigned instruction_fetch : 1; - unsigned : 30; - /* - * These addresses are copied into cr10 & cr11 if single - * stepping is switched off - */ - __u32 starting_addr; - __u32 ending_addr; - union { - per_lowcore_words32 words; - } lowcore; -} per_struct32; +#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */ +#include "compat_linux.h" /* needed for psw_compat_t */ + +struct compat_per_struct_kernel { + __u32 cr9; /* PER control bits */ + __u32 cr10; /* PER starting address */ + __u32 cr11; /* PER ending address */ + __u32 bits; /* Obsolete software bits */ + __u32 starting_addr; /* User specified start address */ + __u32 ending_addr; /* User specified end address */ + __u16 perc_atmid; /* PER trap ATMID */ + __u32 address; /* PER trap instruction address */ + __u8 access_id; /* PER trap access identification */ +}; -struct user_regs_struct32 +struct compat_user_regs_struct { - _psw_t32 psw; + psw_compat_t psw; u32 gprs[NUM_GPRS]; u32 acrs[NUM_ACRS]; u32 orig_gpr2; + /* nb: there's a 4-byte hole here */ s390_fp_regs fp_regs; /* * These per registers are in here so that gdb can modify them * itself as there is no "official" ptrace interface for hardware * watchpoints. This is the way intel does it. */ - per_struct32 per_info; - u32 ieee_instruction_pointer; - /* Used to give failing instruction back to user for ieee exceptions */ + struct compat_per_struct_kernel per_info; + u32 ieee_instruction_pointer; /* obsolete, always 0 */ }; -struct user32 { +struct compat_user { /* We start with the registers, to mimic the way that "memory" is returned from the ptrace(3,...) function. */ - struct user_regs_struct32 regs; /* Where the registers are actually stored */ + struct compat_user_regs_struct regs; /* The rest of this junk is to help gdb figure out what goes where */ u32 u_tsize; /* Text segment size (pages). */ u32 u_dsize; /* Data segment size (pages). */ @@ -78,6 +58,6 @@ typedef struct __u32 len; __u32 kernel_addr; __u32 process_addr; -} ptrace_area_emu31; +} compat_ptrace_area; #endif /* _PTRACE32_H */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a5692c460ba..f204d692036 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/compat_signal.c - * - * Copyright (C) IBM Corp. 2000,2006 + * Copyright IBM Corp. 2000, 2006 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) * Gerhard Tonn (ton@de.ibm.com) * @@ -27,10 +25,10 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> +#include <asm/switch_to.h> #include "compat_linux.h" #include "compat_ptrace.h" - -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#include "entry.h" typedef struct { @@ -38,6 +36,7 @@ typedef struct struct sigcontext32 sc; _sigregs32 sregs; int signo; + __u32 gprs_high[NUM_GPRS]; __u8 retcode[S390_SYSCALL_SIZE]; } sigframe32; @@ -47,15 +46,13 @@ typedef struct __u8 retcode[S390_SYSCALL_SIZE]; compat_siginfo_t info; struct ucontext32 uc; + __u32 gprs_high[NUM_GPRS]; } rt_sigframe32; -int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err; - if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t))) - return -EFAULT; - /* If you change siginfo_t structure, please be sure this code is fixed accordingly. It should never copy any pad contained in the structure @@ -102,7 +99,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) break; } } - return err; + return err ? -EFAULT : 0; } int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) @@ -110,9 +107,6 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) int err; u32 tmp; - if (!access_ok (VERIFY_READ, from, sizeof(compat_siginfo_t))) - return -EFAULT; - err = __get_user(to->si_signo, &from->si_signo); err |= __get_user(to->si_errno, &from->si_errno); err |= __get_user(to->si_code, &from->si_code); @@ -138,7 +132,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; case __SI_FAULT >> 16: err |= __get_user(tmp, &from->si_addr); - to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN); + to->si_addr = (void __force __user *) + (u64) (tmp & PSW32_ADDR_INSN); break; case __SI_POLL >> 16: err |= __get_user(to->si_band, &from->si_band); @@ -153,260 +148,134 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; } } - return err; + return err ? -EFAULT : 0; } -asmlinkage long -sys32_sigaction(int sig, const struct old_sigaction32 __user *act, - struct old_sigaction32 __user *oact) +static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) { - struct k_sigaction new_ka, old_ka; - unsigned long sa_handler, sa_restorer; - int ret; - - if (act) { - compat_old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(sa_handler, &act->sa_handler) || - __get_user(sa_restorer, &act->sa_restorer) || - __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(mask, &act->sa_mask)) - return -EFAULT; - new_ka.sa.sa_handler = (__sighandler_t) sa_handler; - new_ka.sa.sa_restorer = (void (*)(void)) sa_restorer; - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - sa_handler = (unsigned long) old_ka.sa.sa_handler; - sa_restorer = (unsigned long) old_ka.sa.sa_restorer; - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(sa_handler, &oact->sa_handler) || - __put_user(sa_restorer, &oact->sa_restorer) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) - return -EFAULT; - } - - return ret; + _sigregs32 user_sregs; + int i; + + user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32); + user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI; + user_sregs.regs.psw.mask |= PSW32_USER_BITS; + user_sregs.regs.psw.addr = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); + for (i = 0; i < NUM_GPRS; i++) + user_sregs.regs.gprs[i] = (__u32) regs->gprs[i]; + save_access_regs(current->thread.acrs); + memcpy(&user_sregs.regs.acrs, current->thread.acrs, + sizeof(user_sregs.regs.acrs)); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); + memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, + sizeof(user_sregs.fpregs)); + if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32))) + return -EFAULT; + return 0; } -asmlinkage long -sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, - struct sigaction32 __user *oact, size_t sigsetsize) +static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) { - struct k_sigaction new_ka, old_ka; - unsigned long sa_handler; - int ret; - compat_sigset_t set32; + _sigregs32 user_sregs; + int i; - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; + /* Alwys make any pending restarted system call return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; - if (act) { - ret = get_user(sa_handler, &act->sa_handler); - ret |= __copy_from_user(&set32, &act->sa_mask, - sizeof(compat_sigset_t)); - switch (_NSIG_WORDS) { - case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] - | (((long)set32.sig[7]) << 32); - case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] - | (((long)set32.sig[5]) << 32); - case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] - | (((long)set32.sig[3]) << 32); - case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] - | (((long)set32.sig[1]) << 32); - } - ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); - - if (ret) - return -EFAULT; - new_ka.sa.sa_handler = (__sighandler_t) sa_handler; - } + if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs))) + return -EFAULT; - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - switch (_NSIG_WORDS) { - case 4: - set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); - set32.sig[6] = old_ka.sa.sa_mask.sig[3]; - case 3: - set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); - set32.sig[4] = old_ka.sa.sa_mask.sig[2]; - case 2: - set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); - set32.sig[2] = old_ka.sa.sa_mask.sig[1]; - case 1: - set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); - set32.sig[0] = old_ka.sa.sa_mask.sig[0]; - } - ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler); - ret |= __copy_to_user(&oact->sa_mask, &set32, - sizeof(compat_sigset_t)); - ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - } + if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI)) + return -EINVAL; - return ret; -} + /* Loading the floating-point-control word can fail. Do that first. */ + if (restore_fp_ctl(&user_sregs.fpregs.fpc)) + return -EINVAL; -asmlinkage long -sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss) -{ - struct pt_regs *regs = task_pt_regs(current); - stack_t kss, koss; - unsigned long ss_sp; - int ret, err = 0; - mm_segment_t old_fs = get_fs(); - - if (uss) { - if (!access_ok(VERIFY_READ, uss, sizeof(*uss))) - return -EFAULT; - err |= __get_user(ss_sp, &uss->ss_sp); - err |= __get_user(kss.ss_size, &uss->ss_size); - err |= __get_user(kss.ss_flags, &uss->ss_flags); - if (err) - return -EFAULT; - kss.ss_sp = (void __user *) ss_sp; - } + /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) | + (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 | + (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 | + (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE); + /* Check for invalid user address space control. */ + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME) + regs->psw.mask = PSW_ASC_PRIMARY | + (regs->psw.mask & ~PSW_MASK_ASC); + regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN); + for (i = 0; i < NUM_GPRS; i++) + regs->gprs[i] = (__u64) user_sregs.regs.gprs[i]; + memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, + sizeof(current->thread.acrs)); + restore_access_regs(current->thread.acrs); - set_fs (KERNEL_DS); - ret = do_sigaltstack((stack_t __force __user *) (uss ? &kss : NULL), - (stack_t __force __user *) (uoss ? &koss : NULL), - regs->gprs[15]); - set_fs (old_fs); - - if (!ret && uoss) { - if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss))) - return -EFAULT; - ss_sp = (unsigned long) koss.ss_sp; - err |= __put_user(ss_sp, &uoss->ss_sp); - err |= __put_user(koss.ss_size, &uoss->ss_size); - err |= __put_user(koss.ss_flags, &uoss->ss_flags); - if (err) - return -EFAULT; - } - return ret; + memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, + sizeof(current->thread.fp_regs)); + + restore_fp_regs(current->thread.fp_regs.fprs); + clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ + return 0; } -static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) +static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) { - _s390_regs_common32 regs32; - int err, i; + __u32 gprs_high[NUM_GPRS]; + int i; - regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits, - (__u32)(regs->psw.mask >> 32)); - regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr; for (i = 0; i < NUM_GPRS; i++) - regs32.gprs[i] = (__u32) regs->gprs[i]; - save_access_regs(current->thread.acrs); - memcpy(regs32.acrs, current->thread.acrs, sizeof(regs32.acrs)); - err = __copy_to_user(&sregs->regs, ®s32, sizeof(regs32)); - if (err) - return err; - save_fp_regs(¤t->thread.fp_regs); - /* s390_fp_regs and _s390_fp_regs32 are the same ! */ - return __copy_to_user(&sregs->fpregs, ¤t->thread.fp_regs, - sizeof(_s390_fp_regs32)); + gprs_high[i] = regs->gprs[i] >> 32; + if (__copy_to_user(uregs, &gprs_high, sizeof(gprs_high))) + return -EFAULT; + return 0; } -static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) +static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) { - _s390_regs_common32 regs32; - int err, i; - - /* Alwys make any pending restarted system call return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + __u32 gprs_high[NUM_GPRS]; + int i; - err = __copy_from_user(®s32, &sregs->regs, sizeof(regs32)); - if (err) - return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - (__u64)regs32.psw.mask << 32); - regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); + if (__copy_from_user(&gprs_high, uregs, sizeof(gprs_high))) + return -EFAULT; for (i = 0; i < NUM_GPRS; i++) - regs->gprs[i] = (__u64) regs32.gprs[i]; - memcpy(current->thread.acrs, regs32.acrs, sizeof(current->thread.acrs)); - restore_access_regs(current->thread.acrs); - - err = __copy_from_user(¤t->thread.fp_regs, &sregs->fpregs, - sizeof(_s390_fp_regs32)); - current->thread.fp_regs.fpc &= FPC_VALID_MASK; - if (err) - return err; - - restore_fp_regs(¤t->thread.fp_regs); - regs->trap = -1; /* disable syscall checks */ + *(__u32 *)®s->gprs[i] = gprs_high[i]; return 0; } -asmlinkage long sys32_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; sigset_t set; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; - + if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + goto badframe; return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; } -asmlinkage long sys32_rt_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; sigset_t set; - stack_t st; - __u32 ss_sp; - int err; - mm_segment_t old_fs = get_fs(); - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) goto badframe; - - err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp); - st.ss_sp = compat_ptr(ss_sp); - err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size); - err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags); - if (err) + if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + goto badframe; + if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; - - set_fs (KERNEL_DS); - do_sigaltstack((stack_t __force __user *)&st, NULL, regs->gprs[15]); - set_fs (old_fs); - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -428,19 +297,16 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) /* Default to using normal stack */ sp = (unsigned long) A(regs->gprs[15]); + /* Overflow on alternate signal stack gives SIGSEGV. */ + if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL)) + return (void __user *) -1UL; + /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { if (! sas_ss_flags(sp)) sp = current->sas_ss_sp + current->sas_ss_size; } - /* This is the legacy signal stack switching. */ - else if (!user_mode(regs) && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - sp = (unsigned long) ka->sa.sa_restorer; - } - return (void __user *)((sp - frame_size) & -8ul); } @@ -458,7 +324,8 @@ static int setup_frame32(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) { sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(sigframe32)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32))) + + if (frame == (void __user *) -1UL) goto give_sigsegv; if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32)) @@ -466,17 +333,19 @@ static int setup_frame32(int sig, struct k_sigaction *ka, if (save_sigregs32(regs, &frame->sregs)) goto give_sigsegv; + if (save_sigregs_gprs_high(regs, frame->gprs_high)) + goto give_sigsegv; if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs)) goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; + regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + (u16 __force __user *)(frame->retcode))) goto give_sigsegv; } @@ -485,19 +354,28 @@ static int setup_frame32(int sig, struct k_sigaction *ka, goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->gprs[15] = (__force __u64) frame; + /* Force 31 bit amode and default user address space control. */ + regs->psw.mask = PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); + regs->psw.addr = (__force __u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->sc; + regs->gprs[3] = (__force __u64) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ - regs->gprs[4] = current->thread.trap_no; - regs->gprs[5] = current->thread.prot_addr; + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; + } /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) goto give_sigsegv; return 0; @@ -511,20 +389,19 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, { int err = 0; rt_sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(rt_sigframe32)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32))) + + if (frame == (void __user *) -1UL) goto give_sigsegv; if (copy_siginfo_to_user32(&frame->info, info)) goto give_sigsegv; /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->gprs[15]), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]); err |= save_sigregs32(regs, &frame->uc.uc_mcontext); + err |= save_sigregs_gprs_high(regs, frame->gprs_high); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) goto give_sigsegv; @@ -532,24 +409,30 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; - err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode)); + regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, + (u16 __force __user *)(frame->retcode))) + goto give_sigsegv; } /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->gprs[15] = (__force __u64) frame; + /* Force 31 bit amode and default user address space control. */ + regs->psw.mask = PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); + regs->psw.addr = (__u64 __force) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->info; - regs->gprs[4] = (__u64) &frame->uc; + regs->gprs[3] = (__force __u64) &frame->info; + regs->gprs[4] = (__force __u64) &frame->uc; + regs->gprs[5] = task_thread_info(current)->last_break; return 0; give_sigsegv: @@ -561,9 +444,8 @@ give_sigsegv: * OK, we're invoking a handler */ -int -handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +void handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { int ret; @@ -572,15 +454,9 @@ handle_signal32(unsigned long sig, struct k_sigaction *ka, ret = setup_rt_frame32(sig, ka, info, oldset, regs); else ret = setup_frame32(sig, ka, oldset, regs); - - if (ret == 0) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - return ret; + if (ret) + return; + signal_delivered(sig, info, ka, regs, + test_thread_flag(TIF_SINGLE_STEP)); } diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S deleted file mode 100644 index 743d54f0b8d..00000000000 --- a/arch/s390/kernel/compat_wrapper.S +++ /dev/null @@ -1,1734 +0,0 @@ -/* -* arch/s390/kernel/compat_wrapper.S -* wrapper for 31 bit compatible system calls. -* -* Copyright (C) IBM Corp. 2000,2006 -* Author(s): Gerhard Tonn (ton@de.ibm.com), -* Thomas Spatzier (tspat@de.ibm.com) -*/ - - .globl sys32_exit_wrapper -sys32_exit_wrapper: - lgfr %r2,%r2 # int - jg sys_exit # branch to sys_exit - - .globl sys32_read_wrapper -sys32_read_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys32_read # branch to sys_read - - .globl sys32_write_wrapper -sys32_write_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - jg sys32_write # branch to system call - - .globl sys32_open_wrapper -sys32_open_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_open # branch to system call - - .globl sys32_close_wrapper -sys32_close_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_close # branch to system call - - .globl sys32_creat_wrapper -sys32_creat_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_creat # branch to system call - - .globl sys32_link_wrapper -sys32_link_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_link # branch to system call - - .globl sys32_unlink_wrapper -sys32_unlink_wrapper: - llgtr %r2,%r2 # const char * - jg sys_unlink # branch to system call - - .globl sys32_chdir_wrapper -sys32_chdir_wrapper: - llgtr %r2,%r2 # const char * - jg sys_chdir # branch to system call - - .globl sys32_time_wrapper -sys32_time_wrapper: - llgtr %r2,%r2 # int * - jg compat_sys_time # branch to system call - - .globl sys32_mknod_wrapper -sys32_mknod_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgfr %r4,%r4 # dev - jg sys_mknod # branch to system call - - .globl sys32_chmod_wrapper -sys32_chmod_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # mode_t - jg sys_chmod # branch to system call - - .globl sys32_lchown16_wrapper -sys32_lchown16_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_uid_emu31_t - jg sys32_lchown16 # branch to system call - - .globl sys32_lseek_wrapper -sys32_lseek_wrapper: - llgfr %r2,%r2 # unsigned int - lgfr %r3,%r3 # off_t - llgfr %r4,%r4 # unsigned int - jg sys_lseek # branch to system call - -#sys32_getpid_wrapper # void - - .globl sys32_mount_wrapper -sys32_mount_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # char * - llgfr %r5,%r5 # unsigned long - llgtr %r6,%r6 # void * - jg compat_sys_mount # branch to system call - - .globl sys32_oldumount_wrapper -sys32_oldumount_wrapper: - llgtr %r2,%r2 # char * - jg sys_oldumount # branch to system call - - .globl sys32_setuid16_wrapper -sys32_setuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - jg sys32_setuid16 # branch to system call - -#sys32_getuid16_wrapper # void - - .globl sys32_ptrace_wrapper -sys32_ptrace_wrapper: - lgfr %r2,%r2 # long - lgfr %r3,%r3 # long - llgtr %r4,%r4 # long - llgfr %r5,%r5 # long - jg sys_ptrace # branch to system call - - .globl sys32_alarm_wrapper -sys32_alarm_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_alarm # branch to system call - -#sys32_pause_wrapper # void - - .globl compat_sys_utime_wrapper -compat_sys_utime_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_utimbuf * - jg compat_sys_utime # branch to system call - - .globl sys32_access_wrapper -sys32_access_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_access # branch to system call - - .globl sys32_nice_wrapper -sys32_nice_wrapper: - lgfr %r2,%r2 # int - jg sys_nice # branch to system call - -#sys32_sync_wrapper # void - - .globl sys32_kill_wrapper -sys32_kill_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_kill # branch to system call - - .globl sys32_rename_wrapper -sys32_rename_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_rename # branch to system call - - .globl sys32_mkdir_wrapper -sys32_mkdir_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_mkdir # branch to system call - - .globl sys32_rmdir_wrapper -sys32_rmdir_wrapper: - llgtr %r2,%r2 # const char * - jg sys_rmdir # branch to system call - - .globl sys32_dup_wrapper -sys32_dup_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_dup # branch to system call - - .globl sys32_pipe_wrapper -sys32_pipe_wrapper: - llgtr %r2,%r2 # u32 * - jg sys_pipe # branch to system call - - .globl compat_sys_times_wrapper -compat_sys_times_wrapper: - llgtr %r2,%r2 # struct compat_tms * - jg compat_sys_times # branch to system call - - .globl sys32_brk_wrapper -sys32_brk_wrapper: - llgtr %r2,%r2 # unsigned long - jg sys_brk # branch to system call - - .globl sys32_setgid16_wrapper -sys32_setgid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - jg sys32_setgid16 # branch to system call - -#sys32_getgid16_wrapper # void - - .globl sys32_signal_wrapper -sys32_signal_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __sighandler_t - jg sys_signal - -#sys32_geteuid16_wrapper # void - -#sys32_getegid16_wrapper # void - - .globl sys32_acct_wrapper -sys32_acct_wrapper: - llgtr %r2,%r2 # char * - jg sys_acct # branch to system call - - .globl sys32_umount_wrapper -sys32_umount_wrapper: - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_umount # branch to system call - - .globl compat_sys_ioctl_wrapper -compat_sys_ioctl_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned int - jg compat_sys_ioctl # branch to system call - - .globl compat_sys_fcntl_wrapper -compat_sys_fcntl_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned long - jg compat_sys_fcntl # branch to system call - - .globl sys32_setpgid_wrapper -sys32_setpgid_wrapper: - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # pid_t - jg sys_setpgid # branch to system call - - .globl sys32_umask_wrapper -sys32_umask_wrapper: - lgfr %r2,%r2 # int - jg sys_umask # branch to system call - - .globl sys32_chroot_wrapper -sys32_chroot_wrapper: - llgtr %r2,%r2 # char * - jg sys_chroot # branch to system call - - .globl sys32_ustat_wrapper -sys32_ustat_wrapper: - llgfr %r2,%r2 # dev_t - llgtr %r3,%r3 # struct ustat * - jg sys_ustat - - .globl sys32_dup2_wrapper -sys32_dup2_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_dup2 # branch to system call - -#sys32_getppid_wrapper # void - -#sys32_getpgrp_wrapper # void - -#sys32_setsid_wrapper # void - - .globl sys32_sigaction_wrapper -sys32_sigaction_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct old_sigaction * - llgtr %r4,%r4 # struct old_sigaction32 * - jg sys32_sigaction # branch to system call - - .globl sys32_setreuid16_wrapper -sys32_setreuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - jg sys32_setreuid16 # branch to system call - - .globl sys32_setregid16_wrapper -sys32_setregid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - llgfr %r3,%r3 # __kernel_old_gid_emu31_t - jg sys32_setregid16 # branch to system call - - .globl sys_sigsuspend_wrapper -sys_sigsuspend_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # old_sigset_t - jg sys_sigsuspend - - .globl compat_sys_sigpending_wrapper -compat_sys_sigpending_wrapper: - llgtr %r2,%r2 # compat_old_sigset_t * - jg compat_sys_sigpending # branch to system call - - .globl sys32_sethostname_wrapper -sys32_sethostname_wrapper: - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_sethostname # branch to system call - - .globl compat_sys_setrlimit_wrapper -compat_sys_setrlimit_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_setrlimit # branch to system call - - .globl compat_sys_old_getrlimit_wrapper -compat_sys_old_getrlimit_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_old_getrlimit # branch to system call - - .globl compat_sys_getrlimit_wrapper -compat_sys_getrlimit_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_getrlimit # branch to system call - - .globl sys32_mmap2_wrapper -sys32_mmap2_wrapper: - llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * - jg sys32_mmap2 # branch to system call - - .globl compat_sys_getrusage_wrapper -compat_sys_getrusage_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct rusage_emu31 * - jg compat_sys_getrusage # branch to system call - - .globl sys32_gettimeofday_wrapper -sys32_gettimeofday_wrapper: - llgtr %r2,%r2 # struct timeval_emu31 * - llgtr %r3,%r3 # struct timezone * - jg sys32_gettimeofday # branch to system call - - .globl sys32_settimeofday_wrapper -sys32_settimeofday_wrapper: - llgtr %r2,%r2 # struct timeval_emu31 * - llgtr %r3,%r3 # struct timezone * - jg sys32_settimeofday # branch to system call - - .globl sys32_getgroups16_wrapper -sys32_getgroups16_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - jg sys32_getgroups16 # branch to system call - - .globl sys32_setgroups16_wrapper -sys32_setgroups16_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - jg sys32_setgroups16 # branch to system call - - .globl sys32_symlink_wrapper -sys32_symlink_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_symlink # branch to system call - - .globl sys32_readlink_wrapper -sys32_readlink_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # char * - lgfr %r4,%r4 # int - jg sys_readlink # branch to system call - - .globl sys32_uselib_wrapper -sys32_uselib_wrapper: - llgtr %r2,%r2 # const char * - jg sys_uselib # branch to system call - - .globl sys32_swapon_wrapper -sys32_swapon_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_swapon # branch to system call - - .globl sys32_reboot_wrapper -sys32_reboot_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # unsigned int - llgtr %r5,%r5 # void * - jg sys_reboot # branch to system call - - .globl old32_readdir_wrapper -old32_readdir_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg compat_sys_old_readdir # branch to system call - - .globl old32_mmap_wrapper -old32_mmap_wrapper: - llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * - jg old32_mmap # branch to system call - - .globl sys32_munmap_wrapper -sys32_munmap_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_munmap # branch to system call - - .globl sys32_truncate_wrapper -sys32_truncate_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned long - jg sys_truncate # branch to system call - - .globl sys32_ftruncate_wrapper -sys32_ftruncate_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - jg sys_ftruncate # branch to system call - - .globl sys32_fchmod_wrapper -sys32_fchmod_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # mode_t - jg sys_fchmod # branch to system call - - .globl sys32_fchown16_wrapper -sys32_fchown16_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # compat_uid_t - llgfr %r4,%r4 # compat_uid_t - jg sys32_fchown16 # branch to system call - - .globl sys32_getpriority_wrapper -sys32_getpriority_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_getpriority # branch to system call - - .globl sys32_setpriority_wrapper -sys32_setpriority_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_setpriority # branch to system call - - .globl compat_sys_statfs_wrapper -compat_sys_statfs_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_statfs * - jg compat_sys_statfs # branch to system call - - .globl compat_sys_fstatfs_wrapper -compat_sys_fstatfs_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct compat_statfs * - jg compat_sys_fstatfs # branch to system call - - .globl compat_sys_socketcall_wrapper -compat_sys_socketcall_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # u32 * - jg compat_sys_socketcall # branch to system call - - .globl sys32_syslog_wrapper -sys32_syslog_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - lgfr %r4,%r4 # int - jg sys_syslog # branch to system call - - .globl compat_sys_setitimer_wrapper -compat_sys_setitimer_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct itimerval_emu31 * - llgtr %r4,%r4 # struct itimerval_emu31 * - jg compat_sys_setitimer # branch to system call - - .globl compat_sys_getitimer_wrapper -compat_sys_getitimer_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct itimerval_emu31 * - jg compat_sys_getitimer # branch to system call - - .globl compat_sys_newstat_wrapper -compat_sys_newstat_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newstat # branch to system call - - .globl compat_sys_newlstat_wrapper -compat_sys_newlstat_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newlstat # branch to system call - - .globl compat_sys_newfstat_wrapper -compat_sys_newfstat_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newfstat # branch to system call - -#sys32_vhangup_wrapper # void - - .globl compat_sys_wait4_wrapper -compat_sys_wait4_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # unsigned int * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct rusage * - jg compat_sys_wait4 # branch to system call - - .globl sys32_swapoff_wrapper -sys32_swapoff_wrapper: - llgtr %r2,%r2 # const char * - jg sys_swapoff # branch to system call - - .globl compat_sys_sysinfo_wrapper -compat_sys_sysinfo_wrapper: - llgtr %r2,%r2 # struct sysinfo_emu31 * - jg compat_sys_sysinfo # branch to system call - - .globl sys32_ipc_wrapper -sys32_ipc_wrapper: - llgfr %r2,%r2 # uint - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgfr %r6,%r6 # u32 - jg sys32_ipc # branch to system call - - .globl sys32_fsync_wrapper -sys32_fsync_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_fsync # branch to system call - -#sys32_sigreturn_wrapper # done in sigreturn_glue - -#sys32_clone_wrapper # done in clone_glue - - .globl sys32_setdomainname_wrapper -sys32_setdomainname_wrapper: - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_setdomainname # branch to system call - - .globl sys32_newuname_wrapper -sys32_newuname_wrapper: - llgtr %r2,%r2 # struct new_utsname * - jg s390x_newuname # branch to system call - - .globl compat_sys_adjtimex_wrapper -compat_sys_adjtimex_wrapper: - llgtr %r2,%r2 # struct compat_timex * - jg compat_sys_adjtimex # branch to system call - - .globl sys32_mprotect_wrapper -sys32_mprotect_wrapper: - llgtr %r2,%r2 # unsigned long (actually pointer - llgfr %r3,%r3 # size_t - llgfr %r4,%r4 # unsigned long - jg sys_mprotect # branch to system call - - .globl compat_sys_sigprocmask_wrapper -compat_sys_sigprocmask_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_old_sigset_t * - llgtr %r4,%r4 # compat_old_sigset_t * - jg compat_sys_sigprocmask # branch to system call - - .globl sys32_init_module_wrapper -sys32_init_module_wrapper: - llgtr %r2,%r2 # void * - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # char * - jg sys32_init_module # branch to system call - - .globl sys32_delete_module_wrapper -sys32_delete_module_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned int - jg sys32_delete_module # branch to system call - - .globl sys32_quotactl_wrapper -sys32_quotactl_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # qid_t - llgtr %r5,%r5 # caddr_t - jg sys_quotactl # branch to system call - - .globl sys32_getpgid_wrapper -sys32_getpgid_wrapper: - lgfr %r2,%r2 # pid_t - jg sys_getpgid # branch to system call - - .globl sys32_fchdir_wrapper -sys32_fchdir_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_fchdir # branch to system call - - .globl sys32_bdflush_wrapper -sys32_bdflush_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # long - jg sys_bdflush # branch to system call - - .globl sys32_sysfs_wrapper -sys32_sysfs_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys_sysfs # branch to system call - - .globl sys32_personality_wrapper -sys32_personality_wrapper: - llgfr %r2,%r2 # unsigned long - jg s390x_personality # branch to system call - - .globl sys32_setfsuid16_wrapper -sys32_setfsuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - jg sys32_setfsuid16 # branch to system call - - .globl sys32_setfsgid16_wrapper -sys32_setfsgid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - jg sys32_setfsgid16 # branch to system call - - .globl sys32_llseek_wrapper -sys32_llseek_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgtr %r5,%r5 # loff_t * - llgfr %r6,%r6 # unsigned int - jg sys_llseek # branch to system call - - .globl sys32_getdents_wrapper -sys32_getdents_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg compat_sys_getdents # branch to system call - - .globl compat_sys_select_wrapper -compat_sys_select_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_fd_set * - llgtr %r4,%r4 # compat_fd_set * - llgtr %r5,%r5 # compat_fd_set * - llgtr %r6,%r6 # struct compat_timeval * - jg compat_sys_select # branch to system call - - .globl sys32_flock_wrapper -sys32_flock_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_flock # branch to system call - - .globl sys32_msync_wrapper -sys32_msync_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - lgfr %r4,%r4 # int - jg sys_msync # branch to system call - - .globl compat_sys_readv_wrapper -compat_sys_readv_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct compat_iovec * - llgfr %r4,%r4 # unsigned long - jg compat_sys_readv # branch to system call - - .globl compat_sys_writev_wrapper -compat_sys_writev_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct compat_iovec * - llgfr %r4,%r4 # unsigned long - jg compat_sys_writev # branch to system call - - .globl sys32_getsid_wrapper -sys32_getsid_wrapper: - lgfr %r2,%r2 # pid_t - jg sys_getsid # branch to system call - - .globl sys32_fdatasync_wrapper -sys32_fdatasync_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_fdatasync # branch to system call - -#sys32_sysctl_wrapper # tbd - - .globl sys32_mlock_wrapper -sys32_mlock_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_mlock # branch to system call - - .globl sys32_munlock_wrapper -sys32_munlock_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_munlock # branch to system call - - .globl sys32_mlockall_wrapper -sys32_mlockall_wrapper: - lgfr %r2,%r2 # int - jg sys_mlockall # branch to system call - -#sys32_munlockall_wrapper # void - - .globl sys32_sched_setparam_wrapper -sys32_sched_setparam_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_param * - jg sys_sched_setparam # branch to system call - - .globl sys32_sched_getparam_wrapper -sys32_sched_getparam_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_param * - jg sys_sched_getparam # branch to system call - - .globl sys32_sched_setscheduler_wrapper -sys32_sched_setscheduler_wrapper: - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct sched_param * - jg sys_sched_setscheduler # branch to system call - - .globl sys32_sched_getscheduler_wrapper -sys32_sched_getscheduler_wrapper: - lgfr %r2,%r2 # pid_t - jg sys_sched_getscheduler # branch to system call - -#sys32_sched_yield_wrapper # void - - .globl sys32_sched_get_priority_max_wrapper -sys32_sched_get_priority_max_wrapper: - lgfr %r2,%r2 # int - jg sys_sched_get_priority_max # branch to system call - - .globl sys32_sched_get_priority_min_wrapper -sys32_sched_get_priority_min_wrapper: - lgfr %r2,%r2 # int - jg sys_sched_get_priority_min # branch to system call - - .globl sys32_sched_rr_get_interval_wrapper -sys32_sched_rr_get_interval_wrapper: - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct compat_timespec * - jg sys32_sched_rr_get_interval # branch to system call - - .globl compat_sys_nanosleep_wrapper -compat_sys_nanosleep_wrapper: - llgtr %r2,%r2 # struct compat_timespec * - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_nanosleep # branch to system call - - .globl sys32_mremap_wrapper -sys32_mremap_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_mremap # branch to system call - - .globl sys32_setresuid16_wrapper -sys32_setresuid16_wrapper: - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_uid_emu31_t - jg sys32_setresuid16 # branch to system call - - .globl sys32_getresuid16_wrapper -sys32_getresuid16_wrapper: - llgtr %r2,%r2 # __kernel_old_uid_emu31_t * - llgtr %r3,%r3 # __kernel_old_uid_emu31_t * - llgtr %r4,%r4 # __kernel_old_uid_emu31_t * - jg sys32_getresuid16 # branch to system call - - .globl sys32_poll_wrapper -sys32_poll_wrapper: - llgtr %r2,%r2 # struct pollfd * - llgfr %r3,%r3 # unsigned int - lgfr %r4,%r4 # long - jg sys_poll # branch to system call - - .globl compat_sys_nfsservctl_wrapper -compat_sys_nfsservctl_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct compat_nfsctl_arg* - llgtr %r4,%r4 # union compat_nfsctl_res* - jg compat_sys_nfsservctl # branch to system call - - .globl sys32_setresgid16_wrapper -sys32_setresgid16_wrapper: - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - llgfr %r3,%r3 # __kernel_old_gid_emu31_t - llgfr %r4,%r4 # __kernel_old_gid_emu31_t - jg sys32_setresgid16 # branch to system call - - .globl sys32_getresgid16_wrapper -sys32_getresgid16_wrapper: - llgtr %r2,%r2 # __kernel_old_gid_emu31_t * - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - llgtr %r4,%r4 # __kernel_old_gid_emu31_t * - jg sys32_getresgid16 # branch to system call - - .globl sys32_prctl_wrapper -sys32_prctl_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_prctl # branch to system call - -#sys32_rt_sigreturn_wrapper # done in rt_sigreturn_glue - - .globl sys32_rt_sigaction_wrapper -sys32_rt_sigaction_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct sigaction_emu31 * - llgtr %r4,%r4 # const struct sigaction_emu31 * - llgfr %r5,%r5 # size_t - jg sys32_rt_sigaction # branch to system call - - .globl sys32_rt_sigprocmask_wrapper -sys32_rt_sigprocmask_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # old_sigset_emu31 * - llgtr %r4,%r4 # old_sigset_emu31 * - llgfr %r5,%r5 # size_t - jg sys32_rt_sigprocmask # branch to system call - - .globl sys32_rt_sigpending_wrapper -sys32_rt_sigpending_wrapper: - llgtr %r2,%r2 # sigset_emu31 * - llgfr %r3,%r3 # size_t - jg sys32_rt_sigpending # branch to system call - - .globl compat_sys_rt_sigtimedwait_wrapper -compat_sys_rt_sigtimedwait_wrapper: - llgtr %r2,%r2 # const sigset_emu31_t * - llgtr %r3,%r3 # siginfo_emu31_t * - llgtr %r4,%r4 # const struct compat_timespec * - llgfr %r5,%r5 # size_t - jg compat_sys_rt_sigtimedwait # branch to system call - - .globl sys32_rt_sigqueueinfo_wrapper -sys32_rt_sigqueueinfo_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # siginfo_emu31_t * - jg sys32_rt_sigqueueinfo # branch to system call - - .globl compat_sys_rt_sigsuspend_wrapper -compat_sys_rt_sigsuspend_wrapper: - llgtr %r2,%r2 # compat_sigset_t * - llgfr %r3,%r3 # compat_size_t - jg compat_sys_rt_sigsuspend - - .globl sys32_pread64_wrapper -sys32_pread64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg sys32_pread64 # branch to system call - - .globl sys32_pwrite64_wrapper -sys32_pwrite64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg sys32_pwrite64 # branch to system call - - .globl sys32_chown16_wrapper -sys32_chown16_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_gid_emu31_t - jg sys32_chown16 # branch to system call - - .globl sys32_getcwd_wrapper -sys32_getcwd_wrapper: - llgtr %r2,%r2 # char * - llgfr %r3,%r3 # unsigned long - jg sys_getcwd # branch to system call - - .globl sys32_capget_wrapper -sys32_capget_wrapper: - llgtr %r2,%r2 # cap_user_header_t - llgtr %r3,%r3 # cap_user_data_t - jg sys_capget # branch to system call - - .globl sys32_capset_wrapper -sys32_capset_wrapper: - llgtr %r2,%r2 # cap_user_header_t - llgtr %r3,%r3 # const cap_user_data_t - jg sys_capset # branch to system call - - .globl sys32_sigaltstack_wrapper -sys32_sigaltstack_wrapper: - llgtr %r2,%r2 # const stack_emu31_t * - llgtr %r3,%r3 # stack_emu31_t * - jg sys32_sigaltstack - - .globl sys32_sendfile_wrapper -sys32_sendfile_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # __kernel_off_emu31_t * - llgfr %r5,%r5 # size_t - jg sys32_sendfile # branch to system call - -#sys32_vfork_wrapper # done in vfork_glue - - .globl sys32_truncate64_wrapper -sys32_truncate64_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys32_truncate64 # branch to system call - - .globl sys32_ftruncate64_wrapper -sys32_ftruncate64_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys32_ftruncate64 # branch to system call - - .globl sys32_lchown_wrapper -sys32_lchown_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_lchown # branch to system call - -#sys32_getuid_wrapper # void -#sys32_getgid_wrapper # void -#sys32_geteuid_wrapper # void -#sys32_getegid_wrapper # void - - .globl sys32_setreuid_wrapper -sys32_setreuid_wrapper: - llgfr %r2,%r2 # uid_t - llgfr %r3,%r3 # uid_t - jg sys_setreuid # branch to system call - - .globl sys32_setregid_wrapper -sys32_setregid_wrapper: - llgfr %r2,%r2 # gid_t - llgfr %r3,%r3 # gid_t - jg sys_setregid # branch to system call - - .globl sys32_getgroups_wrapper -sys32_getgroups_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # gid_t * - jg sys_getgroups # branch to system call - - .globl sys32_setgroups_wrapper -sys32_setgroups_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # gid_t * - jg sys_setgroups # branch to system call - - .globl sys32_fchown_wrapper -sys32_fchown_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_fchown # branch to system call - - .globl sys32_setresuid_wrapper -sys32_setresuid_wrapper: - llgfr %r2,%r2 # uid_t - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # uid_t - jg sys_setresuid # branch to system call - - .globl sys32_getresuid_wrapper -sys32_getresuid_wrapper: - llgtr %r2,%r2 # uid_t * - llgtr %r3,%r3 # uid_t * - llgtr %r4,%r4 # uid_t * - jg sys_getresuid # branch to system call - - .globl sys32_setresgid_wrapper -sys32_setresgid_wrapper: - llgfr %r2,%r2 # gid_t - llgfr %r3,%r3 # gid_t - llgfr %r4,%r4 # gid_t - jg sys_setresgid # branch to system call - - .globl sys32_getresgid_wrapper -sys32_getresgid_wrapper: - llgtr %r2,%r2 # gid_t * - llgtr %r3,%r3 # gid_t * - llgtr %r4,%r4 # gid_t * - jg sys_getresgid # branch to system call - - .globl sys32_chown_wrapper -sys32_chown_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_chown # branch to system call - - .globl sys32_setuid_wrapper -sys32_setuid_wrapper: - llgfr %r2,%r2 # uid_t - jg sys_setuid # branch to system call - - .globl sys32_setgid_wrapper -sys32_setgid_wrapper: - llgfr %r2,%r2 # gid_t - jg sys_setgid # branch to system call - - .globl sys32_setfsuid_wrapper -sys32_setfsuid_wrapper: - llgfr %r2,%r2 # uid_t - jg sys_setfsuid # branch to system call - - .globl sys32_setfsgid_wrapper -sys32_setfsgid_wrapper: - llgfr %r2,%r2 # gid_t - jg sys_setfsgid # branch to system call - - .globl sys32_pivot_root_wrapper -sys32_pivot_root_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_pivot_root # branch to system call - - .globl sys32_mincore_wrapper -sys32_mincore_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - llgtr %r4,%r4 # unsigned char * - jg sys_mincore # branch to system call - - .globl sys32_madvise_wrapper -sys32_madvise_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - lgfr %r4,%r4 # int - jg sys_madvise # branch to system call - - .globl sys32_getdents64_wrapper -sys32_getdents64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg sys_getdents64 # branch to system call - - .globl compat_sys_fcntl64_wrapper -compat_sys_fcntl64_wrapper: - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned long - jg compat_sys_fcntl64 # branch to system call - - .globl sys32_stat64_wrapper -sys32_stat64_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat64 * - jg sys32_stat64 # branch to system call - - .globl sys32_lstat64_wrapper -sys32_lstat64_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat64 * - jg sys32_lstat64 # branch to system call - - .globl sys32_stime_wrapper -sys32_stime_wrapper: - llgtr %r2,%r2 # long * - jg compat_sys_stime # branch to system call - - .globl sys32_sysctl_wrapper -sys32_sysctl_wrapper: - llgtr %r2,%r2 # struct __sysctl_args32 * - jg sys32_sysctl - - .globl sys32_fstat64_wrapper -sys32_fstat64_wrapper: - llgfr %r2,%r2 # unsigned long - llgtr %r3,%r3 # struct stat64 * - jg sys32_fstat64 # branch to system call - - .globl compat_sys_futex_wrapper -compat_sys_futex_wrapper: - llgtr %r2,%r2 # u32 * - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct compat_timespec * - llgtr %r6,%r6 # u32 * - lgf %r0,164(%r15) # int - stg %r0,160(%r15) - jg compat_sys_futex # branch to system call - - .globl sys32_setxattr_wrapper -sys32_setxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_setxattr - - .globl sys32_lsetxattr_wrapper -sys32_lsetxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_lsetxattr - - .globl sys32_fsetxattr_wrapper -sys32_fsetxattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_fsetxattr - - .globl sys32_getxattr_wrapper -sys32_getxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_getxattr - - .globl sys32_lgetxattr_wrapper -sys32_lgetxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_lgetxattr - - .globl sys32_fgetxattr_wrapper -sys32_fgetxattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_fgetxattr - - .globl sys32_listxattr_wrapper -sys32_listxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_listxattr - - .globl sys32_llistxattr_wrapper -sys32_llistxattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_llistxattr - - .globl sys32_flistxattr_wrapper -sys32_flistxattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_flistxattr - - .globl sys32_removexattr_wrapper -sys32_removexattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - jg sys_removexattr - - .globl sys32_lremovexattr_wrapper -sys32_lremovexattr_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - jg sys_lremovexattr - - .globl sys32_fremovexattr_wrapper -sys32_fremovexattr_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - jg sys_fremovexattr - - .globl sys32_sched_setaffinity_wrapper -sys32_sched_setaffinity_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # unsigned long * - jg compat_sys_sched_setaffinity - - .globl sys32_sched_getaffinity_wrapper -sys32_sched_getaffinity_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # unsigned long * - jg compat_sys_sched_getaffinity - - .globl sys32_exit_group_wrapper -sys32_exit_group_wrapper: - lgfr %r2,%r2 # int - jg sys_exit_group # branch to system call - - .globl sys32_set_tid_address_wrapper -sys32_set_tid_address_wrapper: - llgtr %r2,%r2 # int * - jg sys_set_tid_address # branch to system call - - .globl sys_epoll_create_wrapper -sys_epoll_create_wrapper: - lgfr %r2,%r2 # int - jg sys_epoll_create # branch to system call - - .globl sys_epoll_ctl_wrapper -sys_epoll_ctl_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct epoll_event * - jg sys_epoll_ctl # branch to system call - - .globl sys_epoll_wait_wrapper -sys_epoll_wait_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct epoll_event * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - jg sys_epoll_wait # branch to system call - - .globl sys32_lookup_dcookie_wrapper -sys32_lookup_dcookie_wrapper: - sllg %r2,%r2,32 # get high word of 64bit dcookie - or %r2,%r3 # get low word of 64bit dcookie - llgtr %r3,%r4 # char * - llgfr %r4,%r5 # size_t - jg sys_lookup_dcookie - - .globl sys32_fadvise64_wrapper -sys32_fadvise64_wrapper: - lgfr %r2,%r2 # int - sllg %r3,%r3,32 # get high word of 64bit loff_t - or %r3,%r4 # get low word of 64bit loff_t - llgfr %r4,%r5 # size_t (unsigned long) - lgfr %r5,%r6 # int - jg sys32_fadvise64 - - .globl sys32_fadvise64_64_wrapper -sys32_fadvise64_64_wrapper: - llgtr %r2,%r2 # struct fadvise64_64_args * - jg sys32_fadvise64_64 - - .globl sys32_clock_settime_wrapper -sys32_clock_settime_wrapper: - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_settime - - .globl sys32_clock_gettime_wrapper -sys32_clock_gettime_wrapper: - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_gettime - - .globl sys32_clock_getres_wrapper -sys32_clock_getres_wrapper: - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_getres - - .globl sys32_clock_nanosleep_wrapper -sys32_clock_nanosleep_wrapper: - lgfr %r2,%r2 # clockid_t (int) - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_timespec * - llgtr %r5,%r5 # struct compat_timespec * - jg compat_sys_clock_nanosleep - - .globl sys32_timer_create_wrapper -sys32_timer_create_wrapper: - lgfr %r2,%r2 # timer_t (int) - llgtr %r3,%r3 # struct compat_sigevent * - llgtr %r4,%r4 # timer_t * - jg compat_sys_timer_create - - .globl sys32_timer_settime_wrapper -sys32_timer_settime_wrapper: - lgfr %r2,%r2 # timer_t (int) - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_itimerspec * - llgtr %r5,%r5 # struct compat_itimerspec * - jg compat_sys_timer_settime - - .globl sys32_timer_gettime_wrapper -sys32_timer_gettime_wrapper: - lgfr %r2,%r2 # timer_t (int) - llgtr %r3,%r3 # struct compat_itimerspec * - jg compat_sys_timer_gettime - - .globl sys32_timer_getoverrun_wrapper -sys32_timer_getoverrun_wrapper: - lgfr %r2,%r2 # timer_t (int) - jg sys_timer_getoverrun - - .globl sys32_timer_delete_wrapper -sys32_timer_delete_wrapper: - lgfr %r2,%r2 # timer_t (int) - jg sys_timer_delete - - .globl sys32_io_setup_wrapper -sys32_io_setup_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # u32 * - jg compat_sys_io_setup - - .globl sys32_io_destroy_wrapper -sys32_io_destroy_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - jg sys_io_destroy - - .globl sys32_io_getevents_wrapper -sys32_io_getevents_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - lgfr %r3,%r3 # long - lgfr %r4,%r4 # long - llgtr %r5,%r5 # struct io_event * - llgtr %r6,%r6 # struct compat_timespec * - jg compat_sys_io_getevents - - .globl sys32_io_submit_wrapper -sys32_io_submit_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - lgfr %r3,%r3 # long - llgtr %r4,%r4 # struct iocb ** - jg compat_sys_io_submit - - .globl sys32_io_cancel_wrapper -sys32_io_cancel_wrapper: - llgfr %r2,%r2 # (aio_context_t) u32 - llgtr %r3,%r3 # struct iocb * - llgtr %r4,%r4 # struct io_event * - jg sys_io_cancel - - .globl compat_sys_statfs64_wrapper -compat_sys_statfs64_wrapper: - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # compat_size_t - llgtr %r4,%r4 # struct compat_statfs64 * - jg compat_sys_statfs64 - - .globl compat_sys_fstatfs64_wrapper -compat_sys_fstatfs64_wrapper: - llgfr %r2,%r2 # unsigned int fd - llgfr %r3,%r3 # compat_size_t - llgtr %r4,%r4 # struct compat_statfs64 * - jg compat_sys_fstatfs64 - - .globl compat_sys_mq_open_wrapper -compat_sys_mq_open_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgfr %r4,%r4 # mode_t - llgtr %r5,%r5 # struct compat_mq_attr * - jg compat_sys_mq_open - - .globl sys32_mq_unlink_wrapper -sys32_mq_unlink_wrapper: - llgtr %r2,%r2 # const char * - jg sys_mq_unlink - - .globl compat_sys_mq_timedsend_wrapper -compat_sys_mq_timedsend_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # unsigned int - llgtr %r6,%r6 # const struct compat_timespec * - jg compat_sys_mq_timedsend - - .globl compat_sys_mq_timedreceive_wrapper -compat_sys_mq_timedreceive_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - llgtr %r5,%r5 # unsigned int * - llgtr %r6,%r6 # const struct compat_timespec * - jg compat_sys_mq_timedreceive - - .globl compat_sys_mq_notify_wrapper -compat_sys_mq_notify_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # struct compat_sigevent * - jg compat_sys_mq_notify - - .globl compat_sys_mq_getsetattr_wrapper -compat_sys_mq_getsetattr_wrapper: - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # struct compat_mq_attr * - llgtr %r4,%r4 # struct compat_mq_attr * - jg compat_sys_mq_getsetattr - - .globl compat_sys_add_key_wrapper -compat_sys_add_key_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # const void * - llgfr %r5,%r5 # size_t - llgfr %r6,%r6 # (key_serial_t) u32 - jg sys_add_key - - .globl compat_sys_request_key_wrapper -compat_sys_request_key_wrapper: - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # const void * - llgfr %r5,%r5 # (key_serial_t) u32 - jg sys_request_key - - .globl sys32_remap_file_pages_wrapper -sys32_remap_file_pages_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_remap_file_pages - - .globl compat_sys_waitid_wrapper -compat_sys_waitid_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # pid_t - llgtr %r4,%r4 # siginfo_emu31_t * - lgfr %r5,%r5 # int - llgtr %r6,%r6 # struct rusage_emu31 * - jg compat_sys_waitid - - .globl compat_sys_kexec_load_wrapper -compat_sys_kexec_load_wrapper: - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # struct kexec_segment * - llgfr %r5,%r5 # unsigned long - jg compat_sys_kexec_load - - .globl sys_ioprio_set_wrapper -sys_ioprio_set_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_ioprio_set - - .globl sys_ioprio_get_wrapper -sys_ioprio_get_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_ioprio_get - - .globl sys_inotify_add_watch_wrapper -sys_inotify_add_watch_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # u32 - jg sys_inotify_add_watch - - .globl sys_inotify_rm_watch_wrapper -sys_inotify_rm_watch_wrapper: - lgfr %r2,%r2 # int - llgfr %r3,%r3 # u32 - jg sys_inotify_rm_watch - - .globl compat_sys_openat_wrapper -compat_sys_openat_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - jg compat_sys_openat - - .globl sys_mkdirat_wrapper -sys_mkdirat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_mkdirat - - .globl sys_mknodat_wrapper -sys_mknodat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgfr %r5,%r5 # unsigned int - jg sys_mknodat - - .globl sys_fchownat_wrapper -sys_fchownat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # uid_t - llgfr %r5,%r5 # gid_t - lgfr %r6,%r6 # int - jg sys_fchownat - - .globl compat_sys_futimesat_wrapper -compat_sys_futimesat_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct timeval * - jg compat_sys_futimesat - - .globl sys32_fstatat64_wrapper -sys32_fstatat64_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct stat64 * - lgfr %r5,%r5 # int - jg sys32_fstatat64 - - .globl sys_unlinkat_wrapper -sys_unlinkat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_unlinkat - - .globl sys_renameat_wrapper -sys_renameat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # const char * - jg sys_renameat - - .globl sys_linkat_wrapper -sys_linkat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # const char * - lgfr %r6,%r6 # int - jg sys_linkat - - .globl sys_symlinkat_wrapper -sys_symlinkat_wrapper: - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgtr %r4,%r4 # const char * - jg sys_symlinkat - - .globl sys_readlinkat_wrapper -sys_readlinkat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # char * - lgfr %r5,%r5 # int - jg sys_readlinkat - - .globl sys_fchmodat_wrapper -sys_fchmodat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # mode_t - jg sys_fchmodat - - .globl sys_faccessat_wrapper -sys_faccessat_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_faccessat - - .globl compat_sys_pselect6_wrapper -compat_sys_pselect6_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # fd_set * - llgtr %r4,%r4 # fd_set * - llgtr %r5,%r5 # fd_set * - llgtr %r6,%r6 # struct timespec * - llgt %r0,164(%r15) # void * - stg %r0,160(%r15) - jg compat_sys_pselect6 - - .globl compat_sys_ppoll_wrapper -compat_sys_ppoll_wrapper: - llgtr %r2,%r2 # struct pollfd * - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # struct timespec * - llgtr %r5,%r5 # const sigset_t * - llgfr %r6,%r6 # size_t - jg compat_sys_ppoll - - .globl sys_unshare_wrapper -sys_unshare_wrapper: - llgfr %r2,%r2 # unsigned long - jg sys_unshare - - .globl compat_sys_set_robust_list_wrapper -compat_sys_set_robust_list_wrapper: - llgtr %r2,%r2 # struct compat_robust_list_head * - llgfr %r3,%r3 # size_t - jg compat_sys_set_robust_list - - .globl compat_sys_get_robust_list_wrapper -compat_sys_get_robust_list_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_uptr_t_t * - llgtr %r4,%r4 # compat_size_t * - jg compat_sys_get_robust_list - - .globl sys_splice_wrapper -sys_splice_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # loff_t * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # loff_t * - llgfr %r6,%r6 # size_t - llgf %r0,164(%r15) # unsigned int - stg %r0,160(%r15) - jg sys_splice - - .globl sys_sync_file_range_wrapper -sys_sync_file_range_wrapper: - lgfr %r2,%r2 # int - sllg %r3,%r3,32 # get high word of 64bit loff_t - or %r3,%r4 # get low word of 64bit loff_t - sllg %r4,%r5,32 # get high word of 64bit loff_t - or %r4,%r6 # get low word of 64bit loff_t - llgf %r5,164(%r15) # unsigned int - jg sys_sync_file_range - - .globl sys_tee_wrapper -sys_tee_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # unsigned int - jg sys_tee - - .globl compat_sys_vmsplice_wrapper -compat_sys_vmsplice_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_iovec * - llgfr %r4,%r4 # unsigned int - llgfr %r5,%r5 # unsigned int - jg compat_sys_vmsplice - - .globl sys_getcpu_wrapper -sys_getcpu_wrapper: - llgtr %r2,%r2 # unsigned * - llgtr %r3,%r3 # unsigned * - llgtr %r4,%r4 # struct getcpu_cache * - jg sys_getcpu - - .globl compat_sys_epoll_pwait_wrapper -compat_sys_epoll_pwait_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct compat_epoll_event * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgtr %r6,%r6 # compat_sigset_t * - llgf %r0,164(%r15) # compat_size_t - stg %r0,160(%r15) - jg compat_sys_epoll_pwait - - .globl compat_sys_utimes_wrapper -compat_sys_utimes_wrapper: - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_timeval * - jg compat_sys_utimes - - .globl compat_sys_utimensat_wrapper -compat_sys_utimensat_wrapper: - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct compat_timespec * - lgfr %r5,%r5 # int - jg compat_sys_utimensat - - .globl compat_sys_signalfd_wrapper -compat_sys_signalfd_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_sigset_t * - llgfr %r4,%r4 # compat_size_t - jg compat_sys_signalfd - - .globl sys_eventfd_wrapper -sys_eventfd_wrapper: - llgfr %r2,%r2 # unsigned int - jg sys_eventfd - - .globl sys_fallocate_wrapper -sys_fallocate_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - sllg %r4,%r4,32 # get high word of 64bit loff_t - lr %r4,%r5 # get low word of 64bit loff_t - sllg %r5,%r6,32 # get high word of 64bit loff_t - l %r5,164(%r15) # get low word of 64bit loff_t - jg sys_fallocate - - .globl sys_timerfd_create_wrapper -sys_timerfd_create_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_timerfd_create - - .globl compat_sys_timerfd_settime_wrapper -compat_sys_timerfd_settime_wrapper: - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_itimerspec * - llgtr %r5,%r5 # struct compat_itimerspec * - jg compat_sys_timerfd_settime - - .globl compat_sys_timerfd_gettime_wrapper -compat_sys_timerfd_gettime_wrapper: - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct compat_itimerspec * - jg compat_sys_timerfd_gettime diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c new file mode 100644 index 00000000000..45cdb37aa6f --- /dev/null +++ b/arch/s390/kernel/compat_wrapper.c @@ -0,0 +1,216 @@ +/* + * Compat system call wrappers. + * + * Copyright IBM Corp. 2014 + */ + +#include <linux/syscalls.h> +#include <linux/compat.h> +#include "entry.h" + +#define COMPAT_SYSCALL_WRAP1(name, ...) \ + COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP2(name, ...) \ + COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP3(name, ...) \ + COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP4(name, ...) \ + COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP5(name, ...) \ + COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP6(name, ...) \ + COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__) + +#define __SC_COMPAT_TYPE(t, a) \ + __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a + +#define __SC_COMPAT_CAST(t, a) \ +({ \ + long __ReS = a; \ + \ + BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \ + !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \ + if (__TYPE_IS_L(t)) \ + __ReS = (s32)a; \ + if (__TYPE_IS_UL(t)) \ + __ReS = (u32)a; \ + if (__TYPE_IS_PTR(t)) \ + __ReS = a & 0x7fffffff; \ + (t)__ReS; \ +}) + +/* + * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by + * compat tasks. These wrappers will only be used for system calls where only + * the system call arguments need sign or zero extension or zeroing of the upper + * 33 bits of pointers. + * Note: since the wrapper function will afterwards call a system call which + * again performs zero and sign extension for all system call arguments with + * a size of less than eight bytes, these compat wrappers only touch those + * system call arguments with a size of eight bytes ((unsigned) long and + * pointers). Zero and sign extension for e.g. int parameters will be done by + * the regular system call wrappers. + */ +#define COMPAT_SYSCALL_WRAPx(x, name, ...) \ + asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\ + asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \ + { \ + return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ + } + +COMPAT_SYSCALL_WRAP1(exit, int, error_code); +COMPAT_SYSCALL_WRAP1(close, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname); +COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname); +COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename); +COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev); +COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode); +COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name); +COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds); +COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode); +COMPAT_SYSCALL_WRAP1(nice, int, increment); +COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig); +COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname); +COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname); +COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes); +COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes); +COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk); +COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler); +COMPAT_SYSCALL_WRAP1(acct, const char __user *, name); +COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags); +COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid); +COMPAT_SYSCALL_WRAP1(umask, int, mask); +COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename); +COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd); +COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask); +COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len); +COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new); +COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz); +COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library); +COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags); +COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg); +COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len); +COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode); +COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who); +COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval); +COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len); +COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile); +COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len); +COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name); +COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot); +COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs); +COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr); +COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid); +COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data); +COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2); +COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality); +COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence); +COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd); +COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags); +COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid); +COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len); +COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len); +COMPAT_SYSCALL_WRAP1(mlockall, int, flags); +COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid); +COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy); +COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy); +COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr); +COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout); +COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5); +COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size); +COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr); +COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data); +COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid); +COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid); +COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid); +COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid); +COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid); +COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid); +COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid); +COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid); +COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid); +COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid); +COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old); +COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec); +COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior); +COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count); +COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name); +COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name); +COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name); +COMPAT_SYSCALL_WRAP1(exit_group, int, error_code); +COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr); +COMPAT_SYSCALL_WRAP1(epoll_create, int, size); +COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event); +COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout); +COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id); +COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id); +COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx); +COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); +COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name); +COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id); +COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id); +COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags); +COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio); +COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who); +COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask); +COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd); +COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev); +COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag); +COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag); +COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname); +COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags); +COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname); +COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz); +COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode); +COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode); +COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags); +COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache); +COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count); +COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags); +COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags); +COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags); +COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags); +COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags); +COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); +COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); +COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); +COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags); +COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val); +COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags); +COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); +COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); +COMPAT_SYSCALL_WRAP1(syncfs, int, fd); +COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype); +COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum); +COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2); +COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); +COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); +COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags); diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index d8c1131e081..d7b0c4d2788 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -1,12 +1,13 @@ /* - * arch/s390/kernel/cpcmd.c - * * S390 version - * Copyright IBM Corp. 1999,2007 + * Copyright IBM Corp. 1999, 2007 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Christian Borntraeger (cborntra@de.ibm.com), */ +#define KMSG_COMPONENT "cpcmd" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> @@ -15,7 +16,6 @@ #include <linux/string.h> #include <asm/ebcdic.h> #include <asm/cpcmd.h> -#include <asm/system.h> #include <asm/io.h> static DEFINE_SPINLOCK(cpcmd_lock); @@ -104,8 +104,8 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) (((unsigned long)response + rlen) >> 31)) { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { - printk(KERN_WARNING - "cpcmd: could not allocate response buffer\n"); + pr_warning("The cpcmd kernel function failed to " + "allocate a response buffer\n"); return -ENOMEM; } spin_lock_irqsave(&cpcmd_lock, flags); diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c deleted file mode 100644 index 8cc7c9fa64f..00000000000 --- a/arch/s390/kernel/crash.c +++ /dev/null @@ -1,16 +0,0 @@ -/* - * arch/s390/kernel/crash.c - * - * (C) Copyright IBM Corp. 2005 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - * - */ - -#include <linux/threads.h> -#include <linux/kexec.h> -#include <linux/reboot.h> - -void machine_crash_shutdown(struct pt_regs *regs) -{ -} diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c new file mode 100644 index 00000000000..a3b9150e680 --- /dev/null +++ b/arch/s390/kernel/crash_dump.c @@ -0,0 +1,647 @@ +/* + * S390 kdump implementation + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/crash_dump.h> +#include <asm/lowcore.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/bootmem.h> +#include <linux/elf.h> +#include <linux/memblock.h> +#include <asm/os_info.h> +#include <asm/elf.h> +#include <asm/ipl.h> +#include <asm/sclp.h> + +#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) +#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) +#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + +static struct memblock_region oldmem_region; + +static struct memblock_type oldmem_type = { + .cnt = 1, + .max = 1, + .total_size = 0, + .regions = &oldmem_region, +}; + +#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \ + for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \ + &oldmem_type, p_start, \ + p_end, p_nid); \ + i != (u64)ULLONG_MAX; \ + __next_mem_range(&i, nid, &memblock.physmem, \ + &oldmem_type, \ + p_start, p_end, p_nid)) + +struct dump_save_areas dump_save_areas; + +/* + * Allocate and add a save area for a CPU + */ +struct save_area *dump_save_area_create(int cpu) +{ + struct save_area **save_areas, *save_area; + + save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); + if (!save_area) + return NULL; + if (cpu + 1 > dump_save_areas.count) { + dump_save_areas.count = cpu + 1; + save_areas = krealloc(dump_save_areas.areas, + dump_save_areas.count * sizeof(void *), + GFP_KERNEL | __GFP_ZERO); + if (!save_areas) { + kfree(save_area); + return NULL; + } + dump_save_areas.areas = save_areas; + } + dump_save_areas.areas[cpu] = save_area; + return save_area; +} + +/* + * Return physical address for virtual address + */ +static inline void *load_real_addr(void *addr) +{ + unsigned long real_addr; + + asm volatile( + " lra %0,0(%1)\n" + " jz 0f\n" + " la %0,0\n" + "0:" + : "=a" (real_addr) : "a" (addr) : "cc"); + return (void *)real_addr; +} + +/* + * Copy real to virtual or real memory + */ +static int copy_from_realmem(void *dest, void *src, size_t count) +{ + unsigned long size; + + if (!count) + return 0; + if (!is_vmalloc_or_module_addr(dest)) + return memcpy_real(dest, src, count); + do { + size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK)); + if (memcpy_real(load_real_addr(dest), src, size)) + return -EFAULT; + count -= size; + dest += size; + src += size; + } while (count); + return 0; +} + +/* + * Pointer to ELF header in new kernel + */ +static void *elfcorehdr_newmem; + +/* + * Copy one page from zfcpdump "oldmem" + * + * For pages below HSA size memory from the HSA is copied. Otherwise + * real memory copy is used. + */ +static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize, + unsigned long src, int userbuf) +{ + int rc; + + if (src < sclp_get_hsa_size()) { + rc = memcpy_hsa(buf, src, csize, userbuf); + } else { + if (userbuf) + rc = copy_to_user_real((void __force __user *) buf, + (void *) src, csize); + else + rc = memcpy_real(buf, (void *) src, csize); + } + return rc ? rc : csize; +} + +/* + * Copy one page from kdump "oldmem" + * + * For the kdump reserved memory this functions performs a swap operation: + * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. + * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, + unsigned long src, int userbuf) + +{ + int rc; + + if (src < OLDMEM_SIZE) + src += OLDMEM_BASE; + else if (src > OLDMEM_BASE && + src < OLDMEM_BASE + OLDMEM_SIZE) + src -= OLDMEM_BASE; + if (userbuf) + rc = copy_to_user_real((void __force __user *) buf, + (void *) src, csize); + else + rc = copy_from_realmem(buf, (void *) src, csize); + return (rc == 0) ? rc : csize; +} + +/* + * Copy one page from "oldmem" + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, + unsigned long offset, int userbuf) +{ + unsigned long src; + + if (!csize) + return 0; + src = (pfn << PAGE_SHIFT) + offset; + if (OLDMEM_BASE) + return copy_oldmem_page_kdump(buf, csize, src, userbuf); + else + return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf); +} + +/* + * Remap "oldmem" for kdump + * + * For the kdump reserved memory this functions performs a swap operation: + * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + unsigned long size_old; + int rc; + + if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) { + size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT)); + rc = remap_pfn_range(vma, from, + pfn + (OLDMEM_BASE >> PAGE_SHIFT), + size_old, prot); + if (rc || size == size_old) + return rc; + size -= size_old; + from += size_old; + pfn += size_old >> PAGE_SHIFT; + } + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Remap "oldmem" for zfcpdump + * + * We only map available memory above HSA size. Memory below HSA size + * is read on demand using the copy_oldmem_page() function. + */ +static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, + unsigned long from, + unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + unsigned long hsa_end = sclp_get_hsa_size(); + unsigned long size_hsa; + + if (pfn < hsa_end >> PAGE_SHIFT) { + size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT)); + if (size == size_hsa) + return 0; + size -= size_hsa; + from += size_hsa; + pfn += size_hsa >> PAGE_SHIFT; + } + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Remap "oldmem" for kdump or zfcpdump + */ +int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + if (OLDMEM_BASE) + return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); + else + return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, + prot); +} + +/* + * Copy memory from old kernel + */ +int copy_from_oldmem(void *dest, void *src, size_t count) +{ + unsigned long copied = 0; + int rc; + + if (OLDMEM_BASE) { + if ((unsigned long) src < OLDMEM_SIZE) { + copied = min(count, OLDMEM_SIZE - (unsigned long) src); + rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied); + if (rc) + return rc; + } + } else { + unsigned long hsa_end = sclp_get_hsa_size(); + if ((unsigned long) src < hsa_end) { + copied = min(count, hsa_end - (unsigned long) src); + rc = memcpy_hsa(dest, (unsigned long) src, copied, 0); + if (rc) + return rc; + } + } + return copy_from_realmem(dest + copied, src + copied, count - copied); +} + +/* + * Alloc memory and panic in case of ENOMEM + */ +static void *kzalloc_panic(int len) +{ + void *rc; + + rc = kzalloc(len, GFP_KERNEL); + if (!rc) + panic("s390 kdump kzalloc (%d) failed", len); + return rc; +} + +/* + * Initialize ELF note + */ +static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) +{ + Elf64_Nhdr *note; + u64 len; + + note = (Elf64_Nhdr *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = d_len; + note->n_type = type; + len = sizeof(Elf64_Nhdr); + + memcpy(buf + len, name, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + memcpy(buf + len, desc, note->n_descsz); + len = roundup(len + note->n_descsz, 4); + + return PTR_ADD(buf, len); +} + +/* + * Initialize prstatus note + */ +static void *nt_prstatus(void *ptr, struct save_area *sa) +{ + struct elf_prstatus nt_prstatus; + static int cpu_nr = 1; + + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); + nt_prstatus.pr_pid = cpu_nr; + cpu_nr++; + + return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), + "CORE"); +} + +/* + * Initialize fpregset (floating point) note + */ +static void *nt_fpregset(void *ptr, struct save_area *sa) +{ + elf_fpregset_t nt_fpregset; + + memset(&nt_fpregset, 0, sizeof(nt_fpregset)); + memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); + memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); + + return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), + "CORE"); +} + +/* + * Initialize timer note + */ +static void *nt_s390_timer(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD clock comparator note + */ +static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, + sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD programmable register note + */ +static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, + sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize control register note + */ +static void *nt_s390_ctrs(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, + sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize prefix register note + */ +static void *nt_s390_prefix(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, + sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Fill ELF notes for one CPU with save area registers + */ +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +{ + ptr = nt_prstatus(ptr, sa); + ptr = nt_fpregset(ptr, sa); + ptr = nt_s390_timer(ptr, sa); + ptr = nt_s390_tod_cmp(ptr, sa); + ptr = nt_s390_tod_preg(ptr, sa); + ptr = nt_s390_ctrs(ptr, sa); + ptr = nt_s390_prefix(ptr, sa); + return ptr; +} + +/* + * Initialize prpsinfo note (new kernel) + */ +static void *nt_prpsinfo(void *ptr) +{ + struct elf_prpsinfo prpsinfo; + + memset(&prpsinfo, 0, sizeof(prpsinfo)); + prpsinfo.pr_sname = 'R'; + strcpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Get vmcoreinfo using lowcore->vmcore_info (new kernel) + */ +static void *get_vmcoreinfo_old(unsigned long *size) +{ + char nt_name[11], *vmcoreinfo; + Elf64_Nhdr note; + void *addr; + + if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return NULL; + memset(nt_name, 0, sizeof(nt_name)); + if (copy_from_oldmem(¬e, addr, sizeof(note))) + return NULL; + if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + return NULL; + if (strcmp(nt_name, "VMCOREINFO") != 0) + return NULL; + vmcoreinfo = kzalloc_panic(note.n_descsz); + if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return NULL; + *size = note.n_descsz; + return vmcoreinfo; +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + unsigned long size; + void *vmcoreinfo; + + vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); + if (!vmcoreinfo) + vmcoreinfo = get_vmcoreinfo_old(&size); + if (!vmcoreinfo) + return ptr; + return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); +} + +/* + * Initialize ELF header (new kernel) + */ +static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) +{ + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2MSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = EM_S390; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + ehdr->e_phnum = mem_chunk_cnt + 1; + return ehdr + 1; +} + +/* + * Return CPU count for ELF header (new kernel) + */ +static int get_cpu_cnt(void) +{ + int i, cpus = 0; + + for (i = 0; i < dump_save_areas.count; i++) { + if (dump_save_areas.areas[i]->pref_reg == 0) + continue; + cpus++; + } + return cpus; +} + +/* + * Return memory chunk count for ELF header (new kernel) + */ +static int get_mem_chunk_cnt(void) +{ + int cnt = 0; + u64 idx; + + for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL) + cnt++; + return cnt; +} + +/* + * Initialize ELF loads (new kernel) + */ +static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) +{ + phys_addr_t start, end; + u64 idx; + + for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) { + phdr->p_filesz = end - start; + phdr->p_type = PT_LOAD; + phdr->p_offset = start; + phdr->p_vaddr = start; + phdr->p_paddr = start; + phdr->p_memsz = end - start; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; + phdr++; + } +} + +/* + * Initialize notes (new kernel) + */ +static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) +{ + struct save_area *sa; + void *ptr_start = ptr; + int i; + + ptr = nt_prpsinfo(ptr); + + for (i = 0; i < dump_save_areas.count; i++) { + sa = dump_save_areas.areas[i]; + if (sa->pref_reg == 0) + continue; + ptr = fill_cpu_elf_notes(ptr, sa); + } + ptr = nt_vmcoreinfo(ptr); + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = PT_NOTE; + phdr->p_offset = notes_offset; + phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); + phdr->p_memsz = phdr->p_filesz; + return ptr; +} + +/* + * Create ELF core header (new kernel) + */ +int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) +{ + Elf64_Phdr *phdr_notes, *phdr_loads; + int mem_chunk_cnt; + void *ptr, *hdr; + u32 alloc_size; + u64 hdr_off; + + /* If we are not in kdump or zfcpdump mode return */ + if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) + return 0; + /* If elfcorehdr= has been passed via cmdline, we use that one */ + if (elfcorehdr_addr != ELFCORE_ADDR_MAX) + return 0; + /* If we cannot get HSA size for zfcpdump return error */ + if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size()) + return -ENODEV; + + /* For kdump, exclude previous crashkernel memory */ + if (OLDMEM_BASE) { + oldmem_region.base = OLDMEM_BASE; + oldmem_region.size = OLDMEM_SIZE; + oldmem_type.total_size = OLDMEM_SIZE; + } + + mem_chunk_cnt = get_mem_chunk_cnt(); + + alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + mem_chunk_cnt * sizeof(Elf64_Phdr); + hdr = kzalloc_panic(alloc_size); + /* Init elf header */ + ptr = ehdr_init(hdr, mem_chunk_cnt); + /* Init program headers */ + phdr_notes = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); + phdr_loads = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); + /* Init notes */ + hdr_off = PTR_DIFF(ptr, hdr); + ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); + /* Init loads */ + hdr_off = PTR_DIFF(ptr, hdr); + loads_init(phdr_loads, hdr_off); + *addr = (unsigned long long) hdr; + elfcorehdr_newmem = hdr; + *size = (unsigned long long) hdr_off; + BUG_ON(elfcorehdr_size > alloc_size); + return 0; +} + +/* + * Free ELF core header (new kernel) + */ +void elfcorehdr_free(unsigned long long addr) +{ + if (!elfcorehdr_newmem) + return; + kfree((void *)(unsigned long)addr); +} + +/* + * Read from ELF header + */ +ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + void *src = (void *)(unsigned long)*ppos; + + src = elfcorehdr_newmem ? src : src - OLDMEM_BASE; + memcpy(buf, src, count); + *ppos += count; + return count; +} + +/* + * Read from ELF notes data + */ +ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) +{ + void *src = (void *)(unsigned long)*ppos; + int rc; + + if (elfcorehdr_newmem) { + memcpy(buf, src, count); + } else { + rc = copy_from_oldmem(buf, src, count); + if (rc) + return rc; + } + *ppos += count; + return count; +} diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 1b2f5ce4532..ee8390da6ea 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1,23 +1,25 @@ /* - * arch/s390/kernel/debug.c * S/390 debug facility * - * Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH, - * IBM Corporation + * Copyright IBM Corp. 1999, 2012 + * * Author(s): Michael Holzheu (holzheu@de.ibm.com), * Holger Smolinski (Holger.Smolinski@de.ibm.com) * * Bugreports to: <Linux390@de.ibm.com> */ +#define KMSG_COMPONENT "s390dbf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/ctype.h> +#include <linux/string.h> #include <linux/sysctl.h> #include <asm/uaccess.h> -#include <asm/semaphore.h> #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> @@ -61,8 +63,6 @@ typedef struct } debug_sprintf_entry_t; -extern void tod_to_timeval(uint64_t todval, struct timespec *xtime); - /* internal function prototyes */ static int debug_init(void); @@ -72,8 +72,8 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, size_t user_len, loff_t * offset); static int debug_open(struct inode *inode, struct file *file); static int debug_close(struct inode *inode, struct file *file); -static debug_info_t* debug_info_create(char *name, int pages_per_area, - int nr_areas, int buf_size); +static debug_info_t *debug_info_create(const char *name, int pages_per_area, + int nr_areas, int buf_size, umode_t mode); static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t * id, @@ -110,6 +110,7 @@ struct debug_view debug_raw_view = { NULL, NULL }; +EXPORT_SYMBOL(debug_raw_view); struct debug_view debug_hex_ascii_view = { "hex_ascii", @@ -119,6 +120,7 @@ struct debug_view debug_hex_ascii_view = { NULL, NULL }; +EXPORT_SYMBOL(debug_hex_ascii_view); static struct debug_view debug_level_view = { "level", @@ -155,9 +157,10 @@ struct debug_view debug_sprintf_view = { NULL, NULL }; +EXPORT_SYMBOL(debug_sprintf_view); /* used by dump analysis tools to determine version of debug feature */ -unsigned int debug_feature_version = __DEBUG_FEATURE_VERSION; +static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION; /* static globals */ @@ -166,6 +169,7 @@ static debug_info_t *debug_area_last = NULL; static DEFINE_MUTEX(debug_mutex); static int initialized; +static int debug_critical; static const struct file_operations debug_file_ops = { .owner = THIS_MODULE, @@ -173,6 +177,7 @@ static const struct file_operations debug_file_ops = { .write = debug_input, .open = debug_open, .release = debug_close, + .llseek = no_llseek, }; static struct dentry *debug_debugfs_root_entry; @@ -235,8 +240,8 @@ fail_malloc_areas: */ static debug_info_t* -debug_info_alloc(char *name, int pages_per_area, int nr_areas, int buf_size, - int level, int mode) +debug_info_alloc(const char *name, int pages_per_area, int nr_areas, + int buf_size, int level, int mode) { debug_info_t* rc; @@ -327,7 +332,8 @@ debug_info_free(debug_info_t* db_info){ */ static debug_info_t* -debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size) +debug_info_create(const char *name, int pages_per_area, int nr_areas, + int buf_size, umode_t mode) { debug_info_t* rc; @@ -336,6 +342,8 @@ debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size) if(!rc) goto out; + rc->mode = mode & ~S_IFMT; + /* create root directory */ rc->debugfs_root_entry = debugfs_create_dir(rc->name, debug_debugfs_root_entry); @@ -386,7 +394,7 @@ debug_info_copy(debug_info_t* in, int mode) debug_info_free(rc); } while (1); - if(!rc || (mode == NO_AREAS)) + if (mode == NO_AREAS) goto out; for(i = 0; i < in->nr_areas; i++){ @@ -598,12 +606,12 @@ debug_input(struct file *file, const char __user *user_buf, size_t length, static int debug_open(struct inode *inode, struct file *file) { - int i = 0, rc = 0; + int i, rc = 0; file_private_info_t *p_info; debug_info_t *debug_info, *debug_info_snapshot; mutex_lock(&debug_mutex); - debug_info = file->f_path.dentry->d_inode->i_private; + debug_info = file_inode(file)->i_private; /* find debug view */ for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!debug_info->views[i]) @@ -637,8 +645,7 @@ found: p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); if(!p_info){ - if(debug_info_snapshot) - debug_info_free(debug_info_snapshot); + debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } @@ -652,6 +659,7 @@ found: p_info->act_entry_offset = 0; file->private_data = p_info; debug_info_get(debug_info); + nonseekable_open(inode, file); out: mutex_unlock(&debug_mutex); return rc; @@ -676,23 +684,29 @@ debug_close(struct inode *inode, struct file *file) } /* - * debug_register: - * - creates and initializes debug area for the caller - * - returns handle for debug area + * debug_register_mode: + * - Creates and initializes debug area for the caller + * The mode parameter allows to specify access rights for the s390dbf files + * - Returns handle for debug area */ -debug_info_t* -debug_register (char *name, int pages_per_area, int nr_areas, int buf_size) +debug_info_t *debug_register_mode(const char *name, int pages_per_area, + int nr_areas, int buf_size, umode_t mode, + uid_t uid, gid_t gid) { debug_info_t *rc = NULL; - if (!initialized) - BUG(); + /* Since debugfs currently does not support uid/gid other than root, */ + /* we do not allow gid/uid != 0 until we get support for that. */ + if ((uid != 0) || (gid != 0)) + pr_warning("Root becomes the owner of all s390dbf files " + "in sysfs\n"); + BUG_ON(!initialized); mutex_lock(&debug_mutex); /* create new debug_info */ - rc = debug_info_create(name, pages_per_area, nr_areas, buf_size); + rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode); if(!rc) goto out; debug_register_view(rc, &debug_level_view); @@ -700,11 +714,26 @@ debug_register (char *name, int pages_per_area, int nr_areas, int buf_size) debug_register_view(rc, &debug_pages_view); out: if (!rc){ - printk(KERN_ERR "debug: debug_register failed for %s\n",name); + pr_err("Registering debug feature %s failed\n", name); } mutex_unlock(&debug_mutex); return rc; } +EXPORT_SYMBOL(debug_register_mode); + +/* + * debug_register: + * - creates and initializes debug area for the caller + * - returns handle for debug area + */ + +debug_info_t *debug_register(const char *name, int pages_per_area, + int nr_areas, int buf_size) +{ + return debug_register_mode(name, pages_per_area, nr_areas, buf_size, + S_IRUSR | S_IWUSR, 0, 0); +} +EXPORT_SYMBOL(debug_register); /* * debug_unregister: @@ -723,6 +752,7 @@ debug_unregister(debug_info_t * id) out: return; } +EXPORT_SYMBOL(debug_unregister); /* * debug_set_size: @@ -740,8 +770,8 @@ debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) if(pages_per_area > 0){ new_areas = debug_areas_alloc(pages_per_area, nr_areas); if(!new_areas) { - printk(KERN_WARNING "debug: could not allocate memory "\ - "for pagenumber: %i\n",pages_per_area); + pr_info("Allocating memory for %i pages failed\n", + pages_per_area); rc = -ENOMEM; goto out; } @@ -757,8 +787,7 @@ debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) memset(id->active_entries,0,sizeof(int)*id->nr_areas); memset(id->active_pages, 0, sizeof(int)*id->nr_areas); spin_unlock_irqrestore(&id->lock,flags); - printk(KERN_INFO "debug: %s: set new size (%i pages)\n"\ - ,id->name, pages_per_area); + pr_info("%s: set new size (%i pages)\n" ,id->name, pages_per_area); out: return rc; } @@ -777,17 +806,16 @@ debug_set_level(debug_info_t* id, int new_level) spin_lock_irqsave(&id->lock,flags); if(new_level == DEBUG_OFF_LEVEL){ id->level = DEBUG_OFF_LEVEL; - printk(KERN_INFO "debug: %s: switched off\n",id->name); + pr_info("%s: switched off\n",id->name); } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) { - printk(KERN_INFO - "debug: %s: level %i is out of range (%i - %i)\n", + pr_info("%s: level %i is out of range (%i - %i)\n", id->name, new_level, 0, DEBUG_MAX_LEVEL); } else { id->level = new_level; } spin_unlock_irqrestore(&id->lock,flags); } - +EXPORT_SYMBOL(debug_set_level); /* * proceed_active_entry: @@ -839,7 +867,7 @@ static inline void debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, int exception) { - active->id.stck = get_clock(); + active->id.stck = get_tod_clock_fast(); active->id.fields.cpuid = smp_processor_id(); active->caller = __builtin_return_address(0); active->id.fields.exception = exception; @@ -861,11 +889,11 @@ static int debug_active=1; * if debug_active is already off */ static int -s390dbf_procactive(ctl_table *table, int write, struct file *filp, +s390dbf_procactive(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) - return proc_dointvec(table, write, filp, buffer, lenp, ppos); + return proc_dointvec(table, write, buffer, lenp, ppos); else return 0; } @@ -873,35 +901,30 @@ s390dbf_procactive(ctl_table *table, int write, struct file *filp, static struct ctl_table s390dbf_table[] = { { - .ctl_name = CTL_S390DBF_STOPPABLE, .procname = "debug_stoppable", .data = &debug_stoppable, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_S390DBF_ACTIVE, .procname = "debug_active", .data = &debug_active, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = &s390dbf_procactive, - .strategy = &sysctl_intvec, + .proc_handler = s390dbf_procactive, }, - { .ctl_name = 0 } + { } }; static struct ctl_table s390dbf_dir_table[] = { { - .ctl_name = CTL_S390DBF, .procname = "s390dbf", .maxlen = 0, .mode = S_IRUGO | S_IXUGO, .child = s390dbf_table, }, - { .ctl_name = 0 } + { } }; static struct ctl_table_header *s390dbf_sysctl_header; @@ -912,7 +935,12 @@ debug_stop_all(void) if (debug_stoppable) debug_active = 0; } +EXPORT_SYMBOL(debug_stop_all); +void debug_set_critical(void) +{ + debug_critical = 1; +} /* * debug_event_common: @@ -927,7 +955,11 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len) if (!debug_active || !id->areas) return NULL; - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); memset(DEBUG_DATA(active), 0, id->buf_size); memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -936,6 +968,7 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len) return active; } +EXPORT_SYMBOL(debug_event_common); /* * debug_exception_common: @@ -950,7 +983,11 @@ debug_entry_t if (!debug_active || !id->areas) return NULL; - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); memset(DEBUG_DATA(active), 0, id->buf_size); memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size)); @@ -959,6 +996,7 @@ debug_entry_t return active; } +EXPORT_SYMBOL(debug_exception_common); /* * counts arguments in format string for sprintf view @@ -995,7 +1033,11 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...) return NULL; numargs=debug_count_numargs(string); - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active); va_start(ap,string); @@ -1008,6 +1050,7 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...) return active; } +EXPORT_SYMBOL(debug_sprintf_event); /* * debug_sprintf_exception: @@ -1029,7 +1072,11 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...) numargs=debug_count_numargs(string); - spin_lock_irqsave(&id->lock, flags); + if (debug_critical) { + if (!spin_trylock_irqsave(&id->lock, flags)) + return NULL; + } else + spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active); va_start(ap,string); @@ -1042,26 +1089,7 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...) return active; } - -/* - * debug_init: - * - is called exactly once to initialize the debug feature - */ - -static int -__init debug_init(void) -{ - int rc = 0; - - s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table); - mutex_lock(&debug_mutex); - debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT,NULL); - printk(KERN_INFO "debug: Initialization complete\n"); - initialized = 1; - mutex_unlock(&debug_mutex); - - return rc; -} +EXPORT_SYMBOL(debug_sprintf_exception); /* * debug_register_view: @@ -1073,20 +1101,21 @@ debug_register_view(debug_info_t * id, struct debug_view *view) int rc = 0; int i; unsigned long flags; - mode_t mode = S_IFREG; + umode_t mode; struct dentry *pde; if (!id) goto out; - if (view->prolog_proc || view->format_proc || view->header_proc) - mode |= S_IRUSR; - if (view->input_proc) - mode |= S_IWUSR; + mode = (id->mode | S_IFREG) & ~S_IXUGO; + if (!(view->prolog_proc || view->format_proc || view->header_proc)) + mode &= ~(S_IRUSR | S_IRGRP | S_IROTH); + if (!view->input_proc) + mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry, id , &debug_file_ops); if (!pde){ - printk(KERN_WARNING "debug: debugfs_create_file() failed!"\ - " Cannot register view %s/%s\n", id->name,view->name); + pr_err("Registering view %s/%s failed due to out of " + "memory\n", id->name,view->name); rc = -1; goto out; } @@ -1096,20 +1125,20 @@ debug_register_view(debug_info_t * id, struct debug_view *view) break; } if (i == DEBUG_MAX_VIEWS) { - printk(KERN_WARNING "debug: cannot register view %s/%s\n", - id->name,view->name); - printk(KERN_WARNING - "debug: maximum number of views reached (%i)!\n", i); - debugfs_remove(pde); + pr_err("Registering view %s/%s would exceed the maximum " + "number of views %i\n", id->name, view->name, i); rc = -1; } else { id->views[i] = view; id->debugfs_entries[i] = pde; } spin_unlock_irqrestore(&id->lock, flags); + if (rc) + debugfs_remove(pde); out: return rc; } +EXPORT_SYMBOL(debug_register_view); /* * debug_unregister_view: @@ -1118,9 +1147,9 @@ out: int debug_unregister_view(debug_info_t * id, struct debug_view *view) { - int rc = 0; - int i; + struct dentry *dentry = NULL; unsigned long flags; + int i, rc = 0; if (!id) goto out; @@ -1132,14 +1161,16 @@ debug_unregister_view(debug_info_t * id, struct debug_view *view) if (i == DEBUG_MAX_VIEWS) rc = -1; else { - debugfs_remove(id->debugfs_entries[i]); + dentry = id->debugfs_entries[i]; id->views[i] = NULL; - rc = 0; + id->debugfs_entries[i] = NULL; } spin_unlock_irqrestore(&id->lock, flags); + debugfs_remove(dentry); out: return rc; } +EXPORT_SYMBOL(debug_unregister_view); static inline char * debug_get_user_string(const char __user *user_buf, size_t user_len) @@ -1166,10 +1197,9 @@ debug_get_uint(char *buf) { int rc; - for(; isspace(*buf); buf++); + buf = skip_spaces(buf); rc = simple_strtoul(buf, &buf, 10); if(*buf){ - printk("debug: no integer specified!\n"); rc = -EINVAL; } return rc; @@ -1281,7 +1311,8 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view, new_level = debug_get_uint(str); } if(new_level < 0) { - printk(KERN_INFO "debug: level `%s` is not valid\n", str); + pr_warning("%s is not a valid level for a debug " + "feature\n", str); rc = -EINVAL; } else { debug_set_level(id, new_level); @@ -1316,19 +1347,12 @@ static void debug_flush(debug_info_t* id, int area) memset(id->areas[i][j], 0, PAGE_SIZE); } } - printk(KERN_INFO "debug: %s: all areas flushed\n",id->name); } else if(area >= 0 && area < id->nr_areas) { id->active_entries[area] = 0; id->active_pages[area] = 0; for(i = 0; i < id->pages_per_area; i++) { memset(id->areas[area][i],0,PAGE_SIZE); } - printk(KERN_INFO "debug: %s: area %i has been flushed\n", - id->name, area); - } else { - printk(KERN_INFO - "debug: %s: area %i cannot be flushed (range: %i - %i)\n", - id->name, area, 0, id->nr_areas-1); } spin_unlock_irqrestore(&id->lock,flags); } @@ -1365,7 +1389,8 @@ debug_input_flush_fn(debug_info_t * id, struct debug_view *view, goto out; } - printk(KERN_INFO "debug: area `%c` is not valid\n", input_buf[0]); + pr_info("Flushing debug data failed because %c is not a valid " + "area\n", input_buf[0]); out: *offset += user_len; @@ -1419,10 +1444,10 @@ debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, rc += sprintf(out_buf + rc, "| "); for (i = 0; i < id->buf_size; i++) { unsigned char c = in_buf[i]; - if (!isprint(c)) - rc += sprintf(out_buf + rc, "."); - else + if (isascii(c) && isprint(c)) rc += sprintf(out_buf + rc, "%c", c); + else + rc += sprintf(out_buf + rc, "."); } rc += sprintf(out_buf + rc, "\n"); return rc; @@ -1437,17 +1462,13 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, int area, debug_entry_t * entry, char *out_buf) { struct timespec time_spec; - unsigned long long time; char *except_str; unsigned long caller; int rc = 0; unsigned int level; level = entry->id.fields.level; - time = entry->id.stck; - /* adjust todclock to 1970 */ - time -= 0x8126d60e46000000LL - (0x3c26700LL * 1000000 * 4096); - tod_to_timeval(time, &time_spec); + stck_to_timespec(entry->id.stck, &time_spec); if (entry->id.fields.exception) except_str = "*"; @@ -1459,6 +1480,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, except_str, entry->id.fields.cpuid, (void *) caller); return rc; } +EXPORT_SYMBOL(debug_dflt_header_fn); /* * prints debug data sprintf-formated: @@ -1507,33 +1529,16 @@ out: } /* - * clean up module + * debug_init: + * - is called exactly once to initialize the debug feature */ -static void __exit debug_exit(void) +static int __init debug_init(void) { - debugfs_remove(debug_debugfs_root_entry); - unregister_sysctl_table(s390dbf_sysctl_header); - return; + s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table); + mutex_lock(&debug_mutex); + debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL); + initialized = 1; + mutex_unlock(&debug_mutex); + return 0; } - -/* - * module definitions - */ postcore_initcall(debug_init); -module_exit(debug_exit); -MODULE_LICENSE("GPL"); - -EXPORT_SYMBOL(debug_register); -EXPORT_SYMBOL(debug_unregister); -EXPORT_SYMBOL(debug_set_level); -EXPORT_SYMBOL(debug_stop_all); -EXPORT_SYMBOL(debug_register_view); -EXPORT_SYMBOL(debug_unregister_view); -EXPORT_SYMBOL(debug_event_common); -EXPORT_SYMBOL(debug_exception_common); -EXPORT_SYMBOL(debug_hex_ascii_view); -EXPORT_SYMBOL(debug_raw_view); -EXPORT_SYMBOL(debug_dflt_header_fn); -EXPORT_SYMBOL(debug_sprintf_view); -EXPORT_SYMBOL(debug_sprintf_exception); -EXPORT_SYMBOL(debug_sprintf_event); diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index c032d11da8a..8237fc07ac7 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -9,27 +9,6 @@ #include <asm/diag.h> /* - * Diagnose 10: Release pages - */ -void diag10(unsigned long addr) -{ - if (addr >= 0x7ff00000) - return; - asm volatile( -#ifdef CONFIG_64BIT - " sam31\n" - " diag %0,%0,0x10\n" - "0: sam64\n" -#else - " diag %0,%0,0x10\n" - "0:\n" -#endif - EX_TABLE(0b, 0b) - : : "a" (addr)); -} -EXPORT_SYMBOL(diag10); - -/* * Diagnose 14: Input spool file manipulation */ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index c14a336f630..993efe6a887 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1,6 +1,4 @@ /* - * arch/s390/kernel/dis.c - * * Disassemble s390 instructions. * * Copyright IBM Corp. 2007 @@ -15,7 +13,6 @@ #include <linux/timer.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/delay.h> @@ -25,15 +22,15 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> -#include <asm/system.h> #include <asm/uaccess.h> +#include <asm/dis.h> #include <asm/io.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/mathemu.h> #include <asm/cpcmd.h> -#include <asm/s390_ext.h> #include <asm/lowcore.h> #include <asm/debug.h> +#include <asm/irq.h> #ifndef CONFIG_64BIT #define ONELONG "%08lx: " @@ -41,17 +38,6 @@ #define ONELONG "%016lx: " #endif /* CONFIG_64BIT */ -#define OPERAND_GPR 0x1 /* Operand printed as %rx */ -#define OPERAND_FPR 0x2 /* Operand printed as %fx */ -#define OPERAND_AR 0x4 /* Operand printed as %ax */ -#define OPERAND_CR 0x8 /* Operand printed as %cx */ -#define OPERAND_DISP 0x10 /* Operand printed as displacement */ -#define OPERAND_BASE 0x20 /* Operand printed as base register */ -#define OPERAND_INDEX 0x40 /* Operand printed as index register */ -#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */ -#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */ -#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */ - enum { UNUSED, /* Indicates the end of the operand list */ R_8, /* GPR starting at position 8 */ @@ -87,15 +73,29 @@ enum { U4_12, /* 4 bit unsigned value starting at 12 */ U4_16, /* 4 bit unsigned value starting at 16 */ U4_20, /* 4 bit unsigned value starting at 20 */ + U4_24, /* 4 bit unsigned value starting at 24 */ + U4_28, /* 4 bit unsigned value starting at 28 */ + U4_32, /* 4 bit unsigned value starting at 32 */ + U4_36, /* 4 bit unsigned value starting at 36 */ U8_8, /* 8 bit unsigned value starting at 8 */ U8_16, /* 8 bit unsigned value starting at 16 */ + U8_24, /* 8 bit unsigned value starting at 24 */ + U8_32, /* 8 bit unsigned value starting at 32 */ + I8_8, /* 8 bit signed value starting at 8 */ + I8_32, /* 8 bit signed value starting at 32 */ + J12_12, /* PC relative offset at 12 */ I16_16, /* 16 bit signed value starting at 16 */ + I16_32, /* 32 bit signed value starting at 16 */ U16_16, /* 16 bit unsigned value starting at 16 */ + U16_32, /* 32 bit unsigned value starting at 16 */ J16_16, /* PC relative jump offset at 16 */ + J16_32, /* PC relative offset at 16 */ + I24_24, /* 24 bit signed value starting at 24 */ J32_16, /* PC relative long offset at 16 */ I32_16, /* 32 bit signed value starting at 16 */ U32_16, /* 32 bit unsigned value starting at 16 */ M_16, /* 4 bit optional mask starting at 16 */ + M_20, /* 4 bit optional mask starting at 20 */ RO_28, /* optional GPR starting at position 28 */ }; @@ -105,37 +105,47 @@ enum { */ enum { INSTR_INVALID, - INSTR_E, INSTR_RIE_RRP, INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, - INSTR_RIL_UP, INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP, + INSTR_E, + INSTR_IE_UU, + INSTR_MII_UPI, + INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU, + INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0, + INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP, + INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU, + INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP, INSTR_RRE_00, INSTR_RRE_0R, INSTR_RRE_AA, INSTR_RRE_AR, INSTR_RRE_F0, - INSTR_RRE_FF, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF, INSTR_RRE_RR, - INSTR_RRE_RR_OPT, INSTR_RRF_F0FF, INSTR_RRF_FUFF, INSTR_RRF_M0RR, - INSTR_RRF_R0RR, INSTR_RRF_RURR, INSTR_RRF_U0FF, INSTR_RRF_U0RF, + INSTR_RRE_FF, INSTR_RRE_FR, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF, + INSTR_RRE_RR, INSTR_RRE_RR_OPT, + INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR, + INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_FUFF2, INSTR_RRF_M0RR, + INSTR_RRF_R0RR, INSTR_RRF_R0RR2, INSTR_RRF_RMRR, INSTR_RRF_RURR, + INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, INSTR_RRF_UUFF, + INSTR_RRF_UUFR, INSTR_RRF_UURF, + INSTR_RRR_F0FF, INSTR_RRS_RRRDU, INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR, - INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, INSTR_RSI_RRP, - INSTR_RSL_R0RD, INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, - INSTR_RSY_RURD, INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, - INSTR_RS_RRRD, INSTR_RS_RURD, INSTR_RXE_FRRD, INSTR_RXE_RRRD, - INSTR_RXF_FRRDF, INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RX_FRRD, - INSTR_RX_RRRD, INSTR_RX_URRD, INSTR_SIY_URD, INSTR_SI_URD, - INSTR_SSE_RDRD, INSTR_SSF_RRDRD, INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, - INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, + INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, + INSTR_RSI_RRP, + INSTR_RSL_LRDFU, INSTR_RSL_R0RD, + INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD, + INSTR_RSY_RDRM, + INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, + INSTR_RS_RURD, + INSTR_RXE_FRRD, INSTR_RXE_RRRD, + INSTR_RXF_FRRDF, + INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD, + INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD, + INSTR_SIL_RDI, INSTR_SIL_RDU, + INSTR_SIY_IRD, INSTR_SIY_URD, + INSTR_SI_URD, + INSTR_SMI_U0RDP, + INSTR_SSE_RDRD, + INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2, + INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, + INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, INSTR_S_00, INSTR_S_RD, }; -struct operand { - int bits; /* The number of bits in the operand. */ - int shift; /* The number of bits to shift. */ - int flags; /* One bit syntax flags. */ -}; - -struct insn { - const char name[5]; - unsigned char opfrag; - unsigned char format; -}; - -static const struct operand operands[] = +static const struct s390_operand operands[] = { [UNUSED] = { 0, 0, 0 }, [R_8] = { 4, 8, OPERAND_GPR }, @@ -171,99 +181,287 @@ static const struct operand operands[] = [U4_12] = { 4, 12, 0 }, [U4_16] = { 4, 16, 0 }, [U4_20] = { 4, 20, 0 }, + [U4_24] = { 4, 24, 0 }, + [U4_28] = { 4, 28, 0 }, + [U4_32] = { 4, 32, 0 }, + [U4_36] = { 4, 36, 0 }, [U8_8] = { 8, 8, 0 }, [U8_16] = { 8, 16, 0 }, + [U8_24] = { 8, 24, 0 }, + [U8_32] = { 8, 32, 0 }, + [J12_12] = { 12, 12, OPERAND_PCREL }, [I16_16] = { 16, 16, OPERAND_SIGNED }, [U16_16] = { 16, 16, 0 }, + [U16_32] = { 16, 32, 0 }, [J16_16] = { 16, 16, OPERAND_PCREL }, + [J16_32] = { 16, 32, OPERAND_PCREL }, + [I16_32] = { 16, 32, OPERAND_SIGNED }, + [I24_24] = { 24, 24, OPERAND_SIGNED }, [J32_16] = { 32, 16, OPERAND_PCREL }, [I32_16] = { 32, 16, OPERAND_SIGNED }, [U32_16] = { 32, 16, 0 }, [M_16] = { 4, 16, 0 }, + [M_20] = { 4, 20, 0 }, [RO_28] = { 4, 28, OPERAND_GPR } }; static const unsigned char formats[][7] = { - [INSTR_E] = { 0xff, 0,0,0,0,0,0 }, /* e.g. pr */ - [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, /* e.g. brxhg */ - [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, /* e.g. brasl */ - [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 }, /* e.g. brcl */ - [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, /* e.g. afi */ - [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, /* e.g. alfi */ - [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 }, /* e.g. ahi */ - [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 }, /* e.g. brct */ - [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 }, /* e.g. tml */ - [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 }, /* e.g. brc */ - [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 }, /* e.g. palb */ - [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 }, /* e.g. tb */ - [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 }, /* e.g. cpya */ - [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 }, /* e.g. sar */ - [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 }, /* e.g. sqer */ - [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 }, /* e.g. debr */ - [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 }, /* e.g. ipm */ - [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 }, /* e.g. ear */ - [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 }, /* e.g. cefbr */ - [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 }, /* e.g. lura */ - [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 }, /* efpc, sfpc */ - [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 }, /* e.g. madbr */ - [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 },/* e.g. didbr */ - [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 },/* e.g. .insn */ - [INSTR_RRF_R0RR] = { 0xff, R_24,R_28,R_16,0,0,0 }, /* e.g. idte */ - [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, /* e.g. fixr */ - [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, /* e.g. cfebr */ - [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, /* e.g. sske */ - [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 }, /* e.g. adr */ - [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 }, /* e.g. spm */ - [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 }, /* e.g. lr */ - [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 }, /* e.g. svc */ - [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 }, /* e.g. bcr */ - [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, /* e.g. lmh */ - [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, /* e.g. lmh */ - [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, /* e.g. icmh */ - [INSTR_RSL_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, /* e.g. tp */ - [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, /* e.g. brxh */ - [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 },/* e.g. stmy */ + [INSTR_E] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_IE_UU] = { 0xff, U4_24,U4_28,0,0,0,0 }, + [INSTR_MII_UPI] = { 0xff, U4_8,J12_12,I24_24 }, + [INSTR_RIE_R0IU] = { 0xff, R_8,I16_16,U4_32,0,0,0 }, + [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 }, + [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 }, + [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 }, + [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, + [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 }, + [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 }, + [INSTR_RIE_RUPU] = { 0xff, R_8,U8_32,U4_12,J16_16,0,0 }, + [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, + [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, + [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, + [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 }, + [INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 }, + [INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 }, + [INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 }, + [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 }, + [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 }, + [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 }, + [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 }, + [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 }, + [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 }, + [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 }, + [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 }, + [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 }, + [INSTR_RRE_FR] = { 0xff, F_24,R_28,0,0,0,0 }, + [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 }, + [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 }, + [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 }, + [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 }, + [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 }, + [INSTR_RRF_0UFF] = { 0xff, F_24,F_28,U4_20,0,0,0 }, + [INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 }, + [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 }, + [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 }, + [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 }, + [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 }, + [INSTR_RRF_FUFF2] = { 0xff, F_24,F_28,F_16,U4_20,0,0 }, + [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, + [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, + [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 }, + [INSTR_RRF_RMRR] = { 0xff, R_24,R_16,R_28,M_20,0,0 }, + [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 }, + [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, + [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, + [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 }, + [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 }, + [INSTR_RRF_UUFR] = { 0xff, F_24,U4_16,R_28,U4_20,0,0 }, + [INSTR_RRF_UURF] = { 0xff, R_24,U4_16,F_28,U4_20,0,0 }, + [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 }, + [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 }, + [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 }, + [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 }, + [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 }, + [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 }, + [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 }, + [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, + [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, + [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, + [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, + [INSTR_RSL_LRDFU] = { 0xff, F_32,D_20,L4_8,B_16,U4_36,0 }, + [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 }, + [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 }, + [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, + [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, - /* e.g. icmh */ - [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 },/* e.g. lamy */ - [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 },/* e.g. lamy */ - [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, /* e.g. lam */ - [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, /* e.g. lctl */ - [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, /* e.g. sll */ - [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, /* e.g. cs */ - [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, /* e.g. icm */ - [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, /* e.g. axbr */ - [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, /* e.g. lg */ + [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, + [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, + [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, + [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 }, + [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, + [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, + [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 }, - /* e.g. madb */ - [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 },/* e.g. ly */ - [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 },/* e.g. ley */ - [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, /* e.g. ae */ - [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, /* e.g. l */ - [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 }, /* e.g. bc */ - [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, /* e.g. cli */ - [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 }, /* e.g. tmy */ - [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, /* e.g. mvsdk */ + [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RXY_URRD] = { 0xff, U4_8,D20_20,X_12,B_16,0,0 }, + [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, + [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, + [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 }, + [INSTR_SIL_RDI] = { 0xff, D_20,B_16,I16_32,0,0,0 }, + [INSTR_SIL_RDU] = { 0xff, D_20,B_16,U16_32,0,0,0 }, + [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 }, + [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 }, + [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, + [INSTR_SMI_U0RDP] = { 0xff, U4_8,J16_32,D_20,B_16,0,0 }, + [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, + [INSTR_SSF_RRDRD] = { 0x0f, D_20,B_16,D_36,B_32,R_8,0 }, + [INSTR_SSF_RRDRD2]= { 0x0f, R_8,D_20,B_16,D_36,B_32,0 }, [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 }, - /* e.g. mvc */ [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 }, - /* e.g. srp */ [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 }, - /* e.g. pack */ - [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 }, - /* e.g. mvck */ [INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 }, - /* e.g. plo */ [INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 }, - /* e.g. lmd */ - [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, /* e.g. hsch */ - [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, /* e.g. lpsw */ - [INSTR_SSF_RRDRD] = { 0x00, D_20,B_16,D_36,B_32,R_8,0 }, - /* e.g. mvcos */ + [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 }, + [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, + [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, }; -static struct insn opcode[] = { +enum { + LONG_INSN_ALGHSIK, + LONG_INSN_ALHHHR, + LONG_INSN_ALHHLR, + LONG_INSN_ALHSIK, + LONG_INSN_ALSIHN, + LONG_INSN_CDFBRA, + LONG_INSN_CDGBRA, + LONG_INSN_CDGTRA, + LONG_INSN_CDLFBR, + LONG_INSN_CDLFTR, + LONG_INSN_CDLGBR, + LONG_INSN_CDLGTR, + LONG_INSN_CEFBRA, + LONG_INSN_CEGBRA, + LONG_INSN_CELFBR, + LONG_INSN_CELGBR, + LONG_INSN_CFDBRA, + LONG_INSN_CFEBRA, + LONG_INSN_CFXBRA, + LONG_INSN_CGDBRA, + LONG_INSN_CGDTRA, + LONG_INSN_CGEBRA, + LONG_INSN_CGXBRA, + LONG_INSN_CGXTRA, + LONG_INSN_CLFDBR, + LONG_INSN_CLFDTR, + LONG_INSN_CLFEBR, + LONG_INSN_CLFHSI, + LONG_INSN_CLFXBR, + LONG_INSN_CLFXTR, + LONG_INSN_CLGDBR, + LONG_INSN_CLGDTR, + LONG_INSN_CLGEBR, + LONG_INSN_CLGFRL, + LONG_INSN_CLGHRL, + LONG_INSN_CLGHSI, + LONG_INSN_CLGXBR, + LONG_INSN_CLGXTR, + LONG_INSN_CLHHSI, + LONG_INSN_CXFBRA, + LONG_INSN_CXGBRA, + LONG_INSN_CXGTRA, + LONG_INSN_CXLFBR, + LONG_INSN_CXLFTR, + LONG_INSN_CXLGBR, + LONG_INSN_CXLGTR, + LONG_INSN_FIDBRA, + LONG_INSN_FIEBRA, + LONG_INSN_FIXBRA, + LONG_INSN_LDXBRA, + LONG_INSN_LEDBRA, + LONG_INSN_LEXBRA, + LONG_INSN_LLGFAT, + LONG_INSN_LLGFRL, + LONG_INSN_LLGHRL, + LONG_INSN_LLGTAT, + LONG_INSN_POPCNT, + LONG_INSN_RIEMIT, + LONG_INSN_RINEXT, + LONG_INSN_RISBGN, + LONG_INSN_RISBHG, + LONG_INSN_RISBLG, + LONG_INSN_SLHHHR, + LONG_INSN_SLHHLR, + LONG_INSN_TABORT, + LONG_INSN_TBEGIN, + LONG_INSN_TBEGINC, + LONG_INSN_PCISTG, + LONG_INSN_MPCIFC, + LONG_INSN_STPCIFC, + LONG_INSN_PCISTB, +}; + +static char *long_insn_name[] = { + [LONG_INSN_ALGHSIK] = "alghsik", + [LONG_INSN_ALHHHR] = "alhhhr", + [LONG_INSN_ALHHLR] = "alhhlr", + [LONG_INSN_ALHSIK] = "alhsik", + [LONG_INSN_ALSIHN] = "alsihn", + [LONG_INSN_CDFBRA] = "cdfbra", + [LONG_INSN_CDGBRA] = "cdgbra", + [LONG_INSN_CDGTRA] = "cdgtra", + [LONG_INSN_CDLFBR] = "cdlfbr", + [LONG_INSN_CDLFTR] = "cdlftr", + [LONG_INSN_CDLGBR] = "cdlgbr", + [LONG_INSN_CDLGTR] = "cdlgtr", + [LONG_INSN_CEFBRA] = "cefbra", + [LONG_INSN_CEGBRA] = "cegbra", + [LONG_INSN_CELFBR] = "celfbr", + [LONG_INSN_CELGBR] = "celgbr", + [LONG_INSN_CFDBRA] = "cfdbra", + [LONG_INSN_CFEBRA] = "cfebra", + [LONG_INSN_CFXBRA] = "cfxbra", + [LONG_INSN_CGDBRA] = "cgdbra", + [LONG_INSN_CGDTRA] = "cgdtra", + [LONG_INSN_CGEBRA] = "cgebra", + [LONG_INSN_CGXBRA] = "cgxbra", + [LONG_INSN_CGXTRA] = "cgxtra", + [LONG_INSN_CLFDBR] = "clfdbr", + [LONG_INSN_CLFDTR] = "clfdtr", + [LONG_INSN_CLFEBR] = "clfebr", + [LONG_INSN_CLFHSI] = "clfhsi", + [LONG_INSN_CLFXBR] = "clfxbr", + [LONG_INSN_CLFXTR] = "clfxtr", + [LONG_INSN_CLGDBR] = "clgdbr", + [LONG_INSN_CLGDTR] = "clgdtr", + [LONG_INSN_CLGEBR] = "clgebr", + [LONG_INSN_CLGFRL] = "clgfrl", + [LONG_INSN_CLGHRL] = "clghrl", + [LONG_INSN_CLGHSI] = "clghsi", + [LONG_INSN_CLGXBR] = "clgxbr", + [LONG_INSN_CLGXTR] = "clgxtr", + [LONG_INSN_CLHHSI] = "clhhsi", + [LONG_INSN_CXFBRA] = "cxfbra", + [LONG_INSN_CXGBRA] = "cxgbra", + [LONG_INSN_CXGTRA] = "cxgtra", + [LONG_INSN_CXLFBR] = "cxlfbr", + [LONG_INSN_CXLFTR] = "cxlftr", + [LONG_INSN_CXLGBR] = "cxlgbr", + [LONG_INSN_CXLGTR] = "cxlgtr", + [LONG_INSN_FIDBRA] = "fidbra", + [LONG_INSN_FIEBRA] = "fiebra", + [LONG_INSN_FIXBRA] = "fixbra", + [LONG_INSN_LDXBRA] = "ldxbra", + [LONG_INSN_LEDBRA] = "ledbra", + [LONG_INSN_LEXBRA] = "lexbra", + [LONG_INSN_LLGFAT] = "llgfat", + [LONG_INSN_LLGFRL] = "llgfrl", + [LONG_INSN_LLGHRL] = "llghrl", + [LONG_INSN_LLGTAT] = "llgtat", + [LONG_INSN_POPCNT] = "popcnt", + [LONG_INSN_RIEMIT] = "riemit", + [LONG_INSN_RINEXT] = "rinext", + [LONG_INSN_RISBGN] = "risbgn", + [LONG_INSN_RISBHG] = "risbhg", + [LONG_INSN_RISBLG] = "risblg", + [LONG_INSN_SLHHHR] = "slhhhr", + [LONG_INSN_SLHHLR] = "slhhlr", + [LONG_INSN_TABORT] = "tabort", + [LONG_INSN_TBEGIN] = "tbegin", + [LONG_INSN_TBEGINC] = "tbeginc", + [LONG_INSN_PCISTG] = "pcistg", + [LONG_INSN_MPCIFC] = "mpcifc", + [LONG_INSN_STPCIFC] = "stpcifc", + [LONG_INSN_PCISTB] = "pcistb", +}; + +static struct s390_insn opcode[] = { #ifdef CONFIG_64BIT + { "bprp", 0xc5, INSTR_MII_UPI }, + { "bpp", 0xc7, INSTR_SMI_U0RDP }, + { "trtr", 0xd0, INSTR_SS_L0RDRD }, { "lmd", 0xef, INSTR_SS_RRRDRD3 }, #endif { "spm", 0x04, INSTR_RR_R0 }, @@ -298,7 +496,6 @@ static struct insn opcode[] = { { "lcdr", 0x23, INSTR_RR_FF }, { "hdr", 0x24, INSTR_RR_FF }, { "ldxr", 0x25, INSTR_RR_FF }, - { "lrdr", 0x25, INSTR_RR_FF }, { "mxr", 0x26, INSTR_RR_FF }, { "mxdr", 0x27, INSTR_RR_FF }, { "ldr", 0x28, INSTR_RR_FF }, @@ -315,7 +512,6 @@ static struct insn opcode[] = { { "lcer", 0x33, INSTR_RR_FF }, { "her", 0x34, INSTR_RR_FF }, { "ledr", 0x35, INSTR_RR_FF }, - { "lrer", 0x35, INSTR_RR_FF }, { "axr", 0x36, INSTR_RR_FF }, { "sxr", 0x37, INSTR_RR_FF }, { "ler", 0x38, INSTR_RR_FF }, @@ -323,7 +519,6 @@ static struct insn opcode[] = { { "aer", 0x3a, INSTR_RR_FF }, { "ser", 0x3b, INSTR_RR_FF }, { "mder", 0x3c, INSTR_RR_FF }, - { "mer", 0x3c, INSTR_RR_FF }, { "der", 0x3d, INSTR_RR_FF }, { "aur", 0x3e, INSTR_RR_FF }, { "sur", 0x3f, INSTR_RR_FF }, @@ -374,7 +569,6 @@ static struct insn opcode[] = { { "ae", 0x7a, INSTR_RX_FRRD }, { "se", 0x7b, INSTR_RX_FRRD }, { "mde", 0x7c, INSTR_RX_FRRD }, - { "me", 0x7c, INSTR_RX_FRRD }, { "de", 0x7d, INSTR_RX_FRRD }, { "au", 0x7e, INSTR_RX_FRRD }, { "su", 0x7f, INSTR_RX_FRRD }, @@ -452,8 +646,10 @@ static struct insn opcode[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_01[] = { +static struct s390_insn opcode_01[] = { #ifdef CONFIG_64BIT + { "ptff", 0x04, INSTR_E }, + { "pfpo", 0x0a, INSTR_E }, { "sam64", 0x0e, INSTR_E }, #endif { "pr", 0x01, INSTR_E }, @@ -466,7 +662,7 @@ static struct insn opcode_01[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_a5[] = { +static struct s390_insn opcode_a5[] = { #ifdef CONFIG_64BIT { "iihh", 0x00, INSTR_RI_RU }, { "iihl", 0x01, INSTR_RI_RU }, @@ -488,7 +684,7 @@ static struct insn opcode_a5[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_a7[] = { +static struct s390_insn opcode_a7[] = { #ifdef CONFIG_64BIT { "tmhh", 0x02, INSTR_RI_RU }, { "tmhl", 0x03, INSTR_RI_RU }, @@ -510,16 +706,41 @@ static struct insn opcode_a7[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_b2[] = { +static struct s390_insn opcode_aa[] = { +#ifdef CONFIG_64BIT + { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI }, + { "rion", 0x01, INSTR_RI_RI }, + { "tric", 0x02, INSTR_RI_RI }, + { "rioff", 0x03, INSTR_RI_RI }, + { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_b2[] = { #ifdef CONFIG_64BIT - { "sske", 0x2b, INSTR_RRF_M0RR }, { "stckf", 0x7c, INSTR_S_RD }, - { "cu21", 0xa6, INSTR_RRF_M0RR }, - { "cuutf", 0xa6, INSTR_RRF_M0RR }, - { "cu12", 0xa7, INSTR_RRF_M0RR }, - { "cutfu", 0xa7, INSTR_RRF_M0RR }, + { "lpp", 0x80, INSTR_S_RD }, + { "lcctl", 0x84, INSTR_S_RD }, + { "lpctl", 0x85, INSTR_S_RD }, + { "qsi", 0x86, INSTR_S_RD }, + { "lsctl", 0x87, INSTR_S_RD }, + { "qctri", 0x8e, INSTR_S_RD }, { "stfle", 0xb0, INSTR_S_RD }, { "lpswe", 0xb2, INSTR_S_RD }, + { "srnmb", 0xb8, INSTR_S_RD }, + { "srnmt", 0xb9, INSTR_S_RD }, + { "lfas", 0xbd, INSTR_S_RD }, + { "scctr", 0xe0, INSTR_RRE_RR }, + { "spctr", 0xe1, INSTR_RRE_RR }, + { "ecctr", 0xe4, INSTR_RRE_RR }, + { "epctr", 0xe5, INSTR_RRE_RR }, + { "ppa", 0xe8, INSTR_RRF_U0RR }, + { "etnd", 0xec, INSTR_RRE_R0 }, + { "ecpga", 0xed, INSTR_RRE_RR }, + { "tend", 0xf8, INSTR_S_00 }, + { "niai", 0xfa, INSTR_IE_UU }, + { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD }, #endif { "stidp", 0x02, INSTR_S_RD }, { "sck", 0x04, INSTR_S_RD }, @@ -538,6 +759,7 @@ static struct insn opcode_b2[] = { { "pc", 0x18, INSTR_S_RD }, { "sac", 0x19, INSTR_S_RD }, { "cfc", 0x1a, INSTR_S_RD }, + { "servc", 0x20, INSTR_RRE_RR }, { "ipte", 0x21, INSTR_RRE_RR }, { "ipm", 0x22, INSTR_RRE_R0 }, { "ivsk", 0x23, INSTR_RRE_RR }, @@ -548,9 +770,9 @@ static struct insn opcode_b2[] = { { "pt", 0x28, INSTR_RRE_RR }, { "iske", 0x29, INSTR_RRE_RR }, { "rrbe", 0x2a, INSTR_RRE_RR }, - { "sske", 0x2b, INSTR_RRE_RR }, + { "sske", 0x2b, INSTR_RRF_M0RR }, { "tb", 0x2c, INSTR_RRE_0R }, - { "dxr", 0x2d, INSTR_RRE_F0 }, + { "dxr", 0x2d, INSTR_RRE_FF }, { "pgin", 0x2e, INSTR_RRE_RR }, { "pgout", 0x2f, INSTR_RRE_RR }, { "csch", 0x30, INSTR_S_00 }, @@ -568,8 +790,8 @@ static struct insn opcode_b2[] = { { "schm", 0x3c, INSTR_S_00 }, { "bakr", 0x40, INSTR_RRE_RR }, { "cksm", 0x41, INSTR_RRE_RR }, - { "sqdr", 0x44, INSTR_RRE_F0 }, - { "sqer", 0x45, INSTR_RRE_F0 }, + { "sqdr", 0x44, INSTR_RRE_FF }, + { "sqer", 0x45, INSTR_RRE_FF }, { "stura", 0x46, INSTR_RRE_RR }, { "msta", 0x47, INSTR_RRE_R0 }, { "palb", 0x48, INSTR_RRE_00 }, @@ -590,7 +812,6 @@ static struct insn opcode_b2[] = { { "clst", 0x5d, INSTR_RRE_RR }, { "srst", 0x5e, INSTR_RRE_RR }, { "cmpsc", 0x63, INSTR_RRE_RR }, - { "cmpsc", 0x63, INSTR_RRE_RR }, { "siga", 0x74, INSTR_S_RD }, { "xsch", 0x76, INSTR_S_00 }, { "rp", 0x77, INSTR_S_RD }, @@ -601,14 +822,14 @@ static struct insn opcode_b2[] = { { "stfpc", 0x9c, INSTR_S_RD }, { "lfpc", 0x9d, INSTR_S_RD }, { "tre", 0xa5, INSTR_RRE_RR }, - { "cuutf", 0xa6, INSTR_RRE_RR }, - { "cutfu", 0xa7, INSTR_RRE_RR }, + { "cuutf", 0xa6, INSTR_RRF_M0RR }, + { "cutfu", 0xa7, INSTR_RRF_M0RR }, { "stfl", 0xb1, INSTR_S_RD }, { "trap4", 0xff, INSTR_S_RD }, { "", 0, INSTR_INVALID } }; -static struct insn opcode_b3[] = { +static struct s390_insn opcode_b3[] = { #ifdef CONFIG_64BIT { "maylr", 0x38, INSTR_RRF_F0FF }, { "mylr", 0x39, INSTR_RRF_F0FF }, @@ -616,21 +837,87 @@ static struct insn opcode_b3[] = { { "myr", 0x3b, INSTR_RRF_F0FF }, { "mayhr", 0x3c, INSTR_RRF_F0FF }, { "myhr", 0x3d, INSTR_RRF_F0FF }, - { "cegbr", 0xa4, INSTR_RRE_RR }, - { "cdgbr", 0xa5, INSTR_RRE_RR }, - { "cxgbr", 0xa6, INSTR_RRE_RR }, - { "cgebr", 0xa8, INSTR_RRF_U0RF }, - { "cgdbr", 0xa9, INSTR_RRF_U0RF }, - { "cgxbr", 0xaa, INSTR_RRF_U0RF }, - { "cfer", 0xb8, INSTR_RRF_U0RF }, - { "cfdr", 0xb9, INSTR_RRF_U0RF }, - { "cfxr", 0xba, INSTR_RRF_U0RF }, - { "cegr", 0xc4, INSTR_RRE_RR }, - { "cdgr", 0xc5, INSTR_RRE_RR }, - { "cxgr", 0xc6, INSTR_RRE_RR }, + { "lpdfr", 0x70, INSTR_RRE_FF }, + { "lndfr", 0x71, INSTR_RRE_FF }, + { "cpsdr", 0x72, INSTR_RRF_F0FF2 }, + { "lcdfr", 0x73, INSTR_RRE_FF }, + { "sfasr", 0x85, INSTR_RRE_R0 }, + { { 0, LONG_INSN_CELFBR }, 0x90, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLFBR }, 0x91, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXLFBR }, 0x92, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CEFBRA }, 0x94, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDFBRA }, 0x95, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXFBRA }, 0x96, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CFEBRA }, 0x98, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CFDBRA }, 0x99, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CFXBRA }, 0x9a, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CLFEBR }, 0x9c, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLFDBR }, 0x9d, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLFXBR }, 0x9e, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CELGBR }, 0xa0, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLGBR }, 0xa1, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXLGBR }, 0xa2, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CEGBRA }, 0xa4, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDGBRA }, 0xa5, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CXGBRA }, 0xa6, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CGEBRA }, 0xa8, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CGDBRA }, 0xa9, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CGXBRA }, 0xaa, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CLGEBR }, 0xac, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGDBR }, 0xad, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGXBR }, 0xae, INSTR_RRF_UUFR }, + { "ldgr", 0xc1, INSTR_RRE_FR }, + { "cegr", 0xc4, INSTR_RRE_FR }, + { "cdgr", 0xc5, INSTR_RRE_FR }, + { "cxgr", 0xc6, INSTR_RRE_FR }, { "cger", 0xc8, INSTR_RRF_U0RF }, { "cgdr", 0xc9, INSTR_RRF_U0RF }, { "cgxr", 0xca, INSTR_RRF_U0RF }, + { "lgdr", 0xcd, INSTR_RRE_RF }, + { "mdtra", 0xd0, INSTR_RRF_FUFF2 }, + { "ddtra", 0xd1, INSTR_RRF_FUFF2 }, + { "adtra", 0xd2, INSTR_RRF_FUFF2 }, + { "sdtra", 0xd3, INSTR_RRF_FUFF2 }, + { "ldetr", 0xd4, INSTR_RRF_0UFF }, + { "ledtr", 0xd5, INSTR_RRF_UUFF }, + { "ltdtr", 0xd6, INSTR_RRE_FF }, + { "fidtr", 0xd7, INSTR_RRF_UUFF }, + { "mxtra", 0xd8, INSTR_RRF_FUFF2 }, + { "dxtra", 0xd9, INSTR_RRF_FUFF2 }, + { "axtra", 0xda, INSTR_RRF_FUFF2 }, + { "sxtra", 0xdb, INSTR_RRF_FUFF2 }, + { "lxdtr", 0xdc, INSTR_RRF_0UFF }, + { "ldxtr", 0xdd, INSTR_RRF_UUFF }, + { "ltxtr", 0xde, INSTR_RRE_FF }, + { "fixtr", 0xdf, INSTR_RRF_UUFF }, + { "kdtr", 0xe0, INSTR_RRE_FF }, + { { 0, LONG_INSN_CGDTRA }, 0xe1, INSTR_RRF_UURF }, + { "cudtr", 0xe2, INSTR_RRE_RF }, + { "csdtr", 0xe3, INSTR_RRE_RF }, + { "cdtr", 0xe4, INSTR_RRE_FF }, + { "eedtr", 0xe5, INSTR_RRE_RF }, + { "esdtr", 0xe7, INSTR_RRE_RF }, + { "kxtr", 0xe8, INSTR_RRE_FF }, + { { 0, LONG_INSN_CGXTRA }, 0xe9, INSTR_RRF_UUFR }, + { "cuxtr", 0xea, INSTR_RRE_RF }, + { "csxtr", 0xeb, INSTR_RRE_RF }, + { "cxtr", 0xec, INSTR_RRE_FF }, + { "eextr", 0xed, INSTR_RRE_RF }, + { "esxtr", 0xef, INSTR_RRE_RF }, + { { 0, LONG_INSN_CDGTRA }, 0xf1, INSTR_RRF_UUFR }, + { "cdutr", 0xf2, INSTR_RRE_FR }, + { "cdstr", 0xf3, INSTR_RRE_FR }, + { "cedtr", 0xf4, INSTR_RRE_FF }, + { "qadtr", 0xf5, INSTR_RRF_FUFF }, + { "iedtr", 0xf6, INSTR_RRF_F0FR }, + { "rrdtr", 0xf7, INSTR_RRF_FFRU }, + { { 0, LONG_INSN_CXGTRA }, 0xf9, INSTR_RRF_UURF }, + { "cxutr", 0xfa, INSTR_RRE_FR }, + { "cxstr", 0xfb, INSTR_RRE_FR }, + { "cextr", 0xfc, INSTR_RRE_FF }, + { "qaxtr", 0xfd, INSTR_RRF_FUFF }, + { "iextr", 0xfe, INSTR_RRF_F0FR }, + { "rrxtr", 0xff, INSTR_RRF_FFRU }, #endif { "lpebr", 0x00, INSTR_RRE_FF }, { "lnebr", 0x01, INSTR_RRE_FF }, @@ -677,10 +964,10 @@ static struct insn opcode_b3[] = { { "lnxbr", 0x41, INSTR_RRE_FF }, { "ltxbr", 0x42, INSTR_RRE_FF }, { "lcxbr", 0x43, INSTR_RRE_FF }, - { "ledbr", 0x44, INSTR_RRE_FF }, - { "ldxbr", 0x45, INSTR_RRE_FF }, - { "lexbr", 0x46, INSTR_RRE_FF }, - { "fixbr", 0x47, INSTR_RRF_U0FF }, + { { 0, LONG_INSN_LEDBRA }, 0x44, INSTR_RRF_UUFF }, + { { 0, LONG_INSN_LDXBRA }, 0x45, INSTR_RRF_UUFF }, + { { 0, LONG_INSN_LEXBRA }, 0x46, INSTR_RRF_UUFF }, + { { 0, LONG_INSN_FIXBRA }, 0x47, INSTR_RRF_UUFF }, { "kxbr", 0x48, INSTR_RRE_FF }, { "cxbr", 0x49, INSTR_RRE_FF }, { "axbr", 0x4a, INSTR_RRE_FF }, @@ -690,24 +977,24 @@ static struct insn opcode_b3[] = { { "tbedr", 0x50, INSTR_RRF_U0FF }, { "tbdr", 0x51, INSTR_RRF_U0FF }, { "diebr", 0x53, INSTR_RRF_FUFF }, - { "fiebr", 0x57, INSTR_RRF_U0FF }, - { "thder", 0x58, INSTR_RRE_RR }, - { "thdr", 0x59, INSTR_RRE_RR }, + { { 0, LONG_INSN_FIEBRA }, 0x57, INSTR_RRF_UUFF }, + { "thder", 0x58, INSTR_RRE_FF }, + { "thdr", 0x59, INSTR_RRE_FF }, { "didbr", 0x5b, INSTR_RRF_FUFF }, - { "fidbr", 0x5f, INSTR_RRF_U0FF }, + { { 0, LONG_INSN_FIDBRA }, 0x5f, INSTR_RRF_UUFF }, { "lpxr", 0x60, INSTR_RRE_FF }, { "lnxr", 0x61, INSTR_RRE_FF }, { "ltxr", 0x62, INSTR_RRE_FF }, { "lcxr", 0x63, INSTR_RRE_FF }, - { "lxr", 0x65, INSTR_RRE_RR }, + { "lxr", 0x65, INSTR_RRE_FF }, { "lexr", 0x66, INSTR_RRE_FF }, - { "fixr", 0x67, INSTR_RRF_U0FF }, + { "fixr", 0x67, INSTR_RRE_FF }, { "cxr", 0x69, INSTR_RRE_FF }, - { "lzer", 0x74, INSTR_RRE_R0 }, - { "lzdr", 0x75, INSTR_RRE_R0 }, - { "lzxr", 0x76, INSTR_RRE_R0 }, - { "fier", 0x77, INSTR_RRF_U0FF }, - { "fidr", 0x7f, INSTR_RRF_U0FF }, + { "lzer", 0x74, INSTR_RRE_F0 }, + { "lzdr", 0x75, INSTR_RRE_F0 }, + { "lzxr", 0x76, INSTR_RRE_F0 }, + { "fier", 0x77, INSTR_RRE_FF }, + { "fidr", 0x7f, INSTR_RRE_FF }, { "sfpc", 0x84, INSTR_RRE_RR_OPT }, { "efpc", 0x8c, INSTR_RRE_RR_OPT }, { "cefbr", 0x94, INSTR_RRE_RF }, @@ -716,13 +1003,16 @@ static struct insn opcode_b3[] = { { "cfebr", 0x98, INSTR_RRF_U0RF }, { "cfdbr", 0x99, INSTR_RRF_U0RF }, { "cfxbr", 0x9a, INSTR_RRF_U0RF }, - { "cefr", 0xb4, INSTR_RRE_RF }, - { "cdfr", 0xb5, INSTR_RRE_RF }, - { "cxfr", 0xb6, INSTR_RRE_RF }, + { "cefr", 0xb4, INSTR_RRE_FR }, + { "cdfr", 0xb5, INSTR_RRE_FR }, + { "cxfr", 0xb6, INSTR_RRE_FR }, + { "cfer", 0xb8, INSTR_RRF_U0RF }, + { "cfdr", 0xb9, INSTR_RRF_U0RF }, + { "cfxr", 0xba, INSTR_RRF_U0RF }, { "", 0, INSTR_INVALID } }; -static struct insn opcode_b9[] = { +static struct s390_insn opcode_b9[] = { #ifdef CONFIG_64BIT { "lpgr", 0x00, INSTR_RRE_RR }, { "lngr", 0x01, INSTR_RRE_RR }, @@ -760,7 +1050,23 @@ static struct insn opcode_b9[] = { { "lhr", 0x27, INSTR_RRE_RR }, { "cgfr", 0x30, INSTR_RRE_RR }, { "clgfr", 0x31, INSTR_RRE_RR }, + { "cfdtr", 0x41, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGDTR }, 0x42, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLFDTR }, 0x43, INSTR_RRF_UURF }, { "bctgr", 0x46, INSTR_RRE_RR }, + { "cfxtr", 0x49, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CLGXTR }, 0x4a, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CLFXTR }, 0x4b, INSTR_RRF_UUFR }, + { "cdftr", 0x51, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLGTR }, 0x52, INSTR_RRF_UUFR }, + { { 0, LONG_INSN_CDLFTR }, 0x53, INSTR_RRF_UUFR }, + { "cxftr", 0x59, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CXLGTR }, 0x5a, INSTR_RRF_UURF }, + { { 0, LONG_INSN_CXLFTR }, 0x5b, INSTR_RRF_UUFR }, + { "cgrt", 0x60, INSTR_RRF_U0RR }, + { "clgrt", 0x61, INSTR_RRF_U0RR }, + { "crt", 0x72, INSTR_RRF_U0RR }, + { "clrt", 0x73, INSTR_RRF_U0RR }, { "ngr", 0x80, INSTR_RRE_RR }, { "ogr", 0x81, INSTR_RRE_RR }, { "xgr", 0x82, INSTR_RRE_RR }, @@ -773,14 +1079,53 @@ static struct insn opcode_b9[] = { { "slbgr", 0x89, INSTR_RRE_RR }, { "cspg", 0x8a, INSTR_RRE_RR }, { "idte", 0x8e, INSTR_RRF_R0RR }, + { "crdte", 0x8f, INSTR_RRF_RMRR }, { "llcr", 0x94, INSTR_RRE_RR }, { "llhr", 0x95, INSTR_RRE_RR }, { "esea", 0x9d, INSTR_RRE_R0 }, + { "ptf", 0xa2, INSTR_RRE_R0 }, { "lptea", 0xaa, INSTR_RRF_RURR }, + { "rrbm", 0xae, INSTR_RRE_RR }, + { "pfmf", 0xaf, INSTR_RRE_RR }, { "cu14", 0xb0, INSTR_RRF_M0RR }, { "cu24", 0xb1, INSTR_RRF_M0RR }, - { "cu41", 0xb2, INSTR_RRF_M0RR }, - { "cu42", 0xb3, INSTR_RRF_M0RR }, + { "cu41", 0xb2, INSTR_RRE_RR }, + { "cu42", 0xb3, INSTR_RRE_RR }, + { "trtre", 0xbd, INSTR_RRF_M0RR }, + { "srstu", 0xbe, INSTR_RRE_RR }, + { "trte", 0xbf, INSTR_RRF_M0RR }, + { "ahhhr", 0xc8, INSTR_RRF_R0RR2 }, + { "shhhr", 0xc9, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_ALHHHR }, 0xca, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_SLHHHR }, 0xcb, INSTR_RRF_R0RR2 }, + { "chhr", 0xcd, INSTR_RRE_RR }, + { "clhhr", 0xcf, INSTR_RRE_RR }, + { { 0, LONG_INSN_PCISTG }, 0xd0, INSTR_RRE_RR }, + { "pcilg", 0xd2, INSTR_RRE_RR }, + { "rpcit", 0xd3, INSTR_RRE_RR }, + { "ahhlr", 0xd8, INSTR_RRF_R0RR2 }, + { "shhlr", 0xd9, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_ALHHLR }, 0xda, INSTR_RRF_R0RR2 }, + { { 0, LONG_INSN_SLHHLR }, 0xdb, INSTR_RRF_R0RR2 }, + { "chlr", 0xdd, INSTR_RRE_RR }, + { "clhlr", 0xdf, INSTR_RRE_RR }, + { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR }, + { "locgr", 0xe2, INSTR_RRF_M0RR }, + { "ngrk", 0xe4, INSTR_RRF_R0RR2 }, + { "ogrk", 0xe6, INSTR_RRF_R0RR2 }, + { "xgrk", 0xe7, INSTR_RRF_R0RR2 }, + { "agrk", 0xe8, INSTR_RRF_R0RR2 }, + { "sgrk", 0xe9, INSTR_RRF_R0RR2 }, + { "algrk", 0xea, INSTR_RRF_R0RR2 }, + { "slgrk", 0xeb, INSTR_RRF_R0RR2 }, + { "locr", 0xf2, INSTR_RRF_M0RR }, + { "nrk", 0xf4, INSTR_RRF_R0RR2 }, + { "ork", 0xf6, INSTR_RRF_R0RR2 }, + { "xrk", 0xf7, INSTR_RRF_R0RR2 }, + { "ark", 0xf8, INSTR_RRF_R0RR2 }, + { "srk", 0xf9, INSTR_RRF_R0RR2 }, + { "alrk", 0xfa, INSTR_RRF_R0RR2 }, + { "slrk", 0xfb, INSTR_RRF_R0RR2 }, #endif { "kmac", 0x1e, INSTR_RRE_RR }, { "lrvr", 0x1f, INSTR_RRE_RR }, @@ -789,13 +1134,9 @@ static struct insn opcode_b9[] = { { "kimd", 0x3e, INSTR_RRE_RR }, { "klmd", 0x3f, INSTR_RRE_RR }, { "epsw", 0x8d, INSTR_RRE_RR }, - { "trtt", 0x90, INSTR_RRE_RR }, { "trtt", 0x90, INSTR_RRF_M0RR }, - { "trto", 0x91, INSTR_RRE_RR }, { "trto", 0x91, INSTR_RRF_M0RR }, - { "trot", 0x92, INSTR_RRE_RR }, { "trot", 0x92, INSTR_RRF_M0RR }, - { "troo", 0x93, INSTR_RRE_RR }, { "troo", 0x93, INSTR_RRF_M0RR }, { "mlr", 0x96, INSTR_RRE_RR }, { "dlr", 0x97, INSTR_RRE_RR }, @@ -804,7 +1145,7 @@ static struct insn opcode_b9[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_c0[] = { +static struct s390_insn opcode_c0[] = { #ifdef CONFIG_64BIT { "lgfi", 0x01, INSTR_RIL_RI }, { "xihf", 0x06, INSTR_RIL_RU }, @@ -824,8 +1165,10 @@ static struct insn opcode_c0[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_c2[] = { +static struct s390_insn opcode_c2[] = { #ifdef CONFIG_64BIT + { "msgfi", 0x00, INSTR_RIL_RI }, + { "msfi", 0x01, INSTR_RIL_RI }, { "slgfi", 0x04, INSTR_RIL_RU }, { "slfi", 0x05, INSTR_RIL_RU }, { "agfi", 0x08, INSTR_RIL_RI }, @@ -840,14 +1183,65 @@ static struct insn opcode_c2[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_c8[] = { +static struct s390_insn opcode_c4[] = { +#ifdef CONFIG_64BIT + { "llhrl", 0x02, INSTR_RIL_RP }, + { "lghrl", 0x04, INSTR_RIL_RP }, + { "lhrl", 0x05, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP }, + { "sthrl", 0x07, INSTR_RIL_RP }, + { "lgrl", 0x08, INSTR_RIL_RP }, + { "stgrl", 0x0b, INSTR_RIL_RP }, + { "lgfrl", 0x0c, INSTR_RIL_RP }, + { "lrl", 0x0d, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP }, + { "strl", 0x0f, INSTR_RIL_RP }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_c6[] = { +#ifdef CONFIG_64BIT + { "exrl", 0x00, INSTR_RIL_RP }, + { "pfdrl", 0x02, INSTR_RIL_UP }, + { "cghrl", 0x04, INSTR_RIL_RP }, + { "chrl", 0x05, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP }, + { "clhrl", 0x07, INSTR_RIL_RP }, + { "cgrl", 0x08, INSTR_RIL_RP }, + { "clgrl", 0x0a, INSTR_RIL_RP }, + { "cgfrl", 0x0c, INSTR_RIL_RP }, + { "crl", 0x0d, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP }, + { "clrl", 0x0f, INSTR_RIL_RP }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_c8[] = { #ifdef CONFIG_64BIT { "mvcos", 0x00, INSTR_SSF_RRDRD }, + { "ectg", 0x01, INSTR_SSF_RRDRD }, + { "csst", 0x02, INSTR_SSF_RRDRD }, + { "lpd", 0x04, INSTR_SSF_RRDRD2 }, + { "lpdg", 0x05, INSTR_SSF_RRDRD2 }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct s390_insn opcode_cc[] = { +#ifdef CONFIG_64BIT + { "brcth", 0x06, INSTR_RIL_RP }, + { "aih", 0x08, INSTR_RIL_RI }, + { "alsih", 0x0a, INSTR_RIL_RI }, + { { 0, LONG_INSN_ALSIHN }, 0x0b, INSTR_RIL_RI }, + { "cih", 0x0d, INSTR_RIL_RI }, + { "clih", 0x0f, INSTR_RIL_RI }, #endif { "", 0, INSTR_INVALID } }; -static struct insn opcode_e3[] = { +static struct s390_insn opcode_e3[] = { #ifdef CONFIG_64BIT { "ltg", 0x02, INSTR_RXY_RRRD }, { "lrag", 0x03, INSTR_RXY_RRRD }, @@ -876,11 +1270,15 @@ static struct insn opcode_e3[] = { { "cg", 0x20, INSTR_RXY_RRRD }, { "clg", 0x21, INSTR_RXY_RRRD }, { "stg", 0x24, INSTR_RXY_RRRD }, + { "ntstg", 0x25, INSTR_RXY_RRRD }, { "cvdy", 0x26, INSTR_RXY_RRRD }, { "cvdg", 0x2e, INSTR_RXY_RRRD }, { "strvg", 0x2f, INSTR_RXY_RRRD }, { "cgf", 0x30, INSTR_RXY_RRRD }, { "clgf", 0x31, INSTR_RXY_RRRD }, + { "ltgf", 0x32, INSTR_RXY_RRRD }, + { "cgh", 0x34, INSTR_RXY_RRRD }, + { "pfd", 0x36, INSTR_RXY_URRD }, { "strvh", 0x3f, INSTR_RXY_RRRD }, { "bctg", 0x46, INSTR_RXY_RRRD }, { "sty", 0x50, INSTR_RXY_RRRD }, @@ -893,21 +1291,25 @@ static struct insn opcode_e3[] = { { "cy", 0x59, INSTR_RXY_RRRD }, { "ay", 0x5a, INSTR_RXY_RRRD }, { "sy", 0x5b, INSTR_RXY_RRRD }, + { "mfy", 0x5c, INSTR_RXY_RRRD }, { "aly", 0x5e, INSTR_RXY_RRRD }, { "sly", 0x5f, INSTR_RXY_RRRD }, { "sthy", 0x70, INSTR_RXY_RRRD }, { "lay", 0x71, INSTR_RXY_RRRD }, { "stcy", 0x72, INSTR_RXY_RRRD }, { "icy", 0x73, INSTR_RXY_RRRD }, + { "laey", 0x75, INSTR_RXY_RRRD }, { "lb", 0x76, INSTR_RXY_RRRD }, { "lgb", 0x77, INSTR_RXY_RRRD }, { "lhy", 0x78, INSTR_RXY_RRRD }, { "chy", 0x79, INSTR_RXY_RRRD }, { "ahy", 0x7a, INSTR_RXY_RRRD }, { "shy", 0x7b, INSTR_RXY_RRRD }, + { "mhy", 0x7c, INSTR_RXY_RRRD }, { "ng", 0x80, INSTR_RXY_RRRD }, { "og", 0x81, INSTR_RXY_RRRD }, { "xg", 0x82, INSTR_RXY_RRRD }, + { "lgat", 0x85, INSTR_RXY_RRRD }, { "mlg", 0x86, INSTR_RXY_RRRD }, { "dlg", 0x87, INSTR_RXY_RRRD }, { "alcg", 0x88, INSTR_RXY_RRRD }, @@ -918,6 +1320,22 @@ static struct insn opcode_e3[] = { { "llgh", 0x91, INSTR_RXY_RRRD }, { "llc", 0x94, INSTR_RXY_RRRD }, { "llh", 0x95, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_LLGTAT }, 0x9c, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_LLGFAT }, 0x9d, INSTR_RXY_RRRD }, + { "lat", 0x9f, INSTR_RXY_RRRD }, + { "lbh", 0xc0, INSTR_RXY_RRRD }, + { "llch", 0xc2, INSTR_RXY_RRRD }, + { "stch", 0xc3, INSTR_RXY_RRRD }, + { "lhh", 0xc4, INSTR_RXY_RRRD }, + { "llhh", 0xc6, INSTR_RXY_RRRD }, + { "sthh", 0xc7, INSTR_RXY_RRRD }, + { "lfhat", 0xc8, INSTR_RXY_RRRD }, + { "lfh", 0xca, INSTR_RXY_RRRD }, + { "stfh", 0xcb, INSTR_RXY_RRRD }, + { "chf", 0xcd, INSTR_RXY_RRRD }, + { "clhf", 0xcf, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_MPCIFC }, 0xd0, INSTR_RXY_RRRD }, + { { 0, LONG_INSN_STPCIFC }, 0xd4, INSTR_RXY_RRRD }, #endif { "lrv", 0x1e, INSTR_RXY_RRRD }, { "lrvh", 0x1f, INSTR_RXY_RRRD }, @@ -929,9 +1347,20 @@ static struct insn opcode_e3[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_e5[] = { +static struct s390_insn opcode_e5[] = { #ifdef CONFIG_64BIT { "strag", 0x02, INSTR_SSE_RDRD }, + { "mvhhi", 0x44, INSTR_SIL_RDI }, + { "mvghi", 0x48, INSTR_SIL_RDI }, + { "mvhi", 0x4c, INSTR_SIL_RDI }, + { "chhsi", 0x54, INSTR_SIL_RDI }, + { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU }, + { "cghsi", 0x58, INSTR_SIL_RDI }, + { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU }, + { "chsi", 0x5c, INSTR_SIL_RDI }, + { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU }, + { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU }, + { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU }, #endif { "lasp", 0x00, INSTR_SSE_RDRD }, { "tprot", 0x01, INSTR_SSE_RDRD }, @@ -940,7 +1369,7 @@ static struct insn opcode_e5[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_eb[] = { +static struct s390_insn opcode_eb[] = { #ifdef CONFIG_64BIT { "lmg", 0x04, INSTR_RSY_RRRD }, { "srag", 0x0a, INSTR_RSY_RRRD }, @@ -952,9 +1381,11 @@ static struct insn opcode_eb[] = { { "rllg", 0x1c, INSTR_RSY_RRRD }, { "clmh", 0x20, INSTR_RSY_RURD }, { "clmy", 0x21, INSTR_RSY_RURD }, + { "clt", 0x23, INSTR_RSY_RURD }, { "stmg", 0x24, INSTR_RSY_RRRD }, { "stctg", 0x25, INSTR_RSY_CCRD }, { "stmh", 0x26, INSTR_RSY_RRRD }, + { "clgt", 0x2b, INSTR_RSY_RURD }, { "stcmh", 0x2c, INSTR_RSY_RURD }, { "stcmy", 0x2d, INSTR_RSY_RURD }, { "lctlg", 0x2f, INSTR_RSY_CCRD }, @@ -963,13 +1394,17 @@ static struct insn opcode_eb[] = { { "cdsg", 0x3e, INSTR_RSY_RRRD }, { "bxhg", 0x44, INSTR_RSY_RRRD }, { "bxleg", 0x45, INSTR_RSY_RRRD }, + { "ecag", 0x4c, INSTR_RSY_RRRD }, { "tmy", 0x51, INSTR_SIY_URD }, { "mviy", 0x52, INSTR_SIY_URD }, { "niy", 0x54, INSTR_SIY_URD }, { "cliy", 0x55, INSTR_SIY_URD }, { "oiy", 0x56, INSTR_SIY_URD }, { "xiy", 0x57, INSTR_SIY_URD }, - { "icmh", 0x80, INSTR_RSE_RURD }, + { "asi", 0x6a, INSTR_SIY_IRD }, + { "alsi", 0x6e, INSTR_SIY_IRD }, + { "agsi", 0x7a, INSTR_SIY_IRD }, + { "algsi", 0x7e, INSTR_SIY_IRD }, { "icmh", 0x80, INSTR_RSY_RURD }, { "icmy", 0x81, INSTR_RSY_RURD }, { "clclu", 0x8f, INSTR_RSY_RRRD }, @@ -978,6 +1413,29 @@ static struct insn opcode_eb[] = { { "lmy", 0x98, INSTR_RSY_RRRD }, { "lamy", 0x9a, INSTR_RSY_AARD }, { "stamy", 0x9b, INSTR_RSY_AARD }, + { { 0, LONG_INSN_PCISTB }, 0xd0, INSTR_RSY_RRRD }, + { "sic", 0xd1, INSTR_RSY_RRRD }, + { "srak", 0xdc, INSTR_RSY_RRRD }, + { "slak", 0xdd, INSTR_RSY_RRRD }, + { "srlk", 0xde, INSTR_RSY_RRRD }, + { "sllk", 0xdf, INSTR_RSY_RRRD }, + { "locg", 0xe2, INSTR_RSY_RDRM }, + { "stocg", 0xe3, INSTR_RSY_RDRM }, + { "lang", 0xe4, INSTR_RSY_RRRD }, + { "laog", 0xe6, INSTR_RSY_RRRD }, + { "laxg", 0xe7, INSTR_RSY_RRRD }, + { "laag", 0xe8, INSTR_RSY_RRRD }, + { "laalg", 0xea, INSTR_RSY_RRRD }, + { "loc", 0xf2, INSTR_RSY_RDRM }, + { "stoc", 0xf3, INSTR_RSY_RDRM }, + { "lan", 0xf4, INSTR_RSY_RRRD }, + { "lao", 0xf6, INSTR_RSY_RRRD }, + { "lax", 0xf7, INSTR_RSY_RRRD }, + { "laa", 0xf8, INSTR_RSY_RRRD }, + { "laal", 0xfa, INSTR_RSY_RRRD }, + { "lric", 0x60, INSTR_RSY_RDRM }, + { "stric", 0x61, INSTR_RSY_RDRM }, + { "mric", 0x62, INSTR_RSY_RDRM }, #endif { "rll", 0x1d, INSTR_RSY_RRRD }, { "mvclu", 0x8e, INSTR_RSY_RRRD }, @@ -985,15 +1443,46 @@ static struct insn opcode_eb[] = { { "", 0, INSTR_INVALID } }; -static struct insn opcode_ec[] = { +static struct s390_insn opcode_ec[] = { #ifdef CONFIG_64BIT { "brxhg", 0x44, INSTR_RIE_RRP }, { "brxlg", 0x45, INSTR_RIE_RRP }, + { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU }, + { "rnsbg", 0x54, INSTR_RIE_RRUUU }, + { "risbg", 0x55, INSTR_RIE_RRUUU }, + { "rosbg", 0x56, INSTR_RIE_RRUUU }, + { "rxsbg", 0x57, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBGN }, 0x59, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU }, + { "cgrj", 0x64, INSTR_RIE_RRPU }, + { "clgrj", 0x65, INSTR_RIE_RRPU }, + { "cgit", 0x70, INSTR_RIE_R0IU }, + { "clgit", 0x71, INSTR_RIE_R0UU }, + { "cit", 0x72, INSTR_RIE_R0IU }, + { "clfit", 0x73, INSTR_RIE_R0UU }, + { "crj", 0x76, INSTR_RIE_RRPU }, + { "clrj", 0x77, INSTR_RIE_RRPU }, + { "cgij", 0x7c, INSTR_RIE_RUPI }, + { "clgij", 0x7d, INSTR_RIE_RUPU }, + { "cij", 0x7e, INSTR_RIE_RUPI }, + { "clij", 0x7f, INSTR_RIE_RUPU }, + { "ahik", 0xd8, INSTR_RIE_RRI0 }, + { "aghik", 0xd9, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 }, + { "cgrb", 0xe4, INSTR_RRS_RRRDU }, + { "clgrb", 0xe5, INSTR_RRS_RRRDU }, + { "crb", 0xf6, INSTR_RRS_RRRDU }, + { "clrb", 0xf7, INSTR_RRS_RRRDU }, + { "cgib", 0xfc, INSTR_RIS_RURDI }, + { "clgib", 0xfd, INSTR_RIS_RURDU }, + { "cib", 0xfe, INSTR_RIS_RURDI }, + { "clib", 0xff, INSTR_RIS_RURDU }, #endif { "", 0, INSTR_INVALID } }; -static struct insn opcode_ed[] = { +static struct s390_insn opcode_ed[] = { #ifdef CONFIG_64BIT { "mayl", 0x38, INSTR_RXF_FRRDF }, { "myl", 0x39, INSTR_RXF_FRRDF }, @@ -1001,10 +1490,24 @@ static struct insn opcode_ed[] = { { "my", 0x3b, INSTR_RXF_FRRDF }, { "mayh", 0x3c, INSTR_RXF_FRRDF }, { "myh", 0x3d, INSTR_RXF_FRRDF }, + { "sldt", 0x40, INSTR_RXF_FRRDF }, + { "srdt", 0x41, INSTR_RXF_FRRDF }, + { "slxt", 0x48, INSTR_RXF_FRRDF }, + { "srxt", 0x49, INSTR_RXF_FRRDF }, + { "tdcet", 0x50, INSTR_RXE_FRRD }, + { "tdget", 0x51, INSTR_RXE_FRRD }, + { "tdcdt", 0x54, INSTR_RXE_FRRD }, + { "tdgdt", 0x55, INSTR_RXE_FRRD }, + { "tdcxt", 0x58, INSTR_RXE_FRRD }, + { "tdgxt", 0x59, INSTR_RXE_FRRD }, { "ley", 0x64, INSTR_RXY_FRRD }, { "ldy", 0x65, INSTR_RXY_FRRD }, { "stey", 0x66, INSTR_RXY_FRRD }, { "stdy", 0x67, INSTR_RXY_FRRD }, + { "czdt", 0xa8, INSTR_RSL_LRDFU }, + { "czxt", 0xa9, INSTR_RSL_LRDFU }, + { "cdzt", 0xaa, INSTR_RSL_LRDFU }, + { "cxzt", 0xab, INSTR_RSL_LRDFU }, #endif { "ldeb", 0x04, INSTR_RXE_FRRD }, { "lxdb", 0x05, INSTR_RXE_FRRD }, @@ -1038,6 +1541,7 @@ static struct insn opcode_ed[] = { { "mae", 0x2e, INSTR_RXF_FRRDF }, { "mse", 0x2f, INSTR_RXF_FRRDF }, { "sqe", 0x34, INSTR_RXE_FRRD }, + { "sqd", 0x35, INSTR_RXE_FRRD }, { "mee", 0x37, INSTR_RXE_FRRD }, { "mad", 0x3e, INSTR_RXF_FRRDF }, { "msd", 0x3f, INSTR_RXF_FRRDF }, @@ -1046,7 +1550,7 @@ static struct insn opcode_ed[] = { /* Extracts an operand value from an instruction. */ static unsigned int extract_operand(unsigned char *code, - const struct operand *operand) + const struct s390_operand *operand) { unsigned int val; int bits; @@ -1082,16 +1586,11 @@ static unsigned int extract_operand(unsigned char *code, return val; } -static inline int insn_length(unsigned char code) -{ - return ((((int) code + 64) >> 7) + 1) << 1; -} - -static struct insn *find_insn(unsigned char *code) +struct s390_insn *find_insn(unsigned char *code) { unsigned char opfrag = code[1]; unsigned char opmask; - struct insn *table; + struct s390_insn *table; switch (code[0]) { case 0x01: @@ -1103,6 +1602,9 @@ static struct insn *find_insn(unsigned char *code) case 0xa7: table = opcode_a7; break; + case 0xaa: + table = opcode_aa; + break; case 0xb2: table = opcode_b2; break; @@ -1118,9 +1620,18 @@ static struct insn *find_insn(unsigned char *code) case 0xc2: table = opcode_c2; break; + case 0xc4: + table = opcode_c4; + break; + case 0xc6: + table = opcode_c6; + break; case 0xc8: table = opcode_c8; break; + case 0xcc: + table = opcode_cc; + break; case 0xe3: table = opcode_e3; opfrag = code[5]; @@ -1154,11 +1665,39 @@ static struct insn *find_insn(unsigned char *code) return NULL; } +/** + * insn_to_mnemonic - decode an s390 instruction + * @instruction: instruction to decode + * @buf: buffer to fill with mnemonic + * @len: length of buffer + * + * Decode the instruction at @instruction and store the corresponding + * mnemonic into @buf of length @len. + * @buf is left unchanged if the instruction could not be decoded. + * Returns: + * %0 on success, %-ENOENT if the instruction was not found. + */ +int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len) +{ + struct s390_insn *insn; + + insn = find_insn(instruction); + if (!insn) + return -ENOENT; + if (insn->name[0] == '\0') + snprintf(buf, len, "%s", + long_insn_name[(int) insn->name[1]]); + else + snprintf(buf, len, "%.5s", insn->name); + return 0; +} +EXPORT_SYMBOL_GPL(insn_to_mnemonic); + static int print_insn(char *buffer, unsigned char *code, unsigned long addr) { - struct insn *insn; + struct s390_insn *insn; const unsigned char *ops; - const struct operand *operand; + const struct s390_operand *operand; unsigned int value; char separator; char *ptr; @@ -1167,7 +1706,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr = buffer; insn = find_insn(code); if (insn) { - ptr += sprintf(ptr, "%.5s\t", insn->name); + if (insn->name[0] == '\0') + ptr += sprintf(ptr, "%s\t", + long_insn_name[(int) insn->name[1]]); + else + ptr += sprintf(ptr, "%.5s\t", insn->name); /* Extract the operands. */ separator = 0; for (ops = formats[insn->format] + 1, i = 0; @@ -1213,7 +1756,7 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) void show_code(struct pt_regs *regs) { - char *mode = (regs->psw.mask & PSW_MASK_PSTATE) ? "User" : "Krnl"; + char *mode = user_mode(regs) ? "User" : "Krnl"; unsigned char code[64]; char buffer[64], *ptr; mm_segment_t old_fs; @@ -1222,7 +1765,7 @@ void show_code(struct pt_regs *regs) /* Get a snapshot of the 64 bytes surrounding the fault address. */ old_fs = get_fs(); - set_fs((regs->psw.mask & PSW_MASK_PSTATE) ? USER_DS : KERNEL_DS); + set_fs(user_mode(regs) ? USER_DS : KERNEL_DS); for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) { addr = regs->psw.addr - 34 + start; if (__copy_from_user(code + start - 2, @@ -1258,10 +1801,15 @@ void show_code(struct pt_regs *regs) ptr += sprintf(ptr, "%s Code:", mode); hops = 0; while (start < end && hops < 8) { - *ptr++ = (start == 32) ? '>' : ' '; + opsize = insn_length(code[start]); + if (start + opsize == 32) + *ptr++ = '#'; + else if (start == 32) + *ptr++ = '>'; + else + *ptr++ = ' '; addr = regs->psw.addr + start - 32; ptr += sprintf(ptr, ONELONG, addr); - opsize = insn_length(code[start]); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) @@ -1278,3 +1826,28 @@ void show_code(struct pt_regs *regs) } printk("\n"); } + +void print_fn_code(unsigned char *code, unsigned long len) +{ + char buffer[64], *ptr; + int opsize, i; + + while (len) { + ptr = buffer; + opsize = insn_length(*code); + if (opsize > len) + break; + ptr += sprintf(ptr, "%p: ", code); + for (i = 0; i < opsize; i++) + ptr += sprintf(ptr, "%02x", code[i]); + *ptr++ = '\t'; + if (i < 4) + *ptr++ = '\t'; + ptr += print_insn(ptr, code, (unsigned long) code); + *ptr++ = '\n'; + *ptr++ = 0; + printk(buffer); + code += opsize; + len -= opsize; + } +} diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c new file mode 100644 index 00000000000..acb412442e5 --- /dev/null +++ b/arch/s390/kernel/dumpstack.c @@ -0,0 +1,217 @@ +/* + * Stack dumping functions + * + * Copyright IBM Corp. 1999, 2013 + */ + +#include <linux/kallsyms.h> +#include <linux/hardirq.h> +#include <linux/kprobes.h> +#include <linux/utsname.h> +#include <linux/export.h> +#include <linux/kdebug.h> +#include <linux/ptrace.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <asm/processor.h> +#include <asm/debug.h> +#include <asm/dis.h> +#include <asm/ipl.h> + +#ifndef CONFIG_64BIT +#define LONG "%08lx " +#define FOURLONG "%08lx %08lx %08lx %08lx\n" +static int kstack_depth_to_print = 12; +#else /* CONFIG_64BIT */ +#define LONG "%016lx " +#define FOURLONG "%016lx %016lx %016lx %016lx\n" +static int kstack_depth_to_print = 20; +#endif /* CONFIG_64BIT */ + +/* + * For show_trace we have tree different stack to consider: + * - the panic stack which is used if the kernel stack has overflown + * - the asynchronous interrupt stack (cpu related) + * - the synchronous kernel stack (process related) + * The stack trace can start at any of the three stack and can potentially + * touch all of them. The order is: panic stack, async stack, sync stack. + */ +static unsigned long +__show_trace(unsigned long sp, unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + unsigned long addr; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + addr = sf->gprs[8] & PSW_ADDR_INSN; + printk("([<%016lx>] %pSR)\n", addr, (void *)addr); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + addr = sf->gprs[8] & PSW_ADDR_INSN; + printk(" [<%016lx>] %pSR\n", addr, (void *)addr); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + addr = regs->psw.addr & PSW_ADDR_INSN; + printk(" [<%016lx>] %pSR\n", addr, (void *)addr); + low = sp; + sp = regs->gprs[15]; + } +} + +static void show_trace(struct task_struct *task, unsigned long *stack) +{ + const unsigned long frame_size = + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + register unsigned long __r15 asm ("15"); + unsigned long sp; + + sp = (unsigned long) stack; + if (!sp) + sp = task ? task->thread.ksp : __r15; + printk("Call Trace:\n"); +#ifdef CONFIG_CHECK_STACK + sp = __show_trace(sp, + S390_lowcore.panic_stack + frame_size - 4096, + S390_lowcore.panic_stack + frame_size); +#endif + sp = __show_trace(sp, + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size); + if (task) + __show_trace(sp, (unsigned long) task_stack_page(task), + (unsigned long) task_stack_page(task) + THREAD_SIZE); + else + __show_trace(sp, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); + if (!task) + task = current; + debug_show_held_locks(task); +} + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + register unsigned long *__r15 asm ("15"); + unsigned long *stack; + int i; + + if (!sp) + stack = task ? (unsigned long *) task->thread.ksp : __r15; + else + stack = sp; + + for (i = 0; i < kstack_depth_to_print; i++) { + if (((addr_t) stack & (THREAD_SIZE-1)) == 0) + break; + if ((i * sizeof(long) % 32) == 0) + printk("%s ", i == 0 ? "" : "\n"); + printk(LONG, *stack++); + } + printk("\n"); + show_trace(task, sp); +} + +static void show_last_breaking_event(struct pt_regs *regs) +{ +#ifdef CONFIG_64BIT + printk("Last Breaking-Event-Address:\n"); + printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]); +#endif +} + +static inline int mask_bits(struct pt_regs *regs, unsigned long bits) +{ + return (regs->psw.mask & bits) / ((~bits + 1) & bits); +} + +void show_registers(struct pt_regs *regs) +{ + char *mode; + + mode = user_mode(regs) ? "User" : "Krnl"; + printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); + if (!user_mode(regs)) + printk(" (%pSR)", (void *)regs->psw.addr); + printk("\n"); + printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " + "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), + mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), + mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), + mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), + mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), + mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); +#ifdef CONFIG_64BIT + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); +#endif + printk("\n%s GPRS: " FOURLONG, mode, + regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); + printk(" " FOURLONG, + regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); + printk(" " FOURLONG, + regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); + printk(" " FOURLONG, + regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); + show_code(regs); +} + +void show_regs(struct pt_regs *regs) +{ + show_regs_print_info(KERN_DEFAULT); + show_registers(regs); + /* Show stack backtrace if pt_regs is from kernel mode */ + if (!user_mode(regs)) + show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_last_breaking_event(regs); +} + +static DEFINE_SPINLOCK(die_lock); + +void die(struct pt_regs *regs, const char *str) +{ + static int die_counter; + + oops_enter(); + lgr_info_log(); + debug_stop_all(); + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irq(&die_lock); + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception: panic_on_oops"); + oops_exit(); + do_exit(SIGSEGV); +} diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 01832c44063..0dff972a169 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -1,36 +1,91 @@ /* - * arch/s390/kernel/early.c - * - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007, 2009 * Author(s): Hongjie Yang <hongjie@us.ibm.com>, * Heiko Carstens <heiko.carstens@de.ibm.com> */ +#define KMSG_COMPONENT "setup" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/compiler.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/ctype.h> +#include <linux/ftrace.h> #include <linux/lockdep.h> #include <linux/module.h> #include <linux/pfn.h> #include <linux/uaccess.h> +#include <linux/kernel.h> +#include <asm/ebcdic.h> #include <asm/ipl.h> #include <asm/lowcore.h> #include <asm/processor.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/sclp.h> +#include <asm/facility.h> +#include "entry.h" /* * Create a Kernel NSS if the SAVESYS= parameter is defined */ -#define DEFSYS_CMD_SIZE 96 +#define DEFSYS_CMD_SIZE 128 #define SAVESYS_CMD_SIZE 32 char kernel_nss_name[NSS_NAME_SIZE + 1]; +static void __init setup_boot_command_line(void); + +/* + * Get the TOD clock running. + */ +static void __init reset_tod_clock(void) +{ + u64 time; + + if (store_tod_clock(&time) == 0) + return; + /* TOD clock not running. Set the clock to Unix Epoch. */ + if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) + disabled_wait(0); + + sched_clock_base_cc = TOD_UNIX_EPOCH; + S390_lowcore.last_update_clock = sched_clock_base_cc; +} + #ifdef CONFIG_SHARED_KERNEL +int __init savesys_ipl_nss(char *cmd, const int cmdlen); + +asm( + " .section .init.text,\"ax\",@progbits\n" + " .align 4\n" + " .type savesys_ipl_nss, @function\n" + "savesys_ipl_nss:\n" +#ifdef CONFIG_64BIT + " stmg 6,15,48(15)\n" + " lgr 14,3\n" + " sam31\n" + " diag 2,14,0x8\n" + " sam64\n" + " lgr 2,14\n" + " lmg 6,15,48(15)\n" +#else + " stm 6,15,24(15)\n" + " lr 14,3\n" + " diag 2,14,0x8\n" + " lr 2,14\n" + " lm 6,15,24(15)\n" +#endif + " br 14\n" + " .size savesys_ipl_nss, .-savesys_ipl_nss\n" + " .previous\n"); + +static __initdata char upper_command_line[COMMAND_LINE_SIZE]; + static noinline __init void create_kernel_nss(void) { unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size; @@ -38,8 +93,9 @@ static noinline __init void create_kernel_nss(void) unsigned int sinitrd_pfn, einitrd_pfn; #endif int response; + int hlen; + size_t len; char *savesys_ptr; - char upper_command_line[COMMAND_LINE_SIZE]; char defsys_cmd[DEFSYS_CMD_SIZE]; char savesys_cmd[SAVESYS_CMD_SIZE]; @@ -48,8 +104,8 @@ static noinline __init void create_kernel_nss(void) return; /* Convert COMMAND_LINE to upper case */ - for (i = 0; i < strlen(COMMAND_LINE); i++) - upper_command_line[i] = toupper(COMMAND_LINE[i]); + for (i = 0; i < strlen(boot_command_line); i++) + upper_command_line[i] = toupper(boot_command_line[i]); savesys_ptr = strstr(upper_command_line, "SAVESYS="); @@ -68,38 +124,66 @@ static noinline __init void create_kernel_nss(void) end_pfn = PFN_UP(__pa(&_end)); min_size = end_pfn << 2; - sprintf(defsys_cmd, "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", - kernel_nss_name, stext_pfn - 1, stext_pfn, eshared_pfn - 1, - eshared_pfn, end_pfn); + hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE, + "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X", + kernel_nss_name, stext_pfn - 1, stext_pfn, + eshared_pfn - 1, eshared_pfn, end_pfn); #ifdef CONFIG_BLK_DEV_INITRD if (INITRD_START && INITRD_SIZE) { sinitrd_pfn = PFN_DOWN(__pa(INITRD_START)); einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE)); min_size = einitrd_pfn << 2; - sprintf(defsys_cmd, "%s EW %.5X-%.5X", defsys_cmd, - sinitrd_pfn, einitrd_pfn); + hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, + " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn); } #endif - sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK", defsys_cmd, min_size); - sprintf(savesys_cmd, "SAVESYS %s \n IPL %s", - kernel_nss_name, kernel_nss_name); + snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen, + " EW MINSIZE=%.7iK PARMREGS=0-13", min_size); + defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0'; + snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s", + kernel_nss_name, kernel_nss_name); + savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0'; __cpcmd(defsys_cmd, NULL, 0, &response); if (response != 0) { + pr_err("Defining the Linux kernel NSS failed with rc=%d\n", + response); kernel_nss_name[0] = '\0'; return; } - __cpcmd(savesys_cmd, NULL, 0, &response); + len = strlen(savesys_cmd); + ASCEBC(savesys_cmd, len); + response = savesys_ipl_nss(savesys_cmd, len); - if (response != strlen(savesys_cmd)) { + /* On success: response is equal to the command size, + * max SAVESYS_CMD_SIZE + * On error: response contains the numeric portion of cp error message. + * for SAVESYS it will be >= 263 + * for missing privilege class, it will be 1 + */ + if (response > SAVESYS_CMD_SIZE || response == 1) { + pr_err("Saving the Linux kernel NSS failed with rc=%d\n", + response); kernel_nss_name[0] = '\0'; return; } + /* re-initialize cputime accounting. */ + sched_clock_base_cc = get_tod_clock(); + S390_lowcore.last_update_clock = sched_clock_base_cc; + S390_lowcore.last_update_timer = 0x7fffffffffffffffULL; + S390_lowcore.user_timer = 0; + S390_lowcore.system_timer = 0; + asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer)); + + /* re-setup boot command line with new ipl vm parms */ + ipl_update_parameters(); + setup_boot_command_line(); + ipl_flags = IPL_NSS_VALID; } @@ -122,145 +206,261 @@ static noinline __init void clear_bss_section(void) */ static noinline __init void init_kernel_storage_key(void) { +#if PAGE_DEFAULT_KEY unsigned long end_pfn, init_pfn; end_pfn = PFN_UP(__pa(&_end)); for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) - page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY); + page_set_storage_key(init_pfn << PAGE_SHIFT, + PAGE_DEFAULT_KEY, 0); +#endif } +static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); + static noinline __init void detect_machine_type(void) { - struct cpuinfo_S390 *cpuinfo = &S390_lowcore.cpu_data; + struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; - get_cpu_id(&S390_lowcore.cpu_data.cpu_id); - - /* Running under z/VM ? */ - if (cpuinfo->cpu_id.version == 0xff) - machine_flags |= 1; + /* Check current-configuration-level */ + if (stsi(NULL, 0, 0, 0) <= 2) { + S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR; + return; + } + /* Get virtual-machine cpu information. */ + if (stsi(vmms, 3, 2, 2) || !vmms->count) + return; - /* Running on a P/390 ? */ - if (cpuinfo->cpu_id.machine == 0x7490) - machine_flags |= 4; + /* Running under KVM? If not we assume z/VM */ + if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) + S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; + else + S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } +static __init void setup_topology(void) +{ #ifdef CONFIG_64BIT -static noinline __init int memory_fast_detect(void) + int max_mnest; + + if (!test_facility(11)) + return; + S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; + for (max_mnest = 6; max_mnest > 1; max_mnest--) { + if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) + break; + } + topology_max_mnest = max_mnest; +#endif +} + +static void early_pgm_check_handler(void) { - unsigned long val0 = 0; - unsigned long val1 = 0xc; - int ret = -ENOSYS; + const struct exception_table_entry *fixup; + unsigned long cr0, cr0_new; + unsigned long addr; + + addr = S390_lowcore.program_old_psw.addr; + fixup = search_exception_tables(addr & PSW_ADDR_INSN); + if (!fixup) + disabled_wait(0); + /* Disable low address protection before storing into lowcore. */ + __ctl_store(cr0, 0, 0); + cr0_new = cr0 & ~(1UL << 28); + __ctl_load(cr0_new, 0, 0); + S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE; + __ctl_load(cr0, 0, 0); +} - if (ipl_flags & IPL_NSS_VALID) - return -ENOSYS; +static noinline __init void setup_lowcore_early(void) +{ + psw_t psw; + + psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; + psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; + S390_lowcore.external_new_psw = psw; + psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + S390_lowcore.program_new_psw = psw; + s390_base_pgm_handler_fn = early_pgm_check_handler; +} + +static noinline __init void setup_facility_list(void) +{ + stfle(S390_lowcore.stfle_fac_list, + ARRAY_SIZE(S390_lowcore.stfle_fac_list)); +} + +static __init void detect_mvpg(void) +{ +#ifndef CONFIG_64BIT + int rc; asm volatile( - " diag %1,%2,0x260\n" - "0: lhi %0,0\n" + " la 0,0\n" + " mvpg %2,%2\n" + "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "+d" (ret), "+d" (val0), "+d" (val1) : : "cc"); + : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG; +#endif +} - if (ret || val0 != val1) - return -ENOSYS; +static __init void detect_ieee(void) +{ +#ifndef CONFIG_64BIT + int rc, tmp; - memory_chunk[0].size = val0 + 1; - return 0; + asm volatile( + " efpc %1,0\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE; +#endif } -#else -static inline int memory_fast_detect(void) + +static __init void detect_csp(void) { - return -ENOSYS; -} +#ifndef CONFIG_64BIT + int rc; + + asm volatile( + " la 0,0\n" + " la 1,0\n" + " la 2,4\n" + " csp 0,2\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_CSP; #endif +} -static inline __init unsigned long __tprot(unsigned long addr) +static __init void detect_diag9c(void) { - int cc = -1; + unsigned int cpu_address; + int rc; + cpu_address = stap(); asm volatile( - " tprot 0(%1),0\n" - "0: ipm %0\n" - " srl %0,28\n" + " diag %2,0,0x9c\n" + "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "+d" (cc) : "a" (addr) : "cc"); - return (unsigned long)cc; + : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; } -/* Checking memory in 128KB increments. */ -#define CHUNK_INCR (1UL << 17) -#define ADDR2G (1UL << 31) +static __init void detect_diag44(void) +{ +#ifdef CONFIG_64BIT + int rc; -static noinline __init void find_memory_chunks(unsigned long memsize) + asm volatile( + " diag 0,0,0x44\n" + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); + if (!rc) + S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; +#endif +} + +static __init void detect_machine_facilities(void) { - unsigned long addr = 0, old_addr = 0; - unsigned long old_cc = CHUNK_READ_WRITE; - unsigned long cc; - int chunk = 0; - - while (chunk < MEMORY_CHUNKS) { - cc = __tprot(addr); - while (cc == old_cc) { - addr += CHUNK_INCR; - if (memsize && addr >= memsize) - break; -#ifndef CONFIG_64BIT - if (addr == ADDR2G) - break; +#ifdef CONFIG_64BIT + if (test_facility(8)) { + S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1; + __ctl_set_bit(0, 23); + } + if (test_facility(78)) + S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2; + if (test_facility(3)) + S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; + if (test_facility(40)) + S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; + if (test_facility(50) && test_facility(73)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TE; + if (test_facility(66)) + S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; + if (test_facility(51)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; +#endif +} + +static __init void rescue_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); + /* + * Just like in case of IPL from VM reader we make sure there is a + * gap of 4MB between end of kernel and start of initrd. + * That way we can also be sure that saving an NSS will succeed, + * which however only requires different segments. + */ + if (!INITRD_START || !INITRD_SIZE) + return; + if (INITRD_START >= min_initrd_addr) + return; + memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = min_initrd_addr; #endif - cc = __tprot(addr); - } +} - if (old_addr != addr && - (old_cc == CHUNK_READ_WRITE || old_cc == CHUNK_READ_ONLY)) { - memory_chunk[chunk].addr = old_addr; - memory_chunk[chunk].size = addr - old_addr; - memory_chunk[chunk].type = old_cc; - chunk++; - } +/* Set up boot command line */ +static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) +{ + char *parm, *delim; + size_t rc, len; - old_addr = addr; - old_cc = cc; + len = strlen(boot_command_line); -#ifndef CONFIG_64BIT - if (addr == ADDR2G) - break; -#endif - /* - * Finish memory detection at the first hole - * if storage size is unknown. - */ - if (cc == -1UL && !memsize) - break; - if (memsize && addr >= memsize) - break; + delim = boot_command_line + len; /* '\0' character position */ + parm = boot_command_line + len + 1; /* append right after '\0' */ + + rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1); + if (rc) { + if (*parm == '=') + memmove(boot_command_line, parm + 1, rc); + else + *delim = ' '; /* replace '\0' with space */ } } -static __init void early_pgm_check_handler(void) +static inline int has_ebcdic_char(const char *str) { - unsigned long addr; - const struct exception_table_entry *fixup; + int i; - addr = S390_lowcore.program_old_psw.addr; - fixup = search_exception_tables(addr & PSW_ADDR_INSN); - if (!fixup) - disabled_wait(0); - S390_lowcore.program_old_psw.addr = fixup->fixup | PSW_ADDR_AMODE; + for (i = 0; str[i]; i++) + if (str[i] & 0x80) + return 1; + return 0; } -static noinline __init void setup_lowcore_early(void) +static void __init setup_boot_command_line(void) { - psw_t psw; - - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; - psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; - S390_lowcore.external_new_psw = psw; - psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; - S390_lowcore.program_new_psw = psw; - s390_base_pgm_handler_fn = early_pgm_check_handler; + COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; + /* convert arch command line to ascii if necessary */ + if (has_ebcdic_char(COMMAND_LINE)) + EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); + /* copy arch command line */ + strlcpy(boot_command_line, strstrip(COMMAND_LINE), + ARCH_COMMAND_LINE_SIZE); + + /* append IPL PARM data to the boot command line */ + if (MACHINE_IS_VM) + append_to_cmdline(append_ipl_vmparm); + + append_to_cmdline(append_ipl_scpdata); } /* @@ -269,29 +469,29 @@ static noinline __init void setup_lowcore_early(void) */ void __init startup_init(void) { - unsigned long long memsize; - + reset_tod_clock(); ipl_save_parameters(); + rescue_initrd(); clear_bss_section(); init_kernel_storage_key(); lockdep_init(); lockdep_off(); + setup_lowcore_early(); + setup_facility_list(); detect_machine_type(); + ipl_update_parameters(); + setup_boot_command_line(); create_kernel_nss(); - sort_main_extable(); - setup_lowcore_early(); - sclp_read_info_early(); - sclp_facilities_detect(); - memsize = sclp_memory_detect(); -#ifndef CONFIG_64BIT - /* - * Can't deal with more than 2G in 31 bit addressing mode, so - * limit the value in order to avoid strange side effects. - */ - if (memsize > ADDR2G) - memsize = ADDR2G; + detect_mvpg(); + detect_ieee(); + detect_csp(); + detect_diag9c(); + detect_diag44(); + detect_machine_facilities(); + setup_topology(); + sclp_early_detect(); +#ifdef CONFIG_DYNAMIC_FTRACE + S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; #endif - if (memory_fast_detect() < 0) - find_memory_chunks((unsigned long) memsize); lockdep_on(); } diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c index cc0dc609d73..b971c6be629 100644 --- a/arch/s390/kernel/ebcdic.c +++ b/arch/s390/kernel/ebcdic.c @@ -1,10 +1,9 @@ /* - * arch/s390/kernel/ebcdic.c * ECBDIC -> ASCII, ASCII -> ECBDIC, * upper to lower case (EBCDIC) conversion tables. * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Martin Peschke <peschke@fh-brandenburg.de> */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6766e37fe8e..70203265196 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1,205 +1,144 @@ /* - * arch/s390/kernel/entry.S * S390 low-level entry points. * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright IBM Corp. 1999, 2012 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * Heiko Carstens <heiko.carstens@de.ibm.com> */ -#include <linux/sys.h> -#include <linux/linkage.h> #include <linux/init.h> +#include <linux/linkage.h> +#include <asm/processor.h> #include <asm/cache.h> -#include <asm/lowcore.h> #include <asm/errno.h> #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/page.h> - -/* - * Stack layout for the system_call stack entry. - * The first few entries are identical to the user_regs_struct. - */ -SP_PTREGS = STACK_FRAME_OVERHEAD -SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS -SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW -SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS -SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 4 -SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8 -SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 12 -SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16 -SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 20 -SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24 -SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 28 -SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32 -SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 36 -SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40 -SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 44 -SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48 -SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52 -SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 -SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 -SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP -SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE - -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING) +#include <asm/sigp.h> +#include <asm/irq.h> + +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 4 +__PT_R2 = __PT_GPRS + 8 +__PT_R3 = __PT_GPRS + 12 +__PT_R4 = __PT_GPRS + 16 +__PT_R5 = __PT_GPRS + 20 +__PT_R6 = __PT_GPRS + 24 +__PT_R7 = __PT_GPRS + 28 +__PT_R8 = __PT_GPRS + 32 +__PT_R9 = __PT_GPRS + 36 +__PT_R10 = __PT_GPRS + 40 +__PT_R11 = __PT_GPRS + 44 +__PT_R12 = __PT_GPRS + 48 +__PT_R13 = __PT_GPRS + 524 +__PT_R14 = __PT_GPRS + 56 +__PT_R15 = __PT_GPRS + 60 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT +STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE + +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) +_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) +_PIF_WORK = (_PIF_PER_TRAP) #define BASED(name) name-system_call(%r13) -#ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON - l %r1,BASED(.Ltrace_irq_on) - basr %r14,%r1 +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + l %r1,BASED(.Lhardirqs_on) + basr %r14,%r1 # call trace_hardirqs_on_caller +#endif .endm .macro TRACE_IRQS_OFF - l %r1,BASED(.Ltrace_irq_off) - basr %r14,%r1 - .endm - - .macro TRACE_IRQS_CHECK - tm SP_PSW(%r15),0x03 # irqs enabled? - jz 0f - l %r1,BASED(.Ltrace_irq_on) - basr %r14,%r1 - j 1f -0: l %r1,BASED(.Ltrace_irq_off) - basr %r14,%r1 -1: - .endm -#else -#define TRACE_IRQS_ON -#define TRACE_IRQS_OFF -#define TRACE_IRQS_CHECK +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + l %r1,BASED(.Lhardirqs_off) + basr %r14,%r1 # call trace_hardirqs_off_caller #endif + .endm -#ifdef CONFIG_LOCKDEP .macro LOCKDEP_SYS_EXIT - tm SP_PSW+1(%r15),0x01 # returning to user ? - jz 0f +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 l %r1,BASED(.Llockdep_sys_exit) - basr %r14,%r1 -0: - .endm -#else -#define LOCKDEP_SYS_EXIT -#endif - -/* - * Register usage in interrupt handlers: - * R9 - pointer to current task structure - * R13 - pointer to literal pool - * R14 - return register for function calls - * R15 - kernel stack pointer - */ - - .macro STORE_TIMER lc_offset -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - stpt \lc_offset + basr %r14,%r1 # call lockdep_sys_exit #endif .endm -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .macro UPDATE_VTIME lc_from,lc_to,lc_sum - lm %r10,%r11,\lc_from - sl %r10,\lc_to - sl %r11,\lc_to+4 - bc 3,BASED(0f) - sl %r10,BASED(.Lc_1) -0: al %r10,\lc_sum - al %r11,\lc_sum+4 - bc 12,BASED(1f) - al %r10,BASED(.Lc_1) -1: stm %r10,%r11,\lc_sum - .endm + .macro CHECK_STACK stacksize,savearea +#ifdef CONFIG_CHECK_STACK + tml %r15,\stacksize - CONFIG_STACK_GUARD + la %r14,\savearea + jz stack_overflow #endif - - .macro SAVE_ALL_BASE savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 .endm - .macro SAVE_ALL_SVC psworg,savearea - la %r12,\psworg - l %r15,__LC_KERNEL_STACK # problem state -> load ksp + .macro SWITCH_ASYNC savearea,stack,shift + tmh %r8,0x0001 # interrupting from user ? + jnz 1f + lr %r14,%r9 + sl %r14,BASED(.Lcritical_start) + cl %r14,BASED(.Lcritical_length) + jhe 0f + la %r11,\savearea # inside critical section, do cleanup + bras %r14,cleanup_critical + tmh %r8,0x0001 # retest problem state after cleanup + jnz 1f +0: l %r14,\stack # are we already on the target stack? + slr %r14,%r15 + sra %r14,\shift + jnz 1f + CHECK_STACK 1<<\shift,\savearea + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: l %r15,\stack # load target stack +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm - .macro SAVE_ALL_SYNC psworg,savearea - la %r12,\psworg - tm \psworg+1,0x01 # test problem state bit - bz BASED(2f) # skip stack setup save - l %r15,__LC_KERNEL_STACK # problem state -> load ksp -#ifdef CONFIG_CHECK_STACK - b BASED(3f) -2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bz BASED(stack_overflow) -3: -#endif -2: + .macro ADD64 high,low,timer + al \high,\timer + al \low,4+\timer + brc 12,.+8 + ahi \high,1 .endm - .macro SAVE_ALL_ASYNC psworg,savearea - la %r12,\psworg - tm \psworg+1,0x01 # test problem state bit - bnz BASED(1f) # from user -> load async stack - clc \psworg+4(4),BASED(.Lcritical_end) - bhe BASED(0f) - clc \psworg+4(4),BASED(.Lcritical_start) - bl BASED(0f) - l %r14,BASED(.Lcleanup_critical) - basr %r14,%r14 - tm 1(%r12),0x01 # retest problem state after cleanup - bnz BASED(1f) -0: l %r14,__LC_ASYNC_STACK # are we already on the async stack ? - slr %r14,%r15 - sra %r14,STACK_SHIFT - be BASED(2f) -1: l %r15,__LC_ASYNC_STACK -#ifdef CONFIG_CHECK_STACK - b BASED(3f) -2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bz BASED(stack_overflow) -3: -#endif -2: + .macro SUB64 high,low,timer + sl \high,\timer + sl \low,4+\timer + brc 3,.+8 + ahi \high,-1 .endm - .macro CREATE_STACK_FRAME psworg,savearea - s %r15,BASED(.Lc_spsize) # make room for registers & psw - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - la %r12,\psworg - st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - icm %r12,12,__LC_SVC_ILC - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - st %r12,SP_ILC(%r15) - mvc SP_R12(16,%r15),\savearea # move %r12-%r15 to stack - la %r12,0 - st %r12,__SF_BACKCHAIN(%r15) # clear back chain + .macro UPDATE_VTIME high,low,enter_timer + lm \high,\low,__LC_EXIT_TIMER + SUB64 \high,\low,\enter_timer + ADD64 \high,\low,__LC_USER_TIMER + stm \high,\low,__LC_USER_TIMER + lm \high,\low,__LC_LAST_UPDATE_TIMER + SUB64 \high,\low,__LC_EXIT_TIMER + ADD64 \high,\low,__LC_SYSTEM_TIMER + stm \high,\low,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer .endm - .macro RESTORE_ALL psworg,sync - mvc \psworg(8),SP_PSW(%r15) # move user PSW to lowcore - .if !\sync - ni \psworg+1,0xfd # clear wait state bit - .endif - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user - STORE_TIMER __LC_EXIT_TIMER - lpsw \psworg # back to caller + .macro REENABLE_IRQS + st %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW .endm + .section .kprobes.text, "ax" + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -207,34 +146,20 @@ STACK_SIZE = 1 << STACK_SHIFT * Returns: * gpr2 = prev */ - .globl __switch_to -__switch_to: - basr %r1,0 -__switch_to_base: - tm __THREAD_per(%r3),0xe8 # new process is using per ? - bz __switch_to_noper-__switch_to_base(%r1) # if not we're fine - stctl %c9,%c11,__SF_EMPTY(%r15) # We are using per stuff - clc __THREAD_per(12,%r3),__SF_EMPTY(%r15) - be __switch_to_noper-__switch_to_base(%r1) # we got away w/o bashing TLB's - lctl %c9,%c11,__THREAD_per(%r3) # Nope we didn't -__switch_to_noper: +ENTRY(__switch_to) + stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + st %r15,__THREAD_ksp(%r2) # store kernel stack of prev l %r4,__THREAD_info(%r2) # get thread_info of prev - tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? - bz __switch_to_no_mcck-__switch_to_base(%r1) - ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev - l %r4,__THREAD_info(%r3) # get thread_info of next - oi __TI_flags+3(%r4),_TIF_MCCK_PENDING # set it in next -__switch_to_no_mcck: - stm %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task - st %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp - l %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp - lm %r6,%r15,__SF_GPRS(%r15)# load __switch_to registers of next task - st %r3,__LC_CURRENT # __LC_CURRENT = current task struct - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - l %r3,__THREAD_info(%r3) # load thread_info from task struct - st %r3,__LC_THREAD_INFO - ahi %r3,STACK_SIZE - st %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack + l %r5,__THREAD_info(%r3) # get thread_info of next + lr %r15,%r5 + ahi %r15,STACK_INIT # end of kernel stack of next + st %r3,__LC_CURRENT # store task struct of next + st %r5,__LC_THREAD_INFO # store thread info of next + st %r15,__LC_KERNEL_STACK # store end of kernel stack + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next + l %r15,__THREAD_ksp(%r3) # load kernel stack of next + lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 __critical_start: @@ -243,494 +168,473 @@ __critical_start: * are executed with interrupts enabled. */ - .globl system_call -system_call: - STORE_TIMER __LC_SYNC_ENTER_TIMER -sysc_saveall: - SAVE_ALL_BASE __LC_SAVE_AREA - SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - lh %r7,0x8a # get svc number from lowcore -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +ENTRY(system_call) + stpt __LC_SYNC_ENTER_TIMER +sysc_stm: + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lhi %r14,_PIF_SYSCALL +sysc_per: + l %r15,__LC_KERNEL_STACK + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -sysc_stime: - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -sysc_update: - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + st %r14,__PT_FLAGS(%r11) sysc_do_svc: - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - sla %r7,2 # *4 and test for svc 0 - bnz BASED(sysc_nr_ok) # svc number > 0 + l %r10,__TI_sysc_table(%r12) # 31 bit system call table + lh %r8,__PT_INT_CODE+2(%r11) + sla %r8,2 # shift and test for svc0 + jnz sysc_nr_ok # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) - bnl BASED(sysc_nr_ok) - lr %r7,%r1 # copy svc number to %r7 - sla %r7,2 # *4 + jnl sysc_nr_ok + sth %r1,__PT_INT_CODE+2(%r11) + lr %r8,%r1 + sla %r8,2 sysc_nr_ok: - mvc SP_ARGS(4,%r15),SP_R7(%r15) -sysc_do_restart: - l %r8,BASED(.Lsysc_table) - tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) - l %r8,0(%r7,%r8) # get system call addr. - bnz BASED(sysc_tracesys) - basr %r14,%r8 # call sys_xxxx - st %r2,SP_R2(%r15) # store return value (change R2 on stack) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + st %r2,__PT_ORIG_GPR2(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r9,0(%r8,%r10) # get system call addr. + tm __TI_flags+3(%r12),_TIF_TRACE + jnz sysc_tracesys + basr %r14,%r9 # call sys_xxxx + st %r2,__PT_R2(%r11) # store return value sysc_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_restore) - tm __TI_flags+3(%r9),_TIF_WORK_SVC - bnz BASED(sysc_work) # there is work to do (signals etc.) -sysc_restore: -#ifdef CONFIG_TRACE_IRQFLAGS - la %r1,BASED(sysc_restore_trace_psw) - lpsw 0(%r1) -sysc_restore_trace: - TRACE_IRQS_CHECK LOCKDEP_SYS_EXIT -#endif -sysc_leave: - RESTORE_ALL __LC_RETURN_PSW,1 +sysc_tif: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno sysc_restore + tm __PT_FLAGS+3(%r11),_PIF_WORK + jnz sysc_work + tm __TI_flags+3(%r12),_TIF_WORK + jnz sysc_work # check for thread work + tm __LC_CPU_FLAGS+3,_CIF_WORK + jnz sysc_work +sysc_restore: + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW sysc_done: -#ifdef CONFIG_TRACE_IRQFLAGS - .align 8 - .globl sysc_restore_trace_psw -sysc_restore_trace_psw: - .long 0, sysc_restore_trace + 0x80000000 -#endif - -# -# recheck if there is more work to do -# -sysc_work_loop: - tm __TI_flags+3(%r9),_TIF_WORK_SVC - bz BASED(sysc_restore) # there is no work to do # # One of the work bits is on. Find out which one. # sysc_work: - tm __TI_flags+3(%r9),_TIF_MCCK_PENDING - bo BASED(sysc_mcck_pending) - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED - bo BASED(sysc_reschedule) - tm __TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) - bnz BASED(sysc_sigpending) - tm __TI_flags+3(%r9),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r9),_TIF_SINGLE_STEP - bo BASED(sysc_singlestep) - b BASED(sysc_restore) -sysc_work_done: + tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING + jo sysc_mcck_pending + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED + jo sysc_reschedule + tm __PT_FLAGS+3(%r11),_PIF_PER_TRAP + jo sysc_singlestep + tm __TI_flags+3(%r12),_TIF_SIGPENDING + jo sysc_sigpending + tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME + jo sysc_notify_resume + tm __LC_CPU_FLAGS+3,_CIF_ASCE + jo sysc_uaccess + j sysc_return # beware of critical section cleanup # # _TIF_NEED_RESCHED is set, call schedule # sysc_reschedule: l %r1,BASED(.Lschedule) - la %r14,BASED(sysc_work_loop) - br %r1 # call scheduler + la %r14,BASED(sysc_return) + br %r1 # call schedule # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: - l %r1,BASED(.Ls390_handle_mcck) - la %r14,BASED(sysc_work_loop) + l %r1,BASED(.Lhandle_mcck) + la %r14,BASED(sysc_return) br %r1 # TIF bit will be cleared by handler # -# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal +# _CIF_ASCE is set, load user space asce # -sysc_sigpending: - ni __TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP - la %r2,SP_PTREGS(%r15) # load pt_regs - l %r1,BASED(.Ldo_signal) - basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r9),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r9),_TIF_SINGLE_STEP - bo BASED(sysc_singlestep) - b BASED(sysc_work_loop) +sysc_uaccess: + ni __LC_CPU_FLAGS+3,255-_CIF_ASCE + lctl %c1,%c1,__LC_USER_ASCE # load primary asce + j sysc_return # -# _TIF_RESTART_SVC is set, set up registers and restart svc +# _TIF_SIGPENDING is set, call do_signal # -sysc_restart: - ni __TI_flags+3(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - l %r7,SP_R2(%r15) # load new svc number - sla %r7,2 - mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument - lm %r2,%r6,SP_R2(%r15) # load svc arguments - b BASED(sysc_do_restart) # restart svc +sysc_sigpending: + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_signal) + basr %r14,%r1 # call do_signal + tm __PT_FLAGS+3(%r11),_PIF_SYSCALL + jno sysc_return + lm %r2,%r7,__PT_R2(%r11) # load svc arguments + l %r10,__TI_sysc_table(%r12) # 31 bit system call table + xr %r8,%r8 # svc 0 returns -ENOSYS + clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2) + jnl sysc_nr_ok # invalid svc number -> do svc 0 + lh %r8,__PT_INT_CODE+2(%r11) # load new svc number + sla %r8,2 + j sysc_nr_ok # restart svc + +# +# _TIF_NOTIFY_RESUME is set, call do_notify_resume +# +sysc_notify_resume: + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_notify_resume) + la %r14,BASED(sysc_return) + br %r1 # call do_notify_resume # -# _TIF_SINGLE_STEP is set, call do_single_step +# _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP - mvi SP_TRAP+1(%r15),0x28 # set trap indication to pgm check - la %r2,SP_PTREGS(%r15) # address of register-save area - l %r1,BASED(.Lhandle_per) # load adr. of per handler - la %r14,BASED(sysc_return) # load adr. of system return - br %r1 # branch to do_single_step + ni __PT_FLAGS+3(%r11),255-_PIF_PER_TRAP + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_per_trap) + la %r14,BASED(sysc_return) + br %r1 # call do_per_trap # -# call trace before and after sys_call +# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before +# and after the system call # sysc_tracesys: - l %r1,BASED(.Ltrace) - la %r2,SP_PTREGS(%r15) # load pt_regs + l %r1,BASED(.Ltrace_enter) + lr %r2,%r11 # pass pointer to pt_regs la %r3,0 - srl %r7,2 - st %r7,SP_R2(%r15) - basr %r14,%r1 - clc SP_R2(4,%r15),BASED(.Lnr_syscalls) - bnl BASED(sysc_tracenogo) - l %r8,BASED(.Lsysc_table) - l %r7,SP_R2(%r15) # strace might have changed the - sll %r7,2 # system call - l %r8,0(%r7,%r8) + xr %r0,%r0 + icm %r0,3,__PT_INT_CODE+2(%r11) + st %r0,__PT_R2(%r11) + basr %r14,%r1 # call do_syscall_trace_enter + cl %r2,BASED(.Lnr_syscalls) + jnl sysc_tracenogo + lr %r8,%r2 + sll %r8,2 + l %r9,0(%r8,%r10) sysc_tracego: - lm %r3,%r6,SP_R3(%r15) - l %r2,SP_ORIG_R2(%r15) - basr %r14,%r8 # call sys_xxx - st %r2,SP_R2(%r15) # store return value + lm %r3,%r7,__PT_R3(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + st %r2,__PT_R2(%r11) # store return value sysc_tracenogo: - tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) - bz BASED(sysc_return) - l %r1,BASED(.Ltrace) - la %r2,SP_PTREGS(%r15) # load pt_regs - la %r3,1 + tm __TI_flags+3(%r12),_TIF_TRACE + jz sysc_return + l %r1,BASED(.Ltrace_exit) + lr %r2,%r11 # pass pointer to pt_regs la %r14,BASED(sysc_return) - br %r1 + br %r1 # call do_syscall_trace_exit # # a new process exits the kernel with ret_from_fork # - .globl ret_from_fork -ret_from_fork: +ENTRY(ret_from_fork) + la %r11,STACK_FRAME_OVERHEAD(%r15) + l %r12,__LC_THREAD_INFO l %r13,__LC_SVC_NEW_PSW+4 - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? - bo BASED(0f) - st %r15,SP_R15(%r15) # store stack pointer for new kthread -0: l %r1,BASED(.Lschedtail) - basr %r14,%r1 + l %r1,BASED(.Lschedule_tail) + basr %r14,%r1 # call schedule_tail TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - b BASED(sysc_return) - -# -# kernel_execve function needs to deal with pt_regs that is not -# at the usual place -# - .globl kernel_execve -kernel_execve: - stm %r12,%r15,48(%r15) - lr %r14,%r15 - l %r13,__LC_SVC_NEW_PSW+4 - s %r15,BASED(.Lc_spsize) - st %r14,__SF_BACKCHAIN(%r15) - la %r12,SP_PTREGS(%r15) - xc 0(__PT_SIZE,%r12),0(%r12) - l %r1,BASED(.Ldo_execve) - lr %r5,%r12 - basr %r14,%r1 - ltr %r2,%r2 - be BASED(0f) - a %r15,BASED(.Lc_spsize) - lm %r12,%r15,48(%r15) - br %r14 - # execve succeeded. -0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts - l %r15,__LC_KERNEL_STACK # load ksp - s %r15,BASED(.Lc_spsize) # make room for registers & psw - l %r9,__LC_THREAD_INFO - mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - l %r1,BASED(.Lexecve_tail) - basr %r14,%r1 - b BASED(sysc_return) + ssm __LC_SVC_NEW_PSW # reenable interrupts + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? + jne sysc_tracenogo + # it's a kernel thread + lm %r9,%r10,__PT_R9(%r11) # load gprs +ENTRY(kernel_thread_starter) + la %r2,0(%r10) + basr %r14,%r9 + j sysc_tracenogo /* * Program check handler routine */ - .globl pgm_check_handler -pgm_check_handler: -/* - * First we need to check for a special case: - * Single stepping an instruction that disables the PER event mask will - * cause a PER event AFTER the mask has been set. Example: SVC or LPSW. - * For a single stepped SVC the program check handler gets control after - * the SVC new PSW has been loaded. But we want to execute the SVC first and - * then handle the PER event. Therefore we update the SVC old PSW to point - * to the pgm_check_handler and branch to the SVC handler after we checked - * if we have to load the kernel stack register. - * For every other possible cause for PER event without the PER mask set - * we just ignore the PER event (FIXME: is there anything we have to do - * for LPSW?). - */ - STORE_TIMER __LC_SYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA - tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception - bnz BASED(pgm_per) # got per exception -> special case - SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime: -#endif - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - TRACE_IRQS_OFF - l %r3,__LC_PGM_ILC # load program interruption code - la %r8,0x7f - nr %r8,%r3 -pgm_do_call: - l %r7,BASED(.Ljump_table) - sll %r8,2 - l %r7,0(%r8,%r7) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area - la %r14,BASED(sysc_return) - br %r7 # branch to interrupt-handler - -# -# handle per exception -# -pgm_per: - tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on - bnz BASED(pgm_per_std) # ok, normal per event from user space -# ok its one of the special cases, now we need to find out which one - clc __LC_PGM_OLD_PSW(8),__LC_SVC_NEW_PSW - be BASED(pgm_svcper) -# no interesting special case, ignore PER event - lm %r12,%r15,__LC_SAVE_AREA - lpsw 0x28 +ENTRY(pgm_check_handler) + stpt __LC_SYNC_ENTER_TIMER + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_PGM_OLD_PSW + tmh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + l %r15,__LC_KERNEL_STACK +2: la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f + l %r1,__TI_task(%r12) + tmh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __PT_FLAGS+3(%r11),_PIF_PER_TRAP + mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r1),__LC_PER_CODE + mvc __THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID +0: REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ljump_table) + la %r10,0x7f + n %r10,__PT_INT_CODE(%r11) + je sysc_return + sll %r10,2 + l %r1,0(%r10,%r1) # load address of handler routine + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # branch to interrupt-handler + j sysc_return # -# Normal per exception +# PER event in supervisor state, must be kprobes # -pgm_per_std: - SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime2) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime2: -#endif - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - TRACE_IRQS_OFF - l %r1,__TI_task(%r9) - mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - tm SP_PSW+1(%r15),0x01 # kernel per event ? - bz BASED(kernel_per) - l %r3,__LC_PGM_ILC # load program interruption code - la %r8,0x7f - nr %r8,%r3 # clear per-event-bit and ilc - be BASED(sysc_return) # only per or per+check ? - b BASED(pgm_do_call) +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_per_trap) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_per_trap + j sysc_return # -# it was a single stepped SVC that is causing all the trouble +# single stepped system call # pgm_svcper: - SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif - lh %r7,0x8a # get svc number from lowcore - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - TRACE_IRQS_OFF - l %r1,__TI_task(%r9) - mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - b BASED(sysc_do_svc) - -# -# per was called from kernel, must be kprobes -# -kernel_per: - mvi SP_TRAP+1(%r15),0x28 # set trap indication to pgm check - la %r2,SP_PTREGS(%r15) # address of register-save area - l %r1,BASED(.Lhandle_per) # load adr. of per handler - la %r14,BASED(sysc_restore)# load adr. of system return - br %r1 # branch to do_single_step + mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW + mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per) + lhi %r14,_PIF_SYSCALL | _PIF_PER_TRAP + lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs /* * IO interrupt handler routine */ - .globl io_int_handler -io_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER +ENTRY(io_int_handler) stck __LC_INT_CLOCK - SAVE_ALL_BASE __LC_SAVE_AREA+16 - SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 - CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(io_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -io_no_vtime: -#endif - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct + stpt __LC_ASYNC_ENTER_TIMER + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_IO_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz io_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +io_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF - l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to standard irq handler + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) +io_loop: + l %r1,BASED(.Ldo_IRQ) + lr %r2,%r11 # pass pointer to pt_regs + lhi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lhi %r3,THIN_INTERRUPT +io_call: + basr %r14,%r1 # call do_IRQ + tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? -#ifdef CONFIG_PREEMPT - bno BASED(io_preempt) # no -> check for preemptive scheduling -#else - bno BASED(io_restore) # no-> skip resched & signal -#endif - tm __TI_flags+3(%r9),_TIF_WORK_INT - bnz BASED(io_work) # there is work to do (signals etc.) -io_restore: -#ifdef CONFIG_TRACE_IRQFLAGS - la %r1,BASED(io_restore_trace_psw) - lpsw 0(%r1) -io_restore_trace: - TRACE_IRQS_CHECK LOCKDEP_SYS_EXIT -#endif -io_leave: - RESTORE_ALL __LC_RETURN_PSW,0 + TRACE_IRQS_ON +io_tif: + tm __TI_flags+3(%r12),_TIF_WORK + jnz io_work # there is work to do (signals etc.) + tm __LC_CPU_FLAGS+3,_CIF_WORK + jnz io_work +io_restore: + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW io_done: -#ifdef CONFIG_TRACE_IRQFLAGS - .align 8 - .globl io_restore_trace_psw -io_restore_trace_psw: - .long 0, io_restore_trace + 0x80000000 -#endif - +# +# There is work todo, find out in which context we have been interrupted: +# 1) if we return to user space we can do all _TIF_WORK work +# 2) if we return to kernel code and preemptive scheduling is enabled check +# the preemption counter and if it is zero call preempt_schedule_irq +# Before any work can be done, a switch to the kernel stack is required. +# +io_work: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT -io_preempt: - icm %r0,15,__TI_precount(%r9) - bnz BASED(io_restore) - l %r1,SP_R15(%r15) - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + # check for preemptive scheduling + icm %r0,15,__TI_precount(%r12) + jnz io_restore # preemption disabled + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED + jno io_restore + # switch to kernel stack + l %r1,__PT_R15(%r11) + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lr %r15,%r1 -io_resume_loop: - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED - bno BASED(io_restore) - l %r1,BASED(.Lpreempt_schedule_irq) - la %r14,BASED(io_resume_loop) - br %r1 # call schedule + # TRACE_IRQS_ON already done at io_return, call + # TRACE_IRQS_OFF to keep things symmetrical + TRACE_IRQS_OFF + l %r1,BASED(.Lpreempt_irq) + basr %r14,%r1 # call preempt_schedule_irq + j io_return +#else + j io_restore #endif # -# switch to kernel stack, then check the TIF bits +# Need to do work before returning to userspace, switch to kernel stack # -io_work: +io_work_user: l %r1,__LC_KERNEL_STACK - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lr %r15,%r1 + # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_RESTORE_SIGMASK, _TIF_NEED_RESCHED -# and _TIF_MCCK_PENDING # -io_work_loop: - tm __TI_flags+3(%r9),_TIF_MCCK_PENDING - bo BASED(io_mcck_pending) - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED - bo BASED(io_reschedule) - tm __TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) - bnz BASED(io_sigpending) - b BASED(io_restore) -io_work_done: +io_work_tif: + tm __LC_CPU_FLAGS+3(%r12),_CIF_MCCK_PENDING + jo io_mcck_pending + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED + jo io_reschedule + tm __TI_flags+3(%r12),_TIF_SIGPENDING + jo io_sigpending + tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME + jo io_notify_resume + tm __LC_CPU_FLAGS+3,_CIF_ASCE + jo io_uaccess + j io_return # beware of critical section cleanup # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # io_mcck_pending: - l %r1,BASED(.Ls390_handle_mcck) + # TRACE_IRQS_ON already done at io_return + l %r1,BASED(.Lhandle_mcck) basr %r14,%r1 # TIF bit will be cleared by handler - b BASED(io_work_loop) + TRACE_IRQS_OFF + j io_return + +# +# _CIF_ASCE is set, load user space asce +# +io_uaccess: + ni __LC_CPU_FLAGS+3,255-_CIF_ASCE + lctl %c1,%c1,__LC_USER_ASCE # load primary asce + j io_return # # _TIF_NEED_RESCHED is set, call schedule # io_reschedule: - TRACE_IRQS_ON + # TRACE_IRQS_ON already done at io_return l %r1,BASED(.Lschedule) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts basr %r14,%r1 # call scheduler - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - tm __TI_flags+3(%r9),_TIF_WORK_INT - bz BASED(io_restore) # there is no work to do - b BASED(io_work_loop) + j io_return # -# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal +# _TIF_SIGPENDING is set, call do_signal # io_sigpending: - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs + # TRACE_IRQS_ON already done at io_return l %r1,BASED(.Ldo_signal) + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_work_loop) + j io_return + +# +# _TIF_SIGPENDING is set, call do_signal +# +io_notify_resume: + # TRACE_IRQS_ON already done at io_return + l %r1,BASED(.Ldo_notify_resume) + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return /* * External interrupt handler routine */ - .globl ext_int_handler -ext_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER +ENTRY(ext_int_handler) stck __LC_INT_CLOCK - SAVE_ALL_BASE __LC_SAVE_AREA+16 - SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 - CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(ext_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -ext_no_vtime: -#endif - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct + stpt __LC_ASYNC_ENTER_TIMER + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_EXT_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +ext_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF - la %r2,SP_PTREGS(%r15) # address of register-save area - lh %r3,__LC_EXT_INT_CODE # get interruption code - l %r1,BASED(.Ldo_extint) - basr %r14,%r1 - b BASED(io_return) + l %r1,BASED(.Ldo_IRQ) + lr %r2,%r11 # pass pointer to pt_regs + lhi %r3,EXT_INTERRUPT + basr %r14,%r1 # call do_IRQ + j io_return + +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + st %r3,__SF_EMPTY(%r15) + basr %r1,0 + la %r1,psw_idle_lpsw+4-.(%r1) + st %r1,__SF_EMPTY+4(%r15) + oi __SF_EMPTY+4(%r15),0x80 + stck __CLOCK_IDLE_ENTER(%r2) + stpt __TIMER_IDLE_ENTER(%r2) +psw_idle_lpsw: + lpsw __SF_EMPTY(%r15) + br %r14 +psw_idle_end: __critical_end: @@ -738,127 +642,108 @@ __critical_end: * Machine check handler routines */ - .globl mcck_int_handler -mcck_int_handler: +ENTRY(mcck_int_handler) + stck __LC_MCCK_CLOCK spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs - SAVE_ALL_BASE __LC_SAVE_AREA+32 - la %r12,__LC_MCK_OLD_PSW + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_MCK_OLD_PSW tm __LC_MCCK_CODE,0x80 # system damage? - bo BASED(mcck_int_main) # yes -> rest of mcck code invalid -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_SAVE_AREA+52(8),__LC_ASYNC_ENTER_TIMER - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA + jo mcck_panic # yes -> rest of mcck code invalid + la %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - bo BASED(1f) + jo 3f la %r14,__LC_SYNC_ENTER_TIMER clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER - bl BASED(0f) + jl 0f la %r14,__LC_ASYNC_ENTER_TIMER 0: clc 0(8,%r14),__LC_EXIT_TIMER - bl BASED(0f) + jl 1f la %r14,__LC_EXIT_TIMER -0: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER - bl BASED(0f) +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f la %r14,__LC_LAST_UPDATE_TIMER -0: spt 0(%r14) - mvc __LC_ASYNC_ENTER_TIMER(8),0(%r14) -1: -#endif - tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - bno BASED(mcck_int_main) # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit - bnz BASED(mcck_int_main) # from user -> load async stack - clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_end) - bhe BASED(mcck_int_main) - clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_start) - bl BASED(mcck_int_main) - l %r14,BASED(.Lcleanup_critical) - basr %r14,%r14 -mcck_int_main: - l %r14,__LC_PANIC_STACK # are we already on the panic stack? - slr %r14,%r15 - sra %r14,PAGE_SHIFT - be BASED(0f) - l %r15,__LC_PANIC_STACK # load panic stack -0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+32 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? - bno BASED(mcck_no_vtime) # no -> skip cleanup critical - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(mcck_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -mcck_no_vtime: -#endif - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - la %r2,SP_PTREGS(%r15) # load pt_regs - l %r1,BASED(.Ls390_mcck) - basr %r14,%r1 # call machine check handler - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(mcck_return) +2: spt 0(%r14) + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER +mcck_skip: + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32 + stm %r8,%r9,__PT_PSW(%r11) + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_machine_check) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call s390_do_machine_check + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno mcck_return l %r1,__LC_KERNEL_STACK # switch to kernel stack - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r15) lr %r15,%r1 - stosm __SF_EMPTY(%r15),0x04 # turn dat on - tm __TI_flags+3(%r9),_TIF_MCCK_PENDING - bno BASED(mcck_return) + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off + tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING + jno mcck_return TRACE_IRQS_OFF - l %r1,BASED(.Ls390_handle_mcck) - basr %r14,%r1 # call machine check handler + l %r1,BASED(.Lhandle_mcck) + basr %r14,%r1 # call s390_handle_mcck TRACE_IRQS_ON mcck_return: - mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW - ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+52 + mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? - bno BASED(0f) - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 + jno 0f + lm %r0,%r15,__PT_R0(%r11) stpt __LC_EXIT_TIMER - lpsw __LC_RETURN_MCCK_PSW # back to caller -0: -#endif - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 - lpsw __LC_RETURN_MCCK_PSW # back to caller + lpsw __LC_RETURN_MCCK_PSW +0: lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_MCCK_PSW - RESTORE_ALL __LC_RETURN_MCCK_PSW,0 - -/* - * Restart interruption handler, kick starter for additional CPUs - */ -#ifdef CONFIG_SMP - __CPUINIT - .globl restart_int_handler -restart_int_handler: - l %r15,__LC_SAVE_AREA+60 # load ksp - lctl %c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs - lam %a0,%a15,__LC_AREGS_SAVE_AREA - lm %r6,%r15,__SF_GPRS(%r15) # load registers from clone - stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on - basr %r14,0 - l %r14,restart_addr-.(%r14) - br %r14 # branch to start_secondary -restart_addr: - .long start_secondary - .previous -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ - .globl restart_int_handler -restart_int_handler: - basr %r1,0 -restart_base: - lpsw restart_crash-restart_base(%r1) - .align 8 -restart_crash: - .long 0x000a0000,0x00000000 -restart_go: -#endif +mcck_panic: + l %r14,__LC_PANIC_STACK + slr %r14,%r15 + sra %r14,PAGE_SHIFT + jz 0f + l %r15,__LC_PANIC_STACK + j mcck_skip +0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip + +# +# PSW restart interrupt handler +# +ENTRY(restart_int_handler) + st %r15,__LC_SAVE_AREA_RESTART + l %r15,__LC_RESTART_STACK + ahi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) + stm %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw + ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + l %r1,__LC_RESTART_FN # load fn, parm & source cpu + l %r2,__LC_RESTART_DATA + l %r3,__LC_RESTART_SOURCE + ltr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + lh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu + brc 2,2b +3: j 3b + + .section .kprobes.text, "ax" #ifdef CONFIG_CHECK_STACK /* @@ -868,237 +753,214 @@ restart_go: */ stack_overflow: l %r15,__LC_PANIC_STACK # change to panic stack - sl %r15,BASED(.Lc_spsize) - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - la %r1,__LC_SAVE_AREA - ch %r12,BASED(.L0x020) # old psw addr == __LC_SVC_OLD_PSW ? - be BASED(0f) - ch %r12,BASED(.L0x028) # old psw addr == __LC_PGM_OLD_PSW ? - be BASED(0f) - la %r1,__LC_SAVE_AREA+16 -0: mvc SP_R12(16,%r15),0(%r1) # move %r12-%r15 to stack - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear back chain - l %r1,BASED(1f) # branch to kernel_stack_overflow - la %r2,SP_PTREGS(%r15) # load pt_regs - br %r1 + la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_R8(32,%r11),0(%r14) + l %r1,BASED(1f) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + lr %r2,%r11 # pass pointer to pt_regs + br %r1 # branch to kernel_stack_overflow 1: .long kernel_stack_overflow #endif -cleanup_table_system_call: - .long system_call + 0x80000000, sysc_do_svc + 0x80000000 -cleanup_table_sysc_return: - .long sysc_return + 0x80000000, sysc_leave + 0x80000000 -cleanup_table_sysc_leave: - .long sysc_leave + 0x80000000, sysc_done + 0x80000000 -cleanup_table_sysc_work_loop: - .long sysc_work_loop + 0x80000000, sysc_work_done + 0x80000000 -cleanup_table_io_return: - .long io_return + 0x80000000, io_leave + 0x80000000 -cleanup_table_io_leave: - .long io_leave + 0x80000000, io_done + 0x80000000 -cleanup_table_io_work_loop: - .long io_work_loop + 0x80000000, io_work_done + 0x80000000 +cleanup_table: + .long system_call + 0x80000000 + .long sysc_do_svc + 0x80000000 + .long sysc_tif + 0x80000000 + .long sysc_restore + 0x80000000 + .long sysc_done + 0x80000000 + .long io_tif + 0x80000000 + .long io_restore + 0x80000000 + .long io_done + 0x80000000 + .long psw_idle + 0x80000000 + .long psw_idle_end + 0x80000000 cleanup_critical: - clc 4(4,%r12),BASED(cleanup_table_system_call) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_system_call+4) - bl BASED(cleanup_system_call) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_return) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_return+4) - bl BASED(cleanup_sysc_return) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_leave) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_leave+4) - bl BASED(cleanup_sysc_leave) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop+4) - bl BASED(cleanup_sysc_return) -0: - clc 4(4,%r12),BASED(cleanup_table_io_return) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_return+4) - bl BASED(cleanup_io_return) -0: - clc 4(4,%r12),BASED(cleanup_table_io_leave) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_leave+4) - bl BASED(cleanup_io_leave) -0: - clc 4(4,%r12),BASED(cleanup_table_io_work_loop) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_work_loop+4) - bl BASED(cleanup_io_return) -0: - br %r14 + cl %r9,BASED(cleanup_table) # system_call + jl 0f + cl %r9,BASED(cleanup_table+4) # sysc_do_svc + jl cleanup_system_call + cl %r9,BASED(cleanup_table+8) # sysc_tif + jl 0f + cl %r9,BASED(cleanup_table+12) # sysc_restore + jl cleanup_sysc_tif + cl %r9,BASED(cleanup_table+16) # sysc_done + jl cleanup_sysc_restore + cl %r9,BASED(cleanup_table+20) # io_tif + jl 0f + cl %r9,BASED(cleanup_table+24) # io_restore + jl cleanup_io_tif + cl %r9,BASED(cleanup_table+28) # io_done + jl cleanup_io_restore + cl %r9,BASED(cleanup_table+32) # psw_idle + jl 0f + cl %r9,BASED(cleanup_table+36) # psw_idle_end + jl cleanup_idle +0: br %r14 cleanup_system_call: - mvc __LC_RETURN_PSW(8),0(%r12) - c %r12,BASED(.Lmck_old_psw) - be BASED(0f) - la %r12,__LC_SAVE_AREA+16 - b BASED(1f) -0: la %r12,__LC_SAVE_AREA+32 -1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+4) - bh BASED(0f) + # check if stpt has been executed + cl %r9,BASED(cleanup_system_call_insn) + jh 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+8) - bhe BASED(cleanup_vtime) -#endif - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn) - bh BASED(0f) - mvc __LC_SAVE_AREA(16),0(%r12) -0: st %r13,4(%r12) - st %r12,__LC_SAVE_AREA+48 # argh - SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - l %r12,__LC_SAVE_AREA+48 # argh - st %r15,12(%r12) - lh %r7,0x8a -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -cleanup_vtime: - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) - bhe BASED(cleanup_stime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -cleanup_stime: - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+16) - bh BASED(cleanup_update) - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -cleanup_update: + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stm has been executed + cl %r9,BASED(cleanup_system_call_insn+4) + jh 0f + mvc __LC_SAVE_AREA_SYNC(32),0(%r11) +0: # set up saved registers r12, and r13 + st %r12,16(%r11) # r12 thread-info pointer + st %r13,20(%r11) # r13 literal-pool pointer + # check if the user time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+8) + jh 0f + l %r10,__LC_EXIT_TIMER + l %r15,__LC_EXIT_TIMER+4 + SUB64 %r10,%r15,__LC_SYNC_ENTER_TIMER + ADD64 %r10,%r15,__LC_USER_TIMER + st %r10,__LC_USER_TIMER + st %r15,__LC_USER_TIMER+4 +0: # check if the system time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+12) + jh 0f + l %r10,__LC_LAST_UPDATE_TIMER + l %r15,__LC_LAST_UPDATE_TIMER+4 + SUB64 %r10,%r15,__LC_EXIT_TIMER + ADD64 %r10,%r15,__LC_SYSTEM_TIMER + st %r10,__LC_SYSTEM_TIMER + st %r15,__LC_SYSTEM_TIMER+4 +0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_system_call+4) - la %r12,__LC_RETURN_PSW + # set up saved register 11 + l %r15,__LC_KERNEL_STACK + la %r9,STACK_FRAME_OVERHEAD(%r15) + st %r9,12(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(32,%r9),__LC_SAVE_AREA_SYNC + stm %r0,%r7,__PT_R0(%r9) + mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC + xc __PT_FLAGS(4,%r9),__PT_FLAGS(%r9) + mvi __PT_FLAGS+3(%r9),_PIF_SYSCALL + # setup saved register 15 + st %r15,28(%r11) # r15 stack pointer + # set new psw address and exit + l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000 br %r14 cleanup_system_call_insn: - .long sysc_saveall + 0x80000000 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING .long system_call + 0x80000000 - .long sysc_vtime + 0x80000000 - .long sysc_stime + 0x80000000 - .long sysc_update + 0x80000000 -#endif + .long sysc_stm + 0x80000000 + .long sysc_vtime + 0x80000000 + 36 + .long sysc_vtime + 0x80000000 + 76 -cleanup_sysc_return: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_sysc_return) - la %r12,__LC_RETURN_PSW +cleanup_sysc_tif: + l %r9,BASED(cleanup_table+8) # sysc_tif + 0x80000000 br %r14 -cleanup_sysc_leave: - clc 4(4,%r12),BASED(cleanup_sysc_leave_insn) - be BASED(2f) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 4(4,%r12),BASED(cleanup_sysc_leave_insn+4) - be BASED(2f) -#endif - mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - c %r12,BASED(.Lmck_old_psw) - bne BASED(0f) - mvc __LC_SAVE_AREA+32(16),SP_R12(%r15) - b BASED(1f) -0: mvc __LC_SAVE_AREA+16(16),SP_R12(%r15) -1: lm %r0,%r11,SP_R0(%r15) - l %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +cleanup_sysc_restore: + cl %r9,BASED(cleanup_sysc_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW br %r14 -cleanup_sysc_leave_insn: +cleanup_sysc_restore_insn: .long sysc_done - 4 + 0x80000000 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .long sysc_done - 8 + 0x80000000 -#endif -cleanup_io_return: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_work_loop) - la %r12,__LC_RETURN_PSW +cleanup_io_tif: + l %r9,BASED(cleanup_table+20) # io_tif + 0x80000000 br %r14 -cleanup_io_leave: - clc 4(4,%r12),BASED(cleanup_io_leave_insn) - be BASED(2f) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 4(4,%r12),BASED(cleanup_io_leave_insn+4) - be BASED(2f) -#endif - mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - c %r12,BASED(.Lmck_old_psw) - bne BASED(0f) - mvc __LC_SAVE_AREA+32(16),SP_R12(%r15) - b BASED(1f) -0: mvc __LC_SAVE_AREA+16(16),SP_R12(%r15) -1: lm %r0,%r11,SP_R0(%r15) - l %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +cleanup_io_restore: + cl %r9,BASED(cleanup_io_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW br %r14 -cleanup_io_leave_insn: +cleanup_io_restore_insn: .long io_done - 4 + 0x80000000 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .long io_done - 8 + 0x80000000 -#endif + +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER +0: # check if stck has been executed + cl %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) + mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r3) +1: # account system time going idle + lm %r9,%r10,__LC_STEAL_TIMER + ADD64 %r9,%r10,__CLOCK_IDLE_ENTER(%r2) + SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK + stm %r9,%r10,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) + lm %r9,%r10,__LC_SYSTEM_TIMER + ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER + SUB64 %r9,%r10,__TIMER_IDLE_ENTER(%r2) + stm %r9,%r10,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) + # prepare return psw + n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits + l %r9,24(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .long psw_idle_lpsw + 0x80000000 +cleanup_idle_wait: + .long 0xfcfdffff /* * Integer constants */ - .align 4 -.Lc_spsize: .long SP_SIZE -.Lc_overhead: .long STACK_FRAME_OVERHEAD -.Lnr_syscalls: .long NR_syscalls -.L0x018: .short 0x018 -.L0x020: .short 0x020 -.L0x028: .short 0x028 -.L0x030: .short 0x030 -.L0x038: .short 0x038 -.Lc_1: .long 1 + .align 4 +.Lnr_syscalls: + .long NR_syscalls +.Lvtimer_max: + .quad 0x7fffffffffffffff /* * Symbol constants */ -.Ls390_mcck: .long s390_do_machine_check -.Ls390_handle_mcck: - .long s390_handle_mcck -.Lmck_old_psw: .long __LC_MCK_OLD_PSW -.Ldo_IRQ: .long do_IRQ -.Ldo_extint: .long do_extint -.Ldo_signal: .long do_signal -.Lhandle_per: .long do_single_step -.Ldo_execve: .long do_execve -.Lexecve_tail: .long execve_tail -.Ljump_table: .long pgm_check_table -.Lschedule: .long schedule +.Ldo_machine_check: .long s390_do_machine_check +.Lhandle_mcck: .long s390_handle_mcck +.Ldo_IRQ: .long do_IRQ +.Ldo_signal: .long do_signal +.Ldo_notify_resume: .long do_notify_resume +.Ldo_per_trap: .long do_per_trap +.Ljump_table: .long pgm_check_table +.Lschedule: .long schedule #ifdef CONFIG_PREEMPT -.Lpreempt_schedule_irq: - .long preempt_schedule_irq +.Lpreempt_irq: .long preempt_schedule_irq #endif -.Ltrace: .long syscall_trace -.Lschedtail: .long schedule_tail -.Lsysc_table: .long sys_call_table +.Ltrace_enter: .long do_syscall_trace_enter +.Ltrace_exit: .long do_syscall_trace_exit +.Lschedule_tail: .long schedule_tail +.Lsysc_per: .long sysc_per + 0x80000000 #ifdef CONFIG_TRACE_IRQFLAGS -.Ltrace_irq_on: .long trace_hardirqs_on -.Ltrace_irq_off: - .long trace_hardirqs_off -.Llockdep_sys_exit: - .long lockdep_sys_exit +.Lhardirqs_on: .long trace_hardirqs_on_caller +.Lhardirqs_off: .long trace_hardirqs_off_caller +#endif +#ifdef CONFIG_LOCKDEP +.Llockdep_sys_exit: .long lockdep_sys_exit #endif -.Lcritical_start: - .long __critical_start + 0x80000000 -.Lcritical_end: - .long __critical_end + 0x80000000 -.Lcleanup_critical: - .long cleanup_critical +.Lcritical_start: .long __critical_start + 0x80000000 +.Lcritical_length: .long __critical_end - __critical_start .section .rodata, "a" #define SYSCALL(esa,esame,emu) .long esa + .globl sys_call_table sys_call_table: #include "syscalls.S" #undef SYSCALL diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h new file mode 100644 index 00000000000..6ac78192455 --- /dev/null +++ b/arch/s390/kernel/entry.h @@ -0,0 +1,73 @@ +#ifndef _ENTRY_H +#define _ENTRY_H + +#include <linux/types.h> +#include <linux/signal.h> +#include <asm/ptrace.h> +#include <asm/cputime.h> + +extern void *restart_stack; +extern unsigned long suspend_zero_pages; + +void system_call(void); +void pgm_check_handler(void); +void ext_int_handler(void); +void io_int_handler(void); +void mcck_int_handler(void); +void restart_int_handler(void); +void restart_call_handler(void); +void psw_idle(struct s390_idle_data *, unsigned long); + +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); + +void do_protection_exception(struct pt_regs *regs); +void do_dat_exception(struct pt_regs *regs); + +void addressing_exception(struct pt_regs *regs); +void data_exception(struct pt_regs *regs); +void default_trap_handler(struct pt_regs *regs); +void divide_exception(struct pt_regs *regs); +void execute_exception(struct pt_regs *regs); +void hfp_divide_exception(struct pt_regs *regs); +void hfp_overflow_exception(struct pt_regs *regs); +void hfp_significance_exception(struct pt_regs *regs); +void hfp_sqrt_exception(struct pt_regs *regs); +void hfp_underflow_exception(struct pt_regs *regs); +void illegal_op(struct pt_regs *regs); +void operand_exception(struct pt_regs *regs); +void overflow_exception(struct pt_regs *regs); +void privileged_op(struct pt_regs *regs); +void space_switch_exception(struct pt_regs *regs); +void special_op_exception(struct pt_regs *regs); +void specification_exception(struct pt_regs *regs); +void transaction_exception(struct pt_regs *regs); +void translation_exception(struct pt_regs *regs); + +void do_per_trap(struct pt_regs *regs); +void syscall_trace(struct pt_regs *regs, int entryexit); +void kernel_stack_overflow(struct pt_regs * regs); +void do_signal(struct pt_regs *regs); +void handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); +void do_notify_resume(struct pt_regs *regs); + +void __init init_IRQ(void); +void do_IRQ(struct pt_regs *regs, int irq); +void do_restart(void); +void __init startup_init(void); +void die(struct pt_regs *regs, const char *str); +int setup_profiling_timer(unsigned int multiplier); +void __init time_init(void); +int pfn_is_nosave(unsigned long); +void s390_early_resume(void); +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); + +struct s390_mmap_arg_struct; +struct fadvise64_64_args; +struct old_sigaction; + +long sys_s390_personality(unsigned int personality); +long sys_s390_runtime_instr(int command, int signum); + +#endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index efde6e178f6..f2e674c702e 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -1,193 +1,173 @@ /* - * arch/s390/kernel/entry64.S * S390 low-level entry points. * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright IBM Corp. 1999, 2012 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * Heiko Carstens <heiko.carstens@de.ibm.com> */ -#include <linux/sys.h> -#include <linux/linkage.h> #include <linux/init.h> +#include <linux/linkage.h> +#include <asm/processor.h> #include <asm/cache.h> -#include <asm/lowcore.h> #include <asm/errno.h> #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/page.h> +#include <asm/sigp.h> +#include <asm/irq.h> -/* - * Stack layout for the system_call stack entry. - * The first few entries are identical to the user_regs_struct. - */ -SP_PTREGS = STACK_FRAME_OVERHEAD -SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS -SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW -SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS -SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8 -SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16 -SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24 -SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32 -SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40 -SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48 -SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 -SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 64 -SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 72 -SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 80 -SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 88 -SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 96 -SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104 -SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112 -SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 -SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP -SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 8 +__PT_R2 = __PT_GPRS + 16 +__PT_R3 = __PT_GPRS + 24 +__PT_R4 = __PT_GPRS + 32 +__PT_R5 = __PT_GPRS + 40 +__PT_R6 = __PT_GPRS + 48 +__PT_R7 = __PT_GPRS + 56 +__PT_R8 = __PT_GPRS + 64 +__PT_R9 = __PT_GPRS + 72 +__PT_R10 = __PT_GPRS + 80 +__PT_R11 = __PT_GPRS + 88 +__PT_R12 = __PT_GPRS + 96 +__PT_R13 = __PT_GPRS + 104 +__PT_R14 = __PT_GPRS + 112 +__PT_R15 = __PT_GPRS + 120 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT +STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING) +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) +_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) +_PIF_WORK = (_PIF_PER_TRAP) #define BASED(name) name-system_call(%r13) -#ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON - brasl %r14,trace_hardirqs_on +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + brasl %r14,trace_hardirqs_on_caller +#endif .endm .macro TRACE_IRQS_OFF - brasl %r14,trace_hardirqs_off - .endm - - .macro TRACE_IRQS_CHECK - tm SP_PSW(%r15),0x03 # irqs enabled? - jz 0f - brasl %r14,trace_hardirqs_on - j 1f -0: brasl %r14,trace_hardirqs_off -1: - .endm -#else -#define TRACE_IRQS_ON -#define TRACE_IRQS_OFF -#define TRACE_IRQS_CHECK +#ifdef CONFIG_TRACE_IRQFLAGS + basr %r2,%r0 + brasl %r14,trace_hardirqs_off_caller #endif + .endm -#ifdef CONFIG_LOCKDEP .macro LOCKDEP_SYS_EXIT - tm SP_PSW+1(%r15),0x01 # returning to user ? - jz 0f +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 brasl %r14,lockdep_sys_exit -0: - .endm -#else -#define LOCKDEP_SYS_EXIT -#endif - - .macro STORE_TIMER lc_offset -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - stpt \lc_offset #endif .endm -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .macro UPDATE_VTIME lc_from,lc_to,lc_sum - lg %r10,\lc_from - slg %r10,\lc_to - alg %r10,\lc_sum - stg %r10,\lc_sum - .endm + .macro LPP newpp +#if IS_ENABLED(CONFIG_KVM) + tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP + jz .+8 + .insn s,0xb2800000,\newpp #endif - -/* - * Register usage in interrupt handlers: - * R9 - pointer to current task structure - * R13 - pointer to literal pool - * R14 - return register for function calls - * R15 - kernel stack pointer - */ - - .macro SAVE_ALL_BASE savearea - stmg %r12,%r15,\savearea - larl %r13,system_call .endm - .macro SAVE_ALL_SVC psworg,savearea - la %r12,\psworg - lg %r15,__LC_KERNEL_STACK # problem state -> load ksp + .macro HANDLE_SIE_INTERCEPT scratch,reason +#if IS_ENABLED(CONFIG_KVM) + tmhh %r8,0x0001 # interrupting from user ? + jnz .+62 + lgr \scratch,%r9 + slg \scratch,BASED(.Lsie_critical) + clg \scratch,BASED(.Lsie_critical_length) + .if \reason==1 + # Some program interrupts are suppressing (e.g. protection). + # We must also check the instruction after SIE in that case. + # do_protection_exception will rewind to rewind_pad + jh .+42 + .else + jhe .+42 + .endif + lg %r14,__SF_EMPTY(%r15) # get control block pointer + LPP __SF_EMPTY+16(%r15) # set host id + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + larl %r9,sie_exit # skip forward to sie_exit + mvi __SF_EMPTY+31(%r15),\reason # set exit reason +#endif .endm - .macro SAVE_ALL_SYNC psworg,savearea - la %r12,\psworg - tm \psworg+1,0x01 # test problem state bit - jz 2f # skip stack setup save - lg %r15,__LC_KERNEL_STACK # problem state -> load ksp + .macro CHECK_STACK stacksize,savearea #ifdef CONFIG_CHECK_STACK - j 3f -2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD + tml %r15,\stacksize - CONFIG_STACK_GUARD + lghi %r14,\savearea jz stack_overflow -3: #endif -2: .endm - .macro SAVE_ALL_ASYNC psworg,savearea - la %r12,\psworg - tm \psworg+1,0x01 # test problem state bit - jnz 1f # from user -> load kernel stack - clc \psworg+8(8),BASED(.Lcritical_end) + .macro SWITCH_ASYNC savearea,stack,shift + tmhh %r8,0x0001 # interrupting from user ? + jnz 1f + lgr %r14,%r9 + slg %r14,BASED(.Lcritical_start) + clg %r14,BASED(.Lcritical_length) jhe 0f - clc \psworg+8(8),BASED(.Lcritical_start) - jl 0f + lghi %r11,\savearea # inside critical section, do cleanup brasl %r14,cleanup_critical - tm 1(%r12),0x01 # retest problem state after cleanup + tmhh %r8,0x0001 # retest problem state after cleanup jnz 1f -0: lg %r14,__LC_ASYNC_STACK # are we already on the async. stack ? +0: lg %r14,\stack # are we already on the target stack? slgr %r14,%r15 - srag %r14,%r14,STACK_SHIFT - jz 2f -1: lg %r15,__LC_ASYNC_STACK # load async stack -#ifdef CONFIG_CHECK_STACK - j 3f -2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - jz stack_overflow -3: -#endif -2: + srag %r14,%r14,\shift + jnz 1f + CHECK_STACK 1<<\shift,\savearea + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: lg %r15,\stack # load target stack +2: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm - .macro CREATE_STACK_FRAME psworg,savearea - aghi %r15,-SP_SIZE # make room for registers & psw - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - la %r12,\psworg - stg %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - icm %r12,12,__LC_SVC_ILC - stmg %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - st %r12,SP_ILC(%r15) - mvc SP_R12(32,%r15),\savearea # move %r12-%r15 to stack - la %r12,0 - stg %r12,__SF_BACKCHAIN(%r15) + .macro UPDATE_VTIME scratch,enter_timer + lg \scratch,__LC_EXIT_TIMER + slg \scratch,\enter_timer + alg \scratch,__LC_USER_TIMER + stg \scratch,__LC_USER_TIMER + lg \scratch,__LC_LAST_UPDATE_TIMER + slg \scratch,__LC_EXIT_TIMER + alg \scratch,__LC_SYSTEM_TIMER + stg \scratch,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer .endm - .macro RESTORE_ALL psworg,sync - mvc \psworg(16),SP_PSW(%r15) # move user PSW to lowcore - .if !\sync - ni \psworg+1,0xfd # clear wait state bit - .endif - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user - STORE_TIMER __LC_EXIT_TIMER - lpswe \psworg # back to caller + .macro LAST_BREAK scratch + srag \scratch,%r10,23 + jz .+10 + stg %r10,__TI_last_break(%r12) + .endm + + .macro REENABLE_IRQS + stg %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW .endm + .macro STCK savearea +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + .insn s,0xb27c0000,\savearea # store clock fast +#else + .insn s,0xb2050000,\savearea # store clock +#endif + .endm + + .section .kprobes.text, "ax" + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -195,32 +175,20 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ * Returns: * gpr2 = prev */ - .globl __switch_to -__switch_to: - tm __THREAD_per+4(%r3),0xe8 # is the new process using per ? - jz __switch_to_noper # if not we're fine - stctg %c9,%c11,__SF_EMPTY(%r15)# We are using per stuff - clc __THREAD_per(24,%r3),__SF_EMPTY(%r15) - je __switch_to_noper # we got away without bashing TLB's - lctlg %c9,%c11,__THREAD_per(%r3) # Nope we didn't -__switch_to_noper: - lg %r4,__THREAD_info(%r2) # get thread_info of prev - tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending? - jz __switch_to_no_mcck - ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev - lg %r4,__THREAD_info(%r3) # get thread_info of next - oi __TI_flags+7(%r4),_TIF_MCCK_PENDING # set it in next -__switch_to_no_mcck: - stmg %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task - stg %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp - lg %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp - lmg %r6,%r15,__SF_GPRS(%r15)# load __switch_to registers of next task - stg %r3,__LC_CURRENT # __LC_CURRENT = current task struct - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - lg %r3,__THREAD_info(%r3) # load thread_info from task struct - stg %r3,__LC_THREAD_INFO - aghi %r3,STACK_SIZE - stg %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack +ENTRY(__switch_to) + stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev + lg %r4,__THREAD_info(%r2) # get thread_info of prev + lg %r5,__THREAD_info(%r3) # get thread_info of next + lgr %r15,%r5 + aghi %r15,STACK_INIT # end of kernel stack of next + stg %r3,__LC_CURRENT # store task struct of next + stg %r5,__LC_THREAD_INFO # store thread info of next + stg %r15,__LC_KERNEL_STACK # store end of kernel stack + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next + lg %r15,__THREAD_ksp(%r3) # load kernel stack of next + lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 __critical_start: @@ -229,605 +197,589 @@ __critical_start: * are executed with interrupts enabled. */ - .globl system_call -system_call: - STORE_TIMER __LC_SYNC_ENTER_TIMER -sysc_saveall: - SAVE_ALL_BASE __LC_SAVE_AREA - SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +ENTRY(system_call) + stpt __LC_SYNC_ENTER_TIMER +sysc_stmg: + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + lghi %r14,_PIF_SYSCALL +sysc_per: + lg %r15,__LC_KERNEL_STACK + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -sysc_stime: - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -sysc_update: - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif + UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r13 + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + stg %r14,__PT_FLAGS(%r11) sysc_do_svc: - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - slag %r7,%r7,2 # *4 and test for svc 0 + lg %r10,__TI_sysc_table(%r12) # address of system call table + llgh %r8,__PT_INT_CODE+2(%r11) + slag %r8,%r8,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 - cl %r1,BASED(.Lnr_syscalls) + llgfr %r1,%r1 # clear high word in r1 + cghi %r1,NR_syscalls jnl sysc_nr_ok - lgfr %r7,%r1 # clear high word in r1 - slag %r7,%r7,2 # svc 0: system call number in %r1 + sth %r1,__PT_INT_CODE+2(%r11) + slag %r8,%r1,2 sysc_nr_ok: - mvc SP_ARGS(8,%r15),SP_R7(%r15) -sysc_do_restart: - larl %r10,sys_call_table -#ifdef CONFIG_COMPAT - tm __TI_flags+5(%r9),(_TIF_31BIT>>16) # running in 31 bit mode ? - jno sysc_noemu - larl %r10,sys_call_table_emu # use 31 bit emulation system calls -sysc_noemu: -#endif - tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) - lgf %r8,0(%r7,%r10) # load address of system call routine + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stg %r2,__PT_ORIG_GPR2(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lgf %r9,0(%r8,%r10) # get system call add. + tm __TI_flags+7(%r12),_TIF_TRACE jnz sysc_tracesys - basr %r14,%r8 # call sys_xxxx - stg %r2,SP_R2(%r15) # store return value (change R2 on stack) + basr %r14,%r9 # call sys_xxxx + stg %r2,__PT_R2(%r11) # store return value sysc_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? + LOCKDEP_SYS_EXIT +sysc_tif: + tm __PT_PSW+1(%r11),0x01 # returning to user ? jno sysc_restore - tm __TI_flags+7(%r9),_TIF_WORK_SVC - jnz sysc_work # there is work to do (signals etc.) + tm __PT_FLAGS+7(%r11),_PIF_WORK + jnz sysc_work + tm __TI_flags+7(%r12),_TIF_WORK + jnz sysc_work # check for work + tm __LC_CPU_FLAGS+7,_CIF_WORK + jnz sysc_work sysc_restore: -#ifdef CONFIG_TRACE_IRQFLAGS - larl %r1,sysc_restore_trace_psw - lpswe 0(%r1) -sysc_restore_trace: - TRACE_IRQS_CHECK - LOCKDEP_SYS_EXIT -#endif -sysc_leave: - RESTORE_ALL __LC_RETURN_PSW,1 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW sysc_done: -#ifdef CONFIG_TRACE_IRQFLAGS - .align 8 - .globl sysc_restore_trace_psw -sysc_restore_trace_psw: - .quad 0, sysc_restore_trace -#endif - -# -# recheck if there is more work to do -# -sysc_work_loop: - tm __TI_flags+7(%r9),_TIF_WORK_SVC - jz sysc_restore # there is no work to do # # One of the work bits is on. Find out which one. # sysc_work: - tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jo sysc_mcck_pending - tm __TI_flags+7(%r9),_TIF_NEED_RESCHED + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo sysc_reschedule - tm __TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) - jnz sysc_sigpending - tm __TI_flags+7(%r9),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r9),_TIF_SINGLE_STEP + tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP jo sysc_singlestep - j sysc_restore -sysc_work_done: + tm __TI_flags+7(%r12),_TIF_SIGPENDING + jo sysc_sigpending + tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME + jo sysc_notify_resume + tm __LC_CPU_FLAGS+7,_CIF_ASCE + jo sysc_uaccess + j sysc_return # beware of critical section cleanup # # _TIF_NEED_RESCHED is set, call schedule # sysc_reschedule: - larl %r14,sysc_work_loop - jg schedule # return point is sysc_return + larl %r14,sysc_return + jg schedule # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: - larl %r14,sysc_work_loop + larl %r14,sysc_return jg s390_handle_mcck # TIF bit will be cleared by handler # -# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal +# _CIF_ASCE is set, load user space asce +# +sysc_uaccess: + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + j sysc_return + +# +# _TIF_SIGPENDING is set, call do_signal # sysc_sigpending: - ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_signal # call do_signal - tm __TI_flags+7(%r9),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r9),_TIF_SINGLE_STEP - jo sysc_singlestep - j sysc_work_loop + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + tm __PT_FLAGS+7(%r11),_PIF_SYSCALL + jno sysc_return + lmg %r2,%r7,__PT_R2(%r11) # load svc arguments + lg %r10,__TI_sysc_table(%r12) # address of system call table + lghi %r8,0 # svc 0 returns -ENOSYS + llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r8,%r1,2 + j sysc_nr_ok # restart svc # -# _TIF_RESTART_SVC is set, set up registers and restart svc +# _TIF_NOTIFY_RESUME is set, call do_notify_resume # -sysc_restart: - ni __TI_flags+7(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lg %r7,SP_R2(%r15) # load new svc number - slag %r7,%r7,2 # *4 - mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - j sysc_do_restart # restart svc +sysc_notify_resume: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg do_notify_resume # -# _TIF_SINGLE_STEP is set, call do_single_step +# _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP - lhi %r0,__LC_PGM_OLD_PSW - sth %r0,SP_TRAP(%r15) # set trap indication to pgm check - la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_return # load adr. of system return - jg do_single_step # branch to do_sigtrap + ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg do_per_trap # -# call syscall_trace before and after system call -# special linkage: %r12 contains the return address for trace_svc +# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before +# and after the system call # sysc_tracesys: - la %r2,SP_PTREGS(%r15) # load pt_regs + lgr %r2,%r11 # pass pointer to pt_regs la %r3,0 - srl %r7,2 - stg %r7,SP_R2(%r15) - brasl %r14,syscall_trace + llgh %r0,__PT_INT_CODE+2(%r11) + stg %r0,__PT_R2(%r11) + brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls - clg %r0,SP_R2(%r15) + clgr %r0,%r2 jnh sysc_tracenogo - lg %r7,SP_R2(%r15) # strace might have changed the - sll %r7,2 # system call - lgf %r8,0(%r7,%r10) + sllg %r8,%r2,2 + lgf %r9,0(%r8,%r10) sysc_tracego: - lmg %r3,%r6,SP_R3(%r15) - lg %r2,SP_ORIG_R2(%r15) - basr %r14,%r8 # call sys_xxx - stg %r2,SP_R2(%r15) # store return value + lmg %r3,%r7,__PT_R3(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lg %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + stg %r2,__PT_R2(%r11) # store return value sysc_tracenogo: - tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) + tm __TI_flags+7(%r12),_TIF_TRACE jz sysc_return - la %r2,SP_PTREGS(%r15) # load pt_regs - la %r3,1 - larl %r14,sysc_return # return point is sysc_return - jg syscall_trace + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg do_syscall_trace_exit # # a new process exits the kernel with ret_from_fork # - .globl ret_from_fork -ret_from_fork: - lg %r13,__LC_SVC_NEW_PSW+8 - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? - jo 0f - stg %r15,SP_R15(%r15) # store stack pointer for new kthread -0: brasl %r14,schedule_tail +ENTRY(ret_from_fork) + la %r11,STACK_FRAME_OVERHEAD(%r15) + lg %r12,__LC_THREAD_INFO + brasl %r14,schedule_tail TRACE_IRQS_ON - stosm 24(%r15),0x03 # reenable interrupts - j sysc_return - -# -# kernel_execve function needs to deal with pt_regs that is not -# at the usual place -# - .globl kernel_execve -kernel_execve: - stmg %r12,%r15,96(%r15) - lgr %r14,%r15 - aghi %r15,-SP_SIZE - stg %r14,__SF_BACKCHAIN(%r15) - la %r12,SP_PTREGS(%r15) - xc 0(__PT_SIZE,%r12),0(%r12) - lgr %r5,%r12 - brasl %r14,do_execve - ltgfr %r2,%r2 - je 0f - aghi %r15,SP_SIZE - lmg %r12,%r15,96(%r15) - br %r14 - # execve succeeded. -0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts - lg %r15,__LC_KERNEL_STACK # load ksp - aghi %r15,-SP_SIZE # make room for registers & psw - lg %r13,__LC_SVC_NEW_PSW+8 - lg %r9,__LC_THREAD_INFO - mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - brasl %r14,execve_tail - j sysc_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? + jne sysc_tracenogo + # it's a kernel thread + lmg %r9,%r10,__PT_R9(%r11) # load gprs +ENTRY(kernel_thread_starter) + la %r2,0(%r10) + basr %r14,%r9 + j sysc_tracenogo /* * Program check handler routine */ - .globl pgm_check_handler -pgm_check_handler: -/* - * First we need to check for a special case: - * Single stepping an instruction that disables the PER event mask will - * cause a PER event AFTER the mask has been set. Example: SVC or LPSW. - * For a single stepped SVC the program check handler gets control after - * the SVC new PSW has been loaded. But we want to execute the SVC first and - * then handle the PER event. Therefore we update the SVC old PSW to point - * to the pgm_check_handler and branch to the SVC handler after we checked - * if we have to load the kernel stack register. - * For every other possible cause for PER event without the PER mask set - * we just ignore the PER event (FIXME: is there anything we have to do - * for LPSW?). - */ - STORE_TIMER __LC_SYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA - tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception - jnz pgm_per # got per exception -> special case - SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime: -#endif - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - TRACE_IRQS_OFF - lgf %r3,__LC_PGM_ILC # load program interruption code - lghi %r8,0x7f - ngr %r8,%r3 -pgm_do_call: - sll %r8,3 +ENTRY(pgm_check_handler) + stpt __LC_SYNC_ENTER_TIMER + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_PGM_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,1 + tmhh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j 2f +1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r14 + lg %r15,__LC_KERNEL_STACK + lg %r14,__TI_task(%r12) + lghi %r13,__LC_PGM_TDB + tm __LC_PGM_ILC+2,0x02 # check for transaction abort + jz 2f + mvc __THREAD_trap_tdb(256,%r14),0(%r13) +2: la %r11,STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + stg %r10,__PT_ARGS(%r11) + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f + tmhh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP + mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE + mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID +0: REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table - lg %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_return - br %r1 # branch to interrupt-handler - -# -# handle per exception -# -pgm_per: - tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on - jnz pgm_per_std # ok, normal per event from user space -# ok its one of the special cases, now we need to find out which one - clc __LC_PGM_OLD_PSW(16),__LC_SVC_NEW_PSW - je pgm_svcper -# no interesting special case, ignore PER event - lmg %r12,%r15,__LC_SAVE_AREA - lpswe __LC_PGM_OLD_PSW - -# -# Normal per exception -# -pgm_per_std: - SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime2 - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime2: -#endif - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - TRACE_IRQS_OFF - lg %r1,__TI_task(%r9) - tm SP_PSW+1(%r15),0x01 # kernel per event ? - jz kernel_per - mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - lgf %r3,__LC_PGM_ILC # load program interruption code - lghi %r8,0x7f - ngr %r8,%r3 # clear per-event-bit and ilc + llgh %r10,__PT_INT_CODE+2(%r11) + nill %r10,0x007f + sll %r10,2 je sysc_return - j pgm_do_call + lgf %r1,0(%r10,%r1) # load address of handler routine + lgr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # branch to interrupt-handler + j sysc_return # -# it was a single stepped SVC that is causing all the trouble +# PER event in supervisor state, must be kprobes # -pgm_svcper: - SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif - llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - lg %r1,__TI_task(%r9) - mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - j sysc_do_svc +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_per_trap + j sysc_return # -# per was called from kernel, must be kprobes +# single stepped system call # -kernel_per: - lhi %r0,__LC_PGM_OLD_PSW - sth %r0,SP_TRAP(%r15) # set trap indication to pgm check - la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_restore # load adr. of system ret, no work - jg do_single_step # branch to do_single_step +pgm_svcper: + mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW + larl %r14,sysc_per + stg %r14,__LC_RETURN_PSW+8 + lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP + lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs /* * IO interrupt handler routine */ - .globl io_int_handler -io_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER - stck __LC_INT_CLOCK - SAVE_ALL_BASE __LC_SAVE_AREA+32 - SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 - CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz io_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -io_no_vtime: -#endif - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct +ENTRY(io_int_handler) + STCK __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_IO_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,2 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user? + jz io_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +io_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF - la %r2,SP_PTREGS(%r15) # address of register-save area - brasl %r14,do_IRQ # call standard irq handler + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +io_loop: + lgr %r2,%r11 # pass pointer to pt_regs + lghi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lghi %r3,THIN_INTERRUPT +io_call: + brasl %r14,do_IRQ + tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: - tm SP_PSW+1(%r15),0x01 # returning to user ? -#ifdef CONFIG_PREEMPT - jno io_preempt # no -> check for preemptive scheduling -#else - jno io_restore # no-> skip resched & signal -#endif - tm __TI_flags+7(%r9),_TIF_WORK_INT + LOCKDEP_SYS_EXIT + TRACE_IRQS_ON +io_tif: + tm __TI_flags+7(%r12),_TIF_WORK jnz io_work # there is work to do (signals etc.) + tm __LC_CPU_FLAGS+7,_CIF_WORK + jnz io_work io_restore: -#ifdef CONFIG_TRACE_IRQFLAGS - larl %r1,io_restore_trace_psw - lpswe 0(%r1) -io_restore_trace: - TRACE_IRQS_CHECK - LOCKDEP_SYS_EXIT -#endif -io_leave: - RESTORE_ALL __LC_RETURN_PSW,0 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW io_done: -#ifdef CONFIG_TRACE_IRQFLAGS - .align 8 - .globl io_restore_trace_psw -io_restore_trace_psw: - .quad 0, io_restore_trace -#endif - +# +# There is work todo, find out in which context we have been interrupted: +# 1) if we return to user space we can do all _TIF_WORK work +# 2) if we return to kernel code and kvm is enabled check if we need to +# modify the psw to leave SIE +# 3) if we return to kernel code and preemptive scheduling is enabled check +# the preemption counter and if it is zero call preempt_schedule_irq +# Before any work can be done, a switch to the kernel stack is required. +# +io_work: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT -io_preempt: - icm %r0,15,__TI_precount(%r9) - jnz io_restore + # check for preemptive scheduling + icm %r0,15,__TI_precount(%r12) + jnz io_restore # preemption is disabled + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED + jno io_restore # switch to kernel stack - lg %r1,SP_R15(%r15) - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + lg %r1,__PT_R15(%r11) + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 -io_resume_loop: - tm __TI_flags+7(%r9),_TIF_NEED_RESCHED - jno io_restore - larl %r14,io_resume_loop - jg preempt_schedule_irq + # TRACE_IRQS_ON already done at io_return, call + # TRACE_IRQS_OFF to keep things symmetrical + TRACE_IRQS_OFF + brasl %r14,preempt_schedule_irq + j io_return +#else + j io_restore #endif # -# switch to kernel stack, then check TIF bits +# Need to do work before returning to userspace, switch to kernel stack # -io_work: +io_work_user: lg %r1,__LC_KERNEL_STACK - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 + # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_RESTORE_SIGPENDING, _TIF_NEED_RESCHED -# and _TIF_MCCK_PENDING # -io_work_loop: - tm __TI_flags+7(%r9),_TIF_MCCK_PENDING +io_work_tif: + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jo io_mcck_pending - tm __TI_flags+7(%r9),_TIF_NEED_RESCHED + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo io_reschedule - tm __TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) - jnz io_sigpending - j io_restore -io_work_done: + tm __TI_flags+7(%r12),_TIF_SIGPENDING + jo io_sigpending + tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME + jo io_notify_resume + tm __LC_CPU_FLAGS+7,_CIF_ASCE + jo io_uaccess + j io_return # beware of critical section cleanup # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # io_mcck_pending: + # TRACE_IRQS_ON already done at io_return brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler - j io_work_loop + TRACE_IRQS_OFF + j io_return + +# +# _CIF_ASCE is set, load user space asce +# +io_uaccess: + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + j io_return # # _TIF_NEED_RESCHED is set, call schedule # io_reschedule: - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + # TRACE_IRQS_ON already done at io_return + ssm __LC_SVC_NEW_PSW # reenable interrupts brasl %r14,schedule # call scheduler - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - tm __TI_flags+7(%r9),_TIF_WORK_INT - jz io_restore # there is no work to do - j io_work_loop + j io_return # -# _TIF_SIGPENDING or _TIF_RESTORE_SIGMASK is set, call do_signal +# _TIF_SIGPENDING or is set, call do_signal # io_sigpending: - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_signal # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + # TRACE_IRQS_ON already done at io_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j io_return + +# +# _TIF_NOTIFY_RESUME or is set, call do_notify_resume +# +io_notify_resume: + # TRACE_IRQS_ON already done at io_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - j io_work_loop + j io_return /* * External interrupt handler routine */ - .globl ext_int_handler -ext_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER - stck __LC_INT_CLOCK - SAVE_ALL_BASE __LC_SAVE_AREA+32 - SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 - CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz ext_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -ext_no_vtime: -#endif - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct +ENTRY(ext_int_handler) + STCK __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_EXT_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,3 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +ext_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) + lghi %r1,__LC_EXT_PARAMS2 + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF - la %r2,SP_PTREGS(%r15) # address of register-save area - llgh %r3,__LC_EXT_INT_CODE # get interruption code - brasl %r14,do_extint + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + lghi %r3,EXT_INTERRUPT + brasl %r14,do_IRQ j io_return +/* + * Load idle PSW. The second "half" of this function is in cleanup_idle. + */ +ENTRY(psw_idle) + stg %r3,__SF_EMPTY(%r15) + larl %r1,psw_idle_lpsw+4 + stg %r1,__SF_EMPTY+8(%r15) + STCK __CLOCK_IDLE_ENTER(%r2) + stpt __TIMER_IDLE_ENTER(%r2) +psw_idle_lpsw: + lpswe __SF_EMPTY(%r15) + br %r14 +psw_idle_end: + __critical_end: /* * Machine check handler routines */ - .globl mcck_int_handler -mcck_int_handler: +ENTRY(mcck_int_handler) + STCK __LC_MCCK_CLOCK la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs - SAVE_ALL_BASE __LC_SAVE_AREA+64 - la %r12,__LC_MCK_OLD_PSW + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_MCK_OLD_PSW + HANDLE_SIE_INTERCEPT %r14,4 tm __LC_MCCK_CODE,0x80 # system damage? - jo mcck_int_main # yes -> rest of mcck code invalid -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - la %r14,4095 - mvc __LC_SAVE_AREA+104(8),__LC_ASYNC_ENTER_TIMER - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14) + jo mcck_panic # yes -> rest of mcck code invalid + lghi %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - jo 1f + jo 3f la %r14,__LC_SYNC_ENTER_TIMER clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER jl 0f la %r14,__LC_ASYNC_ENTER_TIMER 0: clc 0(8,%r14),__LC_EXIT_TIMER - jl 0f + jl 1f la %r14,__LC_EXIT_TIMER -0: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER - jl 0f +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f la %r14,__LC_LAST_UPDATE_TIMER -0: spt 0(%r14) - mvc __LC_ASYNC_ENTER_TIMER(8),0(%r14) -1: -#endif - tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - jno mcck_int_main # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit - jnz mcck_int_main # from user -> load kernel stack - clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_end) - jhe mcck_int_main - clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_start) - jl mcck_int_main - brasl %r14,cleanup_critical -mcck_int_main: - lg %r14,__LC_PANIC_STACK # are we already on the panic stack? - slgr %r14,%r15 - srag %r14,%r14,PAGE_SHIFT - jz 0f - lg %r15,__LC_PANIC_STACK # load panic stack -0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? - jno mcck_no_vtime # no -> no timer update - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz mcck_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -mcck_no_vtime: -#endif - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - la %r2,SP_PTREGS(%r15) # load pt_regs +2: spt 0(%r14) + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER + LAST_BREAK %r14 +mcck_skip: + lghi %r14,__LC_GPREGS_SAVE_AREA+64 + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),0(%r14) + stmg %r8,%r9,__PT_PSW(%r11) + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - tm SP_PSW+1(%r15),0x01 # returning to user ? + tm __PT_PSW+1(%r11),0x01 # returning to user ? jno mcck_return lg %r1,__LC_KERNEL_STACK # switch to kernel stack - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - stosm __SF_EMPTY(%r15),0x04 # turn dat on - tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jno mcck_return TRACE_IRQS_OFF brasl %r14,s390_handle_mcck TRACE_IRQS_ON mcck_return: - mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW - ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+104 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f stpt __LC_EXIT_TIMER -0: -#endif - lpswe __LC_RETURN_MCCK_PSW # back to caller + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER +0: lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_MCCK_PSW -/* - * Restart interruption handler, kick starter for additional CPUs - */ -#ifdef CONFIG_SMP - __CPUINIT - .globl restart_int_handler -restart_int_handler: - lg %r15,__LC_SAVE_AREA+120 # load ksp - lghi %r10,__LC_CREGS_SAVE_AREA - lctlg %c0,%c15,0(%r10) # get new ctl regs - lghi %r10,__LC_AREGS_SAVE_AREA - lam %a0,%a15,0(%r10) - lmg %r6,%r15,__SF_GPRS(%r15) # load registers from clone - stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on - jg start_secondary - .previous -#else -/* - * If we do not run with SMP enabled, let the new CPU crash ... - */ - .globl restart_int_handler -restart_int_handler: - basr %r1,0 -restart_base: - lpswe restart_crash-restart_base(%r1) - .align 8 -restart_crash: - .long 0x000a0000,0x00000000,0x00000000,0x00000000 -restart_go: -#endif +mcck_panic: + lg %r14,__LC_PANIC_STACK + slgr %r14,%r15 + srag %r14,%r14,PAGE_SHIFT + jz 0f + lg %r15,__LC_PANIC_STACK +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip + +# +# PSW restart interrupt handler +# +ENTRY(restart_int_handler) + stg %r15,__LC_SAVE_AREA_RESTART + lg %r15,__LC_RESTART_STACK + aghi %r15,-__PT_SIZE # create pt_regs on stack + xc 0(__PT_SIZE,%r15),0(%r15) + stmg %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw + aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack + xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) + lg %r1,__LC_RESTART_FN # load fn, parm & source cpu + lg %r2,__LC_RESTART_DATA + lg %r3,__LC_RESTART_SOURCE + ltgr %r3,%r3 # test source cpu address + jm 1f # negative -> skip source stop +0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu + brc 10,0b # wait for status stored +1: basr %r14,%r1 # call function + stap __SF_EMPTY(%r15) # store cpu address + llgh %r3,__SF_EMPTY(%r15) +2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu + brc 2,2b +3: j 3b + + .section .kprobes.text, "ax" #ifdef CONFIG_CHECK_STACK /* @@ -837,201 +789,247 @@ restart_go: */ stack_overflow: lg %r15,__LC_PANIC_STACK # change to panic stack - aghi %r15,-SP_SIZE - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - stmg %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - la %r1,__LC_SAVE_AREA - chi %r12,__LC_SVC_OLD_PSW - je 0f - chi %r12,__LC_PGM_OLD_PSW - je 0f - la %r1,__LC_SAVE_AREA+32 -0: mvc SP_R12(32,%r15),0(%r1) # move %r12-%r15 to stack - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain - la %r2,SP_PTREGS(%r15) # load pt_regs + la %r11,STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r7,__PT_R0(%r11) + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_R8(64,%r11),0(%r14) + stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_overflow #endif -cleanup_table_system_call: - .quad system_call, sysc_do_svc -cleanup_table_sysc_return: - .quad sysc_return, sysc_leave -cleanup_table_sysc_leave: - .quad sysc_leave, sysc_done -cleanup_table_sysc_work_loop: - .quad sysc_work_loop, sysc_work_done -cleanup_table_io_return: - .quad io_return, io_leave -cleanup_table_io_leave: - .quad io_leave, io_done -cleanup_table_io_work_loop: - .quad io_work_loop, io_work_done + .align 8 +cleanup_table: + .quad system_call + .quad sysc_do_svc + .quad sysc_tif + .quad sysc_restore + .quad sysc_done + .quad io_tif + .quad io_restore + .quad io_done + .quad psw_idle + .quad psw_idle_end cleanup_critical: - clc 8(8,%r12),BASED(cleanup_table_system_call) + clg %r9,BASED(cleanup_table) # system_call jl 0f - clc 8(8,%r12),BASED(cleanup_table_system_call+8) + clg %r9,BASED(cleanup_table+8) # sysc_do_svc jl cleanup_system_call -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_return) + clg %r9,BASED(cleanup_table+16) # sysc_tif jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_return+8) - jl cleanup_sysc_return -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_leave) + clg %r9,BASED(cleanup_table+24) # sysc_restore + jl cleanup_sysc_tif + clg %r9,BASED(cleanup_table+32) # sysc_done + jl cleanup_sysc_restore + clg %r9,BASED(cleanup_table+40) # io_tif jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_leave+8) - jl cleanup_sysc_leave -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop) + clg %r9,BASED(cleanup_table+48) # io_restore + jl cleanup_io_tif + clg %r9,BASED(cleanup_table+56) # io_done + jl cleanup_io_restore + clg %r9,BASED(cleanup_table+64) # psw_idle jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop+8) - jl cleanup_sysc_return -0: - clc 8(8,%r12),BASED(cleanup_table_io_return) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_return+8) - jl cleanup_io_return -0: - clc 8(8,%r12),BASED(cleanup_table_io_leave) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_leave+8) - jl cleanup_io_leave -0: - clc 8(8,%r12),BASED(cleanup_table_io_work_loop) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_work_loop+8) - jl cleanup_io_return -0: - br %r14 + clg %r9,BASED(cleanup_table+72) # psw_idle_end + jl cleanup_idle +0: br %r14 + cleanup_system_call: - mvc __LC_RETURN_PSW(16),0(%r12) - cghi %r12,__LC_MCK_OLD_PSW - je 0f - la %r12,__LC_SAVE_AREA+32 - j 1f -0: la %r12,__LC_SAVE_AREA+64 -1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+8) + # check if stpt has been executed + clg %r9,BASED(cleanup_system_call_insn) jh 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16) - jhe cleanup_vtime -#endif - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn) + cghi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stmg has been executed + clg %r9,BASED(cleanup_system_call_insn+8) + jh 0f + mvc __LC_SAVE_AREA_SYNC(64),0(%r11) +0: # check if base register setup + TIF bit load has been done + clg %r9,BASED(cleanup_system_call_insn+16) + jhe 0f + # set up saved registers r10 and r12 + stg %r10,16(%r11) # r10 last break + stg %r12,32(%r11) # r12 thread-info pointer +0: # check if the user time update has been done + clg %r9,BASED(cleanup_system_call_insn+24) jh 0f - mvc __LC_SAVE_AREA(32),0(%r12) -0: stg %r13,8(%r12) - stg %r12,__LC_SAVE_AREA+96 # argh - SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - lg %r12,__LC_SAVE_AREA+96 # argh - stg %r15,24(%r12) - llgh %r7,__LC_SVC_INT_CODE -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -cleanup_vtime: - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) - jhe cleanup_stime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -cleanup_stime: - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+32) - jh cleanup_update - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -cleanup_update: + lg %r15,__LC_EXIT_TIMER + slg %r15,__LC_SYNC_ENTER_TIMER + alg %r15,__LC_USER_TIMER + stg %r15,__LC_USER_TIMER +0: # check if the system time update has been done + clg %r9,BASED(cleanup_system_call_insn+32) + jh 0f + lg %r15,__LC_LAST_UPDATE_TIMER + slg %r15,__LC_EXIT_TIMER + alg %r15,__LC_SYSTEM_TIMER + stg %r15,__LC_SYSTEM_TIMER +0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8) - la %r12,__LC_RETURN_PSW + # do LAST_BREAK + lg %r9,16(%r11) + srag %r9,%r9,23 + jz 0f + mvc __TI_last_break(8,%r12),16(%r11) +0: # set up saved register r11 + lg %r15,__LC_KERNEL_STACK + la %r9,STACK_FRAME_OVERHEAD(%r15) + stg %r9,24(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC + stmg %r0,%r7,__PT_R0(%r9) + mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC + xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9) + mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL + # setup saved register r15 + stg %r15,56(%r11) # r15 stack pointer + # set new psw address and exit + larl %r9,sysc_do_svc br %r14 cleanup_system_call_insn: - .quad sysc_saveall -#ifdef CONFIG_VIRT_CPU_ACCOUNTING .quad system_call - .quad sysc_vtime - .quad sysc_stime - .quad sysc_update -#endif + .quad sysc_stmg + .quad sysc_per + .quad sysc_vtime+18 + .quad sysc_vtime+42 -cleanup_sysc_return: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_sysc_return) - la %r12,__LC_RETURN_PSW +cleanup_sysc_tif: + larl %r9,sysc_tif br %r14 -cleanup_sysc_leave: - clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) - je 2f -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) - je 2f -#endif - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - cghi %r12,__LC_MCK_OLD_PSW - jne 0f - mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) - lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +cleanup_sysc_restore: + clg %r9,BASED(cleanup_sysc_restore_insn) + je 0f + lg %r9,24(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW br %r14 -cleanup_sysc_leave_insn: +cleanup_sysc_restore_insn: .quad sysc_done - 4 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .quad sysc_done - 8 -#endif -cleanup_io_return: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_work_loop) - la %r12,__LC_RETURN_PSW +cleanup_io_tif: + larl %r9,io_tif br %r14 -cleanup_io_leave: - clc 8(8,%r12),BASED(cleanup_io_leave_insn) - je 2f -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) - je 2f -#endif - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - cghi %r12,__LC_MCK_OLD_PSW - jne 0f - mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) - lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +cleanup_io_restore: + clg %r9,BASED(cleanup_io_restore_insn) + je 0f + lg %r9,24(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW br %r14 -cleanup_io_leave_insn: +cleanup_io_restore_insn: .quad io_done - 4 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .quad io_done - 8 -#endif + +cleanup_idle: + # copy interrupt clock & cpu timer + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER + cghi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER +0: # check if stck & stpt have been executed + clg %r9,BASED(cleanup_idle_insn) + jhe 1f + mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) + mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2) +1: # account system time going idle + lg %r9,__LC_STEAL_TIMER + alg %r9,__CLOCK_IDLE_ENTER(%r2) + slg %r9,__LC_LAST_UPDATE_CLOCK + stg %r9,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) + lg %r9,__LC_SYSTEM_TIMER + alg %r9,__LC_LAST_UPDATE_TIMER + slg %r9,__TIMER_IDLE_ENTER(%r2) + stg %r9,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) + # prepare return psw + nihh %r8,0xfcfd # clear irq & wait state bits + lg %r9,48(%r11) # return from psw_idle + br %r14 +cleanup_idle_insn: + .quad psw_idle_lpsw /* * Integer constants */ - .align 4 -.Lconst: -.Lnr_syscalls: .long NR_syscalls -.L0x0130: .short 0x130 -.L0x0140: .short 0x140 -.L0x0150: .short 0x150 -.L0x0160: .short 0x160 -.L0x0170: .short 0x170 + .align 8 .Lcritical_start: - .quad __critical_start -.Lcritical_end: - .quad __critical_end + .quad __critical_start +.Lcritical_length: + .quad __critical_end - __critical_start + + +#if IS_ENABLED(CONFIG_KVM) +/* + * sie64a calling convention: + * %r2 pointer to sie control block + * %r3 guest register save area + */ +ENTRY(sie64a) + stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers + stg %r2,__SF_EMPTY(%r15) # save control block pointer + stg %r3,__SF_EMPTY+8(%r15) # save guest register save area + xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason + lmg %r0,%r13,0(%r3) # load guest gprs 0-13 + lg %r14,__LC_GMAP # get gmap pointer + ltgr %r14,%r14 + jz sie_gmap + lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce +sie_gmap: + lg %r14,__SF_EMPTY(%r15) # get control block pointer + oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now + tm __SIE_PROG20+3(%r14),1 # last exit... + jnz sie_done + LPP __SF_EMPTY(%r15) # set guest id + sie 0(%r14) +sie_done: + LPP __SF_EMPTY+16(%r15) # set host id + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce +# some program checks are suppressing. C code (e.g. do_protection_exception) +# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other +# instructions between sie64a and sie_done should not cause program +# interrupts. So lets use a nop (47 00 00 00) as a landing pad. +# See also HANDLE_SIE_INTERCEPT +rewind_pad: + nop 0 + .globl sie_exit +sie_exit: + lg %r14,__SF_EMPTY+8(%r15) # load guest register save area + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers + lg %r2,__SF_EMPTY+24(%r15) # return exit reason code + br %r14 +sie_fault: + lghi %r14,-EFAULT + stg %r14,__SF_EMPTY+24(%r15) # set exit reason code + j sie_exit + + .align 8 +.Lsie_critical: + .quad sie_gmap +.Lsie_critical_length: + .quad sie_done - sie_gmap + + EX_TABLE(rewind_pad,sie_fault) + EX_TABLE(sie_exit,sie_fault) +#endif .section .rodata, "a" #define SYSCALL(esa,esame,emu) .long esame + .globl sys_call_table sys_call_table: #include "syscalls.S" #undef SYSCALL @@ -1039,6 +1037,7 @@ sys_call_table: #ifdef CONFIG_COMPAT #define SYSCALL(esa,esame,emu) .long emu + .globl sys_call_table_emu sys_call_table_emu: #include "syscalls.S" #undef SYSCALL diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c new file mode 100644 index 00000000000..54d6493c4a5 --- /dev/null +++ b/arch/s390/kernel/ftrace.c @@ -0,0 +1,192 @@ +/* + * Dynamic function tracer architecture backend. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/hardirq.h> +#include <linux/uaccess.h> +#include <linux/ftrace.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/kprobes.h> +#include <trace/syscall.h> +#include <asm/asm-offsets.h> +#include "entry.h" + +#ifdef CONFIG_DYNAMIC_FTRACE + +void ftrace_disable_code(void); +void ftrace_enable_insn(void); + +#ifdef CONFIG_64BIT +/* + * The 64-bit mcount code looks like this: + * stg %r14,8(%r15) # offset 0 + * > larl %r1,<&counter> # offset 6 + * > brasl %r14,_mcount # offset 12 + * lg %r14,8(%r15) # offset 18 + * Total length is 24 bytes. The middle two instructions of the mcount + * block get overwritten by ftrace_make_nop / ftrace_make_call. + * The 64-bit enabled ftrace code block looks like this: + * stg %r14,8(%r15) # offset 0 + * > lg %r1,__LC_FTRACE_FUNC # offset 6 + * > lgr %r0,%r0 # offset 12 + * > basr %r14,%r1 # offset 16 + * lg %r14,8(%15) # offset 18 + * The return points of the mcount/ftrace function have the same offset 18. + * The 64-bit disable ftrace code block looks like this: + * stg %r14,8(%r15) # offset 0 + * > jg .+18 # offset 6 + * > lgr %r0,%r0 # offset 12 + * > basr %r14,%r1 # offset 16 + * lg %r14,8(%15) # offset 18 + * The jg instruction branches to offset 24 to skip as many instructions + * as possible. + */ +asm( + " .align 4\n" + "ftrace_disable_code:\n" + " jg 0f\n" + " lgr %r0,%r0\n" + " basr %r14,%r1\n" + "0:\n" + " .align 4\n" + "ftrace_enable_insn:\n" + " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); + +#define FTRACE_INSN_SIZE 6 + +#else /* CONFIG_64BIT */ +/* + * The 31-bit mcount code looks like this: + * st %r14,4(%r15) # offset 0 + * > bras %r1,0f # offset 4 + * > .long _mcount # offset 8 + * > .long <&counter> # offset 12 + * > 0: l %r14,0(%r1) # offset 16 + * > l %r1,4(%r1) # offset 20 + * basr %r14,%r14 # offset 24 + * l %r14,4(%r15) # offset 26 + * Total length is 30 bytes. The twenty bytes starting from offset 4 + * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call. + * The 31-bit enabled ftrace code block looks like this: + * st %r14,4(%r15) # offset 0 + * > l %r14,__LC_FTRACE_FUNC # offset 4 + * > j 0f # offset 8 + * > .fill 12,1,0x07 # offset 12 + * 0: basr %r14,%r14 # offset 24 + * l %r14,4(%r14) # offset 26 + * The return points of the mcount/ftrace function have the same offset 26. + * The 31-bit disabled ftrace code block looks like this: + * st %r14,4(%r15) # offset 0 + * > j .+26 # offset 4 + * > j 0f # offset 8 + * > .fill 12,1,0x07 # offset 12 + * 0: basr %r14,%r14 # offset 24 + * l %r14,4(%r14) # offset 26 + * The j instruction branches to offset 30 to skip as many instructions + * as possible. + */ +asm( + " .align 4\n" + "ftrace_disable_code:\n" + " j 1f\n" + " j 0f\n" + " .fill 12,1,0x07\n" + "0: basr %r14,%r14\n" + "1:\n" + " .align 4\n" + "ftrace_enable_insn:\n" + " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n"); + +#define FTRACE_INSN_SIZE 4 + +#endif /* CONFIG_64BIT */ + + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, + MCOUNT_INSN_SIZE)) + return -EPERM; + return 0; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn, + FTRACE_INSN_SIZE)) + return -EPERM; + return 0; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + return 0; +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * Hook the return address and push it in the stack of return addresses + * in current thread info. + */ +unsigned long __kprobes prepare_ftrace_return(unsigned long parent, + unsigned long ip) +{ + struct ftrace_graph_ent trace; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + goto out; + ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE; + trace.func = ip; + trace.depth = current->curr_ret_stack + 1; + /* Only trace if the calling function expects to. */ + if (!ftrace_graph_entry(&trace)) + goto out; + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) + goto out; + parent = (unsigned long) return_to_handler; +out: + return parent; +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Patch the kernel code at ftrace_graph_caller location. The instruction + * there is branch relative and save to prepare_ftrace_return. To disable + * the call to prepare_ftrace_return we patch the bras offset to point + * directly after the instructions. To enable the call we calculate + * the original offset to prepare_ftrace_return and put it back. + */ +int ftrace_enable_ftrace_graph_caller(void) +{ + unsigned short offset; + + offset = ((void *) prepare_ftrace_return - + (void *) ftrace_graph_caller) / 2; + return probe_kernel_write((void *) ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + static unsigned short offset = 0x0002; + + return probe_kernel_write((void *) ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 83477c7dc74..e88d35d7495 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/head.S - * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright IBM Corp. 1999, 2010 * * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -23,8 +21,8 @@ * */ -#include <asm/setup.h> -#include <asm/lowcore.h> +#include <linux/init.h> +#include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> @@ -35,126 +33,8 @@ #define ARCH_OFFSET 0 #endif -.section ".text.head","ax" -#ifndef CONFIG_IPL - .org 0 - .long 0x00080000,0x80000000+startup # Just a restart PSW -#else -#ifdef CONFIG_IPL_TAPE -#define IPL_BS 1024 - .org 0 - .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded - .long 0x27000000,0x60000001 # by ipl to addresses 0-23. - .long 0x02000000,0x20000000+IPL_BS # (a PSW and two CCWs). - .long 0x00000000,0x00000000 # external old psw - .long 0x00000000,0x00000000 # svc old psw - .long 0x00000000,0x00000000 # program check old psw - .long 0x00000000,0x00000000 # machine check old psw - .long 0x00000000,0x00000000 # io old psw - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x00000000,0x00000000 - .long 0x000a0000,0x00000058 # external new psw - .long 0x000a0000,0x00000060 # svc new psw - .long 0x000a0000,0x00000068 # program check new psw - .long 0x000a0000,0x00000070 # machine check new psw - .long 0x00080000,0x80000000+.Lioint # io new psw - - .org 0x100 -# -# subroutine for loading from tape -# Paramters: -# R1 = device number -# R2 = load address -.Lloader: - st %r14,.Lldret - la %r3,.Lorbread # r3 = address of orb - la %r5,.Lirb # r5 = address of irb - st %r2,.Lccwread+4 # initialize CCW data addresses - lctl %c6,%c6,.Lcr6 - slr %r2,%r2 -.Lldlp: - la %r6,3 # 3 retries -.Lssch: - ssch 0(%r3) # load chunk of IPL_BS bytes - bnz .Llderr -.Lw4end: - bas %r14,.Lwait4io - tm 8(%r5),0x82 # do we have a problem ? - bnz .Lrecov - slr %r7,%r7 - icm %r7,3,10(%r5) # get residual count - lcr %r7,%r7 - la %r7,IPL_BS(%r7) # IPL_BS-residual=#bytes read - ar %r2,%r7 # add to total size - tm 8(%r5),0x01 # found a tape mark ? - bnz .Ldone - l %r0,.Lccwread+4 # update CCW data addresses - ar %r0,%r7 - st %r0,.Lccwread+4 - b .Lldlp -.Ldone: - l %r14,.Lldret - br %r14 # r2 contains the total size -.Lrecov: - bas %r14,.Lsense # do the sensing - bct %r6,.Lssch # dec. retry count & branch - b .Llderr -# -# Sense subroutine -# -.Lsense: - st %r14,.Lsnsret - la %r7,.Lorbsense - ssch 0(%r7) # start sense command - bnz .Llderr - bas %r14,.Lwait4io - l %r14,.Lsnsret - tm 8(%r5),0x82 # do we have a problem ? - bnz .Llderr - br %r14 -# -# Wait for interrupt subroutine -# -.Lwait4io: - lpsw .Lwaitpsw -.Lioint: - c %r1,0xb8 # compare subchannel number - bne .Lwait4io - tsch 0(%r5) - slr %r0,%r0 - tm 8(%r5),0x82 # do we have a problem ? - bnz .Lwtexit - tm 8(%r5),0x04 # got device end ? - bz .Lwait4io -.Lwtexit: - br %r14 -.Llderr: - lpsw .Lcrash - - .align 8 -.Lorbread: - .long 0x00000000,0x0080ff00,.Lccwread - .align 8 -.Lorbsense: - .long 0x00000000,0x0080ff00,.Lccwsense - .align 8 -.Lccwread: - .long 0x02200000+IPL_BS,0x00000000 -.Lccwsense: - .long 0x04200001,0x00000000 -.Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint - -.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -.Lcr6: .long 0xff000000 - .align 8 -.Lcrash:.long 0x000a0000,0x00000000 -.Lldret:.long 0 -.Lsnsret: .long 0 -#endif /* CONFIG_IPL_TAPE */ +__HEAD -#ifdef CONFIG_IPL_VM #define IPL_BS 0x730 .org 0 .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded @@ -172,7 +52,7 @@ .long 0x02000370,0x60000050 # the channel program the PSW .long 0x020003c0,0x60000050 # at location 0 is loaded. .long 0x02000410,0x60000050 # Initial processing starts - .long 0x02000460,0x60000050 # at 0xf0 = iplstart. + .long 0x02000460,0x60000050 # at 0x200 = iplstart. .long 0x020004b0,0x60000050 .long 0x02000500,0x60000050 .long 0x02000550,0x60000050 @@ -182,11 +62,54 @@ .long 0x02000690,0x60000050 .long 0x020006e0,0x20000050 - .org 0xf0 + .org 0x200 +# +# subroutine to set architecture mode +# +.Lsetmode: +#ifdef CONFIG_64BIT + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam31 # switch to 31 bit addressing mode +#else + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +#endif + br %r14 + +# +# subroutine to wait for end I/O +# +.Lirqwait: +#ifdef CONFIG_64BIT + mvc 0x1f0(16),.Lnewpsw # set up IO interrupt psw + lpsw .Lwaitpsw +.Lioint: + br %r14 + .align 8 +.Lnewpsw: + .quad 0x0000000080000000,.Lioint +#else + mvc 0x78(8),.Lnewpsw # set up IO interrupt psw + lpsw .Lwaitpsw +.Lioint: + br %r14 + .align 8 +.Lnewpsw: + .long 0x00080000,0x80000000+.Lioint +#endif +.Lwaitpsw: + .long 0x020a0000,0x80000000+.Lioint + # # subroutine for loading cards from the reader # .Lloader: + la %r4,0(%r14) la %r3,.Lorb # r2 = address of orb into r2 la %r5,.Lirb # r4 = address of irb la %r6,.Lccws @@ -203,9 +126,7 @@ ssch 0(%r3) # load chunk of 1600 bytes bnz .Llderr .Lwait4irq: - mvc 0x78(8),.Lnewpsw # set up IO interrupt psw - lpsw .Lwaitpsw -.Lioint: + bas %r14,.Lirqwait c %r1,0xb8 # compare subchannel number bne .Lwait4irq tsch 0(%r5) @@ -224,7 +145,7 @@ sr %r0,%r3 # #ccws*80-residual=#bytes read ar %r2,%r0 - br %r14 # r2 contains the total size + br %r4 # r2 contains the total size .Lcont: ahi %r2,0x640 # add 0x640 to total size @@ -248,19 +169,15 @@ .Lloadp:.long 0,0 .align 8 .Lcrash:.long 0x000a0000,0x00000000 -.Lnewpsw: - .long 0x00080000,0x80000000+.Lioint -.Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint .align 8 .Lccws: .rept 19 .long 0x02600050,0x00000000 .endr .long 0x02200050,0x00000000 -#endif /* CONFIG_IPL_VM */ iplstart: + bas %r14,.Lsetmode # Immediately switch to 64 bit mode lh %r1,0xb8 # test if subchannel number bct %r1,.Lnoload # is valid l %r1,0xb8 # load ipl subchannel number @@ -288,19 +205,7 @@ iplstart: bz .Lagain1 # skip dateset trailer la %r5,0(%r4,%r2) lr %r3,%r2 -.Lidebc: - tm 0(%r5),0x80 # high order bit set ? - bo .Ldocv # yes -> convert from EBCDIC - ahi %r5,-1 - bct %r3,.Lidebc - b .Lnocv -.Ldocv: - l %r3,.Lcvtab - tr 0(256,%r4),0(%r3) # convert parameters to ascii - tr 256(256,%r4),0(%r3) - tr 512(256,%r4),0(%r3) - tr 768(122,%r4),0(%r3) -.Lnocv: la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line + la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line mvc 0(256,%r3),0(%r4) mvc 256(256,%r3),256(%r4) mvc 512(256,%r3),512(%r4) @@ -339,12 +244,11 @@ iplstart: clc 0(3,%r2),.L_eof bz .Lagain2 -#ifdef CONFIG_IPL_VM # # reset files in VM reader # - stidp __LC_CPUID # store cpuid - tm __LC_CPUID,0xff # running VM ? + stidp .Lcpuid # store cpuid + tm .Lcpuid,0xff # running VM ? bno .Lnoreset la %r2,.Lreset lhi %r3,26 @@ -356,24 +260,14 @@ iplstart: tm 31(%r5),0xff # bits is set in the schib bz .Lnoreset .Lwaitforirq: - mvc 0x78(8),.Lrdrnewpsw # set up IO interrupt psw -.Lwaitrdrirq: - lpsw .Lrdrwaitpsw -.Lrdrint: + bas %r14,.Lirqwait # wait for IO interrupt c %r1,0xb8 # compare subchannel number - bne .Lwaitrdrirq + bne .Lwaitforirq la %r5,.Lirb tsch 0(%r5) .Lnoreset: b .Lnoload - .align 8 -.Lrdrnewpsw: - .long 0x00080000,0x80000000+.Lrdrint -.Lrdrwaitpsw: - .long 0x020a0000,0x80000000+.Lrdrint -#endif - # # everything loaded, go for it # @@ -381,17 +275,16 @@ iplstart: l %r1,.Lstartup br %r1 -.Linitrd:.long _end + 0x400000 # default address of initrd +.Linitrd:.long _end # default address of initrd .Lparm: .long PARMAREA .Lstartup: .long startup -.Lcvtab:.long _ebcasc # ebcdic to ascii table .Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40 .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6 .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" .L_eof: .long 0xc5d6c600 /* C'EOF' */ .L_hdr: .long 0xc8c4d900 /* C'HDR' */ - -#endif /* CONFIG_IPL */ + .align 8 +.Lcpuid:.fill 8,1,0 # # SALIPL loader support. Based on a patch by Rob van der Heij. @@ -399,9 +292,9 @@ iplstart: # doesn't need a builtin ipl record. # .org 0x800 - .globl start -start: +ENTRY(start) stm %r0,%r15,0x07b0 # store registers + bas %r14,.Lsetmode # Immediately switch to 64 bit mode basr %r12,%r0 .base: l %r11,.parm @@ -417,13 +310,10 @@ start: .sk8x8: mvc 0(240,%r8),0(%r9) # copy iplparms into buffer .gotr: - l %r10,.tbl # EBCDIC to ASCII table - tr 0(240,%r8),0(%r10) slr %r0,%r0 st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11) st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11) j startup # continue with startup -.tbl: .long _ebcasc # translate table .cmd: .long COMMAND_LINE # address of command line buffer .parm: .long PARMAREA .lowcase: @@ -461,8 +351,151 @@ start: .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff +# +# startup-code at 0x10000, running in absolute addressing mode +# this is called either by the ipl loader or directly by PSW restart +# or linload or SALIPL +# + .org 0x10000 +ENTRY(startup) + j .Lep_startup_normal + .org 0x10008 +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii "S390EP" + .byte 0x00,0x01 +# +# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# + .org 0x10010 +ENTRY(startup_kdump) + j .Lep_startup_kdump +.Lep_startup_normal: +#ifdef CONFIG_64BIT + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam31 # switch to 31 bit addressing mode +#else + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +#endif + basr %r13,0 # get base +.LPG0: + xc 0x200(256),0x200 # partially clear lowcore + xc 0x300(256),0x300 + xc 0xe00(256),0xe00 + stck __LC_LAST_UPDATE_CLOCK + spt 6f-.LPG0(%r13) + mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) + xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST +#ifndef CONFIG_MARCH_G5 + # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} + .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list + tm __LC_STFL_FAC_LIST,0x01 # stfle available ? + jz 0f + la %r0,1 + .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended + # verify if all required facilities are supported by the machine +0: la %r1,__LC_STFL_FAC_LIST + la %r2,3f+8-.LPG0(%r13) + l %r3,0(%r2) +1: l %r0,0(%r1) + n %r0,4(%r2) + cl %r0,4(%r2) + jne 2f + la %r1,4(%r1) + la %r2,4(%r2) + ahi %r3,-1 + jnz 1b + j 4f +2: l %r15,.Lstack-.LPG0(%r13) + ahi %r15,-96 + la %r2,.Lals_string-.LPG0(%r13) + l %r3,.Lsclp_print-.LPG0(%r13) + basr %r14,%r3 + lpsw 3f-.LPG0(%r13) # machine type not good enough, crash +.Lals_string: + .asciz "The Linux kernel requires more recent processor hardware" +.Lsclp_print: + .long _sclp_print_early +.Lstack: + .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) + .align 16 +3: .long 0x000a0000,0x8badcccc + +# List of facilities that are required. If not all facilities are present +# the kernel will crash. Format is number of facility words with bits set, +# followed by the facility words. + +#if defined(CONFIG_64BIT) +#if defined(CONFIG_MARCH_ZEC12) + .long 3, 0xc100eff2, 0xf46ce800, 0x00400000 +#elif defined(CONFIG_MARCH_Z196) + .long 2, 0xc100eff2, 0xf46c0000 +#elif defined(CONFIG_MARCH_Z10) + .long 2, 0xc100eff2, 0xf0680000 +#elif defined(CONFIG_MARCH_Z9_109) + .long 1, 0xc100efc2 +#elif defined(CONFIG_MARCH_Z990) + .long 1, 0xc0002000 +#elif defined(CONFIG_MARCH_Z900) + .long 1, 0xc0000000 +#endif +#else +#if defined(CONFIG_MARCH_ZEC12) + .long 1, 0x8100c880 +#elif defined(CONFIG_MARCH_Z196) + .long 1, 0x8100c880 +#elif defined(CONFIG_MARCH_Z10) + .long 1, 0x8100c880 +#elif defined(CONFIG_MARCH_Z9_109) + .long 1, 0x8100c880 +#elif defined(CONFIG_MARCH_Z990) + .long 1, 0x80002000 +#elif defined(CONFIG_MARCH_Z900) + .long 1, 0x80000000 +#endif +#endif +4: +#endif + #ifdef CONFIG_64BIT -#include "head64.S" + /* Continue with 64bit startup code in head64.S */ + sam64 # switch to 64 bit mode + jg startup_continue #else -#include "head31.S" + /* Continue with 31bit startup code in head31.S */ + l %r13,5f-.LPG0(%r13) + b 0(%r13) + .align 8 +5: .long startup_continue #endif + + .align 8 +6: .long 0x7fffffff,0xffffffff + +#include "head_kdump.S" + +# +# params at 10400 (setup.h) +# + .org PARMAREA + .long 0,0 # IPL_DEVICE + .long 0,0 # INITRD_START + .long 0,0 # INITRD_SIZE + .long 0,0 # OLDMEM_BASE + .long 0,0 # OLDMEM_SIZE + + .org COMMAND_LINE + .byte "root=/dev/ram0 ro" + .byte 0 + + .org 0x11000 diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index dc364c1419a..6dbe80983a2 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/head31.S - * - * Copyright (C) IBM Corp. 2005,2006 + * Copyright IBM Corp. 2005, 2010 * * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -10,113 +8,42 @@ * */ -# -# startup-code at 0x10000, running in absolute addressing mode -# this is called either by the ipl loader or directly by PSW restart -# or linload or SALIPL -# - .org 0x10000 -startup:basr %r13,0 # get base -.LPG0: l %r13,0f-.LPG0(%r13) - b 0(%r13) -0: .long startup_continue - -# -# params at 10400 (setup.h) -# - .org PARMAREA - .long 0,0 # IPL_DEVICE - .long 0,0 # INITRD_START - .long 0,0 # INITRD_SIZE - - .org COMMAND_LINE - .byte "root=/dev/ram0 ro" - .byte 0 +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> - .org 0x11000 - -startup_continue: +__HEAD +ENTRY(startup_continue) basr %r13,0 # get base -.LPG1: mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +.LPG1: + + l %r1,.Lbase_cc-.LPG1(%r13) + mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore - mvc __LC_IPLDEV(4),IPL_DEVICE-PARMAREA(%r12) # # Setup stack # l %r15,.Linittu-.LPG1(%r13) + st %r15,__LC_THREAD_INFO # cache thread info in lowcore mvc __LC_CURRENT(4),__TI_task(%r15) ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE st %r15,__LC_KERNEL_STACK # set end of kernel stack ahi %r15,-96 - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain # # Save ipl parameters, clear bss memory, initialize storage key for kernel pages, # and create a kernel NSS if the SAVESYS= parm is defined # l %r14,.Lstartup_init-.LPG1(%r13) basr %r14,%r14 - - l %r12,.Lmflags-.LPG1(%r13) # get address of machine_flags -# -# find out if we have an IEEE fpu -# - mvc __LC_PGM_NEW_PSW(8),.Lpcfpu-.LPG1(%r13) - efpc %r0,0 # test IEEE extract fpc instruction - oi 3(%r12),2 # set IEEE fpu flag -.Lchkfpu: - -# -# find out if we have the CSP instruction -# - mvc __LC_PGM_NEW_PSW(8),.Lpccsp-.LPG1(%r13) - la %r0,0 - lr %r1,%r0 - la %r2,4 - csp %r0,%r2 # Test CSP instruction - oi 3(%r12),8 # set CSP flag -.Lchkcsp: - -# -# find out if we have the MVPG instruction -# - mvc __LC_PGM_NEW_PSW(8),.Lpcmvpg-.LPG1(%r13) - sr %r0,%r0 - la %r1,0 - la %r2,0 - mvpg %r1,%r2 # Test CSP instruction - oi 3(%r12),16 # set MVPG flag -.Lchkmvpg: - -# -# find out if we have the IDTE instruction -# - mvc __LC_PGM_NEW_PSW(8),.Lpcidte-.LPG1(%r13) - .long 0xb2b10000 # store facility list - tm 0xc8,0x08 # check bit for clearing-by-ASCE - bno .Lchkidte-.LPG1(%r13) - lhi %r1,2094 - lhi %r2,0 - .long 0xb98e2001 - oi 3(%r12),0x80 # set IDTE flag -.Lchkidte: - -# -# find out if the diag 0x9c is available -# - mvc __LC_PGM_NEW_PSW(8),.Lpcdiag9c-.LPG1(%r13) - stap __LC_CPUID+4 # store cpu address - lh %r1,__LC_CPUID+4 - diag %r1,0,0x9c # test diag 0x9c - oi 2(%r12),1 # set diag9c flag -.Lchkdiag9c: - lpsw .Lentry-.LPG1(13) # jump to _stext in primary-space, # virtual and never return ... .align 8 .Lentry:.long 0x00080000,0x80000000 + _stext -.Lctl: .long 0x04b50002 # cr0: various things +.Lctl: .long 0x04b50000 # cr0: various things .long 0 # cr1: primary space segment table .long .Lduct # cr2: dispatchable unit control table .long 0 # cr3: instruction authorization @@ -132,13 +59,6 @@ startup_continue: .long 0 # cr13: home space segment table .long 0xc0000000 # cr14: machine check handling off .long 0 # cr15: linkage stack operations -.Lpcfpu:.long 0x00080000,0x80000000 + .Lchkfpu -.Lpccsp:.long 0x00080000,0x80000000 + .Lchkcsp -.Lpcmvpg:.long 0x00080000,0x80000000 + .Lchkmvpg -.Lpcidte:.long 0x00080000,0x80000000 + .Lchkidte -.Lpcdiag9c:.long 0x00080000,0x80000000 + .Lchkdiag9c -.Lmchunk:.long memory_chunk -.Lmflags:.long machine_flags .Lbss_bgn: .long __bss_start .Lbss_end: .long _end .Lparmaddr: .long PARMAREA @@ -152,23 +72,21 @@ startup_continue: .Lduald:.rept 8 .long 0x80000000,0,0,0 # invalid access-list entries .endr +.Lbase_cc: + .long sched_clock_base_cc - .org 0x12000 - .globl _ehead -_ehead: -#ifdef CONFIG_SHARED_KERNEL - .org 0x100000 -#endif +ENTRY(_ehead) + .org 0x100000 - 0x11000 # head.o ends at 0x11000 # # startup-code, running in absolute addressing mode # - .globl _stext -_stext: basr %r13,0 # get base +ENTRY(_stext) + basr %r13,0 # get base .LPG3: # check control registers stctl %c0,%c15,0(%r15) - oi 2(%r15),0x40 # enable sigp emergency signal + oi 2(%r15),0x60 # enable sigp emergency & external call oi 0(%r15),0x10 # switch on low address protection lctl %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 79dccd206a6..d7c00507568 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/head64.S - * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright IBM Corp. 1999, 2010 * * Author(s): Hartmut Penner <hp@de.ibm.com> * Martin Schwidefsky <schwidefsky@de.ibm.com> @@ -10,188 +8,43 @@ * */ -# -# startup-code at 0x10000, running in absolute addressing mode -# this is called either by the ipl loader or directly by PSW restart -# or linload or SALIPL -# - .org 0x10000 -startup:basr %r13,0 # get base -.LPG0: l %r13,0f-.LPG0(%r13) - b 0(%r13) -0: .long startup_continue - -# -# params at 10400 (setup.h) -# - .org PARMAREA - .quad 0 # IPL_DEVICE - .quad 0 # INITRD_START - .quad 0 # INITRD_SIZE - - .org COMMAND_LINE - .byte "root=/dev/ram0 ro" - .byte 0 - - .org 0x11000 - -startup_continue: - basr %r13,0 # get base -.LPG1: sll %r13,1 # remove high order bit - srl %r13,1 - -#ifdef CONFIG_ZFCPDUMP - - # check if we have been ipled using zfcp dump: - - tm 0xb9,0x01 # test if subchannel is enabled - jno .nodump # subchannel disabled - l %r1,0xb8 - la %r5,.Lipl_schib-.LPG1(%r13) - stsch 0(%r5) # get schib of subchannel - jne .nodump # schib not available - tm 5(%r5),0x01 # devno valid? - jno .nodump - tm 4(%r5),0x80 # qdio capable device? - jno .nodump - l %r2,20(%r0) # address of ipl parameter block - lhi %r3,0 - ic %r3,0x148(%r2) # get opt field - chi %r3,0x20 # load with dump? - jne .nodump - - # store all prefix registers in case of load with dump: - - la %r7,0 # base register for 0 page - la %r8,0 # first cpu - l %r11,.Lpref_arr_ptr-.LPG1(%r13) # address of prefix array - ahi %r11,4 # skip boot cpu - lr %r12,%r11 - ahi %r12,(CONFIG_NR_CPUS*4) # end of prefix array - stap .Lcurrent_cpu+2-.LPG1(%r13) # store current cpu addr -1: - cl %r8,.Lcurrent_cpu-.LPG1(%r13) # is ipl cpu ? - je 4f # if yes get next cpu -2: - lr %r9,%r7 - sigp %r9,%r8,0x9 # stop & store status of cpu - brc 8,3f # accepted - brc 4,4f # status stored: next cpu - brc 2,2b # busy: try again - brc 1,4f # not op: next cpu -3: - mvc 0(4,%r11),264(%r7) # copy prefix register to prefix array - ahi %r11,4 # next element in prefix array - clr %r11,%r12 - je 5f # no more space in prefix array -4: - ahi %r8,1 # next cpu (r8 += 1) - cl %r8,.Llast_cpu-.LPG1(%r13) # is last possible cpu ? - jl 1b # jump if not last cpu -5: - lhi %r1,2 # mode 2 = esame (dump) - j 6f - .align 4 -.Lipl_schib: - .rept 13 - .long 0 - .endr -.nodump: - lhi %r1,1 # mode 1 = esame (normal ipl) -6: -#else - lhi %r1,1 # mode 1 = esame (normal ipl) -#endif /* CONFIG_ZFCPDUMP */ - mvi __LC_AR_MODE_ID,1 # set esame flag - slr %r0,%r0 # set cpuid to zero - sigp %r1,%r0,0x12 # switch to esame mode - sam64 # switch to 64 bit mode +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page.h> + +__HEAD +ENTRY(startup_continue) + larl %r1,sched_clock_base_cc + mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK + larl %r13,.LPG1 # get base lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore - mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) + lghi %r0,__LC_PASTE + stg %r0,__LC_VDSO_PER_CPU # # Setup stack # larl %r15,init_thread_union + stg %r15,__LC_THREAD_INFO # cache thread info in lowcore lg %r14,__TI_task(%r15) # cache current in lowcore stg %r14,__LC_CURRENT aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE stg %r15,__LC_KERNEL_STACK # set end of kernel stack aghi %r15,-160 - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain # # Save ipl parameters, clear bss memory, initialize storage key for kernel pages, # and create a kernel NSS if the SAVESYS= parm is defined # brasl %r14,startup_init - # set program check new psw mask - mvc __LC_PGM_NEW_PSW(8),.Lpcmsk-.LPG1(%r13) - larl %r12,machine_flags -# -# find out if we have the MVPG instruction -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - sgr %r0,%r0 - lghi %r1,0 - lghi %r2,0 - mvpg %r1,%r2 # test MVPG instruction - oi 7(%r12),16 # set MVPG flag -0: - -# -# find out if the diag 0x44 works in 64 bit mode -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - diag 0,0,0x44 # test diag 0x44 - oi 7(%r12),32 # set diag44 flag -0: - -# -# find out if we have the IDTE instruction -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - .long 0xb2b10000 # store facility list - tm 0xc8,0x08 # check bit for clearing-by-ASCE - bno 0f-.LPG1(%r13) - lhi %r1,2048 - lhi %r2,0 - .long 0xb98e2001 - oi 7(%r12),0x80 # set IDTE flag -0: - -# -# find out if the diag 0x9c is available -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - stap __LC_CPUID+4 # store cpu address - lh %r1,__LC_CPUID+4 - diag %r1,0,0x9c # test diag 0x9c - oi 6(%r12),1 # set diag9c flag -0: - -# -# find out if we have the MVCOS instruction -# - la %r1,0f-.LPG1(%r13) # set program check address - stg %r1,__LC_PGM_NEW_PSW+8 - .short 0xc800 # mvcos 0(%r0),0(%r0),%r0 - .short 0x0000 - .short 0x0000 -0: tm 0x8f,0x13 # special-operation exception? - bno 1f-.LPG1(%r13) # if yes, MVCOS is present - oi 6(%r12),2 # set MVCOS flag -1: - lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, # virtual and never return ... .align 16 +.LPG1: .Lentry:.quad 0x0000000180000000,_stext -.Lctl: .quad 0x04b50002 # cr0: various things +.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table .quad 0 # cr3: instruction authorization @@ -206,45 +59,36 @@ startup_continue: .quad 0 # cr12: tracing off .quad 0 # cr13: home space segment table .quad 0xc0000000 # cr14: machine check handling off - .quad 0 # cr15: linkage stack operations + .quad .Llinkage_stack # cr15: linkage stack operations .Lpcmsk:.quad 0x0000000180000000 .L4malign:.quad 0xffffffffffc00000 .Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8 .Lnop: .long 0x07000700 -#ifdef CONFIG_ZFCPDUMP -.Lcurrent_cpu: - .long 0x0 -.Llast_cpu: - .long 0x0000ffff -.Lpref_arr_ptr: - .long zfcpdump_prefix_array -#endif /* CONFIG_ZFCPDUMP */ .Lparmaddr: .quad PARMAREA .align 64 -.Lduct: .long 0,0,0,0,.Lduald,0,0,0 +.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0 .long 0,0,0,0,0,0,0,0 +.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0 .align 128 .Lduald:.rept 8 .long 0x80000000,0,0,0 # invalid access-list entries .endr +.Llinkage_stack: + .long 0,0,0x89000000,0,0,0,0x8a000000,0 - .org 0x12000 - .globl _ehead -_ehead: -#ifdef CONFIG_SHARED_KERNEL - .org 0x100000 -#endif +ENTRY(_ehead) + .org 0x100000 - 0x11000 # head.o ends at 0x11000 # # startup-code, running in absolute addressing mode # - .globl _stext -_stext: basr %r13,0 # get base +ENTRY(_stext) + basr %r13,0 # get base .LPG3: # check control registers stctg %c0,%c15,0(%r15) - oi 6(%r15),0x40 # enable sigp emergency signal + oi 6(%r15),0x60 # enable sigp emergency & external call oi 4(%r15),0x10 # switch on low address proctection lctlg %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S new file mode 100644 index 00000000000..085a95eb315 --- /dev/null +++ b/arch/s390/kernel/head_kdump.S @@ -0,0 +1,108 @@ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <asm/sigp.h> + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +.align 2 +.Lep_startup_kdump: + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# +.align 2 +startup_kdump_relocated: + basr %r13,0 +0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... +.align 8 +.Lrestart_psw: + .quad 0x0000000080000000,0x0000000000000000 + startup +#else +.align 2 +.Lep_startup_kdump: +#ifdef CONFIG_64BIT + larl %r13,startup_kdump_crash + lpswe 0(%r13) +.align 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#else + basr %r13,0 +0: lpsw startup_kdump_crash-0b(%r13) +.align 8 +startup_kdump_crash: + .long 0x000a0000,0x00000000 + startup_kdump_crash +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c deleted file mode 100644 index d494161b05b..00000000000 --- a/arch/s390/kernel/init_task.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * arch/s390/kernel/init_task.c - * - * S390 version - * - * Derived from "arch/i386/kernel/init_task.c" - */ - -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/init_task.h> -#include <linux/mqueue.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> - -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -struct mm_struct init_mm = INIT_MM(init_mm); - -EXPORT_SYMBOL(init_mm); - -/* - * Initial thread structure. - * - * We need to make sure that this is 8192-byte aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 375232c46c7..633ca750453 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1,8 +1,7 @@ /* - * arch/s390/kernel/ipl.c * ipl/reipl/dump support for Linux on s390. * - * Copyright IBM Corp. 2005,2007 + * Copyright IBM Corp. 2005, 2012 * Author(s): Michael Holzheu <holzheu@de.ibm.com> * Heiko Carstens <heiko.carstens@de.ibm.com> * Volker Sameske <sameske@de.ibm.com> @@ -14,6 +13,10 @@ #include <linux/delay.h> #include <linux/reboot.h> #include <linux/ctype.h> +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/crash_dump.h> +#include <linux/debug_locks.h> #include <asm/ipl.h> #include <asm/smp.h> #include <asm/setup.h> @@ -22,6 +25,10 @@ #include <asm/ebcdic.h> #include <asm/reset.h> #include <asm/sclp.h> +#include <asm/checksum.h> +#include <asm/debug.h> +#include <asm/os_info.h> +#include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 @@ -41,11 +48,13 @@ * - halt * - power off * - reipl + * - restart */ #define ON_PANIC_STR "on_panic" #define ON_HALT_STR "on_halt" #define ON_POFF_STR "on_poff" #define ON_REIPL_STR "on_reboot" +#define ON_RESTART_STR "on_restart" struct shutdown_action; struct shutdown_trigger { @@ -54,18 +63,20 @@ struct shutdown_trigger { }; /* - * Five shutdown action types are supported: + * The following shutdown action types are supported: */ #define SHUTDOWN_ACTION_IPL_STR "ipl" #define SHUTDOWN_ACTION_REIPL_STR "reipl" #define SHUTDOWN_ACTION_DUMP_STR "dump" #define SHUTDOWN_ACTION_VMCMD_STR "vmcmd" #define SHUTDOWN_ACTION_STOP_STR "stop" +#define SHUTDOWN_ACTION_DUMP_REIPL_STR "dump_reipl" struct shutdown_action { char *name; void (*fn) (struct shutdown_trigger *trigger); int (*init) (void); + int init_rc; }; static char *ipl_type_str(enum ipl_type type) @@ -121,6 +132,7 @@ enum ipl_method { REIPL_METHOD_FCP_RO_VM, REIPL_METHOD_FCP_DUMP, REIPL_METHOD_NSS, + REIPL_METHOD_NSS_DIAG, REIPL_METHOD_DEFAULT, }; @@ -134,14 +146,16 @@ enum dump_method { static int diag308_set_works = 0; +static struct ipl_parameter_block ipl_block; + static int reipl_capabilities = IPL_TYPE_UNKNOWN; static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN; static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT; static struct ipl_parameter_block *reipl_block_fcp; static struct ipl_parameter_block *reipl_block_ccw; - -static char reipl_nss_name[NSS_NAME_SIZE + 1]; +static struct ipl_parameter_block *reipl_block_nss; +static struct ipl_parameter_block *reipl_block_actual; static int dump_capabilities = DUMP_TYPE_NONE; static enum dump_type dump_type = DUMP_TYPE_NONE; @@ -213,7 +227,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ const char *buf, size_t len) \ { \ strncpy(_value, buf, sizeof(_value) - 1); \ - strstrip(_value); \ + strim(_value); \ return len; \ } \ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ @@ -263,6 +277,118 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); +/* VM IPL PARM routines */ +static size_t reipl_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + int i; + size_t len; + char has_lowercase = 0; + + len = 0; + if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) && + (ipb->ipl_info.ccw.vm_parm_len > 0)) { + + len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len); + memcpy(dest, ipb->ipl_info.ccw.vm_parm, len); + /* If at least one character is lowercase, we assume mixed + * case; otherwise we convert everything to lowercase. + */ + for (i = 0; i < len; i++) + if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */ + (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */ + (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */ + has_lowercase = 1; + break; + } + if (!has_lowercase) + EBC_TOLOWER(dest, len); + EBCASC(dest, len); + } + dest[len] = 0; + + return len; +} + +size_t append_ipl_vmparm(char *dest, size_t size) +{ + size_t rc; + + rc = 0; + if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)) + rc = reipl_get_ascii_vmparm(dest, size, &ipl_block); + else + dest[0] = 0; + return rc; +} + +static ssize_t ipl_vm_parm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + char parm[DIAG308_VMPARM_SIZE + 1] = {}; + + append_ipl_vmparm(parm, sizeof(parm)); + return sprintf(page, "%s\n", parm); +} + +static size_t scpdata_length(const char* buf, size_t count) +{ + while (count) { + if (buf[count - 1] != '\0' && buf[count - 1] != ' ') + break; + count--; + } + return count; +} + +static size_t reipl_append_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + size_t count; + size_t i; + int has_lowercase; + + count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data, + ipb->ipl_info.fcp.scp_data_len)); + if (!count) + goto out; + + has_lowercase = 0; + for (i = 0; i < count; i++) { + if (!isascii(ipb->ipl_info.fcp.scp_data[i])) { + count = 0; + goto out; + } + if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i])) + has_lowercase = 1; + } + + if (has_lowercase) + memcpy(dest, ipb->ipl_info.fcp.scp_data, count); + else + for (i = 0; i < count; i++) + dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]); +out: + dest[count] = '\0'; + return count; +} + +size_t append_ipl_scpdata(char *dest, size_t len) +{ + size_t rc; + + rc = 0; + if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP) + rc = reipl_append_ascii_scpdata(dest, len, &ipl_block); + else + dest[0] = 0; + return rc; +} + + +static struct kobj_attribute sys_ipl_vm_parm_attr = + __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL); + static ssize_t sys_ipl_device_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { @@ -282,17 +408,12 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, static struct kobj_attribute sys_ipl_device_attr = __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL); -static ssize_t ipl_parameter_read(struct kobject *kobj, struct bin_attribute *attr, - char *buf, loff_t off, size_t count) +static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) { - unsigned int size = IPL_PARMBLOCK_SIZE; - - if (off > size) - return 0; - if (off + count > size) - count = size - off; - memcpy(buf, (void *)IPL_PARMBLOCK_START + off, count); - return count; + return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START, + IPL_PARMBLOCK_SIZE); } static struct bin_attribute ipl_parameter_attr = { @@ -304,18 +425,14 @@ static struct bin_attribute ipl_parameter_attr = { .read = &ipl_parameter_read, }; -static ssize_t ipl_scp_data_read(struct kobject *kobj, struct bin_attribute *attr, - char *buf, loff_t off, size_t count) +static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) { unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len; void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data; - if (off > size) - return 0; - if (off + count > size) - count = size - off; - memcpy(buf, scp_data + off, count); - return count; + return memory_read_from_buffer(buf, count, &off, scp_data, size); } static struct bin_attribute ipl_scp_data_attr = { @@ -363,22 +480,34 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, return sprintf(page, "#unknown#\n"); memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN); EBCASC(loadparm, LOADPARM_LEN); - strstrip(loadparm); + strim(loadparm); return sprintf(page, "%s\n", loadparm); } static struct kobj_attribute sys_ipl_ccw_loadparm_attr = __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL); -static struct attribute *ipl_ccw_attrs[] = { +static struct attribute *ipl_ccw_attrs_vm[] = { &sys_ipl_type_attr.attr, &sys_ipl_device_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, + &sys_ipl_vm_parm_attr.attr, NULL, }; -static struct attribute_group ipl_ccw_attr_group = { - .attrs = ipl_ccw_attrs, +static struct attribute *ipl_ccw_attrs_lpar[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_device_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + NULL, +}; + +static struct attribute_group ipl_ccw_attr_group_vm = { + .attrs = ipl_ccw_attrs_vm, +}; + +static struct attribute_group ipl_ccw_attr_group_lpar = { + .attrs = ipl_ccw_attrs_lpar }; /* NSS ipl device attributes */ @@ -388,6 +517,8 @@ DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name); static struct attribute *ipl_nss_attrs[] = { &sys_ipl_type_attr.attr, &sys_ipl_nss_name_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + &sys_ipl_vm_parm_attr.attr, NULL, }; @@ -430,7 +561,7 @@ out: return rc; } -static void ipl_run(struct shutdown_trigger *trigger) +static void __ipl_run(void *unused) { diag308(DIAG308_IPL, NULL); if (MACHINE_IS_VM) @@ -439,6 +570,11 @@ static void ipl_run(struct shutdown_trigger *trigger) reipl_ccw_dev(&ipl_info.data.ccw.dev_id); } +static void ipl_run(struct shutdown_trigger *trigger) +{ + smp_call_ipl_cpu(__ipl_run, NULL); +} + static int __init ipl_init(void) { int rc; @@ -450,7 +586,12 @@ static int __init ipl_init(void) } switch (ipl_info.type) { case IPL_TYPE_CCW: - rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group); + if (MACHINE_IS_VM) + rc = sysfs_create_group(&ipl_kset->kobj, + &ipl_ccw_attr_group_vm); + else + rc = sysfs_create_group(&ipl_kset->kobj, + &ipl_ccw_attr_group_lpar); break; case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: @@ -481,11 +622,141 @@ static struct shutdown_action __refdata ipl_action = { * reipl shutdown action: Reboot Linux on shutdown. */ +/* VM IPL PARM attributes */ +static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb, + char *page) +{ + char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; + + reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); + return sprintf(page, "%s\n", vmparm); +} + +static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb, + size_t vmparm_max, + const char *buf, size_t len) +{ + int i, ip_len; + + /* ignore trailing newline */ + ip_len = len; + if ((len > 0) && (buf[len - 1] == '\n')) + ip_len--; + + if (ip_len > vmparm_max) + return -EINVAL; + + /* parm is used to store kernel options, check for common chars */ + for (i = 0; i < ip_len; i++) + if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i]))) + return -EINVAL; + + memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE); + ipb->ipl_info.ccw.vm_parm_len = ip_len; + if (ip_len > 0) { + ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID; + memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len); + ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len); + } else { + ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID; + } + + return len; +} + +/* NSS wrapper */ +static ssize_t reipl_nss_vmparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_vmparm_show(reipl_block_nss, page); +} + +static ssize_t reipl_nss_vmparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len); +} + +/* CCW wrapper */ +static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_vmparm_show(reipl_block_ccw, page); +} + +static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len); +} + +static struct kobj_attribute sys_reipl_nss_vmparm_attr = + __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show, + reipl_nss_vmparm_store); +static struct kobj_attribute sys_reipl_ccw_vmparm_attr = + __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show, + reipl_ccw_vmparm_store); + /* FCP reipl device attributes */ -DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n", +static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len; + void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + +static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t padding; + size_t scpdata_len; + + if (off < 0) + return -EINVAL; + + if (off >= DIAG308_SCPDATA_SIZE) + return -ENOSPC; + + if (count > DIAG308_SCPDATA_SIZE - off) + count = DIAG308_SCPDATA_SIZE - off; + + memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count); + scpdata_len = off + count; + + if (scpdata_len % 8) { + padding = 8 - (scpdata_len % 8); + memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len, + 0, padding); + scpdata_len += padding; + } + + reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len; + reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len; + reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len; + + return count; +} + +static struct bin_attribute sys_reipl_fcp_scp_data_attr = { + .attr = { + .name = "scp_data", + .mode = S_IRUGO | S_IWUSR, + }, + .size = PAGE_SIZE, + .read = reipl_fcp_scpdata_read, + .write = reipl_fcp_scpdata_write, +}; + +DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n", reipl_block_fcp->ipl_info.fcp.wwpn); -DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n", reipl_block_fcp->ipl_info.fcp.lun); DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", reipl_block_fcp->ipl_info.fcp.bootprog); @@ -504,7 +775,6 @@ static struct attribute *reipl_fcp_attrs[] = { }; static struct attribute_group reipl_fcp_attr_group = { - .name = IPL_FCP_STR, .attrs = reipl_fcp_attrs, }; @@ -513,27 +783,26 @@ static struct attribute_group reipl_fcp_attr_group = { DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n", reipl_block_ccw->ipl_info.ccw.devno); -static void reipl_get_ascii_loadparm(char *loadparm) +static void reipl_get_ascii_loadparm(char *loadparm, + struct ipl_parameter_block *ibp) { - memcpy(loadparm, &reipl_block_ccw->ipl_info.ccw.load_param, - LOADPARM_LEN); + memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN); EBCASC(loadparm, LOADPARM_LEN); loadparm[LOADPARM_LEN] = 0; - strstrip(loadparm); + strim(loadparm); } -static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj, - struct kobj_attribute *attr, char *page) +static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb, + char *page) { char buf[LOADPARM_LEN + 1]; - reipl_get_ascii_loadparm(buf); + reipl_get_ascii_loadparm(buf, ipb); return sprintf(page, "%s\n", buf); } -static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t len) +static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, + const char *buf, size_t len) { int i, lp_len; @@ -552,35 +821,128 @@ static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj, return -EINVAL; } /* initialize loadparm with blanks */ - memset(&reipl_block_ccw->ipl_info.ccw.load_param, ' ', LOADPARM_LEN); + memset(ipb->ipl_info.ccw.load_parm, ' ', LOADPARM_LEN); /* copy and convert to ebcdic */ - memcpy(&reipl_block_ccw->ipl_info.ccw.load_param, buf, lp_len); - ASCEBC(reipl_block_ccw->ipl_info.ccw.load_param, LOADPARM_LEN); + memcpy(ipb->ipl_info.ccw.load_parm, buf, lp_len); + ASCEBC(ipb->ipl_info.ccw.load_parm, LOADPARM_LEN); return len; } +/* NSS wrapper */ +static ssize_t reipl_nss_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_loadparm_show(reipl_block_nss, page); +} + +static ssize_t reipl_nss_loadparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_loadparm_store(reipl_block_nss, buf, len); +} + +/* CCW wrapper */ +static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_loadparm_show(reipl_block_ccw, page); +} + +static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_loadparm_store(reipl_block_ccw, buf, len); +} + static struct kobj_attribute sys_reipl_ccw_loadparm_attr = - __ATTR(loadparm, 0644, reipl_ccw_loadparm_show, - reipl_ccw_loadparm_store); + __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show, + reipl_ccw_loadparm_store); + +static struct attribute *reipl_ccw_attrs_vm[] = { + &sys_reipl_ccw_device_attr.attr, + &sys_reipl_ccw_loadparm_attr.attr, + &sys_reipl_ccw_vmparm_attr.attr, + NULL, +}; -static struct attribute *reipl_ccw_attrs[] = { +static struct attribute *reipl_ccw_attrs_lpar[] = { &sys_reipl_ccw_device_attr.attr, &sys_reipl_ccw_loadparm_attr.attr, NULL, }; -static struct attribute_group reipl_ccw_attr_group = { +static struct attribute_group reipl_ccw_attr_group_vm = { + .name = IPL_CCW_STR, + .attrs = reipl_ccw_attrs_vm, +}; + +static struct attribute_group reipl_ccw_attr_group_lpar = { .name = IPL_CCW_STR, - .attrs = reipl_ccw_attrs, + .attrs = reipl_ccw_attrs_lpar, }; /* NSS reipl device attributes */ +static void reipl_get_ascii_nss_name(char *dst, + struct ipl_parameter_block *ipb) +{ + memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE); + EBCASC(dst, NSS_NAME_SIZE); + dst[NSS_NAME_SIZE] = 0; +} -DEFINE_IPL_ATTR_STR_RW(reipl_nss, name, "%s\n", "%s\n", reipl_nss_name); +static ssize_t reipl_nss_name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + char nss_name[NSS_NAME_SIZE + 1] = {}; + + reipl_get_ascii_nss_name(nss_name, reipl_block_nss); + return sprintf(page, "%s\n", nss_name); +} + +static ssize_t reipl_nss_name_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int nss_len; + + /* ignore trailing newline */ + nss_len = len; + if ((len > 0) && (buf[len - 1] == '\n')) + nss_len--; + + if (nss_len > NSS_NAME_SIZE) + return -EINVAL; + + memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE); + if (nss_len > 0) { + reipl_block_nss->ipl_info.ccw.vm_flags |= + DIAG308_VM_FLAGS_NSS_VALID; + memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len); + ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len); + EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len); + } else { + reipl_block_nss->ipl_info.ccw.vm_flags &= + ~DIAG308_VM_FLAGS_NSS_VALID; + } + + return len; +} + +static struct kobj_attribute sys_reipl_nss_name_attr = + __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show, + reipl_nss_name_store); + +static struct kobj_attribute sys_reipl_nss_loadparm_attr = + __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show, + reipl_nss_loadparm_store); static struct attribute *reipl_nss_attrs[] = { &sys_reipl_nss_name_attr.attr, + &sys_reipl_nss_loadparm_attr.attr, + &sys_reipl_nss_vmparm_attr.attr, NULL, }; @@ -589,6 +951,13 @@ static struct attribute_group reipl_nss_attr_group = { .attrs = reipl_nss_attrs, }; +static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block) +{ + reipl_block_actual = reipl_block; + os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual, + reipl_block->hdr.len); +} + /* reipl type */ static int reipl_set_type(enum ipl_type type) @@ -604,6 +973,7 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_CCW_VM; else reipl_method = REIPL_METHOD_CCW_CIO; + set_reipl_block_actual(reipl_block_ccw); break; case IPL_TYPE_FCP: if (diag308_set_works) @@ -612,12 +982,17 @@ static int reipl_set_type(enum ipl_type type) reipl_method = REIPL_METHOD_FCP_RO_VM; else reipl_method = REIPL_METHOD_FCP_RO_DIAG; + set_reipl_block_actual(reipl_block_fcp); break; case IPL_TYPE_FCP_DUMP: reipl_method = REIPL_METHOD_FCP_DUMP; break; case IPL_TYPE_NSS: - reipl_method = REIPL_METHOD_NSS; + if (diag308_set_works) + reipl_method = REIPL_METHOD_NSS_DIAG; + else + reipl_method = REIPL_METHOD_NSS; + set_reipl_block_actual(reipl_block_nss); break; case IPL_TYPE_UNKNOWN: reipl_method = REIPL_METHOD_DEFAULT; @@ -654,12 +1029,40 @@ static struct kobj_attribute reipl_type_attr = __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store); static struct kset *reipl_kset; +static struct kset *reipl_fcp_kset; + +static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb, + const enum ipl_method m) +{ + char loadparm[LOADPARM_LEN + 1] = {}; + char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; + char nss_name[NSS_NAME_SIZE + 1] = {}; + size_t pos = 0; + + reipl_get_ascii_loadparm(loadparm, ipb); + reipl_get_ascii_nss_name(nss_name, ipb); + reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); + + switch (m) { + case REIPL_METHOD_CCW_VM: + pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno); + break; + case REIPL_METHOD_NSS: + pos = sprintf(dst, "IPL %s", nss_name); + break; + default: + break; + } + if (strlen(loadparm) > 0) + pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm); + if (strlen(vmparm) > 0) + sprintf(dst + pos, " PARM %s", vmparm); +} -void reipl_run(struct shutdown_trigger *trigger) +static void __reipl_run(void *unused) { struct ccw_dev_id devid; - static char buf[100]; - char loadparm[LOADPARM_LEN + 1]; + static char buf[128]; switch (reipl_method) { case REIPL_METHOD_CCW_CIO: @@ -668,13 +1071,7 @@ void reipl_run(struct shutdown_trigger *trigger) reipl_ccw_dev(&devid); break; case REIPL_METHOD_CCW_VM: - reipl_get_ascii_loadparm(loadparm); - if (strlen(loadparm) == 0) - sprintf(buf, "IPL %X CLEAR", - reipl_block_ccw->ipl_info.ccw.devno); - else - sprintf(buf, "IPL %X CLEAR LOADPARM '%s'", - reipl_block_ccw->ipl_info.ccw.devno, loadparm); + get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM); __cpcmd(buf, NULL, 0, NULL); break; case REIPL_METHOD_CCW_DIAG: @@ -691,8 +1088,12 @@ void reipl_run(struct shutdown_trigger *trigger) case REIPL_METHOD_FCP_RO_VM: __cpcmd("IPL", NULL, 0, NULL); break; + case REIPL_METHOD_NSS_DIAG: + diag308(DIAG308_SET, reipl_block_nss); + diag308(DIAG308_IPL, NULL); + break; case REIPL_METHOD_NSS: - sprintf(buf, "IPL %s", reipl_nss_name); + get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS); __cpcmd(buf, NULL, 0, NULL); break; case REIPL_METHOD_DEFAULT: @@ -701,22 +1102,46 @@ void reipl_run(struct shutdown_trigger *trigger) diag308(DIAG308_IPL, NULL); break; case REIPL_METHOD_FCP_DUMP: - default: break; } disabled_wait((unsigned long) __builtin_return_address(0)); } -static void __init reipl_probe(void) +static void reipl_run(struct shutdown_trigger *trigger) +{ + smp_call_ipl_cpu(__reipl_run, NULL); +} + +static void reipl_block_ccw_init(struct ipl_parameter_block *ipb) { - void *buffer; + ipb->hdr.len = IPL_PARM_BLK_CCW_LEN; + ipb->hdr.version = IPL_PARM_BLOCK_VERSION; + ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN; + ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW; +} - buffer = (void *) get_zeroed_page(GFP_KERNEL); - if (!buffer) - return; - if (diag308(DIAG308_STORE, buffer) == DIAG308_RC_OK) - diag308_set_works = 1; - free_page((unsigned long)buffer); +static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) +{ + /* LOADPARM */ + /* check if read scp info worked and set loadparm */ + if (sclp_ipl_info.is_valid) + memcpy(ipb->ipl_info.ccw.load_parm, + &sclp_ipl_info.loadparm, LOADPARM_LEN); + else + /* read scp info failed: set empty loadparm (EBCDIC blanks) */ + memset(ipb->ipl_info.ccw.load_parm, 0x40, LOADPARM_LEN); + ipb->hdr.flags = DIAG308_FLAGS_LP_VALID; + + /* VM PARM */ + if (MACHINE_IS_VM && diag308_set_works && + (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) { + + ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID; + ipb->ipl_info.ccw.vm_parm_len = + ipl_block.ipl_info.ccw.vm_parm_len; + memcpy(ipb->ipl_info.ccw.vm_parm, + ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE); + } } static int __init reipl_nss_init(void) @@ -725,10 +1150,31 @@ static int __init reipl_nss_init(void) if (!MACHINE_IS_VM) return 0; + + reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_nss) + return -ENOMEM; + + if (!diag308_set_works) + sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO; + rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group); if (rc) return rc; - strncpy(reipl_nss_name, kernel_nss_name, NSS_NAME_SIZE + 1); + + reipl_block_ccw_init(reipl_block_nss); + if (ipl_info.type == IPL_TYPE_NSS) { + memset(reipl_block_nss->ipl_info.ccw.nss_name, + ' ', NSS_NAME_SIZE); + memcpy(reipl_block_nss->ipl_info.ccw.nss_name, + kernel_nss_name, strlen(kernel_nss_name)); + ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE); + reipl_block_nss->ipl_info.ccw.vm_flags |= + DIAG308_VM_FLAGS_NSS_VALID; + + reipl_block_ccw_fill_parms(reipl_block_nss); + } + reipl_capabilities |= IPL_TYPE_NSS; return 0; } @@ -740,28 +1186,27 @@ static int __init reipl_ccw_init(void) reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL); if (!reipl_block_ccw) return -ENOMEM; - rc = sysfs_create_group(&reipl_kset->kobj, &reipl_ccw_attr_group); - if (rc) { - free_page((unsigned long)reipl_block_ccw); - return rc; + + if (MACHINE_IS_VM) { + if (!diag308_set_works) + sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO; + rc = sysfs_create_group(&reipl_kset->kobj, + &reipl_ccw_attr_group_vm); + } else { + if(!diag308_set_works) + sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO; + rc = sysfs_create_group(&reipl_kset->kobj, + &reipl_ccw_attr_group_lpar); } - reipl_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN; - reipl_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION; - reipl_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN; - reipl_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW; - reipl_block_ccw->hdr.flags = DIAG308_FLAGS_LP_VALID; - /* check if read scp info worked and set loadparm */ - if (sclp_ipl_info.is_valid) - memcpy(reipl_block_ccw->ipl_info.ccw.load_param, - &sclp_ipl_info.loadparm, LOADPARM_LEN); - else - /* read scp info failed: set empty loadparm (EBCDIC blanks) */ - memset(reipl_block_ccw->ipl_info.ccw.load_param, 0x40, - LOADPARM_LEN); - if (!MACHINE_IS_VM && !diag308_set_works) - sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO; - if (ipl_info.type == IPL_TYPE_CCW) + if (rc) + return rc; + + reipl_block_ccw_init(reipl_block_ccw); + if (ipl_info.type == IPL_TYPE_CCW) { reipl_block_ccw->ipl_info.ccw.devno = ipl_devno; + reipl_block_ccw_fill_parms(reipl_block_ccw); + } + reipl_capabilities |= IPL_TYPE_CCW; return 0; } @@ -770,22 +1215,45 @@ static int __init reipl_fcp_init(void) { int rc; - if ((!diag308_set_works) && (ipl_info.type != IPL_TYPE_FCP)) - return 0; - if ((!diag308_set_works) && (ipl_info.type == IPL_TYPE_FCP)) - make_attrs_ro(reipl_fcp_attrs); + if (!diag308_set_works) { + if (ipl_info.type == IPL_TYPE_FCP) { + make_attrs_ro(reipl_fcp_attrs); + sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO; + } else + return 0; + } reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); if (!reipl_block_fcp) return -ENOMEM; - rc = sysfs_create_group(&reipl_kset->kobj, &reipl_fcp_attr_group); + + /* sysfs: create fcp kset for mixing attr group and bin attrs */ + reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL, + &reipl_kset->kobj); + if (!reipl_fcp_kset) { + free_page((unsigned long) reipl_block_fcp); + return -ENOMEM; + } + + rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); if (rc) { - free_page((unsigned long)reipl_block_fcp); + kset_unregister(reipl_fcp_kset); + free_page((unsigned long) reipl_block_fcp); return rc; } - if (ipl_info.type == IPL_TYPE_FCP) { + + rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj, + &sys_reipl_fcp_scp_data_attr); + if (rc) { + sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); + kset_unregister(reipl_fcp_kset); + free_page((unsigned long) reipl_block_fcp); + return rc; + } + + if (ipl_info.type == IPL_TYPE_FCP) memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE); - } else { + else { reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN; @@ -796,6 +1264,29 @@ static int __init reipl_fcp_init(void) return 0; } +static int __init reipl_type_init(void) +{ + enum ipl_type reipl_type = ipl_info.type; + struct ipl_parameter_block *reipl_block; + unsigned long size; + + reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size); + if (!reipl_block) + goto out; + /* + * If we have an OS info reipl block, this will be used + */ + if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) { + memcpy(reipl_block_fcp, reipl_block, size); + reipl_type = IPL_TYPE_FCP; + } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) { + memcpy(reipl_block_ccw, reipl_block, size); + reipl_type = IPL_TYPE_CCW; + } +out: + return reipl_set_type(reipl_type); +} + static int __init reipl_init(void) { int rc; @@ -817,10 +1308,7 @@ static int __init reipl_init(void) rc = reipl_nss_init(); if (rc) return rc; - rc = reipl_set_type(ipl_info.type); - if (rc) - return rc; - return 0; + return reipl_type_init(); } static struct shutdown_action __refdata reipl_action = { @@ -835,9 +1323,9 @@ static struct shutdown_action __refdata reipl_action = { /* FCP dump device attributes */ -DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n", dump_block_fcp->ipl_info.fcp.wwpn); -DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n", dump_block_fcp->ipl_info.fcp.lun); DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", dump_block_fcp->ipl_info.fcp.bootprog); @@ -926,38 +1414,50 @@ static struct kobj_attribute dump_type_attr = static struct kset *dump_kset; -static void dump_run(struct shutdown_trigger *trigger) +static void diag308_dump(void *dump_block) +{ + diag308(DIAG308_SET, dump_block); + while (1) { + if (diag308(DIAG308_DUMP, NULL) != 0x302) + break; + udelay_simple(USEC_PER_SEC); + } +} + +static void __dump_run(void *unused) { struct ccw_dev_id devid; static char buf[100]; switch (dump_method) { case DUMP_METHOD_CCW_CIO: - smp_send_stop(); devid.devno = dump_block_ccw->ipl_info.ccw.devno; devid.ssid = 0; reipl_ccw_dev(&devid); break; case DUMP_METHOD_CCW_VM: - smp_send_stop(); sprintf(buf, "STORE STATUS"); __cpcmd(buf, NULL, 0, NULL); sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno); __cpcmd(buf, NULL, 0, NULL); break; case DUMP_METHOD_CCW_DIAG: - diag308(DIAG308_SET, dump_block_ccw); - diag308(DIAG308_DUMP, NULL); + diag308_dump(dump_block_ccw); break; case DUMP_METHOD_FCP_DIAG: - diag308(DIAG308_SET, dump_block_fcp); - diag308(DIAG308_DUMP, NULL); + diag308_dump(dump_block_fcp); break; - case DUMP_METHOD_NONE: default: - return; + break; } - printk(KERN_EMERG "Dump failed!\n"); +} + +static void dump_run(struct shutdown_trigger *trigger) +{ + if (dump_method == DUMP_METHOD_NONE) + return; + smp_send_stop(); + smp_call_ipl_cpu(__dump_run, NULL); } static int __init dump_ccw_init(void) @@ -1033,6 +1533,31 @@ static struct shutdown_action __refdata dump_action = { .init = dump_init, }; +static void dump_reipl_run(struct shutdown_trigger *trigger) +{ + unsigned long ipib = (unsigned long) reipl_block_actual; + unsigned int csum; + + csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); + mem_assign_absolute(S390_lowcore.ipib, ipib); + mem_assign_absolute(S390_lowcore.ipib_checksum, csum); + dump_run(trigger); +} + +static int __init dump_reipl_init(void) +{ + if (!diag308_set_works) + return -EOPNOTSUPP; + else + return 0; +} + +static struct shutdown_action __refdata dump_reipl_action = { + .name = SHUTDOWN_ACTION_DUMP_REIPL_STR, + .fn = dump_reipl_run, + .init = dump_reipl_init, +}; + /* * vmcmd shutdown action: Trigger vm command on shutdown. */ @@ -1041,17 +1566,20 @@ static char vmcmd_on_reboot[128]; static char vmcmd_on_panic[128]; static char vmcmd_on_halt[128]; static char vmcmd_on_poff[128]; +static char vmcmd_on_restart[128]; DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart); static struct attribute *vmcmd_attrs[] = { &sys_vmcmd_on_reboot_attr.attr, &sys_vmcmd_on_panic_attr.attr, &sys_vmcmd_on_halt_attr.attr, &sys_vmcmd_on_poff_attr.attr, + &sys_vmcmd_on_restart_attr.attr, NULL, }; @@ -1063,7 +1591,7 @@ static struct kset *vmcmd_kset; static void vmcmd_run(struct shutdown_trigger *trigger) { - char *cmd, *next_cmd; + char *cmd; if (strcmp(trigger->name, ON_REIPL_STR) == 0) cmd = vmcmd_on_reboot; @@ -1073,26 +1601,20 @@ static void vmcmd_run(struct shutdown_trigger *trigger) cmd = vmcmd_on_halt; else if (strcmp(trigger->name, ON_POFF_STR) == 0) cmd = vmcmd_on_poff; + else if (strcmp(trigger->name, ON_RESTART_STR) == 0) + cmd = vmcmd_on_restart; else return; if (strlen(cmd) == 0) return; - do { - next_cmd = strchr(cmd, '\n'); - if (next_cmd) { - next_cmd[0] = 0; - next_cmd += 1; - } - __cpcmd(cmd, NULL, 0, NULL); - cmd = next_cmd; - } while (cmd != NULL); + __cpcmd(cmd, NULL, 0, NULL); } static int vmcmd_init(void) { if (!MACHINE_IS_VM) - return -ENOTSUPP; + return -EOPNOTSUPP; vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj); if (!vmcmd_kset) return -ENOMEM; @@ -1108,12 +1630,10 @@ static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR, static void stop_run(struct shutdown_trigger *trigger) { - if (strcmp(trigger->name, ON_PANIC_STR) == 0) + if (strcmp(trigger->name, ON_PANIC_STR) == 0 || + strcmp(trigger->name, ON_RESTART_STR) == 0) disabled_wait((unsigned long) __builtin_return_address(0)); - else { - signal_processor(smp_processor_id(), sigp_stop); - for (;;); - } + smp_stop_cpu(); } static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR, @@ -1122,7 +1642,8 @@ static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR, /* action list */ static struct shutdown_action *shutdown_actions_list[] = { - &ipl_action, &reipl_action, &dump_action, &vmcmd_action, &stop_action}; + &ipl_action, &reipl_action, &dump_reipl_action, &dump_action, + &vmcmd_action, &stop_action}; #define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *)) /* @@ -1135,13 +1656,15 @@ static int set_trigger(const char *buf, struct shutdown_trigger *trigger, size_t len) { int i; + for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) { - if (!shutdown_actions_list[i]) - continue; - if (strncmp(buf, shutdown_actions_list[i]->name, - strlen(shutdown_actions_list[i]->name)) == 0) { - trigger->action = shutdown_actions_list[i]; - return len; + if (sysfs_streq(buf, shutdown_actions_list[i]->name)) { + if (shutdown_actions_list[i]->init_rc) { + return shutdown_actions_list[i]->init_rc; + } else { + trigger->action = shutdown_actions_list[i]; + return len; + } } } return -EINVAL; @@ -1198,10 +1721,51 @@ static struct kobj_attribute on_panic_attr = static void do_panic(void) { + lgr_info_log(); on_panic_trigger.action->fn(&on_panic_trigger); stop_run(&on_panic_trigger); } +/* on restart */ + +static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR, + &stop_action}; + +static ssize_t on_restart_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_restart_trigger.action->name); +} + +static ssize_t on_restart_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_restart_trigger, len); +} + +static struct kobj_attribute on_restart_attr = + __ATTR(on_restart, 0644, on_restart_show, on_restart_store); + +static void __do_restart(void *ignore) +{ + __arch_local_irq_stosm(0x04); /* enable DAT */ + smp_send_stop(); +#ifdef CONFIG_CRASH_DUMP + crash_kexec(NULL); +#endif + on_restart_trigger.action->fn(&on_restart_trigger); + stop_run(&on_restart_trigger); +} + +void do_restart(void) +{ + tracing_off(); + debug_locks_off(); + lgr_info_log(); + smp_call_online_cpu(__do_restart, NULL); +} + /* on halt */ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; @@ -1278,7 +1842,9 @@ static void __init shutdown_triggers_init(void) if (sysfs_create_file(&shutdown_actions_kset->kobj, &on_poff_attr.attr)) goto fail; - + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_restart_attr.attr)) + goto fail; return; fail: panic("shutdown_triggers_init failed\n"); @@ -1291,14 +1857,13 @@ static void __init shutdown_actions_init(void) for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) { if (!shutdown_actions_list[i]->init) continue; - if (shutdown_actions_list[i]->init()) - shutdown_actions_list[i] = NULL; + shutdown_actions_list[i]->init_rc = + shutdown_actions_list[i]->init(); } } static int __init s390_ipl_init(void) { - reipl_probe(); sclp_get_ipl_info(&sclp_ipl_info); shutdown_actions_init(); shutdown_triggers_init(); @@ -1374,7 +1939,7 @@ static int on_panic_notify(struct notifier_block *self, static struct notifier_block on_panic_nb = { .notifier_call = on_panic_notify, - .priority = 0, + .priority = INT_MIN, }; void __init setup_ipl(void) @@ -1398,17 +1963,24 @@ void __init setup_ipl(void) sizeof(ipl_info.data.nss.name)); break; case IPL_TYPE_UNKNOWN: - default: /* We have no info to copy */ break; } atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb); } +void __init ipl_update_parameters(void) +{ + int rc; + + rc = diag308(DIAG308_STORE, &ipl_block); + if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG)) + diag308_set_works = 1; +} + void __init ipl_save_parameters(void) { struct cio_iplinfo iplinfo; - unsigned int *ipl_ptr; void *src, *dst; if (cio_get_iplinfo(&iplinfo)) @@ -1419,11 +1991,10 @@ void __init ipl_save_parameters(void) if (!iplinfo.is_qdio) return; ipl_flags |= IPL_PARMBLOCK_VALID; - ipl_ptr = (unsigned int *)__LC_IPL_PARMBLOCK_PTR; - src = (void *)(unsigned long)*ipl_ptr; + src = (void *)(unsigned long)S390_lowcore.ipl_parmblock_ptr; dst = (void *)IPL_PARMBLOCK_ORIGIN; memmove(dst, src, PAGE_SIZE); - *ipl_ptr = IPL_PARMBLOCK_ORIGIN; + S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN; } static LIST_HEAD(rcall); @@ -1449,13 +2020,19 @@ static void do_reset_calls(void) { struct reset_call *reset; +#ifdef CONFIG_64BIT + if (diag308_set_works) { + diag308_reset(); + return; + } +#endif list_for_each_entry(reset, &rcall, list) reset->fn(); } u32 dump_prefix_page; -void s390_reset_system(void) +void s390_reset_system(void (*func)(void *), void *data) { struct _lowcore *lc; @@ -1474,15 +2051,19 @@ void s390_reset_system(void) __ctl_clear_bit(0,28); /* Set new machine check handler */ - S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT; S390_lowcore.mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; /* Set new program check handler */ - S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT; S390_lowcore.program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + /* Store status at absolute zero */ + store_status(); + do_reset_calls(); + if (func) + func(data); } - diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index c36d8123ca1..99b0b09646c 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -1,105 +1,307 @@ /* - * arch/s390/kernel/irq.c - * - * Copyright IBM Corp. 2004,2007 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Thomas Spatzier (tspat@de.ibm.com) + * Copyright IBM Corp. 2004, 2011 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Thomas Spatzier <tspat@de.ibm.com>, * * This file contains interrupt related functions. */ -#include <linux/module.h> -#include <linux/kernel.h> #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/seq_file.h> -#include <linux/cpu.h> #include <linux/proc_fs.h> #include <linux/profile.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/ftrace.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/irq.h> +#include <asm/irq_regs.h> +#include <asm/cputime.h> +#include <asm/lowcore.h> +#include <asm/irq.h> +#include <asm/hw_irq.h> +#include "entry.h" + +DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); +EXPORT_PER_CPU_SYMBOL_GPL(irq_stat); + +struct irq_class { + char *name; + char *desc; +}; + +/* + * The list of "main" irq classes on s390. This is the list of interrupts + * that appear both in /proc/stat ("intr" line) and /proc/interrupts. + * Historically only external and I/O interrupts have been part of /proc/stat. + * We can't add the split external and I/O sub classes since the first field + * in the "intr" line in /proc/stat is supposed to be the sum of all other + * fields. + * Since the external and I/O interrupt fields are already sums we would end + * up with having a sum which accounts each interrupt twice. + */ +static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { + [EXT_INTERRUPT] = {.name = "EXT"}, + [IO_INTERRUPT] = {.name = "I/O"}, + [THIN_INTERRUPT] = {.name = "AIO"}, +}; + +/* + * The list of split external and I/O interrupts that appear only in + * /proc/interrupts. + * In addition this list contains non external / I/O events like NMIs. + */ +static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { + [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"}, + [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"}, + [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"}, + [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"}, + [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"}, + [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"}, + [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"}, + [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"}, + [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"}, + [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"}, + [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, + [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, + [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, + [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, + [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, + [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"}, + [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"}, + [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"}, + [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"}, + [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"}, + [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"}, + [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"}, + [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"}, + [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"}, + [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"}, + [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"}, + [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, + [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, + [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, + [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, + [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, + [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, +}; + +void __init init_IRQ(void) +{ + init_cio_interrupts(); + init_airq_interrupts(); + init_ext_interrupts(); +} + +void do_IRQ(struct pt_regs *regs, int irq) +{ + struct pt_regs *old_regs; + + old_regs = set_irq_regs(regs); + irq_enter(); + if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + /* Serve timer interrupts first. */ + clock_comparator_work(); + generic_handle_irq(irq); + irq_exit(); + set_irq_regs(old_regs); +} /* * show_interrupts is needed by /proc/interrupts. */ int show_interrupts(struct seq_file *p, void *v) { - static const char *intrclass_names[] = { "EXT", "I/O", }; - int i = *(loff_t *) v, j; + int irq = *(loff_t *) v; + int cpu; - if (i == 0) { + get_online_cpus(); + if (irq == 0) { seq_puts(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%d ",j); + for_each_online_cpu(cpu) + seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); + goto out; } + if (irq < NR_IRQS) { + if (irq >= NR_IRQS_BASE) + goto out; + seq_printf(p, "%s: ", irqclass_main_desc[irq].name); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); + seq_putc(p, '\n'); + goto out; + } + for (irq = 0; irq < NR_ARCH_IRQS; irq++) { + seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", + per_cpu(irq_stat, cpu).irqs[irq]); + if (irqclass_sub_desc[irq].desc) + seq_printf(p, " %s", irqclass_sub_desc[irq].desc); + seq_putc(p, '\n'); + } +out: + put_online_cpus(); + return 0; +} - if (i < NR_IRQS) { - seq_printf(p, "%s: ", intrclass_names[i]); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); -#endif - seq_putc(p, '\n'); - - } - - return 0; +unsigned int arch_dynirq_lower_bound(unsigned int from) +{ + return from < THIN_INTERRUPT ? THIN_INTERRUPT : from; } /* - * For compatibilty only. S/390 specific setup of interrupts et al. is done - * much later in init_channel_subsystem(). + * Switch to the asynchronous interrupt stack for softirq execution. */ -void __init -init_IRQ(void) +void do_softirq_own_stack(void) { - /* nothing... */ + unsigned long old, new; + + /* Get current stack pointer. */ + asm volatile("la %0,0(15)" : "=a" (old)); + /* Check against async. stack address range. */ + new = S390_lowcore.async_stack; + if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { + /* Need to switch to the async. stack. */ + new -= STACK_FRAME_OVERHEAD; + ((struct stack_frame *) new)->back_chain = old; + asm volatile(" la 15,0(%0)\n" + " basr 14,%2\n" + " la 15,0(%1)\n" + : : "a" (new), "a" (old), + "a" (__do_softirq) + : "0", "1", "2", "3", "4", "5", "14", + "cc", "memory" ); + } else { + /* We are already on the async stack. */ + __do_softirq(); + } } /* - * Switch to the asynchronous interrupt stack for softirq execution. + * ext_int_hash[index] is the list head for all external interrupts that hash + * to this index. */ -extern void __do_softirq(void); +static struct hlist_head ext_int_hash[32] ____cacheline_aligned; + +struct ext_int_info { + ext_int_handler_t handler; + struct hlist_node entry; + struct rcu_head rcu; + u16 code; +}; + +/* ext_int_hash_lock protects the handler lists for external interrupts */ +static DEFINE_SPINLOCK(ext_int_hash_lock); + +static inline int ext_hash(u16 code) +{ + BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash))); + + return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1); +} + +int register_external_irq(u16 code, ext_int_handler_t handler) +{ + struct ext_int_info *p; + unsigned long flags; + int index; + + p = kmalloc(sizeof(*p), GFP_ATOMIC); + if (!p) + return -ENOMEM; + p->code = code; + p->handler = handler; + index = ext_hash(code); + + spin_lock_irqsave(&ext_int_hash_lock, flags); + hlist_add_head_rcu(&p->entry, &ext_int_hash[index]); + spin_unlock_irqrestore(&ext_int_hash_lock, flags); + return 0; +} +EXPORT_SYMBOL(register_external_irq); -asmlinkage void do_softirq(void) +int unregister_external_irq(u16 code, ext_int_handler_t handler) { - unsigned long flags, old, new; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - if (local_softirq_pending()) { - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (old)); - /* Check against async. stack address range. */ - new = S390_lowcore.async_stack; - if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { - /* Need to switch to the async. stack. */ - new -= STACK_FRAME_OVERHEAD; - ((struct stack_frame *) new)->back_chain = old; - - asm volatile(" la 15,0(%0)\n" - " basr 14,%2\n" - " la 15,0(%1)\n" - : : "a" (new), "a" (old), - "a" (__do_softirq) - : "0", "1", "2", "3", "4", "5", "14", - "cc", "memory" ); - } else - /* We are already on the async stack. */ - __do_softirq(); + struct ext_int_info *p; + unsigned long flags; + int index = ext_hash(code); + + spin_lock_irqsave(&ext_int_hash_lock, flags); + hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + if (p->code == code && p->handler == handler) { + hlist_del_rcu(&p->entry); + kfree_rcu(p, rcu); + } } + spin_unlock_irqrestore(&ext_int_hash_lock, flags); + return 0; +} +EXPORT_SYMBOL(unregister_external_irq); + +static irqreturn_t do_ext_interrupt(int irq, void *dummy) +{ + struct pt_regs *regs = get_irq_regs(); + struct ext_code ext_code; + struct ext_int_info *p; + int index; - local_irq_restore(flags); + ext_code = *(struct ext_code *) ®s->int_code; + if (ext_code.code != EXT_IRQ_CLK_COMP) + __get_cpu_var(s390_idle).nohz_delay = 1; + + index = ext_hash(ext_code.code); + rcu_read_lock(); + hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) { + if (unlikely(p->code != ext_code.code)) + continue; + p->handler(ext_code, regs->int_parm, regs->int_parm_long); + } + rcu_read_unlock(); + return IRQ_HANDLED; } -void init_irq_proc(void) +static struct irqaction external_interrupt = { + .name = "EXT", + .handler = do_ext_interrupt, +}; + +void __init init_ext_interrupts(void) { - struct proc_dir_entry *root_irq_dir; + int idx; + + for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) + INIT_HLIST_HEAD(&ext_int_hash[idx]); - root_irq_dir = proc_mkdir("irq", NULL); - create_prof_cpu_mask(root_irq_dir); + irq_set_chip_and_handler(EXT_INTERRUPT, + &dummy_irq_chip, handle_percpu_irq); + setup_irq(EXT_INTERRUPT, &external_interrupt); +} + +static DEFINE_SPINLOCK(irq_subclass_lock); +static unsigned char irq_subclass_refcount[64]; + +void irq_subclass_register(enum irq_subclass subclass) +{ + spin_lock(&irq_subclass_lock); + if (!irq_subclass_refcount[subclass]) + ctl_set_bit(0, subclass); + irq_subclass_refcount[subclass]++; + spin_unlock(&irq_subclass_lock); +} +EXPORT_SYMBOL(irq_subclass_register); + +void irq_subclass_unregister(enum irq_subclass subclass) +{ + spin_lock(&irq_subclass_lock); + irq_subclass_refcount[subclass]--; + if (!irq_subclass_refcount[subclass]) + ctl_clear_bit(0, subclass); + spin_unlock(&irq_subclass_lock); } +EXPORT_SYMBOL(irq_subclass_unregister); diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c new file mode 100644 index 00000000000..b987ab2c154 --- /dev/null +++ b/arch/s390/kernel/jump_label.c @@ -0,0 +1,70 @@ +/* + * Jump label s390 support + * + * Copyright IBM Corp. 2011 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/stop_machine.h> +#include <linux/jump_label.h> +#include <asm/ipl.h> + +#ifdef HAVE_JUMP_LABEL + +struct insn { + u16 opcode; + s32 offset; +} __packed; + +struct insn_args { + struct jump_entry *entry; + enum jump_label_type type; +}; + +static void __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn insn; + int rc; + + if (type == JUMP_LABEL_ENABLE) { + /* brcl 15,offset */ + insn.opcode = 0xc0f4; + insn.offset = (entry->target - entry->code) >> 1; + } else { + /* brcl 0,0 */ + insn.opcode = 0xc004; + insn.offset = 0; + } + + rc = probe_kernel_write((void *)entry->code, &insn, JUMP_LABEL_NOP_SIZE); + WARN_ON_ONCE(rc < 0); +} + +static int __sm_arch_jump_label_transform(void *data) +{ + struct insn_args *args = data; + + __jump_label_transform(args->entry, args->type); + return 0; +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn_args args; + + args.entry = entry; + args.type = type; + + stop_machine(__sm_arch_jump_label_transform, &args, NULL); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __jump_label_transform(entry, type); +} + +#endif diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index c5549a20628..bc71a7b95af 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -15,7 +15,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright (C) IBM Corporation, 2002, 2006 + * Copyright IBM Corp. 2002, 2006 * * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com> */ @@ -25,333 +25,467 @@ #include <linux/preempt.h> #include <linux/stop_machine.h> #include <linux/kdebug.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/hardirq.h> #include <asm/cacheflush.h> #include <asm/sections.h> -#include <asm/uaccess.h> -#include <linux/module.h> +#include <asm/dis.h> -DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe *, current_kprobe); DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; - -int __kprobes arch_prepare_kprobe(struct kprobe *p) -{ - /* Make sure the probe isn't going on a difficult instruction */ - if (is_prohibited_opcode((kprobe_opcode_t *) p->addr)) - return -EINVAL; - - if ((unsigned long)p->addr & 0x01) { - printk("Attempt to register kprobe at an unaligned address\n"); - return -EINVAL; - } +struct kretprobe_blackpoint kretprobe_blacklist[] = { }; - /* Use the get_insn_slot() facility for correctness */ - if (!(p->ainsn.insn = get_insn_slot())) - return -ENOMEM; +DEFINE_INSN_CACHE_OPS(dmainsn); - memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); +static void *alloc_dmainsn_page(void) +{ + return (void *)__get_free_page(GFP_KERNEL | GFP_DMA); +} - get_instruction_type(&p->ainsn); - p->opcode = *p->addr; - return 0; +static void free_dmainsn_page(void *page) +{ + free_page((unsigned long)page); } -int __kprobes is_prohibited_opcode(kprobe_opcode_t *instruction) +struct kprobe_insn_cache kprobe_dmainsn_slots = { + .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex), + .alloc = alloc_dmainsn_page, + .free = free_dmainsn_page, + .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages), + .insn_size = MAX_INSN_SIZE, +}; + +static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) { - switch (*(__u8 *) instruction) { + if (!is_known_insn((unsigned char *)insn)) + return -EINVAL; + switch (insn[0] >> 8) { case 0x0c: /* bassm */ case 0x0b: /* bsm */ case 0x83: /* diag */ case 0x44: /* ex */ + case 0xac: /* stnsm */ + case 0xad: /* stosm */ return -EINVAL; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x00: /* exrl */ + return -EINVAL; + } } - switch (*(__u16 *) instruction) { + switch (insn[0]) { case 0x0101: /* pr */ case 0xb25a: /* bsa */ case 0xb240: /* bakr */ case 0xb258: /* bsg */ case 0xb218: /* pc */ case 0xb228: /* pt */ + case 0xb98d: /* epsw */ return -EINVAL; } return 0; } -void __kprobes get_instruction_type(struct arch_specific_insn *ainsn) +static int __kprobes get_fixup_type(kprobe_opcode_t *insn) { /* default fixup method */ - ainsn->fixup = FIXUP_PSW_NORMAL; - - /* save r1 operand */ - ainsn->reg = (*ainsn->insn & 0xf0) >> 4; + int fixup = FIXUP_PSW_NORMAL; - /* save the instruction length (pop 5-5) in bytes */ - switch (*(__u8 *) (ainsn->insn) >> 6) { - case 0: - ainsn->ilen = 2; - break; - case 1: - case 2: - ainsn->ilen = 4; - break; - case 3: - ainsn->ilen = 6; - break; - } - - switch (*(__u8 *) ainsn->insn) { + switch (insn[0] >> 8) { case 0x05: /* balr */ case 0x0d: /* basr */ - ainsn->fixup = FIXUP_RETURN_REGISTER; + fixup = FIXUP_RETURN_REGISTER; /* if r2 = 0, no branch will be taken */ - if ((*ainsn->insn & 0x0f) == 0) - ainsn->fixup |= FIXUP_BRANCH_NOT_TAKEN; + if ((insn[0] & 0x0f) == 0) + fixup |= FIXUP_BRANCH_NOT_TAKEN; break; case 0x06: /* bctr */ case 0x07: /* bcr */ - ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; + fixup = FIXUP_BRANCH_NOT_TAKEN; break; case 0x45: /* bal */ case 0x4d: /* bas */ - ainsn->fixup = FIXUP_RETURN_REGISTER; + fixup = FIXUP_RETURN_REGISTER; break; case 0x47: /* bc */ case 0x46: /* bct */ case 0x86: /* bxh */ case 0x87: /* bxle */ - ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; + fixup = FIXUP_BRANCH_NOT_TAKEN; break; case 0x82: /* lpsw */ - ainsn->fixup = FIXUP_NOT_REQUIRED; + fixup = FIXUP_NOT_REQUIRED; break; case 0xb2: /* lpswe */ - if (*(((__u8 *) ainsn->insn) + 1) == 0xb2) { - ainsn->fixup = FIXUP_NOT_REQUIRED; - } + if ((insn[0] & 0xff) == 0xb2) + fixup = FIXUP_NOT_REQUIRED; break; case 0xa7: /* bras */ - if ((*ainsn->insn & 0x0f) == 0x05) { - ainsn->fixup |= FIXUP_RETURN_REGISTER; - } + if ((insn[0] & 0x0f) == 0x05) + fixup |= FIXUP_RETURN_REGISTER; break; case 0xc0: - if ((*ainsn->insn & 0x0f) == 0x00 /* larl */ - || (*ainsn->insn & 0x0f) == 0x05) /* brasl */ - ainsn->fixup |= FIXUP_RETURN_REGISTER; + if ((insn[0] & 0x0f) == 0x05) /* brasl */ + fixup |= FIXUP_RETURN_REGISTER; break; case 0xeb: - if (*(((__u8 *) ainsn->insn) + 5 ) == 0x44 || /* bxhg */ - *(((__u8 *) ainsn->insn) + 5) == 0x45) {/* bxleg */ - ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; + switch (insn[2] & 0xff) { + case 0x44: /* bxhg */ + case 0x45: /* bxleg */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; } break; case 0xe3: /* bctg */ - if (*(((__u8 *) ainsn->insn) + 5) == 0x46) { - ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; + if ((insn[2] & 0xff) == 0x46) + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0xec: + switch (insn[2] & 0xff) { + case 0xe5: /* clgrb */ + case 0xe6: /* cgrb */ + case 0xf6: /* crb */ + case 0xf7: /* clrb */ + case 0xfc: /* cgib */ + case 0xfd: /* cglib */ + case 0xfe: /* cib */ + case 0xff: /* clib */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; } break; } + return fixup; } -static int __kprobes swap_instruction(void *aref) +static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn) { - struct ins_replace_args *args = aref; - u32 *addr; - u32 instr; - int err = -EFAULT; + /* Check if we have a RIL-b or RIL-c format instruction which + * we need to modify in order to avoid instruction emulation. */ + switch (insn[0] >> 8) { + case 0xc0: + if ((insn[0] & 0x0f) == 0x00) /* larl */ + return true; + break; + case 0xc4: + switch (insn[0] & 0x0f) { + case 0x02: /* llhrl */ + case 0x04: /* lghrl */ + case 0x05: /* lhrl */ + case 0x06: /* llghrl */ + case 0x07: /* sthrl */ + case 0x08: /* lgrl */ + case 0x0b: /* stgrl */ + case 0x0c: /* lgfrl */ + case 0x0d: /* lrl */ + case 0x0e: /* llgfrl */ + case 0x0f: /* strl */ + return true; + } + break; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x02: /* pfdrl */ + case 0x04: /* cghrl */ + case 0x05: /* chrl */ + case 0x06: /* clghrl */ + case 0x07: /* clhrl */ + case 0x08: /* cgrl */ + case 0x0a: /* clgrl */ + case 0x0c: /* cgfrl */ + case 0x0d: /* crl */ + case 0x0e: /* clgfrl */ + case 0x0f: /* clrl */ + return true; + } + break; + } + return false; +} +static void __kprobes copy_instruction(struct kprobe *p) +{ + s64 disp, new_disp; + u64 addr, new_addr; + + memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8)); + if (!is_insn_relative_long(p->ainsn.insn)) + return; /* - * Text segment is read-only, hence we use stura to bypass dynamic - * address translation to exchange the instruction. Since stura - * always operates on four bytes, but we only want to exchange two - * bytes do some calculations to get things right. In addition we - * shall not cross any page boundaries (vmalloc area!) when writing - * the new instruction. + * For pc-relative instructions in RIL-b or RIL-c format patch the + * RI2 displacement field. We have already made sure that the insn + * slot for the patched instruction is within the same 2GB area + * as the original instruction (either kernel image or module area). + * Therefore the new displacement will always fit. */ - addr = (u32 *)((unsigned long)args->ptr & -4UL); - if ((unsigned long)args->ptr & 2) - instr = ((*addr) & 0xffff0000) | args->new; - else - instr = ((*addr) & 0x0000ffff) | args->new << 16; + disp = *(s32 *)&p->ainsn.insn[1]; + addr = (u64)(unsigned long)p->addr; + new_addr = (u64)(unsigned long)p->ainsn.insn; + new_disp = ((addr + (disp * 2)) - new_addr) / 2; + *(s32 *)&p->ainsn.insn[1] = new_disp; +} - asm volatile( - " lra %1,0(%1)\n" - "0: stura %2,%1\n" - "1: la %0,0\n" - "2:\n" - EX_TABLE(0b,2b) - : "+d" (err) - : "a" (addr), "d" (instr) - : "memory", "cc"); +static inline int is_kernel_addr(void *addr) +{ + return addr < (void *)_end; +} - return err; +static inline int is_module_addr(void *addr) +{ +#ifdef CONFIG_64BIT + BUILD_BUG_ON(MODULES_LEN > (1UL << 31)); + if (addr < (void *)MODULES_VADDR) + return 0; + if (addr > (void *)MODULES_END) + return 0; +#endif + return 1; } -void __kprobes arch_arm_kprobe(struct kprobe *p) +static int __kprobes s390_get_insn_slot(struct kprobe *p) +{ + /* + * Get an insn slot that is within the same 2GB area like the original + * instruction. That way instructions with a 32bit signed displacement + * field can be patched and executed within the insn slot. + */ + p->ainsn.insn = NULL; + if (is_kernel_addr(p->addr)) + p->ainsn.insn = get_dmainsn_slot(); + else if (is_module_addr(p->addr)) + p->ainsn.insn = get_insn_slot(); + return p->ainsn.insn ? 0 : -ENOMEM; +} + +static void __kprobes s390_free_insn_slot(struct kprobe *p) +{ + if (!p->ainsn.insn) + return; + if (is_kernel_addr(p->addr)) + free_dmainsn_slot(p->ainsn.insn, 0); + else + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + if ((unsigned long) p->addr & 0x01) + return -EINVAL; + /* Make sure the probe isn't going on a difficult instruction */ + if (is_prohibited_opcode(p->addr)) + return -EINVAL; + if (s390_get_insn_slot(p)) + return -ENOMEM; + p->opcode = *p->addr; + copy_instruction(p); + return 0; +} + +struct ins_replace_args { + kprobe_opcode_t *ptr; + kprobe_opcode_t opcode; +}; + +static int __kprobes swap_instruction(void *aref) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long status = kcb->kprobe_status; - struct ins_replace_args args; - - args.ptr = p->addr; - args.old = p->opcode; - args.new = BREAKPOINT_INSTRUCTION; + struct ins_replace_args *args = aref; kcb->kprobe_status = KPROBE_SWAP_INST; - stop_machine_run(swap_instruction, &args, NR_CPUS); + probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode)); kcb->kprobe_status = status; + return 0; } -void __kprobes arch_disarm_kprobe(struct kprobe *p) +void __kprobes arch_arm_kprobe(struct kprobe *p) { - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long status = kcb->kprobe_status; struct ins_replace_args args; args.ptr = p->addr; - args.old = BREAKPOINT_INSTRUCTION; - args.new = p->opcode; + args.opcode = BREAKPOINT_INSTRUCTION; + stop_machine(swap_instruction, &args, NULL); +} - kcb->kprobe_status = KPROBE_SWAP_INST; - stop_machine_run(swap_instruction, &args, NR_CPUS); - kcb->kprobe_status = status; +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + struct ins_replace_args args; + + args.ptr = p->addr; + args.opcode = p->opcode; + stop_machine(swap_instruction, &args, NULL); } void __kprobes arch_remove_kprobe(struct kprobe *p) { - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, 0); - mutex_unlock(&kprobe_mutex); + s390_free_insn_slot(p); } -static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb, + struct pt_regs *regs, + unsigned long ip) { - per_cr_bits kprobe_per_regs[1]; + struct per_regs per_kprobe; - memset(kprobe_per_regs, 0, sizeof(per_cr_bits)); - regs->psw.addr = (unsigned long)p->ainsn.insn | PSW_ADDR_AMODE; + /* Set up the PER control registers %cr9-%cr11 */ + per_kprobe.control = PER_EVENT_IFETCH; + per_kprobe.start = ip; + per_kprobe.end = ip; - /* Set up the per control reg info, will pass to lctl */ - kprobe_per_regs[0].em_instruction_fetch = 1; - kprobe_per_regs[0].starting_addr = (unsigned long)p->ainsn.insn; - kprobe_per_regs[0].ending_addr = (unsigned long)p->ainsn.insn + 1; + /* Save control regs and psw mask */ + __ctl_store(kcb->kprobe_saved_ctl, 9, 11); + kcb->kprobe_saved_imask = regs->psw.mask & + (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT); - /* Set the PER control regs, turns on single step for this address */ - __ctl_load(kprobe_per_regs, 9, 11); + /* Set PER control regs, turns on single step for the given address */ + __ctl_load(per_kprobe, 9, 11); regs->psw.mask |= PSW_MASK_PER; - regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK); + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); + regs->psw.addr = ip | PSW_ADDR_AMODE; } -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb, + struct pt_regs *regs, + unsigned long ip) { - kcb->prev_kprobe.kp = kprobe_running(); - kcb->prev_kprobe.status = kcb->kprobe_status; - kcb->prev_kprobe.kprobe_saved_imask = kcb->kprobe_saved_imask; - memcpy(kcb->prev_kprobe.kprobe_saved_ctl, kcb->kprobe_saved_ctl, - sizeof(kcb->kprobe_saved_ctl)); + /* Restore control regs and psw mask, set new psw address */ + __ctl_load(kcb->kprobe_saved_ctl, 9, 11); + regs->psw.mask &= ~PSW_MASK_PER; + regs->psw.mask |= kcb->kprobe_saved_imask; + regs->psw.addr = ip | PSW_ADDR_AMODE; } -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +/* + * Activate a kprobe by storing its pointer to current_kprobe. The + * previous kprobe is stored in kcb->prev_kprobe. A stack of up to + * two kprobes can be active, see KPROBE_REENTER. + */ +static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; - kcb->kprobe_status = kcb->prev_kprobe.status; - kcb->kprobe_saved_imask = kcb->prev_kprobe.kprobe_saved_imask; - memcpy(kcb->kprobe_saved_ctl, kcb->prev_kprobe.kprobe_saved_ctl, - sizeof(kcb->kprobe_saved_ctl)); + kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe); + kcb->prev_kprobe.status = kcb->kprobe_status; + __get_cpu_var(current_kprobe) = p; } -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +/* + * Deactivate a kprobe by backing up to the previous state. If the + * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL, + * for any other state prev_kprobe.kp will be NULL. + */ +static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; - /* Save the interrupt and per flags */ - kcb->kprobe_saved_imask = regs->psw.mask & - (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK); - /* Save the control regs that govern PER */ - __ctl_store(kcb->kprobe_saved_ctl, 9, 11); + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14]; /* Replace the return addr with trampoline addr */ - regs->gprs[14] = (unsigned long)&kretprobe_trampoline; + regs->gprs[14] = (unsigned long) &kretprobe_trampoline; +} + +static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb, + struct kprobe *p) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + default: + /* + * A kprobe on the code path to single step an instruction + * is a BUG. The code path resides in the .kprobes.text + * section and is executed with interrupts disabled. + */ + printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr); + dump_kprobe(p); + BUG(); + } } static int __kprobes kprobe_handler(struct pt_regs *regs) { - struct kprobe *p; - int ret = 0; - unsigned long *addr = (unsigned long *) - ((regs->psw.addr & PSW_ADDR_INSN) - 2); struct kprobe_ctlblk *kcb; + struct kprobe *p; /* - * We don't want to be preempted for the entire - * duration of kprobe processing + * We want to disable preemption for the entire duration of kprobe + * processing. That includes the calls to the pre/post handlers + * and single stepping the kprobe instruction. */ preempt_disable(); kcb = get_kprobe_ctlblk(); + p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2)); - /* Check we're not actually recursing */ - if (kprobe_running()) { - p = get_kprobe(addr); - if (p) { - if (kcb->kprobe_status == KPROBE_HIT_SS && - *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { - regs->psw.mask &= ~PSW_MASK_PER; - regs->psw.mask |= kcb->kprobe_saved_imask; - goto no_kprobe; - } - /* We have reentered the kprobe_handler(), since - * another probe was hit while within the handler. - * We here save the original kprobes variables and - * just single step on the instruction of the new probe - * without calling any user handlers. + if (p) { + if (kprobe_running()) { + /* + * We have hit a kprobe while another is still + * active. This can happen in the pre and post + * handler. Single step the instruction of the + * new probe but do not call any handler function + * of this secondary kprobe. + * push_kprobe and pop_kprobe saves and restores + * the currently active kprobe. */ - save_previous_kprobe(kcb); - set_current_kprobe(p, regs, kcb); - kprobes_inc_nmissed_count(p); - prepare_singlestep(p, regs); + kprobe_reenter_check(kcb, p); + push_kprobe(kcb, p); kcb->kprobe_status = KPROBE_REENTER; - return 1; } else { - p = __get_cpu_var(current_kprobe); - if (p->break_handler && p->break_handler(p, regs)) { - goto ss_probe; - } + /* + * If we have no pre-handler or it returned 0, we + * continue with single stepping. If we have a + * pre-handler and it returned non-zero, it prepped + * for calling the break_handler below on re-entry + * for jprobe processing, so get out doing nothing + * more here. + */ + push_kprobe(kcb, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (p->pre_handler && p->pre_handler(p, regs)) + return 1; + kcb->kprobe_status = KPROBE_HIT_SS; } - goto no_kprobe; - } - - p = get_kprobe(addr); - if (!p) - /* - * No kprobe at this address. The fault has not been - * caused by a kprobe breakpoint. The race of breakpoint - * vs. kprobe remove does not exist because on s390 we - * use stop_machine_run to arm/disarm the breakpoints. - */ - goto no_kprobe; - - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - set_current_kprobe(p, regs, kcb); - if (p->pre_handler && p->pre_handler(p, regs)) - /* handler has already set things up, so skip ss setup */ + enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn); return 1; - -ss_probe: - prepare_singlestep(p, regs); - kcb->kprobe_status = KPROBE_HIT_SS; - return 1; - -no_kprobe: + } else if (kprobe_running()) { + p = __get_cpu_var(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) { + /* + * Continuation after the jprobe completed and + * caused the jprobe_return trap. The jprobe + * break_handler "returns" to the original + * function that still has the kprobe breakpoint + * installed. We continue with single stepping. + */ + kcb->kprobe_status = KPROBE_HIT_SS; + enable_singlestep(kcb, regs, + (unsigned long) p->ainsn.insn); + return 1; + } /* else: + * No kprobe at this address and the current kprobe + * has no break handler (no jprobe!). The kernel just + * exploded, let the standard trap handler pick up the + * pieces. + */ + } /* else: + * No kprobe at this address and no active kprobe. The trap has + * not been caused by a kprobe breakpoint. The race of breakpoint + * vs. kprobe remove does not exist because on s390 as we use + * stop_machine to arm/disarm the breakpoints. + */ preempt_enable_no_resched(); - return ret; + return 0; } /* @@ -360,7 +494,7 @@ no_kprobe: * - When the probed function returns, this probe * causes the handlers to fire */ -void kretprobe_trampoline_holder(void) +static void __used kretprobe_trampoline_holder(void) { asm volatile(".global kretprobe_trampoline\n" "kretprobe_trampoline: bcr 0,0\n"); @@ -372,20 +506,20 @@ void kretprobe_trampoline_holder(void) static int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - struct kretprobe_instance *ri = NULL; + struct kretprobe_instance *ri; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; - unsigned long flags, orig_ret_address = 0; - unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address; + unsigned long trampoline_address; + kprobe_opcode_t *correct_ret_addr; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* * It is possible to have multiple instances associated with a given * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return + * have a return probe installed on them, and/or more than one return * return probe was registered for a target function. * * We can handle this because: @@ -395,34 +529,59 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + ri = NULL; + orig_ret_address = 0; + correct_ret_addr = NULL; + trampoline_address = (unsigned long) &kretprobe_trampoline; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; - if (ri->rp && ri->rp->handler) + orig_ret_address = (unsigned long) ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long) ri->ret_addr; + + if (ri->rp && ri->rp->handler) { + ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); + } - orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri, &empty_rp); - if (orig_ret_address != trampoline_address) { + if (orig_ret_address != trampoline_address) /* * This is the real return address. Any other * instances associated with this task are for * other calls deeper on the call stack */ break; - } } - kretprobe_assert(ri, orig_ret_address, trampoline_address); + regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; - reset_current_kprobe(); - spin_unlock_irqrestore(&kretprobe_lock, flags); + pop_kprobe(get_kprobe_ctlblk()); + kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } @@ -445,55 +604,42 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; + int fixup = get_fixup_type(p->ainsn.insn); - regs->psw.addr &= PSW_ADDR_INSN; + if (fixup & FIXUP_PSW_NORMAL) + ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; - if (p->ainsn.fixup & FIXUP_PSW_NORMAL) - regs->psw.addr = (unsigned long)p->addr + - ((unsigned long)regs->psw.addr - - (unsigned long)p->ainsn.insn); - - if (p->ainsn.fixup & FIXUP_BRANCH_NOT_TAKEN) - if ((unsigned long)regs->psw.addr - - (unsigned long)p->ainsn.insn == p->ainsn.ilen) - regs->psw.addr = (unsigned long)p->addr + p->ainsn.ilen; + if (fixup & FIXUP_BRANCH_NOT_TAKEN) { + int ilen = insn_length(p->ainsn.insn[0] >> 8); + if (ip - (unsigned long) p->ainsn.insn == ilen) + ip = (unsigned long) p->addr + ilen; + } - if (p->ainsn.fixup & FIXUP_RETURN_REGISTER) - regs->gprs[p->ainsn.reg] = ((unsigned long)p->addr + - (regs->gprs[p->ainsn.reg] - - (unsigned long)p->ainsn.insn)) - | PSW_ADDR_AMODE; + if (fixup & FIXUP_RETURN_REGISTER) { + int reg = (p->ainsn.insn[0] & 0xf0) >> 4; + regs->gprs[reg] += (unsigned long) p->addr - + (unsigned long) p->ainsn.insn; + } - regs->psw.addr |= PSW_ADDR_AMODE; - /* turn off PER mode */ - regs->psw.mask &= ~PSW_MASK_PER; - /* Restore the original per control regs */ - __ctl_load(kcb->kprobe_saved_ctl, 9, 11); - regs->psw.mask |= kcb->kprobe_saved_imask; + disable_singlestep(kcb, regs, ip); } static int __kprobes post_kprobe_handler(struct pt_regs *regs) { - struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe *p = kprobe_running(); - if (!cur) + if (!p) return 0; - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) { kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); + p->post_handler(p, regs, 0); } - resume_execution(cur, regs); - - /*Restore back the original saved kprobes variables and continue. */ - if (kcb->kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(kcb); - goto out; - } - reset_current_kprobe(); -out: + resume_execution(p, regs); + pop_kprobe(kcb); preempt_enable_no_resched(); /* @@ -501,17 +647,16 @@ out: * will have PER set, in which case, continue the remaining processing * of do_single_step, as if this is not a probe hit. */ - if (regs->psw.mask & PSW_MASK_PER) { + if (regs->psw.mask & PSW_MASK_PER) return 0; - } return 1; } -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) { - struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe *p = kprobe_running(); const struct exception_table_entry *entry; switch(kcb->kprobe_status) { @@ -527,23 +672,18 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) * and allow the page fault handler to continue as a * normal page fault. */ - regs->psw.addr = (unsigned long)cur->addr | PSW_ADDR_AMODE; - regs->psw.mask &= ~PSW_MASK_PER; - regs->psw.mask |= kcb->kprobe_saved_imask; - if (kcb->kprobe_status == KPROBE_REENTER) - restore_previous_kprobe(kcb); - else - reset_current_kprobe(); + disable_singlestep(kcb, regs, (unsigned long) p->addr); + pop_kprobe(kcb); preempt_enable_no_resched(); break; case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accouting + * we can also use npre/npostfault count for accounting * these specific fault cases. */ - kprobes_inc_nmissed_count(cur); + kprobes_inc_nmissed_count(p); /* * We come here because instructions in the pre/post @@ -552,7 +692,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) * copy_from_user(), get_user() etc. Let the * user-specified handler try to fix it first. */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + if (p->fault_handler && p->fault_handler(p, regs, trapnr)) return 1; /* @@ -561,7 +701,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) */ entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (entry) { - regs->psw.addr = entry->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE; return 1; } @@ -576,57 +716,71 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) return 0; } +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + int ret; + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_disable(); + ret = kprobe_trap_handler(regs, trapnr); + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); + return ret; +} + /* * Wrapper routine to for handling exceptions. */ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { - struct die_args *args = (struct die_args *)data; + struct die_args *args = (struct die_args *) data; + struct pt_regs *regs = args->regs; int ret = NOTIFY_DONE; + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_disable(); + switch (val) { case DIE_BPT: - if (kprobe_handler(args->regs)) + if (kprobe_handler(regs)) ret = NOTIFY_STOP; break; case DIE_SSTEP: - if (post_kprobe_handler(args->regs)) + if (post_kprobe_handler(regs)) ret = NOTIFY_STOP; break; case DIE_TRAP: - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && - kprobe_fault_handler(args->regs, args->trapnr)) + if (!preemptible() && kprobe_running() && + kprobe_trap_handler(regs, args->trapnr)) ret = NOTIFY_STOP; - preempt_enable(); break; default: break; } + + if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) + local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); + return ret; } int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); - unsigned long addr; struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long stack; memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); /* setup return addr to the jprobe handler routine */ - regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE; + regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE; + regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); - /* r14 is the function return address */ - kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14]; /* r15 is the stack pointer */ - kcb->jprobe_saved_r15 = (unsigned long)regs->gprs[15]; - addr = (unsigned long)kcb->jprobe_saved_r15; + stack = (unsigned long) regs->gprs[15]; - memcpy(kcb->jprobes_stack, (kprobe_opcode_t *) addr, - MIN_STACK_SIZE(addr)); + memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack)); return 1; } @@ -635,7 +789,7 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -void __kprobes jprobe_return_end(void) +static void __used __kprobes jprobe_return_end(void) { asm volatile("bcr 0,0"); } @@ -643,30 +797,29 @@ void __kprobes jprobe_return_end(void) int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_r15); + unsigned long stack; + + stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15]; /* Put the regs back */ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); /* put the stack back */ - memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, - MIN_STACK_SIZE(stack_addr)); + memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack)); preempt_enable_no_resched(); return 1; } -static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) & kretprobe_trampoline, +static struct kprobe trampoline = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, .pre_handler = trampoline_probe_handler }; int __init arch_init_kprobes(void) { - return register_kprobe(&trampoline_p); + return register_kprobe(&trampoline); } int __kprobes arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *) & kretprobe_trampoline) - return 1; - return 0; + return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline; } diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c new file mode 100644 index 00000000000..6ea6d69339b --- /dev/null +++ b/arch/s390/kernel/lgr.c @@ -0,0 +1,186 @@ +/* + * Linux Guest Relocation (LGR) detection + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/slab.h> +#include <asm/facility.h> +#include <asm/sysinfo.h> +#include <asm/ebcdic.h> +#include <asm/debug.h> +#include <asm/ipl.h> + +#define LGR_TIMER_INTERVAL_SECS (30 * 60) +#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */ + +/* + * LGR info: Contains stfle and stsi data + */ +struct lgr_info { + /* Bit field with facility information: 4 DWORDs are stored */ + u64 stfle_fac_list[4]; + /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */ + u32 level; + /* Level 1: CEC info (stsi 1.1.1) */ + char manufacturer[16]; + char type[4]; + char sequence[16]; + char plant[4]; + char model[16]; + /* Level 2: LPAR info (stsi 2.2.2) */ + u16 lpar_number; + char name[8]; + /* Level 3: VM info (stsi 3.2.2) */ + u8 vm_count; + struct { + char name[8]; + char cpi[16]; + } vm[VM_LEVEL_MAX]; +} __packed __aligned(8); + +/* + * LGR globals + */ +static char lgr_page[PAGE_SIZE] __aligned(PAGE_SIZE); +static struct lgr_info lgr_info_last; +static struct lgr_info lgr_info_cur; +static struct debug_info *lgr_dbf; + +/* + * Copy buffer and then convert it to ASCII + */ +static void cpascii(char *dst, char *src, int size) +{ + memcpy(dst, src, size); + EBCASC(dst, size); +} + +/* + * Fill LGR info with 1.1.1 stsi data + */ +static void lgr_stsi_1_1_1(struct lgr_info *lgr_info) +{ + struct sysinfo_1_1_1 *si = (void *) lgr_page; + + if (stsi(si, 1, 1, 1)) + return; + cpascii(lgr_info->manufacturer, si->manufacturer, + sizeof(si->manufacturer)); + cpascii(lgr_info->type, si->type, sizeof(si->type)); + cpascii(lgr_info->model, si->model, sizeof(si->model)); + cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence)); + cpascii(lgr_info->plant, si->plant, sizeof(si->plant)); +} + +/* + * Fill LGR info with 2.2.2 stsi data + */ +static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_2_2_2 *si = (void *) lgr_page; + + if (stsi(si, 2, 2, 2)) + return; + cpascii(lgr_info->name, si->name, sizeof(si->name)); + memcpy(&lgr_info->lpar_number, &si->lpar_number, + sizeof(lgr_info->lpar_number)); +} + +/* + * Fill LGR info with 3.2.2 stsi data + */ +static void lgr_stsi_3_2_2(struct lgr_info *lgr_info) +{ + struct sysinfo_3_2_2 *si = (void *) lgr_page; + int i; + + if (stsi(si, 3, 2, 2)) + return; + for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) { + cpascii(lgr_info->vm[i].name, si->vm[i].name, + sizeof(si->vm[i].name)); + cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi, + sizeof(si->vm[i].cpi)); + } + lgr_info->vm_count = si->count; +} + +/* + * Fill LGR info with current data + */ +static void lgr_info_get(struct lgr_info *lgr_info) +{ + int level; + + memset(lgr_info, 0, sizeof(*lgr_info)); + stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list)); + level = stsi(NULL, 0, 0, 0); + lgr_info->level = level; + if (level >= 1) + lgr_stsi_1_1_1(lgr_info); + if (level >= 2) + lgr_stsi_2_2_2(lgr_info); + if (level >= 3) + lgr_stsi_3_2_2(lgr_info); +} + +/* + * Check if LGR info has changed and if yes log new LGR info to s390dbf + */ +void lgr_info_log(void) +{ + static DEFINE_SPINLOCK(lgr_info_lock); + unsigned long flags; + + if (!spin_trylock_irqsave(&lgr_info_lock, flags)) + return; + lgr_info_get(&lgr_info_cur); + if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) { + debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur)); + lgr_info_last = lgr_info_cur; + } + spin_unlock_irqrestore(&lgr_info_lock, flags); +} +EXPORT_SYMBOL_GPL(lgr_info_log); + +static void lgr_timer_set(void); + +/* + * LGR timer callback + */ +static void lgr_timer_fn(unsigned long ignored) +{ + lgr_info_log(); + lgr_timer_set(); +} + +static struct timer_list lgr_timer = + TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0); + +/* + * Setup next LGR timer + */ +static void lgr_timer_set(void) +{ + mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); +} + +/* + * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer + */ +static int __init lgr_init(void) +{ + lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info)); + if (!lgr_dbf) + return -ENOMEM; + debug_register_view(lgr_dbf, &debug_hex_ascii_view); + lgr_info_get(&lgr_info_last); + debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last)); + lgr_timer_set(); + return 0; +} +module_init(lgr_init); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3c77dd36994..719e27b2cf2 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,10 +1,9 @@ /* - * arch/s390/kernel/machine_kexec.c - * - * Copyright IBM Corp. 2005,2006 + * Copyright IBM Corp. 2005, 2011 * * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> */ #include <linux/device.h> @@ -12,27 +11,187 @@ #include <linux/kexec.h> #include <linux/delay.h> #include <linux/reboot.h> +#include <linux/ftrace.h> +#include <linux/debug_locks.h> +#include <linux/suspend.h> #include <asm/cio.h> #include <asm/setup.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> -#include <asm/system.h> #include <asm/smp.h> #include <asm/reset.h> #include <asm/ipl.h> +#include <asm/diag.h> +#include <asm/elf.h> +#include <asm/asm-offsets.h> +#include <asm/os_info.h> typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; +#ifdef CONFIG_CRASH_DUMP + +/* + * Create ELF notes for one CPU + */ +static void add_elf_notes(int cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + void *ptr; + + memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); + ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); + ptr = fill_cpu_elf_notes(ptr, sa); + memset(ptr, 0, sizeof(struct elf_note)); +} + +/* + * Initialize CPU ELF notes + */ +static void setup_regs(void) +{ + unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + int cpu, this_cpu; + + this_cpu = smp_find_processor_id(stap()); + add_elf_notes(this_cpu); + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + if (smp_store_status(cpu)) + continue; + add_elf_notes(cpu); + } + /* Copy dump CPU store status info to absolute zero */ + memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); +} + +/* + * PM notifier callback for kdump + */ +static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + if (crashk_res.start) + crash_map_reserved_pages(); + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + if (crashk_res.start) + crash_unmap_reserved_pages(); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static int __init machine_kdump_pm_init(void) +{ + pm_notifier(machine_kdump_pm_cb, 0); + return 0; +} +arch_initcall(machine_kdump_pm_init); +#endif + +/* + * Start kdump: We expect here that a store status has been done on our CPU + */ +static void __do_machine_kdump(void *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + + setup_regs(); + __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); + start_kdump(1); +#endif +} + +/* + * Check if kdump checksums are valid: We call purgatory with parameter "0" + */ +static int kdump_csum_valid(struct kimage *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)image->start; + int rc; + + __arch_local_irq_stnsm(0xfb); /* disable DAT */ + rc = start_kdump(0); + __arch_local_irq_stosm(0x04); /* enable DAT */ + return rc ? 0 : -EINVAL; +#else + return -EINVAL; +#endif +} + +/* + * Map or unmap crashkernel memory + */ +static void crash_map_pages(int enable) +{ + unsigned long size = resource_size(&crashk_res); + + BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || + size % KEXEC_CRASH_MEM_ALIGN); + if (enable) + vmem_add_mapping(crashk_res.start, size); + else { + vmem_remove_mapping(crashk_res.start, size); + if (size) + os_info_crashkernel_add(crashk_res.start, size); + else + os_info_crashkernel_add(0, 0); + } +} + +/* + * Map crashkernel memory + */ +void crash_map_reserved_pages(void) +{ + crash_map_pages(1); +} + +/* + * Unmap crashkernel memory + */ +void crash_unmap_reserved_pages(void) +{ + crash_map_pages(0); +} + +/* + * Give back memory to hypervisor before new kdump is loaded + */ +static int machine_kexec_prepare_kdump(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (MACHINE_IS_VM) + diag10_range(PFN_DOWN(crashk_res.start), + PFN_DOWN(crashk_res.end - crashk_res.start + 1)); + return 0; +#else + return -EINVAL; +#endif +} + int machine_kexec_prepare(struct kimage *image) { void *reboot_code_buffer; /* Can't replace kernel image since it is read-only. */ if (ipl_flags & IPL_NSS_VALID) - return -ENOSYS; + return -EOPNOTSUPP; + + if (image->type == KEXEC_TYPE_CRASH) + return machine_kexec_prepare_kdump(); /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) @@ -50,22 +209,64 @@ void machine_kexec_cleanup(struct kimage *image) { } +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_SYMBOL(high_memory); + VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); +} + void machine_shutdown(void) { - printk(KERN_INFO "kexec: machine_shutdown called\n"); } -void machine_kexec(struct kimage *image) +void machine_crash_shutdown(struct pt_regs *regs) { - relocate_kernel_t data_mover; +} - smp_send_stop(); - pfault_fini(); - s390_reset_system(); +/* + * Do normal kexec + */ +static void __do_machine_kexec(void *data) +{ + relocate_kernel_t data_mover; + struct kimage *image = data; data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ (*data_mover)(&image->head, image->start); - for (;;); +} + +/* + * Reset system and call either kdump or normal kexec + */ +static void __machine_kexec(void *data) +{ + struct kimage *image = data; + + __arch_local_irq_stosm(0x04); /* enable DAT */ + pfault_fini(); + tracing_off(); + debug_locks_off(); + if (image->type == KEXEC_TYPE_CRASH) { + lgr_info_log(); + s390_reset_system(__do_machine_kdump, data); + } else { + s390_reset_system(__do_machine_kexec, data); + } + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +/* + * Do either kdump or normal kexec. In case of kdump we first ask + * purgatory, if kdump checksums are valid. + */ +void machine_kexec(struct kimage *image) +{ + if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image)) + return; + tracer_disable(); + smp_send_stop(); + smp_call_ipl_cpu(__machine_kexec, image); } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S new file mode 100644 index 00000000000..08dcf21cb8d --- /dev/null +++ b/arch/s390/kernel/mcount.S @@ -0,0 +1,73 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> + + .section .kprobes.text, "ax" + +ENTRY(ftrace_stub) + br %r14 + +ENTRY(_mcount) +#ifdef CONFIG_DYNAMIC_FTRACE + br %r14 + +ENTRY(ftrace_caller) +#endif + stm %r2,%r5,16(%r15) + bras %r1,2f +0: .long ftrace_trace_function +1: .long function_trace_stop +2: l %r2,1b-0b(%r1) + icm %r2,0xf,0(%r2) + jnz 3f + st %r14,56(%r15) + lr %r0,%r15 + ahi %r15,-96 + l %r3,100(%r15) + la %r2,0(%r14) + st %r0,__SF_BACKCHAIN(%r15) + la %r3,0(%r3) + ahi %r2,-MCOUNT_INSN_SIZE + l %r14,0b-0b(%r1) + l %r14,0(%r14) + basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + l %r2,100(%r15) + l %r3,152(%r15) +ENTRY(ftrace_graph_caller) +# The bras instruction gets runtime patched to call prepare_ftrace_return. +# See ftrace_enable_ftrace_graph_caller. The patched instruction is: +# bras %r14,prepare_ftrace_return + bras %r14,0f +0: st %r2,100(%r15) +#endif + ahi %r15,96 + l %r14,56(%r15) +3: lm %r2,%r5,16(%r15) + br %r14 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +ENTRY(return_to_handler) + stm %r2,%r5,16(%r15) + st %r14,56(%r15) + lr %r0,%r15 + ahi %r15,-96 + st %r0,__SF_BACKCHAIN(%r15) + bras %r1,0f + .long ftrace_return_to_handler +0: l %r2,0b-0b(%r1) + basr %r14,%r2 + lr %r14,%r2 + ahi %r15,96 + lm %r2,%r5,16(%r15) + br %r14 + +#endif diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S new file mode 100644 index 00000000000..1c52eae3396 --- /dev/null +++ b/arch/s390/kernel/mcount64.S @@ -0,0 +1,65 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> + + .section .kprobes.text, "ax" + +ENTRY(ftrace_stub) + br %r14 + +ENTRY(_mcount) +#ifdef CONFIG_DYNAMIC_FTRACE + br %r14 + +ENTRY(ftrace_caller) +#endif + larl %r1,function_trace_stop + icm %r1,0xf,0(%r1) + bnzr %r14 + stmg %r2,%r5,32(%r15) + stg %r14,112(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,__SF_BACKCHAIN(%r15) + lgr %r2,%r14 + lg %r3,168(%r15) + aghi %r2,-MCOUNT_INSN_SIZE + larl %r14,ftrace_trace_function + lg %r14,0(%r14) + basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + lg %r2,168(%r15) + lg %r3,272(%r15) +ENTRY(ftrace_graph_caller) +# The bras instruction gets runtime patched to call prepare_ftrace_return. +# See ftrace_enable_ftrace_graph_caller. The patched instruction is: +# bras %r14,prepare_ftrace_return + bras %r14,0f +0: stg %r2,168(%r15) +#endif + aghi %r15,160 + lmg %r2,%r5,32(%r15) + lg %r14,112(%r15) + br %r14 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +ENTRY(return_to_handler) + stmg %r2,%r5,32(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,__SF_BACKCHAIN(%r15) + brasl %r14,ftrace_return_to_handler + aghi %r15,160 + lgr %r14,%r2 + lmg %r2,%r5,32(%r15) + br %r14 + +#endif diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 59b4e796680..b89b59158b9 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -1,9 +1,8 @@ /* - * arch/s390/kernel/module.c - Kernel module help for s390. + * Kernel module help for s390. * * S390 version - * Copyright (C) 2002, 2003 IBM Deutschland Entwicklung GmbH, - * IBM Corporation + * Copyright IBM Corp. 2002, 2003 * Author(s): Arnd Bergmann (arndb@de.ibm.com) * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -45,23 +44,28 @@ #define PLT_ENTRY_SIZE 20 #endif /* CONFIG_64BIT */ +#ifdef CONFIG_64BIT void *module_alloc(unsigned long size) { - if (size == 0) + if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; - return vmalloc(size); + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + __builtin_return_address(0)); } +#endif /* Free memory returned from module_alloc */ void module_free(struct module *mod, void *module_region) { + if (mod) { + vfree(mod->arch.syminfo); + mod->arch.syminfo = NULL; + } vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } -static void -check_rela(Elf_Rela *rela, struct module *me) +static void check_rela(Elf_Rela *rela, struct module *me) { struct mod_arch_syminfo *info; @@ -110,9 +114,8 @@ check_rela(Elf_Rela *rela, struct module *me) * Account for GOT and PLT relocations. We can't add sections for * got and plt but we can increase the core module size. */ -int -module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, - char *secstrings, struct module *me) +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *me) { Elf_Shdr *symtab; Elf_Sym *symbols; @@ -174,22 +177,52 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, return 0; } -int -apply_relocate(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, - unsigned int relsec, struct module *me) +static int apply_rela_bits(Elf_Addr loc, Elf_Addr val, + int sign, int bits, int shift) { - printk(KERN_ERR "module %s: RELOCATION unsupported\n", - me->name); - return -ENOEXEC; + unsigned long umax; + long min, max; + + if (val & ((1UL << shift) - 1)) + return -ENOEXEC; + if (sign) { + val = (Elf_Addr)(((long) val) >> shift); + min = -(1L << (bits - 1)); + max = (1L << (bits - 1)) - 1; + if ((long) val < min || (long) val > max) + return -ENOEXEC; + } else { + val >>= shift; + umax = ((1UL << (bits - 1)) << 1) - 1; + if ((unsigned long) val > umax) + return -ENOEXEC; + } + + if (bits == 8) + *(unsigned char *) loc = val; + else if (bits == 12) + *(unsigned short *) loc = (val & 0xfff) | + (*(unsigned short *) loc & 0xf000); + else if (bits == 16) + *(unsigned short *) loc = val; + else if (bits == 20) + *(unsigned int *) loc = (val & 0xfff) << 16 | + (val & 0xff000) >> 4 | + (*(unsigned int *) loc & 0xf00000ff); + else if (bits == 32) + *(unsigned int *) loc = val; + else if (bits == 64) + *(unsigned long *) loc = val; + return 0; } -static int -apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, - struct module *me) +static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, + const char *strtab, struct module *me) { struct mod_arch_syminfo *info; Elf_Addr loc, val; int r_type, r_sym; + int rc = -ENOEXEC; /* This is where to make the change */ loc = base + rela->r_offset; @@ -201,6 +234,9 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = symtab[r_sym].st_value; switch (r_type) { + case R_390_NONE: /* No relocation. */ + rc = 0; + break; case R_390_8: /* Direct 8 bit. */ case R_390_12: /* Direct 12 bit. */ case R_390_16: /* Direct 16 bit. */ @@ -209,20 +245,17 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_64: /* Direct 64 bit. */ val += rela->r_addend; if (r_type == R_390_8) - *(unsigned char *) loc = val; + rc = apply_rela_bits(loc, val, 0, 8, 0); else if (r_type == R_390_12) - *(unsigned short *) loc = (val & 0xfff) | - (*(unsigned short *) loc & 0xf000); + rc = apply_rela_bits(loc, val, 0, 12, 0); else if (r_type == R_390_16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_20) - *(unsigned int *) loc = - (*(unsigned int *) loc & 0xf00000ff) | - (val & 0xfff) << 16 | (val & 0xff000) >> 4; + rc = apply_rela_bits(loc, val, 1, 20, 0); else if (r_type == R_390_32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); break; case R_390_PC16: /* PC relative 16 bit. */ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ @@ -231,15 +264,15 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PC64: /* PC relative 64 bit. */ val += rela->r_addend - loc; if (r_type == R_390_PC16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 1, 16, 0); else if (r_type == R_390_PC16DBL) - *(unsigned short *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 16, 1); else if (r_type == R_390_PC32DBL) - *(unsigned int *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 32, 1); else if (r_type == R_390_PC32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 1, 32, 0); else if (r_type == R_390_PC64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 1, 64, 0); break; case R_390_GOT12: /* 12 bit GOT offset. */ case R_390_GOT16: /* 16 bit GOT offset. */ @@ -264,26 +297,24 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = info->got_offset + rela->r_addend; if (r_type == R_390_GOT12 || r_type == R_390_GOTPLT12) - *(unsigned short *) loc = (val & 0xfff) | - (*(unsigned short *) loc & 0xf000); + rc = apply_rela_bits(loc, val, 0, 12, 0); else if (r_type == R_390_GOT16 || r_type == R_390_GOTPLT16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_GOT20 || r_type == R_390_GOTPLT20) - *(unsigned int *) loc = - (*(unsigned int *) loc & 0xf00000ff) | - (val & 0xfff) << 16 | (val & 0xff000) >> 4; + rc = apply_rela_bits(loc, val, 1, 20, 0); else if (r_type == R_390_GOT32 || r_type == R_390_GOTPLT32) - *(unsigned int *) loc = val; - else if (r_type == R_390_GOTENT || - r_type == R_390_GOTPLTENT) - *(unsigned int *) loc = - (val + (Elf_Addr) me->module_core - loc) >> 1; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_GOT64 || r_type == R_390_GOTPLT64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); + else if (r_type == R_390_GOTENT || + r_type == R_390_GOTPLTENT) { + val += (Elf_Addr) me->module_core - loc; + rc = apply_rela_bits(loc, val, 1, 32, 1); + } break; case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */ case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */ @@ -310,27 +341,32 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, info->plt_initialized = 1; } if (r_type == R_390_PLTOFF16 || - r_type == R_390_PLTOFF32 - || r_type == R_390_PLTOFF64 - ) + r_type == R_390_PLTOFF32 || + r_type == R_390_PLTOFF64) val = me->arch.plt_offset - me->arch.got_offset + info->plt_offset + rela->r_addend; - else - val = (Elf_Addr) me->module_core + - me->arch.plt_offset + info->plt_offset + - rela->r_addend - loc; + else { + if (!((r_type == R_390_PLT16DBL && + val - loc + 0xffffUL < 0x1ffffeUL) || + (r_type == R_390_PLT32DBL && + val - loc + 0xffffffffULL < 0x1fffffffeULL))) + val = (Elf_Addr) me->module_core + + me->arch.plt_offset + + info->plt_offset; + val += rela->r_addend - loc; + } if (r_type == R_390_PLT16DBL) - *(unsigned short *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 16, 1); else if (r_type == R_390_PLTOFF16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_PLT32DBL) - *(unsigned int *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 32, 1); else if (r_type == R_390_PLT32 || r_type == R_390_PLTOFF32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_PLT64 || r_type == R_390_PLTOFF64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); break; case R_390_GOTOFF16: /* 16 bit offset to GOT. */ case R_390_GOTOFF32: /* 32 bit offset to GOT. */ @@ -338,20 +374,20 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = val + rela->r_addend - ((Elf_Addr) me->module_core + me->arch.got_offset); if (r_type == R_390_GOTOFF16) - *(unsigned short *) loc = val; + rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_GOTOFF32) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 0, 32, 0); else if (r_type == R_390_GOTOFF64) - *(unsigned long *) loc = val; + rc = apply_rela_bits(loc, val, 0, 64, 0); break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ val = (Elf_Addr) me->module_core + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) - *(unsigned int *) loc = val; + rc = apply_rela_bits(loc, val, 1, 32, 0); else if (r_type == R_390_GOTPCDBL) - *(unsigned int *) loc = val >> 1; + rc = apply_rela_bits(loc, val, 1, 32, 1); break; case R_390_COPY: case R_390_GLOB_DAT: /* Create GOT entry. */ @@ -359,19 +395,25 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_RELATIVE: /* Adjust by program base. */ /* Only needed if we want to support loading of modules linked with -shared. */ - break; + return -ENOEXEC; default: - printk(KERN_ERR "module %s: Unknown relocation: %u\n", + printk(KERN_ERR "module %s: unknown relocation: %u\n", me->name, r_type); return -ENOEXEC; } + if (rc) { + printk(KERN_ERR "module %s: relocation error for symbol %s " + "(r_type %i, value 0x%lx)\n", + me->name, strtab + symtab[r_sym].st_name, + r_type, (unsigned long) val); + return rc; + } return 0; } -int -apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, - unsigned int symindex, unsigned int relsec, - struct module *me) +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *me) { Elf_Addr base; Elf_Sym *symtab; @@ -387,7 +429,7 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, n = sechdrs[relsec].sh_size / sizeof(Elf_Rela); for (i = 0; i < n; i++, rela++) { - rc = apply_rela(rela, base, symtab, me); + rc = apply_rela(rela, base, symtab, strtab, me); if (rc) return rc; } @@ -399,10 +441,6 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *me) { vfree(me->arch.syminfo); - return module_bug_finalize(hdr, sechdrs, me); -} - -void module_arch_cleanup(struct module *mod) -{ - module_bug_cleanup(mod); + me->arch.syminfo = NULL; + return 0; } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c new file mode 100644 index 00000000000..210e1285f75 --- /dev/null +++ b/arch/s390/kernel/nmi.c @@ -0,0 +1,372 @@ +/* + * Machine check handler + * + * Copyright IBM Corp. 2000, 2009 + * Author(s): Ingo Adlung <adlung@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Cornelia Huck <cornelia.huck@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + */ + +#include <linux/kernel_stat.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/hardirq.h> +#include <linux/time.h> +#include <linux/module.h> +#include <asm/lowcore.h> +#include <asm/smp.h> +#include <asm/etr.h> +#include <asm/cputime.h> +#include <asm/nmi.h> +#include <asm/crw.h> + +struct mcck_struct { + int kill_task; + int channel_report; + int warning; + unsigned long long mcck_code; +}; + +static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); + +static void s390_handle_damage(char *msg) +{ + smp_send_stop(); + disabled_wait((unsigned long) __builtin_return_address(0)); + while (1); +} + +/* + * Main machine check handler function. Will be called with interrupts enabled + * or disabled and machine checks enabled or disabled. + */ +void s390_handle_mcck(void) +{ + unsigned long flags; + struct mcck_struct mcck; + + /* + * Disable machine checks and get the current state of accumulated + * machine checks. Afterwards delete the old state and enable machine + * checks again. + */ + local_irq_save(flags); + local_mcck_disable(); + mcck = __get_cpu_var(cpu_mcck); + memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); + clear_cpu_flag(CIF_MCCK_PENDING); + local_mcck_enable(); + local_irq_restore(flags); + + if (mcck.channel_report) + crw_handle_channel_report(); + /* + * A warning may remain for a prolonged period on the bare iron. + * (actually until the machine is powered off, or the problem is gone) + * So we just stop listening for the WARNING MCH and avoid continuously + * being interrupted. One caveat is however, that we must do this per + * processor and cannot use the smp version of ctl_clear_bit(). + * On VM we only get one interrupt per virtally presented machinecheck. + * Though one suffices, we may get one interrupt per (virtual) cpu. + */ + if (mcck.warning) { /* WARNING pending ? */ + static int mchchk_wng_posted = 0; + + /* Use single cpu clear, as we cannot handle smp here. */ + __ctl_clear_bit(14, 24); /* Disable WARNING MCH */ + if (xchg(&mchchk_wng_posted, 1) == 0) + kill_cad_pid(SIGPWR, 1); + } + if (mcck.kill_task) { + local_irq_enable(); + printk(KERN_EMERG "mcck: Terminating task because of machine " + "malfunction (code 0x%016llx).\n", mcck.mcck_code); + printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", + current->comm, current->pid); + do_exit(SIGSEGV); + } +} +EXPORT_SYMBOL_GPL(s390_handle_mcck); + +/* + * returns 0 if all registers could be validated + * returns 1 otherwise + */ +static int notrace s390_revalidate_registers(struct mci *mci) +{ + int kill_task; + u64 zero; + void *fpt_save_area, *fpt_creg_save_area; + + kill_task = 0; + zero = 0; + + if (!mci->gr) { + /* + * General purpose registers couldn't be restored and have + * unknown contents. Process needs to be terminated. + */ + kill_task = 1; + } + if (!mci->fp) { + /* + * Floating point registers can't be restored and + * therefore the process needs to be terminated. + */ + kill_task = 1; + } +#ifndef CONFIG_64BIT + asm volatile( + " ld 0,0(%0)\n" + " ld 2,8(%0)\n" + " ld 4,16(%0)\n" + " ld 6,24(%0)" + : : "a" (&S390_lowcore.floating_pt_save_area)); +#endif + + if (MACHINE_HAS_IEEE) { +#ifdef CONFIG_64BIT + fpt_save_area = &S390_lowcore.floating_pt_save_area; + fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area; +#else + fpt_save_area = (void *) S390_lowcore.extended_save_area_addr; + fpt_creg_save_area = fpt_save_area + 128; +#endif + if (!mci->fc) { + /* + * Floating point control register can't be restored. + * Task will be terminated. + */ + asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero)); + kill_task = 1; + + } else + asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); + + asm volatile( + " ld 0,0(%0)\n" + " ld 1,8(%0)\n" + " ld 2,16(%0)\n" + " ld 3,24(%0)\n" + " ld 4,32(%0)\n" + " ld 5,40(%0)\n" + " ld 6,48(%0)\n" + " ld 7,56(%0)\n" + " ld 8,64(%0)\n" + " ld 9,72(%0)\n" + " ld 10,80(%0)\n" + " ld 11,88(%0)\n" + " ld 12,96(%0)\n" + " ld 13,104(%0)\n" + " ld 14,112(%0)\n" + " ld 15,120(%0)\n" + : : "a" (fpt_save_area)); + } + /* Revalidate access registers */ + asm volatile( + " lam 0,15,0(%0)" + : : "a" (&S390_lowcore.access_regs_save_area)); + if (!mci->ar) { + /* + * Access registers have unknown contents. + * Terminating task. + */ + kill_task = 1; + } + /* Revalidate control registers */ + if (!mci->cr) { + /* + * Control registers have unknown contents. + * Can't recover and therefore stopping machine. + */ + s390_handle_damage("invalid control registers."); + } else { +#ifdef CONFIG_64BIT + asm volatile( + " lctlg 0,15,0(%0)" + : : "a" (&S390_lowcore.cregs_save_area)); +#else + asm volatile( + " lctl 0,15,0(%0)" + : : "a" (&S390_lowcore.cregs_save_area)); +#endif + } + /* + * We don't even try to revalidate the TOD register, since we simply + * can't write something sensible into that register. + */ +#ifdef CONFIG_64BIT + /* + * See if we can revalidate the TOD programmable register with its + * old contents (should be zero) otherwise set it to zero. + */ + if (!mci->pr) + asm volatile( + " sr 0,0\n" + " sckpf" + : : : "0", "cc"); + else + asm volatile( + " l 0,0(%0)\n" + " sckpf" + : : "a" (&S390_lowcore.tod_progreg_save_area) + : "0", "cc"); +#endif + /* Revalidate clock comparator register */ + set_clock_comparator(S390_lowcore.clock_comparator); + /* Check if old PSW is valid */ + if (!mci->wp) + /* + * Can't tell if we come from user or kernel mode + * -> stopping machine. + */ + s390_handle_damage("old psw invalid."); + + if (!mci->ms || !mci->pm || !mci->ia) + kill_task = 1; + + return kill_task; +} + +#define MAX_IPD_COUNT 29 +#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */ + +#define ED_STP_ISLAND 6 /* External damage STP island check */ +#define ED_STP_SYNC 7 /* External damage STP sync check */ +#define ED_ETR_SYNC 12 /* External damage ETR sync check */ +#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */ + +/* + * machine check handler. + */ +void notrace s390_do_machine_check(struct pt_regs *regs) +{ + static int ipd_count; + static DEFINE_SPINLOCK(ipd_lock); + static unsigned long long last_ipd; + struct mcck_struct *mcck; + unsigned long long tmp; + struct mci *mci; + int umode; + + nmi_enter(); + inc_irq_stat(NMI_NMI); + mci = (struct mci *) &S390_lowcore.mcck_interruption_code; + mcck = &__get_cpu_var(cpu_mcck); + umode = user_mode(regs); + + if (mci->sd) { + /* System damage -> stopping machine */ + s390_handle_damage("received system damage machine check."); + } + if (mci->pd) { + if (mci->b) { + /* Processing backup -> verify if we can survive this */ + u64 z_mcic, o_mcic, t_mcic; +#ifdef CONFIG_64BIT + z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29); + o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | + 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | + 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 | + 1ULL<<16); +#else + z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 | + 1ULL<<29); + o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | + 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | + 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16); +#endif + t_mcic = *(u64 *)mci; + + if (((t_mcic & z_mcic) != 0) || + ((t_mcic & o_mcic) != o_mcic)) { + s390_handle_damage("processing backup machine " + "check with damage."); + } + + /* + * Nullifying exigent condition, therefore we might + * retry this instruction. + */ + spin_lock(&ipd_lock); + tmp = get_tod_clock(); + if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME) + ipd_count++; + else + ipd_count = 1; + last_ipd = tmp; + if (ipd_count == MAX_IPD_COUNT) + s390_handle_damage("too many ipd retries."); + spin_unlock(&ipd_lock); + } else { + /* Processing damage -> stopping machine */ + s390_handle_damage("received instruction processing " + "damage machine check."); + } + } + if (s390_revalidate_registers(mci)) { + if (umode) { + /* + * Couldn't restore all register contents while in + * user mode -> mark task for termination. + */ + mcck->kill_task = 1; + mcck->mcck_code = *(unsigned long long *) mci; + set_cpu_flag(CIF_MCCK_PENDING); + } else { + /* + * Couldn't restore all register contents while in + * kernel mode -> stopping machine. + */ + s390_handle_damage("unable to revalidate registers."); + } + } + if (mci->cd) { + /* Timing facility damage */ + s390_handle_damage("TOD clock damaged"); + } + if (mci->ed && mci->ec) { + /* External damage */ + if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC)) + etr_sync_check(); + if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH)) + etr_switch_to_local(); + if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC)) + stp_sync_check(); + if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) + stp_island_check(); + } + if (mci->se) + /* Storage error uncorrected */ + s390_handle_damage("received storage error uncorrected " + "machine check."); + if (mci->ke) + /* Storage key-error uncorrected */ + s390_handle_damage("received storage key-error uncorrected " + "machine check."); + if (mci->ds && mci->fa) + /* Storage degradation */ + s390_handle_damage("received storage degradation machine " + "check."); + if (mci->cp) { + /* Channel report word pending */ + mcck->channel_report = 1; + set_cpu_flag(CIF_MCCK_PENDING); + } + if (mci->w) { + /* Warning pending */ + mcck->warning = 1; + set_cpu_flag(CIF_MCCK_PENDING); + } + nmi_exit(); +} + +static int __init machine_check_init(void) +{ + ctl_set_bit(14, 25); /* enable external damage MCH */ + ctl_set_bit(14, 27); /* enable system recovery MCH */ + ctl_set_bit(14, 24); /* enable warning MCH */ + return 0; +} +arch_initcall(machine_check_init); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c new file mode 100644 index 00000000000..d112fc66f99 --- /dev/null +++ b/arch/s390/kernel/os_info.c @@ -0,0 +1,168 @@ +/* + * OS info memory interface + * + * Copyright IBM Corp. 2012 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#define KMSG_COMPONENT "os_info" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/crash_dump.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <asm/checksum.h> +#include <asm/lowcore.h> +#include <asm/os_info.h> + +/* + * OS info structure has to be page aligned + */ +static struct os_info os_info __page_aligned_data; + +/* + * Compute checksum over OS info structure + */ +u32 os_info_csum(struct os_info *os_info) +{ + int size = sizeof(*os_info) - offsetof(struct os_info, version_major); + return csum_partial(&os_info->version_major, size, 0); +} + +/* + * Add crashkernel info to OS info and update checksum + */ +void os_info_crashkernel_add(unsigned long base, unsigned long size) +{ + os_info.crashkernel_addr = (u64)(unsigned long)base; + os_info.crashkernel_size = (u64)(unsigned long)size; + os_info.csum = os_info_csum(&os_info); +} + +/* + * Add OS info entry and update checksum + */ +void os_info_entry_add(int nr, void *ptr, u64 size) +{ + os_info.entry[nr].addr = (u64)(unsigned long)ptr; + os_info.entry[nr].size = size; + os_info.entry[nr].csum = csum_partial(ptr, size, 0); + os_info.csum = os_info_csum(&os_info); +} + +/* + * Initialize OS info struture and set lowcore pointer + */ +void __init os_info_init(void) +{ + void *ptr = &os_info; + + os_info.version_major = OS_INFO_VERSION_MAJOR; + os_info.version_minor = OS_INFO_VERSION_MINOR; + os_info.magic = OS_INFO_MAGIC; + os_info.csum = os_info_csum(&os_info); + mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr); +} + +#ifdef CONFIG_CRASH_DUMP + +static struct os_info *os_info_old; + +/* + * Allocate and copy OS info entry from oldmem + */ +static void os_info_old_alloc(int nr, int align) +{ + unsigned long addr, size = 0; + char *buf, *buf_align, *msg; + u32 csum; + + addr = os_info_old->entry[nr].addr; + if (!addr) { + msg = "not available"; + goto fail; + } + size = os_info_old->entry[nr].size; + buf = kmalloc(size + align - 1, GFP_KERNEL); + if (!buf) { + msg = "alloc failed"; + goto fail; + } + buf_align = PTR_ALIGN(buf, align); + if (copy_from_oldmem(buf_align, (void *) addr, size)) { + msg = "copy failed"; + goto fail_free; + } + csum = csum_partial(buf_align, size, 0); + if (csum != os_info_old->entry[nr].csum) { + msg = "checksum failed"; + goto fail_free; + } + os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align; + msg = "copied"; + goto out; +fail_free: + kfree(buf); +fail: + os_info_old->entry[nr].addr = 0; +out: + pr_info("entry %i: %s (addr=0x%lx size=%lu)\n", + nr, msg, addr, size); +} + +/* + * Initialize os info and os info entries from oldmem + */ +static void os_info_old_init(void) +{ + static int os_info_init; + unsigned long addr; + + if (os_info_init) + return; + if (!OLDMEM_BASE) + goto fail; + if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr))) + goto fail; + if (addr == 0 || addr % PAGE_SIZE) + goto fail; + os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); + if (!os_info_old) + goto fail; + if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old))) + goto fail_free; + if (os_info_old->magic != OS_INFO_MAGIC) + goto fail_free; + if (os_info_old->csum != os_info_csum(os_info_old)) + goto fail_free; + if (os_info_old->version_major > OS_INFO_VERSION_MAJOR) + goto fail_free; + os_info_old_alloc(OS_INFO_VMCOREINFO, 1); + os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1); + pr_info("crashkernel: addr=0x%lx size=%lu\n", + (unsigned long) os_info_old->crashkernel_addr, + (unsigned long) os_info_old->crashkernel_size); + os_info_init = 1; + return; +fail_free: + kfree(os_info_old); +fail: + os_info_init = 1; + os_info_old = NULL; +} + +/* + * Return pointer to os infor entry and its size + */ +void *os_info_old_entry(int nr, unsigned long *size) +{ + os_info_old_init(); + + if (!os_info_old) + return NULL; + if (!os_info_old->entry[nr].addr) + return NULL; + *size = (unsigned long) os_info_old->entry[nr].size; + return (void *)(unsigned long)os_info_old->entry[nr].addr; +} +#endif diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c new file mode 100644 index 00000000000..ea75d011a6f --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -0,0 +1,696 @@ +/* + * Performance event support for s390x - CPU-measurement Counter Facility + * + * Copyright IBM Corp. 2012 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_cf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/init.h> +#include <linux/export.h> +#include <asm/ctl_reg.h> +#include <asm/irq.h> +#include <asm/cpu_mf.h> + +/* CPU-measurement counter facility supports these CPU counter sets: + * For CPU counter sets: + * Basic counter set: 0-31 + * Problem-state counter set: 32-63 + * Crypto-activity counter set: 64-127 + * Extented counter set: 128-159 + */ +enum cpumf_ctr_set { + /* CPU counter sets */ + CPUMF_CTR_SET_BASIC = 0, + CPUMF_CTR_SET_USER = 1, + CPUMF_CTR_SET_CRYPTO = 2, + CPUMF_CTR_SET_EXT = 3, + + /* Maximum number of counter sets */ + CPUMF_CTR_SET_MAX, +}; + +#define CPUMF_LCCTL_ENABLE_SHIFT 16 +#define CPUMF_LCCTL_ACTCTL_SHIFT 0 +static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { + [CPUMF_CTR_SET_BASIC] = 0x02, + [CPUMF_CTR_SET_USER] = 0x04, + [CPUMF_CTR_SET_CRYPTO] = 0x08, + [CPUMF_CTR_SET_EXT] = 0x01, +}; + +static void ctr_set_enable(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; +} +static void ctr_set_disable(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); +} +static void ctr_set_start(u64 *state, int ctr_set) +{ + *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; +} +static void ctr_set_stop(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); +} + +/* Local CPUMF event structure */ +struct cpu_hw_events { + struct cpumf_ctr_info info; + atomic_t ctr_set[CPUMF_CTR_SET_MAX]; + u64 state, tx_state; + unsigned int flags; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .ctr_set = { + [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + }, + .state = 0, + .flags = 0, +}; + +static int get_counter_set(u64 event) +{ + int set = -1; + + if (event < 32) + set = CPUMF_CTR_SET_BASIC; + else if (event < 64) + set = CPUMF_CTR_SET_USER; + else if (event < 128) + set = CPUMF_CTR_SET_CRYPTO; + else if (event < 256) + set = CPUMF_CTR_SET_EXT; + + return set; +} + +static int validate_event(const struct hw_perf_event *hwc) +{ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + /* check for reserved counters */ + if ((hwc->config >= 6 && hwc->config <= 31) || + (hwc->config >= 38 && hwc->config <= 63) || + (hwc->config >= 80 && hwc->config <= 127)) + return -EOPNOTSUPP; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int validate_ctr_version(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check required version for counter sets */ + switch (hwc->config_base) { + case CPUMF_CTR_SET_BASIC: + case CPUMF_CTR_SET_USER: + if (cpuhw->info.cfvn < 1) + err = -EOPNOTSUPP; + break; + case CPUMF_CTR_SET_CRYPTO: + case CPUMF_CTR_SET_EXT: + if (cpuhw->info.csvn < 1) + err = -EOPNOTSUPP; + if ((cpuhw->info.csvn == 1 && hwc->config > 159) || + (cpuhw->info.csvn == 2 && hwc->config > 175) || + (cpuhw->info.csvn > 2 && hwc->config > 255)) + err = -EOPNOTSUPP; + break; + } + + put_cpu_var(cpu_hw_events); + return err; +} + +static int validate_ctr_auth(const struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuhw; + u64 ctrs_state; + int err = 0; + + cpuhw = &get_cpu_var(cpu_hw_events); + + /* check authorization for cpu counter sets */ + ctrs_state = cpumf_state_ctl[hwc->config_base]; + if (!(ctrs_state & cpuhw->info.auth_ctl)) + err = -EPERM; + + put_cpu_var(cpu_hw_events); + return err; +} + +/* + * Change the CPUMF state to active. + * Enable and activate the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + err = lcctl(cpuhw->state); + if (err) { + pr_err("Enabling the performance measuring unit " + "failed with rc=%x\n", err); + return; + } + + cpuhw->flags |= PMU_F_ENABLED; +} + +/* + * Change the CPUMF state to inactive. + * Disable and enable (inactive) the CPU-counter sets according + * to the per-cpu control state. + */ +static void cpumf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + int err; + u64 inactive; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + err = lcctl(inactive); + if (err) { + pr_err("Disabling the performance measuring unit " + "failed with rc=%x\n", err); + return; + } + + cpuhw->flags &= ~PMU_F_ENABLED; +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events = ATOMIC_INIT(0); +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* CPU-measurement alerts for the counter facility */ +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_events *cpuhw; + + if (!(alert & CPU_MF_INT_CF_MASK)) + return; + + inc_irq_stat(IRQEXT_CMC); + cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* counter authorization change alert */ + if (alert & CPU_MF_INT_CF_CACA) + qctri(&cpuhw->info); + + /* loss of counter data alert */ + if (alert & CPU_MF_INT_CF_LCDA) + pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); +} + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +static void setup_pmc_cpu(void *flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + switch (*((int *) flags)) { + case PMC_INIT: + memset(&cpuhw->info, 0, sizeof(cpuhw->info)); + qctri(&cpuhw->info); + cpuhw->flags |= PMU_F_RESERVED; + break; + + case PMC_RELEASE: + cpuhw->flags &= ~PMU_F_RESERVED; + break; + } + + /* Disable CPU counter sets */ + lcctl(0); +} + +/* Initialize the CPU-measurement facility */ +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +/* Release the CPU-measurement facility */ +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); +} + +/* Release the PMU if event is the last perf event */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ +static const int cpumf_generic_events_basic[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0, + [PERF_COUNT_HW_INSTRUCTIONS] = 1, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; +/* CPUMF <-> perf event mappings for userspace (problem-state set) */ +static const int cpumf_generic_events_user[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 32, + [PERF_COUNT_HW_INSTRUCTIONS] = 33, + [PERF_COUNT_HW_CACHE_REFERENCES] = -1, + [PERF_COUNT_HW_CACHE_MISSES] = -1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int err; + u64 ev; + + switch (attr->type) { + case PERF_TYPE_RAW: + /* Raw events are used to access counters directly, + * hence do not permit excludes */ + if (attr->exclude_kernel || attr->exclude_user || + attr->exclude_hv) + return -EOPNOTSUPP; + ev = attr->config; + break; + + case PERF_TYPE_HARDWARE: + ev = attr->config; + /* Count user space (problem-state) only */ + if (!attr->exclude_user && attr->exclude_kernel) { + if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_user[ev]; + + /* No support for kernel space counters only */ + } else if (!attr->exclude_kernel && attr->exclude_user) { + return -EOPNOTSUPP; + + /* Count user and kernel space */ + } else { + if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_basic[ev]; + } + break; + + default: + return -ENOENT; + } + + if (ev == -1) + return -ENOENT; + + if (ev >= PERF_CPUM_CF_MAX_CTR) + return -EINVAL; + + /* Use the hardware perf event structure to store the counter number + * in 'config' member and the counter set to which the counter belongs + * in the 'config_base'. The counter set (config_base) is then used + * to enable/disable the counters. + */ + hwc->config = ev; + hwc->config_base = get_counter_set(ev); + + /* Validate the counter that is assigned to this event. + * Because the counter facility can use numerous counters at the + * same time without constraints, it is not necessary to explicity + * validate event groups (event->group_leader != event). + */ + err = validate_event(hwc); + if (err) + return err; + + /* Initialize for using the CPU-measurement counter facility */ + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + /* Finally, validate version and authorization of the counter set */ + err = validate_ctr_auth(hwc); + if (!err) + err = validate_ctr_version(hwc); + + return err; +} + +static int cpumf_pmu_event_init(struct perf_event *event) +{ + int err; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_RAW: + /* The CPU measurement counter facility does not have overflow + * interrupts to do sampling. Sampling must be provided by + * external means, for example, by timers. + */ + if (is_sampling_event(event)) + return -ENOENT; + err = __hw_perf_event_init(event); + break; + default: + return -ENOENT; + } + + if (unlikely(err) && event->destroy) + event->destroy(event); + + return err; +} + +static int hw_perf_event_reset(struct perf_event *event) +{ + u64 prev, new; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) { + if (err != 3) + break; + /* The counter is not (yet) available. This + * might happen if the counter set to which + * this counter belongs is in the disabled + * state. + */ + new = 0; + } + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + return err; +} + +static int hw_perf_event_update(struct perf_event *event) +{ + u64 prev, new, delta; + int err; + + do { + prev = local64_read(&event->hw.prev_count); + err = ecctr(event->hw.config, &new); + if (err) + goto out; + } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev); + + delta = (prev <= new) ? new - prev + : (-1ULL - prev) + new + 1; /* overflow */ + local64_add(delta, &event->count); +out: + return err; +} + +static void cpumf_pmu_read(struct perf_event *event) +{ + if (event->hw.state & PERF_HES_STOPPED) + return; + + hw_perf_event_update(event); +} + +static void cpumf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(hwc->config == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* (Re-)enable and activate the counter set */ + ctr_set_enable(&cpuhw->state, hwc->config_base); + ctr_set_start(&cpuhw->state, hwc->config_base); + + /* The counter set to which this counter belongs can be already active. + * Because all counters in a set are active, the event->hw.prev_count + * needs to be synchronized. At this point, the counter set can be in + * the inactive or disabled state. + */ + hw_perf_event_reset(event); + + /* increment refcount for this counter set */ + atomic_inc(&cpuhw->ctr_set[hwc->config_base]); +} + +static void cpumf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* Decrement reference count for this counter set and if this + * is the last used counter in the set, clear activation + * control and set the counter set state to inactive. + */ + if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base])) + ctr_set_stop(&cpuhw->state, hwc->config_base); + event->hw.state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static int cpumf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + /* Check authorization for the counter set to which this + * counter belongs. + * For group events transaction, the authorization check is + * done in cpumf_pmu_commit_txn(). + */ + if (!(cpuhw->flags & PERF_EVENT_TXN)) + if (validate_ctr_auth(&event->hw)) + return -EPERM; + + ctr_set_enable(&cpuhw->state, event->hw.config_base); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + cpumf_pmu_start(event, PERF_EF_RELOAD); + + perf_event_update_userpage(event); + + return 0; +} + +static void cpumf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpumf_pmu_stop(event, PERF_EF_UPDATE); + + /* Check if any counter in the counter set is still used. If not used, + * change the counter set to the disabled state. This also clears the + * content of all counters in the set. + * + * When a new perf event has been added but not yet started, this can + * clear enable control and resets all counters in a set. Therefore, + * cpumf_pmu_start() always has to reenable a counter set. + */ + if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base])) + ctr_set_disable(&cpuhw->state, event->hw.config_base); + + perf_event_update_userpage(event); +} + +/* + * Start group events scheduling transaction. + * Set flags to perform a single test at commit time. + */ +static void cpumf_pmu_start_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + perf_pmu_disable(pmu); + cpuhw->flags |= PERF_EVENT_TXN; + cpuhw->tx_state = cpuhw->state; +} + +/* + * Stop and cancel a group events scheduling tranctions. + * Assumes cpumf_pmu_del() is called for each successful added + * cpumf_pmu_add() during the transaction. + */ +static void cpumf_pmu_cancel_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + WARN_ON(cpuhw->tx_state != cpuhw->state); + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); +} + +/* + * Commit the group events scheduling transaction. On success, the + * transaction is closed. On error, the transaction is kept open + * until cpumf_pmu_cancel_txn() is called. + */ +static int cpumf_pmu_commit_txn(struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + u64 state; + + /* check if the updated state can be scheduled */ + state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + state >>= CPUMF_LCCTL_ENABLE_SHIFT; + if ((state & cpuhw->info.auth_ctl) != state) + return -EPERM; + + cpuhw->flags &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); + return 0; +} + +/* Performance monitoring unit for s390x */ +static struct pmu cpumf_pmu = { + .pmu_enable = cpumf_pmu_enable, + .pmu_disable = cpumf_pmu_disable, + .event_init = cpumf_pmu_event_init, + .add = cpumf_pmu_add, + .del = cpumf_pmu_del, + .start = cpumf_pmu_start, + .stop = cpumf_pmu_stop, + .read = cpumf_pmu_read, + .start_txn = cpumf_pmu_start_txn, + .commit_txn = cpumf_pmu_commit_txn, + .cancel_txn = cpumf_pmu_cancel_txn, +}; + +static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int __init cpumf_pmu_init(void) +{ + int rc; + + if (!cpum_cf_avail()) + return -ENODEV; + + /* clear bit 15 of cr0 to unauthorize problem-state to + * extract measurement counters */ + ctl_clear_bit(0, 48); + + /* register handler for measurement-alert interruptions */ + rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + if (rc) { + pr_err("Registering for CPU-measurement alerts " + "failed with rc=%i\n", rc); + goto out; + } + + cpumf_pmu.attr_groups = cpumf_cf_event_group(); + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + if (rc) { + pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return rc; +} +early_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c new file mode 100644 index 00000000000..4554a4bae39 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -0,0 +1,322 @@ +/* + * Perf PMU sysfs events attributes for available CPU-measurement counters + * + */ + +#include <linux/slab.h> +#include <linux/perf_event.h> + + +/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */ + +CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000); +CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001); +CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002); +CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025); +CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004); +CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005); +CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040); +CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041); +CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042); +CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043); +CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044); +CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045); +CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046); +CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047); +CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048); +CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049); +CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a); +CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b); +CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c); +CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d); +CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e); +CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f); +CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092); +CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083); +CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); + +static struct attribute *cpumcf_pmu_event_attr[] = { + CPUMF_EVENT_PTR(cf, CPU_CYCLES), + CPUMF_EVENT_PTR(cf, INSTRUCTIONS), + CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS), + CPUMF_EVENT_PTR(cf, PRNG_CYCLES), + CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS), + CPUMF_EVENT_PTR(cf, SHA_CYCLES), + CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS), + CPUMF_EVENT_PTR(cf, DEA_CYCLES), + CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, AES_FUNCTIONS), + CPUMF_EVENT_PTR(cf, AES_CYCLES), + CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES), + NULL, +}; + +static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT), + NULL, +}; + +static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES), + NULL, +}; + +static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL), + NULL, +}; + +/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumcf_pmu_event_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; + +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + + +static __init struct attribute **merge_attr(struct attribute **a, + struct attribute **b) +{ + struct attribute **new; + int j, i; + + for (j = 0; a[j]; j++) + ; + for (i = 0; b[i]; i++) + j++; + j++; + + new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL); + if (!new) + return NULL; + j = 0; + for (i = 0; a[i]; i++) + new[j++] = a[i]; + for (i = 0; b[i]; i++) + new[j++] = b[i]; + new[j] = NULL; + + return new; +} + +__init const struct attribute_group **cpumf_cf_event_group(void) +{ + struct attribute **combined, **model; + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + model = cpumcf_z10_pmu_event_attr; + break; + case 0x2817: + case 0x2818: + model = cpumcf_z196_pmu_event_attr; + break; + case 0x2827: + case 0x2828: + model = cpumcf_zec12_pmu_event_attr; + break; + default: + model = NULL; + break; + }; + + if (!model) + goto out; + + combined = merge_attr(cpumcf_pmu_event_attr, model); + if (combined) + cpumsf_pmu_events_group.attrs = combined; +out: + return cpumsf_pmu_attr_groups; +} diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c new file mode 100644 index 00000000000..ea0c7b2ef03 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -0,0 +1,1643 @@ +/* + * Performance event support for the System z CPU-measurement Sampling Facility + * + * Copyright IBM Corp. 2013 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_sf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/notifier.h> +#include <linux/export.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/moduleparam.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> +#include <asm/debug.h> +#include <asm/timex.h> + +/* Minimum number of sample-data-block-tables: + * At least one table is required for the sampling buffer structure. + * A single table contains up to 511 pointers to sample-data-blocks. + */ +#define CPUM_SF_MIN_SDBT 1 + +/* Number of sample-data-blocks per sample-data-block-table (SDBT): + * A table contains SDB pointers (8 bytes) and one table-link entry + * that points to the origin of the next SDBT. + */ +#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) + +/* Maximum page offset for an SDBT table-link entry: + * If this page offset is reached, a table-link entry to the next SDBT + * must be added. + */ +#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) +static inline int require_table_link(const void *sdbt) +{ + return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; +} + +/* Minimum and maximum sampling buffer sizes: + * + * This number represents the maximum size of the sampling buffer taking + * the number of sample-data-block-tables into account. Note that these + * numbers apply to the basic-sampling function only. + * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if + * the diagnostic-sampling function is active. + * + * Sampling buffer size Buffer characteristics + * --------------------------------------------------- + * 64KB == 16 pages (4KB per page) + * 1 page for SDB-tables + * 15 pages for SDBs + * + * 32MB == 8192 pages (4KB per page) + * 16 pages for SDB-tables + * 8176 pages for SDBs + */ +static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; +static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; +static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; + +struct sf_buffer { + unsigned long *sdbt; /* Sample-data-block-table origin */ + /* buffer characteristics (required for buffer increments) */ + unsigned long num_sdb; /* Number of sample-data-blocks */ + unsigned long num_sdbt; /* Number of sample-data-block-tables */ + unsigned long *tail; /* last sample-data-block-table */ +}; + +struct cpu_hw_sf { + /* CPU-measurement sampling information block */ + struct hws_qsi_info_block qsi; + /* CPU-measurement sampling control block */ + struct hws_lsctl_request_block lsctl; + struct sf_buffer sfb; /* Sampling buffer */ + unsigned int flags; /* Status flags */ + struct perf_event *event; /* Scheduled perf event */ +}; +static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); + +/* Debug feature */ +static debug_info_t *sfdbg; + +/* + * sf_disable() - Switch off sampling facility + */ +static int sf_disable(void) +{ + struct hws_lsctl_request_block sreq; + + memset(&sreq, 0, sizeof(sreq)); + return lsctl(&sreq); +} + +/* + * sf_buffer_available() - Check for an allocated sampling buffer + */ +static int sf_buffer_available(struct cpu_hw_sf *cpuhw) +{ + return !!cpuhw->sfb.sdbt; +} + +/* + * deallocate sampling facility buffer + */ +static void free_sampling_buffer(struct sf_buffer *sfb) +{ + unsigned long *sdbt, *curr; + + if (!sfb->sdbt) + return; + + sdbt = sfb->sdbt; + curr = sdbt; + + /* Free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* Process table-link entries */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page((unsigned long) sdbt); + + /* If the origin is reached, sampling buffer is freed */ + if (curr == sfb->sdbt) + break; + else + sdbt = curr; + } else { + /* Process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + } + + debug_sprintf_event(sfdbg, 5, + "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); + memset(sfb, 0, sizeof(*sfb)); +} + +static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) +{ + unsigned long sdb, *trailer; + + /* Allocate and initialize sample-data-block */ + sdb = get_zeroed_page(gfp_flags); + if (!sdb) + return -ENOMEM; + trailer = trailer_entry_ptr(sdb); + *trailer = SDB_TE_ALERT_REQ_MASK; + + /* Link SDB into the sample-data-block-table */ + *sdbt = sdb; + + return 0; +} + +/* + * realloc_sampling_buffer() - extend sampler memory + * + * Allocates new sample-data-blocks and adds them to the specified sampling + * buffer memory. + * + * Important: This modifies the sampling buffer and must be called when the + * sampling facility is disabled. + * + * Returns zero on success, non-zero otherwise. + */ +static int realloc_sampling_buffer(struct sf_buffer *sfb, + unsigned long num_sdb, gfp_t gfp_flags) +{ + int i, rc; + unsigned long *new, *tail; + + if (!sfb->sdbt || !sfb->tail) + return -EINVAL; + + if (!is_link_entry(sfb->tail)) + return -EINVAL; + + /* Append to the existing sampling buffer, overwriting the table-link + * register. + * The tail variables always points to the "tail" (last and table-link) + * entry in an SDB-table. + */ + tail = sfb->tail; + + /* Do a sanity check whether the table-link entry points to + * the sampling buffer origin. + */ + if (sfb->sdbt != get_next_sdbt(tail)) { + debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " + "sampling buffer is not linked: origin=%p" + "tail=%p\n", + (void *) sfb->sdbt, (void *) tail); + return -EINVAL; + } + + /* Allocate remaining SDBs */ + rc = 0; + for (i = 0; i < num_sdb; i++) { + /* Allocate a new SDB-table if it is full. */ + if (require_table_link(tail)) { + new = (unsigned long *) get_zeroed_page(gfp_flags); + if (!new) { + rc = -ENOMEM; + break; + } + sfb->num_sdbt++; + /* Link current page to tail of chain */ + *tail = (unsigned long)(void *) new + 1; + tail = new; + } + + /* Allocate a new sample-data-block. + * If there is not enough memory, stop the realloc process + * and simply use what was allocated. If this is a temporary + * issue, a new realloc call (if required) might succeed. + */ + rc = alloc_sample_data_block(tail, gfp_flags); + if (rc) + break; + sfb->num_sdb++; + tail++; + } + + /* Link sampling buffer to its origin */ + *tail = (unsigned long) sfb->sdbt + 1; + sfb->tail = tail; + + debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" + " settings: sdbt=%lu sdb=%lu\n", + sfb->num_sdbt, sfb->num_sdb); + return rc; +} + +/* + * allocate_sampling_buffer() - allocate sampler memory + * + * Allocates and initializes a sampling buffer structure using the + * specified number of sample-data-blocks (SDB). For each allocation, + * a 4K page is used. The number of sample-data-block-tables (SDBT) + * are calculated from SDBs. + * Also set the ALERT_REQ mask in each SDBs trailer. + * + * Returns zero on success, non-zero otherwise. + */ +static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) +{ + int rc; + + if (sfb->sdbt) + return -EINVAL; + + /* Allocate the sample-data-block-table origin */ + sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!sfb->sdbt) + return -ENOMEM; + sfb->num_sdb = 0; + sfb->num_sdbt = 1; + + /* Link the table origin to point to itself to prepare for + * realloc_sampling_buffer() invocation. + */ + sfb->tail = sfb->sdbt; + *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; + + /* Allocate requested number of sample-data-blocks */ + rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); + if (rc) { + free_sampling_buffer(sfb); + debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " + "realloc_sampling_buffer failed with rc=%i\n", rc); + } else + debug_sprintf_event(sfdbg, 4, + "alloc_sampling_buffer: tear=%p dear=%p\n", + sfb->sdbt, (void *) *sfb->sdbt); + return rc; +} + +static void sfb_set_limits(unsigned long min, unsigned long max) +{ + struct hws_qsi_info_block si; + + CPUM_SF_MIN_SDB = min; + CPUM_SF_MAX_SDB = max; + + memset(&si, 0, sizeof(si)); + if (!qsi(&si)) + CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); +} + +static unsigned long sfb_max_limit(struct hw_perf_event *hwc) +{ + return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR + : CPUM_SF_MAX_SDB; +} + +static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + if (!sfb->sdbt) + return SFB_ALLOC_REG(hwc); + if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) + return SFB_ALLOC_REG(hwc) - sfb->num_sdb; + return 0; +} + +static int sfb_has_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + return sfb_pending_allocs(sfb, hwc) > 0; +} + +static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + /* Limit the number of SDBs to not exceed the maximum */ + num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); + if (num) + SFB_ALLOC_REG(hwc) += num; +} + +static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + SFB_ALLOC_REG(hwc) = 0; + sfb_account_allocs(num, hwc); +} + +static size_t event_sample_size(struct hw_perf_event *hwc) +{ + struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); + size_t sample_size; + + /* The sample size depends on the sampling function: The basic-sampling + * function must be always enabled, diagnostic-sampling function is + * optional. + */ + sample_size = sfr->bsdes; + if (SAMPL_DIAG_MODE(hwc)) + sample_size += sfr->dsdes; + + return sample_size; +} + +static void deallocate_buffers(struct cpu_hw_sf *cpuhw) +{ + if (cpuhw->sfb.sdbt) + free_sampling_buffer(&cpuhw->sfb); +} + +static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) +{ + unsigned long n_sdb, freq, factor; + size_t sfr_size, sample_size; + struct sf_raw_sample *sfr; + + /* Allocate raw sample buffer + * + * The raw sample buffer is used to temporarily store sampling data + * entries for perf raw sample processing. The buffer size mainly + * depends on the size of diagnostic-sampling data entries which is + * machine-specific. The exact size calculation includes: + * 1. The first 4 bytes of diagnostic-sampling data entries are + * already reflected in the sf_raw_sample structure. Subtract + * these bytes. + * 2. The perf raw sample data must be 8-byte aligned (u64) and + * perf's internal data size must be considered too. So add + * an additional u32 for correct alignment and subtract before + * allocating the buffer. + * 3. Store the raw sample buffer pointer in the perf event + * hardware structure. + */ + sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + + sizeof(u32), sizeof(u64)); + sfr_size -= sizeof(u32); + sfr = kzalloc(sfr_size, GFP_KERNEL); + if (!sfr) + return -ENOMEM; + sfr->size = sfr_size; + sfr->bsdes = cpuhw->qsi.bsdes; + sfr->dsdes = cpuhw->qsi.dsdes; + RAWSAMPLE_REG(hwc) = (unsigned long) sfr; + + /* Calculate sampling buffers using 4K pages + * + * 1. Determine the sample data size which depends on the used + * sampling functions, for example, basic-sampling or + * basic-sampling with diagnostic-sampling. + * + * 2. Use the sampling frequency as input. The sampling buffer is + * designed for almost one second. This can be adjusted through + * the "factor" variable. + * In any case, alloc_sampling_buffer() sets the Alert Request + * Control indicator to trigger a measurement-alert to harvest + * sample-data-blocks (sdb). + * + * 3. Compute the number of sample-data-blocks and ensure a minimum + * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not + * exceed a "calculated" maximum. The symbolic maximum is + * designed for basic-sampling only and needs to be increased if + * diagnostic-sampling is active. + * See also the remarks for these symbolic constants. + * + * 4. Compute the number of sample-data-block-tables (SDBT) and + * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up + * to 511 SDBs). + */ + sample_size = event_sample_size(hwc); + freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); + factor = 1; + n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); + if (n_sdb < CPUM_SF_MIN_SDB) + n_sdb = CPUM_SF_MIN_SDB; + + /* If there is already a sampling buffer allocated, it is very likely + * that the sampling facility is enabled too. If the event to be + * initialized requires a greater sampling buffer, the allocation must + * be postponed. Changing the sampling buffer requires the sampling + * facility to be in the disabled state. So, account the number of + * required SDBs and let cpumsf_pmu_enable() resize the buffer just + * before the event is started. + */ + sfb_init_allocs(n_sdb, hwc); + if (sf_buffer_available(cpuhw)) + return 0; + + debug_sprintf_event(sfdbg, 3, + "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" + " sample_size=%lu cpuhw=%p\n", + SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), + sample_size, cpuhw); + + return alloc_sampling_buffer(&cpuhw->sfb, + sfb_pending_allocs(&cpuhw->sfb, hwc)); +} + +static unsigned long min_percent(unsigned int percent, unsigned long base, + unsigned long min) +{ + return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); +} + +static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) +{ + /* Use a percentage-based approach to extend the sampling facility + * buffer. Accept up to 5% sample data loss. + * Vary the extents between 1% to 5% of the current number of + * sample-data-blocks. + */ + if (ratio <= 5) + return 0; + if (ratio <= 25) + return min_percent(1, base, 1); + if (ratio <= 50) + return min_percent(1, base, 1); + if (ratio <= 75) + return min_percent(2, base, 2); + if (ratio <= 100) + return min_percent(3, base, 3); + if (ratio <= 250) + return min_percent(4, base, 4); + + return min_percent(5, base, 8); +} + +static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, + struct hw_perf_event *hwc) +{ + unsigned long ratio, num; + + if (!OVERFLOW_REG(hwc)) + return; + + /* The sample_overflow contains the average number of sample data + * that has been lost because sample-data-blocks were full. + * + * Calculate the total number of sample data entries that has been + * discarded. Then calculate the ratio of lost samples to total samples + * per second in percent. + */ + ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, + sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); + + /* Compute number of sample-data-blocks */ + num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); + if (num) + sfb_account_allocs(num, hwc); + + debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" + " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); + OVERFLOW_REG(hwc) = 0; +} + +/* extend_sampling_buffer() - Extend sampling buffer + * @sfb: Sampling buffer structure (for local CPU) + * @hwc: Perf event hardware structure + * + * Use this function to extend the sampling buffer based on the overflow counter + * and postponed allocation extents stored in the specified Perf event hardware. + * + * Important: This function disables the sampling facility in order to safely + * change the sampling buffer structure. Do not call this function + * when the PMU is active. + */ +static void extend_sampling_buffer(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + unsigned long num, num_old; + int rc; + + num = sfb_pending_allocs(sfb, hwc); + if (!num) + return; + num_old = sfb->num_sdb; + + /* Disable the sampling facility to reset any states and also + * clear pending measurement alerts. + */ + sf_disable(); + + /* Extend the sampling buffer. + * This memory allocation typically happens in an atomic context when + * called by perf. Because this is a reallocation, it is fine if the + * new SDB-request cannot be satisfied immediately. + */ + rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); + if (rc) + debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " + "failed with rc=%i\n", rc); + + if (sfb_has_pending_allocs(sfb, hwc)) + debug_sprintf_event(sfdbg, 5, "sfb: extend: " + "req=%lu alloc=%lu remaining=%lu\n", + num, sfb->num_sdb - num_old, + sfb_pending_allocs(sfb, hwc)); +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +#define PMC_FAILURE 2 +static void setup_pmc_cpu(void *flags) +{ + int err; + struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); + + err = 0; + switch (*((int *) flags)) { + case PMC_INIT: + memset(cpusf, 0, sizeof(*cpusf)); + err = qsi(&cpusf->qsi); + if (err) + break; + cpusf->flags |= PMU_F_RESERVED; + err = sf_disable(); + if (err) + pr_err("Switching off the sampling facility failed " + "with rc=%i\n", err); + debug_sprintf_event(sfdbg, 5, + "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); + break; + case PMC_RELEASE: + cpusf->flags &= ~PMU_F_RESERVED; + err = sf_disable(); + if (err) { + pr_err("Switching off the sampling facility failed " + "with rc=%i\n", err); + } else + deallocate_buffers(cpusf); + debug_sprintf_event(sfdbg, 5, + "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); + break; + } + if (err) + *((int *) flags) |= PMC_FAILURE; +} + +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(setup_pmc_cpu, &flags, 1); + perf_release_sampling(); +} + +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + int err; + + err = perf_reserve_sampling(); + if (err) + return err; + on_each_cpu(setup_pmc_cpu, &flags, 1); + if (flags & PMC_FAILURE) { + release_pmc_hardware(); + return -ENODEV; + } + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + /* Free raw sample buffer */ + if (RAWSAMPLE_REG(&event->hw)) + kfree((void *) RAWSAMPLE_REG(&event->hw)); + + /* Release PMC if this is the last perf event */ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static void hw_init_period(struct hw_perf_event *hwc, u64 period) +{ + hwc->sample_period = period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); +} + +static void hw_reset_registers(struct hw_perf_event *hwc, + unsigned long *sdbt_origin) +{ + struct sf_raw_sample *sfr; + + /* (Re)set to first sample-data-block-table */ + TEAR_REG(hwc) = (unsigned long) sdbt_origin; + + /* (Re)set raw sampling buffer register */ + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); + memset(&sfr->basic, 0, sizeof(sfr->basic)); + memset(&sfr->diag, 0, sfr->dsdes); +} + +static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, + unsigned long rate) +{ + return clamp_t(unsigned long, rate, + si->min_sampl_rate, si->max_sampl_rate); +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct cpu_hw_sf *cpuhw; + struct hws_qsi_info_block si; + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + unsigned long rate; + int cpu, err; + + /* Reserve CPU-measurement sampling facility */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + if (err) + goto out; + + /* Access per-CPU sampling information (query sampling info) */ + /* + * The event->cpu value can be -1 to count on every CPU, for example, + * when attaching to a task. If this is specified, use the query + * sampling info from the current CPU, otherwise use event->cpu to + * retrieve the per-CPU information. + * Later, cpuhw indicates whether to allocate sampling buffers for a + * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). + */ + memset(&si, 0, sizeof(si)); + cpuhw = NULL; + if (event->cpu == -1) + qsi(&si); + else { + /* Event is pinned to a particular CPU, retrieve the per-CPU + * sampling structure for accessing the CPU-specific QSI. + */ + cpuhw = &per_cpu(cpu_hw_sf, event->cpu); + si = cpuhw->qsi; + } + + /* Check sampling facility authorization and, if not authorized, + * fall back to other PMUs. It is safe to check any CPU because + * the authorization is identical for all configured CPUs. + */ + if (!si.as) { + err = -ENOENT; + goto out; + } + + /* Always enable basic sampling */ + SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; + + /* Check if diagnostic sampling is requested. Deny if the required + * sampling authorization is missing. + */ + if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { + if (!si.ad) { + err = -EPERM; + goto out; + } + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; + } + + /* Check and set other sampling flags */ + if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; + + /* The sampling information (si) contains information about the + * min/max sampling intervals and the CPU speed. So calculate the + * correct sampling interval and avoid the whole period adjust + * feedback loop. + */ + rate = 0; + if (attr->freq) { + rate = freq_to_sample_rate(&si, attr->sample_freq); + rate = hw_limit_rate(&si, rate); + attr->freq = 0; + attr->sample_period = rate; + } else { + /* The min/max sampling rates specifies the valid range + * of sample periods. If the specified sample period is + * out of range, limit the period to the range boundary. + */ + rate = hw_limit_rate(&si, hwc->sample_period); + + /* The perf core maintains a maximum sample rate that is + * configurable through the sysctl interface. Ensure the + * sampling rate does not exceed this value. This also helps + * to avoid throttling when pushing samples with + * perf_event_overflow(). + */ + if (sample_rate_to_freq(&si, rate) > + sysctl_perf_event_sample_rate) { + err = -EINVAL; + debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); + goto out; + } + } + SAMPL_RATE(hwc) = rate; + hw_init_period(hwc, SAMPL_RATE(hwc)); + + /* Initialize sample data overflow accounting */ + hwc->extra_reg.reg = REG_OVERFLOW; + OVERFLOW_REG(hwc) = 0; + + /* Allocate the per-CPU sampling buffer using the CPU information + * from the event. If the event is not pinned to a particular + * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling + * buffers for each online CPU. + */ + if (cpuhw) + /* Event is pinned to a particular CPU */ + err = allocate_buffers(cpuhw, hwc); + else { + /* Event is not pinned, allocate sampling buffer on + * each online CPU + */ + for_each_online_cpu(cpu) { + cpuhw = &per_cpu(cpu_hw_sf, cpu); + err = allocate_buffers(cpuhw, hwc); + if (err) + break; + } + } +out: + return err; +} + +static int cpumsf_pmu_event_init(struct perf_event *event) +{ + int err; + + /* No support for taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + if ((event->attr.config != PERF_EVENT_CPUM_SF) && + (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) + return -ENOENT; + break; + case PERF_TYPE_HARDWARE: + /* Support sampling of CPU cycles in addition to the + * counter facility. However, the counter facility + * is more precise and, hence, restrict this PMU to + * sampling events only. + */ + if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) + return -ENOENT; + if (!is_sampling_event(event)) + return -ENOENT; + break; + default: + return -ENOENT; + } + + /* Check online status of the CPU to which the event is pinned */ + if (event->cpu >= nr_cpumask_bits || + (event->cpu >= 0 && !cpu_online(event->cpu))) + return -ENODEV; + + /* Force reset of idle/hv excludes regardless of what the + * user requested. + */ + if (event->attr.exclude_hv) + event->attr.exclude_hv = 0; + if (event->attr.exclude_idle) + event->attr.exclude_idle = 0; + + err = __hw_perf_event_init(event); + if (unlikely(err)) + if (event->destroy) + event->destroy(event); + return err; +} + +static void cpumsf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct hw_perf_event *hwc; + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + /* Check whether to extent the sampling buffer. + * + * Two conditions trigger an increase of the sampling buffer for a + * perf event: + * 1. Postponed buffer allocations from the event initialization. + * 2. Sampling overflows that contribute to pending allocations. + * + * Note that the extend_sampling_buffer() function disables the sampling + * facility, but it can be fully re-enabled using sampling controls that + * have been saved in cpumsf_pmu_disable(). + */ + if (cpuhw->event) { + hwc = &cpuhw->event->hw; + /* Account number of overflow-designated buffer extents */ + sfb_account_overflows(cpuhw, hwc); + if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) + extend_sampling_buffer(&cpuhw->sfb, hwc); + } + + /* (Re)enable the PMU and sampling facility */ + cpuhw->flags |= PMU_F_ENABLED; + barrier(); + + err = lsctl(&cpuhw->lsctl); + if (err) { + cpuhw->flags &= ~PMU_F_ENABLED; + pr_err("Loading sampling controls failed: op=%i err=%i\n", + 1, err); + return; + } + + debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " + "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, + cpuhw->lsctl.ed, cpuhw->lsctl.cd, + (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); +} + +static void cpumsf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct hws_lsctl_request_block inactive; + struct hws_qsi_info_block si; + int err; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + /* Switch off sampling activation control */ + inactive = cpuhw->lsctl; + inactive.cs = 0; + inactive.cd = 0; + + err = lsctl(&inactive); + if (err) { + pr_err("Loading sampling controls failed: op=%i err=%i\n", + 2, err); + return; + } + + /* Save state of TEAR and DEAR register contents */ + if (!qsi(&si)) { + /* TEAR/DEAR values are valid only if the sampling facility is + * enabled. Note that cpumsf_pmu_disable() might be called even + * for a disabled sampling facility because cpumsf_pmu_enable() + * controls the enable/disable state. + */ + if (si.es) { + cpuhw->lsctl.tear = si.tear; + cpuhw->lsctl.dear = si.dear; + } + } else + debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " + "qsi() failed with err=%i\n", err); + + cpuhw->flags &= ~PMU_F_ENABLED; +} + +/* perf_exclude_event() - Filter event + * @event: The perf event + * @regs: pt_regs structure + * @sde_regs: Sample-data-entry (sde) regs structure + * + * Filter perf events according to their exclude specification. + * + * Return non-zero if the event shall be excluded. + */ +static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, + struct perf_sf_sde_regs *sde_regs) +{ + if (event->attr.exclude_user && user_mode(regs)) + return 1; + if (event->attr.exclude_kernel && !user_mode(regs)) + return 1; + if (event->attr.exclude_guest && sde_regs->in_guest) + return 1; + if (event->attr.exclude_host && !sde_regs->in_guest) + return 1; + return 0; +} + +/* perf_push_sample() - Push samples to perf + * @event: The perf event + * @sample: Hardware sample data + * + * Use the hardware sample data to create perf event sample. The sample + * is the pushed to the event subsystem and the function checks for + * possible event overflows. If an event overflow occurs, the PMU is + * stopped. + * + * Return non-zero if an event overflow occurred. + */ +static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) +{ + int overflow; + struct pt_regs regs; + struct perf_sf_sde_regs *sde_regs; + struct perf_sample_data data; + struct perf_raw_record raw; + + /* Setup perf sample */ + perf_sample_data_init(&data, 0, event->hw.last_period); + raw.size = sfr->size; + raw.data = sfr; + data.raw = &raw; + + /* Setup pt_regs to look like an CPU-measurement external interrupt + * using the Program Request Alert code. The regs.int_parm_long + * field which is unused contains additional sample-data-entry related + * indicators. + */ + memset(®s, 0, sizeof(regs)); + regs.int_code = 0x1407; + regs.int_parm = CPU_MF_INT_SF_PRA; + sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; + + regs.psw.addr = sfr->basic.ia; + if (sfr->basic.T) + regs.psw.mask |= PSW_MASK_DAT; + if (sfr->basic.W) + regs.psw.mask |= PSW_MASK_WAIT; + if (sfr->basic.P) + regs.psw.mask |= PSW_MASK_PSTATE; + switch (sfr->basic.AS) { + case 0x0: + regs.psw.mask |= PSW_ASC_PRIMARY; + break; + case 0x1: + regs.psw.mask |= PSW_ASC_ACCREG; + break; + case 0x2: + regs.psw.mask |= PSW_ASC_SECONDARY; + break; + case 0x3: + regs.psw.mask |= PSW_ASC_HOME; + break; + } + + /* The host-program-parameter (hpp) contains the sie control + * block that is set by sie64a() in entry64.S. Check if hpp + * refers to a valid control block and set sde_regs flags + * accordingly. This would allow to use hpp values for other + * purposes too. + * For now, simply use a non-zero value as guest indicator. + */ + if (sfr->basic.hpp) + sde_regs->in_guest = 1; + + overflow = 0; + if (perf_exclude_event(event, ®s, sde_regs)) + goto out; + if (perf_event_overflow(event, &data, ®s)) { + overflow = 1; + event->pmu->stop(event, 0); + } + perf_event_update_userpage(event); +out: + return overflow; +} + +static void perf_event_count_update(struct perf_event *event, u64 count) +{ + local64_add(count, &event->count); +} + +static int sample_format_is_valid(struct hws_combined_entry *sample, + unsigned int flags) +{ + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) + /* Only basic-sampling data entries with data-entry-format + * version of 0x0001 can be processed. + */ + if (sample->basic.def != 0x0001) + return 0; + if (flags & PERF_CPUM_SF_DIAG_MODE) + /* The data-entry-format number of diagnostic-sampling data + * entries can vary. Because diagnostic data is just passed + * through, do only a sanity check on the DEF. + */ + if (sample->diag.def < 0x8001) + return 0; + return 1; +} + +static int sample_is_consistent(struct hws_combined_entry *sample, + unsigned long flags) +{ + /* This check applies only to basic-sampling data entries of potentially + * combined-sampling data entries. Invalid entries cannot be processed + * by the PMU and, thus, do not deliver an associated + * diagnostic-sampling data entry. + */ + if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) + return 0; + /* + * Samples are skipped, if they are invalid or for which the + * instruction address is not predictable, i.e., the wait-state bit is + * set. + */ + if (sample->basic.I || sample->basic.W) + return 0; + return 1; +} + +static void reset_sample_slot(struct hws_combined_entry *sample, + unsigned long flags) +{ + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) + sample->basic.def = 0; + if (flags & PERF_CPUM_SF_DIAG_MODE) + sample->diag.def = 0; +} + +static void sfr_store_sample(struct sf_raw_sample *sfr, + struct hws_combined_entry *sample) +{ + if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) + sfr->basic = sample->basic; + if (sfr->format & PERF_CPUM_SF_DIAG_MODE) + memcpy(&sfr->diag, &sample->diag, sfr->dsdes); +} + +static void debug_sample_entry(struct hws_combined_entry *sample, + struct hws_trailer_entry *te, + unsigned long flags) +{ + debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " + "sampling data entry: te->f=%i basic.def=%04x (%p)" + " diag.def=%04x (%p)\n", te->f, + sample->basic.def, &sample->basic, + (flags & PERF_CPUM_SF_DIAG_MODE) + ? sample->diag.def : 0xFFFF, + (flags & PERF_CPUM_SF_DIAG_MODE) + ? &sample->diag : NULL); +} + +/* hw_collect_samples() - Walk through a sample-data-block and collect samples + * @event: The perf event + * @sdbt: Sample-data-block table + * @overflow: Event overflow counter + * + * Walks through a sample-data-block and collects sampling data entries that are + * then pushed to the perf event subsystem. Depending on the sampling function, + * there can be either basic-sampling or combined-sampling data entries. A + * combined-sampling data entry consists of a basic- and a diagnostic-sampling + * data entry. The sampling function is determined by the flags in the perf + * event hardware structure. The function always works with a combined-sampling + * data entry but ignores the the diagnostic portion if it is not available. + * + * Note that the implementation focuses on basic-sampling data entries and, if + * such an entry is not valid, the entire combined-sampling data entry is + * ignored. + * + * The overflow variables counts the number of samples that has been discarded + * due to a perf event overflow. + */ +static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, + unsigned long long *overflow) +{ + unsigned long flags = SAMPL_FLAGS(&event->hw); + struct hws_combined_entry *sample; + struct hws_trailer_entry *te; + struct sf_raw_sample *sfr; + size_t sample_size; + + /* Prepare and initialize raw sample data */ + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); + sfr->format = flags & PERF_CPUM_SF_MODE_MASK; + + sample_size = event_sample_size(&event->hw); + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + sample = (struct hws_combined_entry *) *sdbt; + while ((unsigned long *) sample < (unsigned long *) te) { + /* Check for an empty sample */ + if (!sample->basic.def) + break; + + /* Update perf event period */ + perf_event_count_update(event, SAMPL_RATE(&event->hw)); + + /* Check sampling data entry */ + if (sample_format_is_valid(sample, flags)) { + /* If an event overflow occurred, the PMU is stopped to + * throttle event delivery. Remaining sample data is + * discarded. + */ + if (!*overflow) { + if (sample_is_consistent(sample, flags)) { + /* Deliver sample data to perf */ + sfr_store_sample(sfr, sample); + *overflow = perf_push_sample(event, sfr); + } + } else + /* Count discarded samples */ + *overflow += 1; + } else { + debug_sample_entry(sample, te, flags); + /* Sample slot is not yet written or other record. + * + * This condition can occur if the buffer was reused + * from a combined basic- and diagnostic-sampling. + * If only basic-sampling is then active, entries are + * written into the larger diagnostic entries. + * This is typically the case for sample-data-blocks + * that are not full. Stop processing if the first + * invalid format was detected. + */ + if (!te->f) + break; + } + + /* Reset sample slot and advance to next sample */ + reset_sample_slot(sample, flags); + sample += sample_size; + } +} + +/* hw_perf_event_update() - Process sampling buffer + * @event: The perf event + * @flush_all: Flag to also flush partially filled sample-data-blocks + * + * Processes the sampling buffer and create perf event samples. + * The sampling buffer position are retrieved and saved in the TEAR_REG + * register of the specified perf event. + * + * Only full sample-data-blocks are processed. Specify the flash_all flag + * to also walk through partially filled sample-data-blocks. It is ignored + * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag + * enforces the processing of full sample-data-blocks only (trailer entries + * with the block-full-indicator bit set). + */ +static void hw_perf_event_update(struct perf_event *event, int flush_all) +{ + struct hw_perf_event *hwc = &event->hw; + struct hws_trailer_entry *te; + unsigned long *sdbt; + unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; + int done; + + if (flush_all && SDB_FULL_BLOCKS(hwc)) + flush_all = 0; + + sdbt = (unsigned long *) TEAR_REG(hwc); + done = event_overflow = sampl_overflow = num_sdb = 0; + while (!done) { + /* Get the trailer entry of the sample-data-block */ + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + + /* Leave loop if no more work to do (block full indicator) */ + if (!te->f) { + done = 1; + if (!flush_all) + break; + } + + /* Check the sample overflow count */ + if (te->overflow) + /* Account sample overflows and, if a particular limit + * is reached, extend the sampling buffer. + * For details, see sfb_account_overflows(). + */ + sampl_overflow += te->overflow; + + /* Timestamps are valid for full sample-data-blocks only */ + debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " + "overflow=%llu timestamp=0x%llx\n", + sdbt, te->overflow, + (te->f) ? trailer_timestamp(te) : 0ULL); + + /* Collect all samples from a single sample-data-block and + * flag if an (perf) event overflow happened. If so, the PMU + * is stopped and remaining samples will be discarded. + */ + hw_collect_samples(event, sdbt, &event_overflow); + num_sdb++; + + /* Reset trailer (using compare-double-and-swap) */ + do { + te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; + te_flags |= SDB_TE_ALERT_REQ_MASK; + } while (!cmpxchg_double(&te->flags, &te->overflow, + te->flags, te->overflow, + te_flags, 0ULL)); + + /* Advance to next sample-data-block */ + sdbt++; + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + /* Update event hardware registers */ + TEAR_REG(hwc) = (unsigned long) sdbt; + + /* Stop processing sample-data if all samples of the current + * sample-data-block were flushed even if it was not full. + */ + if (flush_all && done) + break; + + /* If an event overflow happened, discard samples by + * processing any remaining sample-data-blocks. + */ + if (event_overflow) + flush_all = 1; + } + + /* Account sample overflows in the event hardware structure */ + if (sampl_overflow) + OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + + sampl_overflow, 1 + num_sdb); + if (sampl_overflow || event_overflow) + debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " + "overflow stats: sample=%llu event=%llu\n", + sampl_overflow, event_overflow); +} + +static void cpumsf_pmu_read(struct perf_event *event) +{ + /* Nothing to do ... updates are interrupt-driven */ +} + +/* Activate sampling control. + * Next call of pmu_enable() starts sampling. + */ +static void cpumsf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + perf_pmu_disable(event->pmu); + event->hw.state = 0; + cpuhw->lsctl.cs = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.cd = 1; + perf_pmu_enable(event->pmu); +} + +/* Deactivate sampling control. + * Next call of pmu_enable() stops sampling. + */ +static void cpumsf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + if (event->hw.state & PERF_HES_STOPPED) + return; + + perf_pmu_disable(event->pmu); + cpuhw->lsctl.cs = 0; + cpuhw->lsctl.cd = 0; + event->hw.state |= PERF_HES_STOPPED; + + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event, 1); + event->hw.state |= PERF_HES_UPTODATE; + } + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + int err; + + if (cpuhw->flags & PMU_F_IN_USE) + return -EAGAIN; + + if (!cpuhw->sfb.sdbt) + return -EINVAL; + + err = 0; + perf_pmu_disable(event->pmu); + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + /* Set up sampling controls. Always program the sampling register + * using the SDB-table start. Reset TEAR_REG event hardware register + * that is used by hw_perf_event_update() to store the sampling buffer + * position after samples have been flushed. + */ + cpuhw->lsctl.s = 0; + cpuhw->lsctl.h = 1; + cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; + cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; + cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); + hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + + /* Ensure sampling functions are in the disabled state. If disabled, + * switch on sampling enable control. */ + if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { + err = -EAGAIN; + goto out; + } + cpuhw->lsctl.es = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.ed = 1; + + /* Set in_use flag and store event */ + event->hw.idx = 0; /* only one sampling event per CPU supported */ + cpuhw->event = event; + cpuhw->flags |= PMU_F_IN_USE; + + if (flags & PERF_EF_START) + cpumsf_pmu_start(event, PERF_EF_RELOAD); +out: + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + return err; +} + +static void cpumsf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + perf_pmu_disable(event->pmu); + cpumsf_pmu_stop(event, PERF_EF_UPDATE); + + cpuhw->lsctl.es = 0; + cpuhw->lsctl.ed = 0; + cpuhw->flags &= ~PMU_F_IN_USE; + cpuhw->event = NULL; + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_event_idx(struct perf_event *event) +{ + return event->hw.idx; +} + +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); + +static struct attribute *cpumsf_pmu_events_attr[] = { + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), + NULL, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumsf_pmu_events_attr, +}; +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + +static struct pmu cpumf_sampling = { + .pmu_enable = cpumsf_pmu_enable, + .pmu_disable = cpumsf_pmu_disable, + + .event_init = cpumsf_pmu_event_init, + .add = cpumsf_pmu_add, + .del = cpumsf_pmu_del, + + .start = cpumsf_pmu_start, + .stop = cpumsf_pmu_stop, + .read = cpumsf_pmu_read, + + .event_idx = cpumsf_pmu_event_idx, + .attr_groups = cpumsf_pmu_attr_groups, +}; + +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_sf *cpuhw; + + if (!(alert & CPU_MF_INT_SF_MASK)) + return; + inc_irq_stat(IRQEXT_CMS); + cpuhw = &__get_cpu_var(cpu_hw_sf); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* The processing below must take care of multiple alert events that + * might be indicated concurrently. */ + + /* Program alert request */ + if (alert & CPU_MF_INT_SF_PRA) { + if (cpuhw->flags & PMU_F_IN_USE) + hw_perf_event_update(cpuhw->event, 0); + else + WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); + } + + /* Report measurement alerts only for non-PRA codes */ + if (alert != CPU_MF_INT_SF_PRA) + debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); + + /* Sampling authorization change request */ + if (alert & CPU_MF_INT_SF_SACA) + qsi(&cpuhw->qsi); + + /* Loss of sample data due to high-priority machine activities */ + if (alert & CPU_MF_INT_SF_LSDA) { + pr_err("Sample data was lost\n"); + cpuhw->flags |= PMU_F_ERR_LSDA; + sf_disable(); + } + + /* Invalid sampling buffer entry */ + if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { + pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", + alert); + cpuhw->flags |= PMU_F_ERR_IBE; + sf_disable(); + } +} + +static int cpumf_pmu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + /* Ignore the notification if no events are scheduled on the PMU. + * This might be racy... + */ + if (!atomic_read(&num_events)) + return NOTIFY_OK; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) +{ + if (!cpum_sf_avail()) + return -ENODEV; + return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); +} + +static int param_set_sfb_size(const char *val, const struct kernel_param *kp) +{ + int rc; + unsigned long min, max; + + if (!cpum_sf_avail()) + return -ENODEV; + if (!val || !strlen(val)) + return -EINVAL; + + /* Valid parameter values: "min,max" or "max" */ + min = CPUM_SF_MIN_SDB; + max = CPUM_SF_MAX_SDB; + if (strchr(val, ',')) + rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; + else + rc = kstrtoul(val, 10, &max); + + if (min < 2 || min >= max || max > get_num_physpages()) + rc = -EINVAL; + if (rc) + return rc; + + sfb_set_limits(min, max); + pr_info("The sampling buffer limits have changed to: " + "min=%lu max=%lu (diag=x%lu)\n", + CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); + return 0; +} + +#define param_check_sfb_size(name, p) __param_check(name, p, void) +static struct kernel_param_ops param_ops_sfb_size = { + .set = param_set_sfb_size, + .get = param_get_sfb_size, +}; + +#define RS_INIT_FAILURE_QSI 0x0001 +#define RS_INIT_FAILURE_BSDES 0x0002 +#define RS_INIT_FAILURE_ALRT 0x0003 +#define RS_INIT_FAILURE_PERF 0x0004 +static void __init pr_cpumsf_err(unsigned int reason) +{ + pr_err("Sampling facility support for perf is not available: " + "reason=%04x\n", reason); +} + +static int __init init_cpum_sampling_pmu(void) +{ + struct hws_qsi_info_block si; + int err; + + if (!cpum_sf_avail()) + return -ENODEV; + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) { + pr_cpumsf_err(RS_INIT_FAILURE_QSI); + return -ENODEV; + } + + if (si.bsdes != sizeof(struct hws_basic_entry)) { + pr_cpumsf_err(RS_INIT_FAILURE_BSDES); + return -EINVAL; + } + + if (si.ad) + sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + + sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); + if (!sfdbg) + pr_err("Registering for s390dbf failed\n"); + debug_register_view(sfdbg, &debug_sprintf_view); + + err = register_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + if (err) { + pr_cpumsf_err(RS_INIT_FAILURE_ALRT); + goto out; + } + + err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); + if (err) { + pr_cpumsf_err(RS_INIT_FAILURE_PERF); + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return err; +} +arch_initcall(init_cpum_sampling_pmu); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c new file mode 100644 index 00000000000..61595c1f0a0 --- /dev/null +++ b/arch/s390/kernel/perf_event.c @@ -0,0 +1,324 @@ +/* + * Performance event support for s390x + * + * Copyright IBM Corp. 2012, 2013 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "perf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/kvm_host.h> +#include <linux/percpu.h> +#include <linux/export.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/sysfs.h> +#include <asm/irq.h> +#include <asm/cpu_mf.h> +#include <asm/lowcore.h> +#include <asm/processor.h> +#include <asm/sysinfo.h> + +const char *perf_pmu_name(void) +{ + if (cpum_cf_avail() || cpum_sf_avail()) + return "CPU-Measurement Facilities (CPU-MF)"; + return "pmu"; +} +EXPORT_SYMBOL(perf_pmu_name); + +int perf_num_counters(void) +{ + int num = 0; + + if (cpum_cf_avail()) + num += PERF_CPUM_CF_MAX_CTR; + if (cpum_sf_avail()) + num += PERF_CPUM_SF_MAX_CTR; + + return num; +} +EXPORT_SYMBOL(perf_num_counters); + +static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) +{ + struct stack_frame *stack = (struct stack_frame *) regs->gprs[15]; + + if (!stack) + return NULL; + + return (struct kvm_s390_sie_block *) stack->empty1[0]; +} + +static bool is_in_guest(struct pt_regs *regs) +{ + if (user_mode(regs)) + return false; +#if IS_ENABLED(CONFIG_KVM) + return instruction_pointer(regs) == (unsigned long) &sie_exit; +#else + return false; +#endif +} + +static unsigned long guest_is_user_mode(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE; +} + +static unsigned long instruction_pointer_guest(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN; +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + return is_in_guest(regs) ? instruction_pointer_guest(regs) + : instruction_pointer(regs); +} + +static unsigned long perf_misc_guest_flags(struct pt_regs *regs) +{ + return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; +} + +static unsigned long perf_misc_flags_sf(struct pt_regs *regs) +{ + struct perf_sf_sde_regs *sde_regs; + unsigned long flags; + + sde_regs = (struct perf_sf_sde_regs *) ®s->int_parm_long; + if (sde_regs->in_guest) + flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; + else + flags = user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; + return flags; +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + /* Check if the cpum_sf PMU has created the pt_regs structure. + * In this case, perf misc flags can be easily extracted. Otherwise, + * do regular checks on the pt_regs content. + */ + if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA) + if (!regs->gprs[15]) + return perf_misc_flags_sf(regs); + + if (is_in_guest(regs)) + return perf_misc_guest_flags(regs); + + return user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; +} + +static void print_debug_cf(void) +{ + struct cpumf_ctr_info cf_info; + int cpu = smp_processor_id(); + + memset(&cf_info, 0, sizeof(cf_info)); + if (!qctri(&cf_info)) + pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n", + cpu, cf_info.cfvn, cf_info.csvn, + cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl); +} + +static void print_debug_sf(void) +{ + struct hws_qsi_info_block si; + int cpu = smp_processor_id(); + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) + return; + + pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n", + cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + + if (si.as) + pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i" + " bsdes=%i tear=%016lx dear=%016lx\n", cpu, + si.as, si.es, si.cs, si.bsdes, si.tear, si.dear); + if (si.ad) + pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i" + " dsdes=%i tear=%016lx dear=%016lx\n", cpu, + si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear); +} + +void perf_event_print_debug(void) +{ + unsigned long flags; + + local_irq_save(flags); + if (cpum_cf_avail()) + print_debug_cf(); + if (cpum_sf_avail()) + print_debug_sf(); + local_irq_restore(flags); +} + +/* Service level infrastructure */ +static void sl_print_counter(struct seq_file *m) +{ + struct cpumf_ctr_info ci; + + memset(&ci, 0, sizeof(ci)); + if (qctri(&ci)) + return; + + seq_printf(m, "CPU-MF: Counter facility: version=%u.%u " + "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl); +} + +static void sl_print_sampling(struct seq_file *m) +{ + struct hws_qsi_info_block si; + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) + return; + + if (!si.as && !si.ad) + return; + + seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu" + " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + if (si.as) + seq_printf(m, "CPU-MF: Sampling facility: mode=basic" + " sample_size=%u\n", si.bsdes); + if (si.ad) + seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic" + " sample_size=%u\n", si.dsdes); +} + +static void service_level_perf_print(struct seq_file *m, + struct service_level *sl) +{ + if (cpum_cf_avail()) + sl_print_counter(m); + if (cpum_sf_avail()) + sl_print_sampling(m); +} + +static struct service_level service_level_perf = { + .seq_print = service_level_perf_print, +}; + +static int __init service_level_perf_register(void) +{ + return register_service_level(&service_level_perf); +} +arch_initcall(service_level_perf_register); + +/* See also arch/s390/kernel/traps.c */ +static unsigned long __store_trace(struct perf_callchain_entry *entry, + unsigned long sp, + unsigned long low, unsigned long high) +{ + struct stack_frame *sf; + struct pt_regs *regs; + + while (1) { + sp = sp & PSW_ADDR_INSN; + if (sp < low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + /* Follow the backchain. */ + while (1) { + low = sp; + sp = sf->back_chain & PSW_ADDR_INSN; + if (!sp) + break; + if (sp <= low || sp > high - sizeof(*sf)) + return sp; + sf = (struct stack_frame *) sp; + perf_callchain_store(entry, + sf->gprs[8] & PSW_ADDR_INSN); + } + /* Zero backchain detected, check for interrupt frame. */ + sp = (unsigned long) (sf + 1); + if (sp <= low || sp > high - sizeof(*regs)) + return sp; + regs = (struct pt_regs *) sp; + perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + low = sp; + sp = regs->gprs[15]; + } +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + unsigned long head; + struct stack_frame *head_sf; + + if (user_mode(regs)) + return; + + head = regs->gprs[15]; + head_sf = (struct stack_frame *) head; + + if (!head_sf || !head_sf->back_chain) + return; + + head = head_sf->back_chain; + head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE, + S390_lowcore.async_stack); + + __store_trace(entry, head, S390_lowcore.thread_info, + S390_lowcore.thread_info + THREAD_SIZE); +} + +/* Perf defintions for PMU event attributes in sysfs */ +ssize_t cpumf_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sprintf(page, "event=0x%04llx,name=%s\n", + pmu_attr->id, attr->attr.name); +} + +/* Reserve/release functions for sharing perf hardware */ +static DEFINE_SPINLOCK(perf_hw_owner_lock); +static void *perf_sampling_owner; + +int perf_reserve_sampling(void) +{ + int err; + + err = 0; + spin_lock(&perf_hw_owner_lock); + if (perf_sampling_owner) { + pr_warn("The sampling facility is already reserved by %p\n", + perf_sampling_owner); + err = -EBUSY; + } else + perf_sampling_owner = __builtin_return_address(0); + spin_unlock(&perf_hw_owner_lock); + return err; +} +EXPORT_SYMBOL(perf_reserve_sampling); + +void perf_release_sampling(void) +{ + spin_lock(&perf_hw_owner_lock); + WARN_ON(!perf_sampling_owner); + perf_sampling_owner = NULL; + spin_unlock(&perf_hw_owner_lock); +} +EXPORT_SYMBOL(perf_release_sampling); diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S new file mode 100644 index 00000000000..813ec726087 --- /dev/null +++ b/arch/s390/kernel/pgm_check.S @@ -0,0 +1,152 @@ +/* + * Program check table. + * + * Copyright IBM Corp. 2012 + */ + +#include <linux/linkage.h> + +#ifdef CONFIG_32BIT +#define PGM_CHECK_64BIT(handler) .long default_trap_handler +#else +#define PGM_CHECK_64BIT(handler) .long handler +#endif + +#define PGM_CHECK(handler) .long handler +#define PGM_CHECK_DEFAULT PGM_CHECK(default_trap_handler) + +/* + * The program check table contains exactly 128 (0x00-0x7f) entries. Each + * line defines the 31 and/or 64 bit function to be called corresponding + * to the program check interruption code. + */ +.section .rodata, "a" +ENTRY(pgm_check_table) +PGM_CHECK_DEFAULT /* 00 */ +PGM_CHECK(illegal_op) /* 01 */ +PGM_CHECK(privileged_op) /* 02 */ +PGM_CHECK(execute_exception) /* 03 */ +PGM_CHECK(do_protection_exception) /* 04 */ +PGM_CHECK(addressing_exception) /* 05 */ +PGM_CHECK(specification_exception) /* 06 */ +PGM_CHECK(data_exception) /* 07 */ +PGM_CHECK(overflow_exception) /* 08 */ +PGM_CHECK(divide_exception) /* 09 */ +PGM_CHECK(overflow_exception) /* 0a */ +PGM_CHECK(divide_exception) /* 0b */ +PGM_CHECK(hfp_overflow_exception) /* 0c */ +PGM_CHECK(hfp_underflow_exception) /* 0d */ +PGM_CHECK(hfp_significance_exception) /* 0e */ +PGM_CHECK(hfp_divide_exception) /* 0f */ +PGM_CHECK(do_dat_exception) /* 10 */ +PGM_CHECK(do_dat_exception) /* 11 */ +PGM_CHECK(translation_exception) /* 12 */ +PGM_CHECK(special_op_exception) /* 13 */ +PGM_CHECK_DEFAULT /* 14 */ +PGM_CHECK(operand_exception) /* 15 */ +PGM_CHECK_DEFAULT /* 16 */ +PGM_CHECK_DEFAULT /* 17 */ +PGM_CHECK_64BIT(transaction_exception) /* 18 */ +PGM_CHECK_DEFAULT /* 19 */ +PGM_CHECK_DEFAULT /* 1a */ +PGM_CHECK_DEFAULT /* 1b */ +PGM_CHECK(space_switch_exception) /* 1c */ +PGM_CHECK(hfp_sqrt_exception) /* 1d */ +PGM_CHECK_DEFAULT /* 1e */ +PGM_CHECK_DEFAULT /* 1f */ +PGM_CHECK_DEFAULT /* 20 */ +PGM_CHECK_DEFAULT /* 21 */ +PGM_CHECK_DEFAULT /* 22 */ +PGM_CHECK_DEFAULT /* 23 */ +PGM_CHECK_DEFAULT /* 24 */ +PGM_CHECK_DEFAULT /* 25 */ +PGM_CHECK_DEFAULT /* 26 */ +PGM_CHECK_DEFAULT /* 27 */ +PGM_CHECK_DEFAULT /* 28 */ +PGM_CHECK_DEFAULT /* 29 */ +PGM_CHECK_DEFAULT /* 2a */ +PGM_CHECK_DEFAULT /* 2b */ +PGM_CHECK_DEFAULT /* 2c */ +PGM_CHECK_DEFAULT /* 2d */ +PGM_CHECK_DEFAULT /* 2e */ +PGM_CHECK_DEFAULT /* 2f */ +PGM_CHECK_DEFAULT /* 30 */ +PGM_CHECK_DEFAULT /* 31 */ +PGM_CHECK_DEFAULT /* 32 */ +PGM_CHECK_DEFAULT /* 33 */ +PGM_CHECK_DEFAULT /* 34 */ +PGM_CHECK_DEFAULT /* 35 */ +PGM_CHECK_DEFAULT /* 36 */ +PGM_CHECK_DEFAULT /* 37 */ +PGM_CHECK_64BIT(do_dat_exception) /* 38 */ +PGM_CHECK_64BIT(do_dat_exception) /* 39 */ +PGM_CHECK_64BIT(do_dat_exception) /* 3a */ +PGM_CHECK_64BIT(do_dat_exception) /* 3b */ +PGM_CHECK_DEFAULT /* 3c */ +PGM_CHECK_DEFAULT /* 3d */ +PGM_CHECK_DEFAULT /* 3e */ +PGM_CHECK_DEFAULT /* 3f */ +PGM_CHECK_DEFAULT /* 40 */ +PGM_CHECK_DEFAULT /* 41 */ +PGM_CHECK_DEFAULT /* 42 */ +PGM_CHECK_DEFAULT /* 43 */ +PGM_CHECK_DEFAULT /* 44 */ +PGM_CHECK_DEFAULT /* 45 */ +PGM_CHECK_DEFAULT /* 46 */ +PGM_CHECK_DEFAULT /* 47 */ +PGM_CHECK_DEFAULT /* 48 */ +PGM_CHECK_DEFAULT /* 49 */ +PGM_CHECK_DEFAULT /* 4a */ +PGM_CHECK_DEFAULT /* 4b */ +PGM_CHECK_DEFAULT /* 4c */ +PGM_CHECK_DEFAULT /* 4d */ +PGM_CHECK_DEFAULT /* 4e */ +PGM_CHECK_DEFAULT /* 4f */ +PGM_CHECK_DEFAULT /* 50 */ +PGM_CHECK_DEFAULT /* 51 */ +PGM_CHECK_DEFAULT /* 52 */ +PGM_CHECK_DEFAULT /* 53 */ +PGM_CHECK_DEFAULT /* 54 */ +PGM_CHECK_DEFAULT /* 55 */ +PGM_CHECK_DEFAULT /* 56 */ +PGM_CHECK_DEFAULT /* 57 */ +PGM_CHECK_DEFAULT /* 58 */ +PGM_CHECK_DEFAULT /* 59 */ +PGM_CHECK_DEFAULT /* 5a */ +PGM_CHECK_DEFAULT /* 5b */ +PGM_CHECK_DEFAULT /* 5c */ +PGM_CHECK_DEFAULT /* 5d */ +PGM_CHECK_DEFAULT /* 5e */ +PGM_CHECK_DEFAULT /* 5f */ +PGM_CHECK_DEFAULT /* 60 */ +PGM_CHECK_DEFAULT /* 61 */ +PGM_CHECK_DEFAULT /* 62 */ +PGM_CHECK_DEFAULT /* 63 */ +PGM_CHECK_DEFAULT /* 64 */ +PGM_CHECK_DEFAULT /* 65 */ +PGM_CHECK_DEFAULT /* 66 */ +PGM_CHECK_DEFAULT /* 67 */ +PGM_CHECK_DEFAULT /* 68 */ +PGM_CHECK_DEFAULT /* 69 */ +PGM_CHECK_DEFAULT /* 6a */ +PGM_CHECK_DEFAULT /* 6b */ +PGM_CHECK_DEFAULT /* 6c */ +PGM_CHECK_DEFAULT /* 6d */ +PGM_CHECK_DEFAULT /* 6e */ +PGM_CHECK_DEFAULT /* 6f */ +PGM_CHECK_DEFAULT /* 70 */ +PGM_CHECK_DEFAULT /* 71 */ +PGM_CHECK_DEFAULT /* 72 */ +PGM_CHECK_DEFAULT /* 73 */ +PGM_CHECK_DEFAULT /* 74 */ +PGM_CHECK_DEFAULT /* 75 */ +PGM_CHECK_DEFAULT /* 76 */ +PGM_CHECK_DEFAULT /* 77 */ +PGM_CHECK_DEFAULT /* 78 */ +PGM_CHECK_DEFAULT /* 79 */ +PGM_CHECK_DEFAULT /* 7a */ +PGM_CHECK_DEFAULT /* 7b */ +PGM_CHECK_DEFAULT /* 7c */ +PGM_CHECK_DEFAULT /* 7d */ +PGM_CHECK_DEFAULT /* 7e */ +PGM_CHECK_DEFAULT /* 7f */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index ce203154d8c..93b9ca42e5c 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -1,49 +1,38 @@ /* - * arch/s390/kernel/process.c + * This file handles the architecture dependent parts of process handling. * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Hartmut Penner (hp@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * - * Derived from "arch/i386/kernel/process.c" - * Copyright (C) 1995, Linus Torvalds - */ - -/* - * This file handles the architecture-dependent parts of process handling.. + * Copyright IBM Corp. 1999, 2009 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Hartmut Penner <hp@de.ibm.com>, + * Denis Joseph Barrow, */ #include <linux/compiler.h> #include <linux/cpu.h> -#include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/fs.h> +#include <linux/elfcore.h> #include <linux/smp.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> #include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/user.h> #include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/reboot.h> -#include <linux/init.h> +#include <linux/tick.h> +#include <linux/personality.h> +#include <linux/syscalls.h> +#include <linux/compat.h> +#include <linux/kprobes.h> +#include <linux/random.h> #include <linux/module.h> -#include <linux/notifier.h> -#include <linux/utsname.h> -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/system.h> #include <asm/io.h> #include <asm/processor.h> +#include <asm/vtimer.h> +#include <asm/exec.h> #include <asm/irq.h> -#include <asm/timer.h> -#include <asm/cpu.h> +#include <asm/nmi.h> +#include <asm/smp.h> +#include <asm/switch_to.h> +#include <asm/runtime_instr.h> +#include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -72,336 +61,163 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -/* - * Need to know about CPUs going idle? - */ -static ATOMIC_NOTIFIER_HEAD(idle_chain); - -int register_idle_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&idle_chain, nb); -} -EXPORT_SYMBOL(register_idle_notifier); - -int unregister_idle_notifier(struct notifier_block *nb) +void arch_cpu_idle(void) { - return atomic_notifier_chain_unregister(&idle_chain, nb); -} -EXPORT_SYMBOL(unregister_idle_notifier); - -void do_monitor_call(struct pt_regs *regs, long interruption_code) -{ -#ifdef CONFIG_SMP - struct s390_idle_data *idle; - - idle = &__get_cpu_var(s390_idle); - spin_lock(&idle->lock); - idle->idle_time += get_clock() - idle->idle_enter; - idle->in_idle = 0; - spin_unlock(&idle->lock); -#endif - /* disable monitor call class 0 */ - __ctl_clear_bit(8, 15); - - atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE, - (void *)(long) smp_processor_id()); -} - -extern void s390_handle_mcck(void); -/* - * The idle loop on a S390... - */ -static void default_idle(void) -{ - int cpu, rc; - int nr_calls = 0; - void *hcpu; -#ifdef CONFIG_SMP - struct s390_idle_data *idle; -#endif - - /* CPU is going idle. */ - cpu = smp_processor_id(); - hcpu = (void *)(long)cpu; - local_irq_disable(); - if (need_resched()) { - local_irq_enable(); - return; - } - - rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1, - &nr_calls); - if (rc == NOTIFY_BAD) { - nr_calls--; - __atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE, - hcpu, nr_calls, NULL); - local_irq_enable(); - return; - } - - /* enable monitor call class 0 */ - __ctl_set_bit(8, 15); - -#ifdef CONFIG_HOTPLUG_CPU - if (cpu_is_offline(cpu)) { - preempt_enable_no_resched(); - cpu_die(); - } -#endif - local_mcck_disable(); - if (test_thread_flag(TIF_MCCK_PENDING)) { + if (test_cpu_flag(CIF_MCCK_PENDING)) { local_mcck_enable(); - /* disable monitor call class 0 */ - __ctl_clear_bit(8, 15); - atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE, - hcpu); local_irq_enable(); - s390_handle_mcck(); return; } -#ifdef CONFIG_SMP - idle = &__get_cpu_var(s390_idle); - spin_lock(&idle->lock); - idle->idle_count++; - idle->in_idle = 1; - idle->idle_enter = get_clock(); - spin_unlock(&idle->lock); -#endif - trace_hardirqs_on(); - /* Wait for external, I/O or machine check interrupt. */ - __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_IO | PSW_MASK_EXT); + /* Halt the cpu and keep track of cpu time accounting. */ + vtime_stop_cpu(); + local_irq_enable(); } -void cpu_idle(void) +void arch_cpu_idle_exit(void) { - for (;;) { - while (!need_resched()) - default_idle(); - - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } + if (test_cpu_flag(CIF_MCCK_PENDING)) + s390_handle_mcck(); } -void show_regs(struct pt_regs *regs) +void arch_cpu_idle_dead(void) { - print_modules(); - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_registers(regs); - /* Show stack backtrace if pt_regs is from kernel mode */ - if (!(regs->psw.mask & PSW_MASK_PSTATE)) - show_trace(NULL, (unsigned long *) regs->gprs[15]); + cpu_die(); } -extern void kernel_thread_starter(void); - -asm( - ".align 4\n" - "kernel_thread_starter:\n" - " la 2,0(10)\n" - " basr 14,9\n" - " la 2,0\n" - " br 11\n"); - -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; - regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; - regs.gprs[9] = (unsigned long) fn; - regs.gprs[10] = (unsigned long) arg; - regs.gprs[11] = (unsigned long) do_exit; - regs.orig_gpr2 = -1; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, - 0, ®s, 0, NULL, NULL); -} +extern void __kprobes kernel_thread_starter(void); /* * Free current thread data structures etc.. */ void exit_thread(void) { + exit_thread_runtime_instr(); } void flush_thread(void) { - clear_used_math(); - clear_tsk_thread_flag(current, TIF_USEDFPU); } void release_thread(struct task_struct *dead_task) { } -int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, - unsigned long unused, - struct task_struct * p, struct pt_regs * regs) +int copy_thread(unsigned long clone_flags, unsigned long new_stackp, + unsigned long arg, struct task_struct *p) { - struct fake_frame - { - struct stack_frame sf; - struct pt_regs childregs; - } *frame; + struct thread_info *ti; + struct fake_frame + { + struct stack_frame sf; + struct pt_regs childregs; + } *frame; + + frame = container_of(task_pt_regs(p), struct fake_frame, childregs); + p->thread.ksp = (unsigned long) frame; + /* Save access registers to new thread structure. */ + save_access_regs(&p->thread.acrs[0]); + /* start new process with ar4 pointing to the correct address space */ + p->thread.mm_segment = get_fs(); + /* Don't copy debug registers */ + memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); + memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); + clear_tsk_thread_flag(p, TIF_SINGLE_STEP); + /* Initialize per thread user and system timer values */ + ti = task_thread_info(p); + ti->user_timer = 0; + ti->system_timer = 0; + + frame->sf.back_chain = 0; + /* new return point is ret_from_fork */ + frame->sf.gprs[8] = (unsigned long) ret_from_fork; + /* fake return stack for resume(), don't go back to schedule */ + frame->sf.gprs[9] = (unsigned long) frame; - frame = container_of(task_pt_regs(p), struct fake_frame, childregs); - p->thread.ksp = (unsigned long) frame; /* Store access registers to kernel stack of new process. */ - frame->childregs = *regs; - frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ - frame->childregs.gprs[15] = new_stackp; - frame->sf.back_chain = 0; - - /* new return point is ret_from_fork */ - frame->sf.gprs[8] = (unsigned long) ret_from_fork; + if (unlikely(p->flags & PF_KTHREAD)) { + /* kernel thread */ + memset(&frame->childregs, 0, sizeof(struct pt_regs)); + frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + frame->childregs.psw.addr = PSW_ADDR_AMODE | + (unsigned long) kernel_thread_starter; + frame->childregs.gprs[9] = new_stackp; /* function */ + frame->childregs.gprs[10] = arg; + frame->childregs.gprs[11] = (unsigned long) do_exit; + frame->childregs.orig_gpr2 = -1; - /* fake return stack for resume(), don't go back to schedule */ - frame->sf.gprs[9] = (unsigned long) frame; + return 0; + } + frame->childregs = *current_pt_regs(); + frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ + frame->childregs.flags = 0; + if (new_stackp) + frame->childregs.gprs[15] = new_stackp; - /* Save access registers to new thread structure. */ - save_access_regs(&p->thread.acrs[0]); + /* Don't copy runtime instrumentation info */ + p->thread.ri_cb = NULL; + p->thread.ri_signum = 0; + frame->childregs.psw.mask &= ~PSW_MASK_RI; #ifndef CONFIG_64BIT - /* + /* * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the child. */ - save_fp_regs(¤t->thread.fp_regs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); memcpy(&p->thread.fp_regs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) - p->thread.acrs[0] = regs->gprs[6]; + p->thread.acrs[0] = frame->childregs.gprs[6]; #else /* CONFIG_64BIT */ /* Save the fpu registers to new thread structure. */ - save_fp_regs(&p->thread.fp_regs); + save_fp_ctl(&p->thread.fp_regs.fpc); + save_fp_regs(p->thread.fp_regs.fprs); + p->thread.fp_regs.pad = 0; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { - if (test_thread_flag(TIF_31BIT)) { - p->thread.acrs[0] = (unsigned int) regs->gprs[6]; + unsigned long tls = frame->childregs.gprs[6]; + if (is_compat_task()) { + p->thread.acrs[0] = (unsigned int)tls; } else { - p->thread.acrs[0] = (unsigned int)(regs->gprs[6] >> 32); - p->thread.acrs[1] = (unsigned int) regs->gprs[6]; + p->thread.acrs[0] = (unsigned int)(tls >> 32); + p->thread.acrs[1] = (unsigned int)tls; } } #endif /* CONFIG_64BIT */ - /* start new process with ar4 pointing to the correct address space */ - p->thread.mm_segment = get_fs(); - /* Don't copy debug registers */ - memset(&p->thread.per_info,0,sizeof(p->thread.per_info)); - - return 0; -} - -asmlinkage long sys_fork(void) -{ - struct pt_regs *regs = task_pt_regs(current); - return do_fork(SIGCHLD, regs->gprs[15], regs, 0, NULL, NULL); -} - -asmlinkage long sys_clone(void) -{ - struct pt_regs *regs = task_pt_regs(current); - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; - - clone_flags = regs->gprs[3]; - newsp = regs->orig_gpr2; - parent_tidptr = (int __user *) regs->gprs[4]; - child_tidptr = (int __user *) regs->gprs[5]; - if (!newsp) - newsp = regs->gprs[15]; - return do_fork(clone_flags, newsp, regs, 0, - parent_tidptr, child_tidptr); -} - -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -asmlinkage long sys_vfork(void) -{ - struct pt_regs *regs = task_pt_regs(current); - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, - regs->gprs[15], regs, 0, NULL, NULL); + return 0; } asmlinkage void execve_tail(void) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); current->thread.fp_regs.fpc = 0; if (MACHINE_HAS_IEEE) asm volatile("sfpc %0,%0" : : "d" (0)); } /* - * sys_execve() executes a new program. - */ -asmlinkage long sys_execve(void) -{ - struct pt_regs *regs = task_pt_regs(current); - char *filename; - unsigned long result; - int rc; - - filename = getname((char __user *) regs->orig_gpr2); - if (IS_ERR(filename)) { - result = PTR_ERR(filename); - goto out; - } - rc = do_execve(filename, (char __user * __user *) regs->gprs[3], - (char __user * __user *) regs->gprs[4], regs); - if (rc) { - result = rc; - goto out_putname; - } - execve_tail(); - result = regs->gprs[2]; -out_putname: - putname(filename); -out: - return result; -} - -/* * fill in the FPU structure for a core dump. */ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) { #ifndef CONFIG_64BIT - /* + /* * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the dump. */ - save_fp_regs(¤t->thread.fp_regs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); memcpy(fpregs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); #else /* CONFIG_64BIT */ - save_fp_regs(fpregs); + save_fp_ctl(&fpregs->fpc); + save_fp_regs(fpregs->fprs); #endif /* CONFIG_64BIT */ return 1; } +EXPORT_SYMBOL(dump_fpu); unsigned long get_wchan(struct task_struct *p) { @@ -427,3 +243,36 @@ unsigned long get_wchan(struct task_struct *p) return 0; } +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() & ~PAGE_MASK; + return sp & ~0xf; +} + +static inline unsigned long brk_rnd(void) +{ + /* 8MB for 32bit, 1GB for 64bit */ + if (is_32bit_task()) + return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; + else + return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long ret; + + ret = PAGE_ALIGN(mm->brk + brk_rnd()); + return (ret > mm->brk) ? ret : mm->brk; +} + +unsigned long randomize_et_dyn(unsigned long base) +{ + unsigned long ret; + + if (!(current->flags & PF_RANDOMIZE)) + return base; + ret = PAGE_ALIGN(base + brk_rnd()); + return (ret > base) ? ret : base; +} diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c new file mode 100644 index 00000000000..24612029f45 --- /dev/null +++ b/arch/s390/kernel/processor.c @@ -0,0 +1,97 @@ +/* + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/delay.h> +#include <linux/cpu.h> +#include <asm/elf.h> +#include <asm/lowcore.h> +#include <asm/param.h> + +static DEFINE_PER_CPU(struct cpuid, cpu_id); + +/* + * cpu_init - initializes state that is per-CPU. + */ +void cpu_init(void) +{ + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct cpuid *id = &__get_cpu_var(cpu_id); + + get_cpu_id(id); + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + BUG_ON(current->mm); + enter_lazy_tlb(&init_mm, current); + memset(idle, 0, sizeof(*idle)); +} + +/* + * show_cpuinfo - Get information on one CPU for use by procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + static const char *hwcap_str[] = { + "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", + "edat", "etf3eh", "highgprs", "te" + }; + unsigned long n = (unsigned long) v - 1; + int i; + + if (!n) { + s390_adjust_jiffies(); + seq_printf(m, "vendor_id : IBM/S390\n" + "# processors : %i\n" + "bogomips per cpu: %lu.%02lu\n", + num_online_cpus(), loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ))%100); + seq_puts(m, "features\t: "); + for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) + if (hwcap_str[i] && (elf_hwcap & (1UL << i))) + seq_printf(m, "%s ", hwcap_str[i]); + seq_puts(m, "\n"); + show_cacheinfo(m); + } + get_online_cpus(); + if (cpu_online(n)) { + struct cpuid *id = &per_cpu(cpu_id, n); + seq_printf(m, "processor %li: " + "version = %02X, " + "identification = %06X, " + "machine = %04X\n", + n, id->version, id->ident, id->machine); + } + put_online_cpus(); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 6e036bae987..5dc7ad9e2fb 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1,113 +1,143 @@ /* - * arch/s390/kernel/ptrace.c + * Ptrace user space interface. * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Copyright IBM Corp. 1999, 2010 + * Author(s): Denis Joseph Barrow * Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * Based on PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Derived from "arch/m68k/kernel/ptrace.c" - * Copyright (C) 1994 by Hamish Macdonald - * Taken from linux/kernel/ptrace.c and modified for M680x0. - * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds - * - * Modified by Cort Dougan (cort@cs.nmt.edu) - * - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/user.h> #include <linux/security.h> #include <linux/audit.h> #include <linux/signal.h> - +#include <linux/elf.h> +#include <linux/regset.h> +#include <linux/tracehook.h> +#include <linux/seccomp.h> +#include <linux/compat.h> +#include <trace/syscall.h> #include <asm/segment.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/unistd.h> +#include <asm/switch_to.h> +#include "entry.h" #ifdef CONFIG_COMPAT #include "compat_ptrace.h" #endif -static void -FixPerRegisters(struct task_struct *task) +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + +enum s390_regset { + REGSET_GENERAL, + REGSET_FP, + REGSET_LAST_BREAK, + REGSET_TDB, + REGSET_SYSTEM_CALL, + REGSET_GENERAL_EXTENDED, +}; + +void update_cr_regs(struct task_struct *task) { - struct pt_regs *regs; - per_struct *per_info; + struct pt_regs *regs = task_pt_regs(task); + struct thread_struct *thread = &task->thread; + struct per_regs old, new; - regs = task_pt_regs(task); - per_info = (per_struct *) &task->thread.per_info; - per_info->control_regs.bits.em_instruction_fetch = - per_info->single_step | per_info->instruction_fetch; - - if (per_info->single_step) { - per_info->control_regs.bits.starting_addr = 0; -#ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) - per_info->control_regs.bits.ending_addr = 0x7fffffffUL; +#ifdef CONFIG_64BIT + /* Take care of the enable/disable of transactional execution. */ + if (MACHINE_HAS_TE) { + unsigned long cr, cr_new; + + __ctl_store(cr, 0, 0); + /* Set or clear transaction execution TXC bit 8. */ + cr_new = cr | (1UL << 55); + if (task->thread.per_flags & PER_FLAG_NO_TE) + cr_new &= ~(1UL << 55); + if (cr_new != cr) + __ctl_load(cr_new, 0, 0); + /* Set or clear transaction execution TDC bits 62 and 63. */ + __ctl_store(cr, 2, 2); + cr_new = cr & ~3UL; + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) + cr_new |= 1UL; + else + cr_new |= 2UL; + } + if (cr_new != cr) + __ctl_load(cr_new, 2, 2); + } +#endif + /* Copy user specified PER registers */ + new.control = thread->per_user.control; + new.start = thread->per_user.start; + new.end = thread->per_user.end; + + /* merge TIF_SINGLE_STEP into user specified PER registers. */ + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + if (test_tsk_thread_flag(task, TIF_BLOCK_STEP)) + new.control |= PER_EVENT_BRANCH; else + new.control |= PER_EVENT_IFETCH; +#ifdef CONFIG_64BIT + new.control |= PER_CONTROL_SUSPENSION; + new.control |= PER_EVENT_TRANSACTION_END; #endif - per_info->control_regs.bits.ending_addr = PSW_ADDR_INSN; - } else { - per_info->control_regs.bits.starting_addr = - per_info->starting_addr; - per_info->control_regs.bits.ending_addr = - per_info->ending_addr; + new.start = 0; + new.end = PSW_ADDR_INSN; } - /* - * if any of the control reg tracing bits are on - * we switch on per in the psw - */ - if (per_info->control_regs.words.cr[0] & PER_EM_MASK) - regs->psw.mask |= PSW_MASK_PER; - else - regs->psw.mask &= ~PSW_MASK_PER; - if (per_info->control_regs.bits.em_storage_alteration) - per_info->control_regs.bits.storage_alt_space_ctl = 1; - else - per_info->control_regs.bits.storage_alt_space_ctl = 0; + /* Take care of the PER enablement bit in the PSW. */ + if (!(new.control & PER_EVENT_MASK)) { + regs->psw.mask &= ~PSW_MASK_PER; + return; + } + regs->psw.mask |= PSW_MASK_PER; + __ctl_store(old, 9, 11); + if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) + __ctl_load(new, 9, 11); } void user_enable_single_step(struct task_struct *task) { - task->thread.per_info.single_step = 1; - FixPerRegisters(task); + clear_tsk_thread_flag(task, TIF_BLOCK_STEP); + set_tsk_thread_flag(task, TIF_SINGLE_STEP); } void user_disable_single_step(struct task_struct *task) { - task->thread.per_info.single_step = 0; - FixPerRegisters(task); + clear_tsk_thread_flag(task, TIF_BLOCK_STEP); + clear_tsk_thread_flag(task, TIF_SINGLE_STEP); +} + +void user_enable_block_step(struct task_struct *task) +{ + set_tsk_thread_flag(task, TIF_SINGLE_STEP); + set_tsk_thread_flag(task, TIF_BLOCK_STEP); } /* * Called by kernel/ptrace.c when detaching.. * - * Make sure single step bits etc are not set. + * Clear all debugging related fields. */ -void -ptrace_disable(struct task_struct *child) +void ptrace_disable(struct task_struct *task) { - /* make sure the single step bit is not set. */ - user_disable_single_step(child); + memset(&task->thread.per_user, 0, sizeof(task->thread.per_user)); + memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); + clear_tsk_thread_flag(task, TIF_SINGLE_STEP); + clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP); + task->thread.per_flags = 0; } #ifndef CONFIG_64BIT @@ -116,6 +146,47 @@ ptrace_disable(struct task_struct *child) # define __ADDR_MASK 7 #endif +static inline unsigned long __peek_user_per(struct task_struct *child, + addr_t addr) +{ + struct per_struct_kernel *dummy = NULL; + + if (addr == (addr_t) &dummy->cr9) + /* Control bits of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PER_EVENT_IFETCH : child->thread.per_user.control; + else if (addr == (addr_t) &dummy->cr10) + /* Start address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + 0 : child->thread.per_user.start; + else if (addr == (addr_t) &dummy->cr11) + /* End address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PSW_ADDR_INSN : child->thread.per_user.end; + else if (addr == (addr_t) &dummy->bits) + /* Single-step bit. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + (1UL << (BITS_PER_LONG - 1)) : 0; + else if (addr == (addr_t) &dummy->starting_addr) + /* Start address of the user specified per set. */ + return child->thread.per_user.start; + else if (addr == (addr_t) &dummy->ending_addr) + /* End address of the user specified per set. */ + return child->thread.per_user.end; + else if (addr == (addr_t) &dummy->perc_atmid) + /* PER code, ATMID and AI of the last PER trap */ + return (unsigned long) + child->thread.per_event.cause << (BITS_PER_LONG - 16); + else if (addr == (addr_t) &dummy->address) + /* Address of the last PER trap */ + return child->thread.per_event.address; + else if (addr == (addr_t) &dummy->access_id) + /* Access id of the last PER trap */ + return (unsigned long) + child->thread.per_event.paid << (BITS_PER_LONG - 8); + return 0; +} + /* * Read the word at offset addr from the user area of a process. The * trouble here is that the information is littered over different @@ -125,33 +196,21 @@ ptrace_disable(struct task_struct *child) * struct user contain pad bytes that should be read as zeroes. * Lovely... */ -static int -peek_user(struct task_struct *child, addr_t addr, addr_t data) +static unsigned long __peek_user(struct task_struct *child, addr_t addr) { struct user *dummy = NULL; - addr_t offset, tmp, mask; - - /* - * Stupid gdb peeks/pokes the access registers in 64 bit with - * an alignment of 4. Programmers from hell... - */ - mask = __ADDR_MASK; -#ifdef CONFIG_64BIT - if (addr >= (addr_t) &dummy->regs.acrs && - addr < (addr_t) &dummy->regs.orig_gpr2) - mask = 3; -#endif - if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) - return -EIO; + addr_t offset, tmp; if (addr < (addr_t) &dummy->regs.acrs) { /* * psw and gprs are stored on the stack */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); - if (addr == (addr_t) &dummy->regs.psw.mask) - /* Remove per bit from user psw. */ - tmp &= ~PSW_MASK_PER; + if (addr == (addr_t) &dummy->regs.psw.mask) { + /* Return a clean psw mask. */ + tmp &= PSW_MASK_USER | PSW_MASK_RI; + tmp |= PSW_USER_BITS; + } } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { /* @@ -176,6 +235,13 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) */ tmp = (addr_t) task_pt_regs(child)->orig_gpr2; + } else if (addr < (addr_t) &dummy->regs.fp_regs) { + /* + * prevent reads of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + tmp = 0; + } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure @@ -183,64 +249,101 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) offset = addr - (addr_t) &dummy->regs.fp_regs; tmp = *(addr_t *)((addr_t) &child->thread.fp_regs + offset); if (addr == (addr_t) &dummy->regs.fp_regs.fpc) - tmp &= (unsigned long) FPC_VALID_MASK - << (BITS_PER_LONG - 32); + tmp <<= BITS_PER_LONG - 32; } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* - * per_info is found in the thread structure + * Handle access to the per_info structure. */ - offset = addr - (addr_t) &dummy->regs.per_info; - tmp = *(addr_t *)((addr_t) &child->thread.per_info + offset); + addr -= (addr_t) &dummy->regs.per_info; + tmp = __peek_user_per(child, addr); } else tmp = 0; - return put_user(tmp, (addr_t __user *) data); + return tmp; } -/* - * Write a word to the user area of a process at location addr. This - * operation does have an additional problem compared to peek_user. - * Stores to the program status word and on the floating point - * control register needs to get checked for validity. - */ static int -poke_user(struct task_struct *child, addr_t addr, addr_t data) +peek_user(struct task_struct *child, addr_t addr, addr_t data) { - struct user *dummy = NULL; - addr_t offset, mask; + addr_t tmp, mask; /* * Stupid gdb peeks/pokes the access registers in 64 bit with - * an alignment of 4. Programmers from hell indeed... + * an alignment of 4. Programmers from hell... */ mask = __ADDR_MASK; #ifdef CONFIG_64BIT - if (addr >= (addr_t) &dummy->regs.acrs && - addr < (addr_t) &dummy->regs.orig_gpr2) + if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && + addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) mask = 3; #endif if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; + tmp = __peek_user(child, addr); + return put_user(tmp, (addr_t __user *) data); +} + +static inline void __poke_user_per(struct task_struct *child, + addr_t addr, addr_t data) +{ + struct per_struct_kernel *dummy = NULL; + + /* + * There are only three fields in the per_info struct that the + * debugger user can write to. + * 1) cr9: the debugger wants to set a new PER event mask + * 2) starting_addr: the debugger wants to set a new starting + * address to use with the PER event mask. + * 3) ending_addr: the debugger wants to set a new ending + * address to use with the PER event mask. + * The user specified PER event mask and the start and end + * addresses are used only if single stepping is not in effect. + * Writes to any other field in per_info are ignored. + */ + if (addr == (addr_t) &dummy->cr9) + /* PER event mask of the user specified per set. */ + child->thread.per_user.control = + data & (PER_EVENT_MASK | PER_CONTROL_MASK); + else if (addr == (addr_t) &dummy->starting_addr) + /* Starting address of the user specified per set. */ + child->thread.per_user.start = data; + else if (addr == (addr_t) &dummy->ending_addr) + /* Ending address of the user specified per set. */ + child->thread.per_user.end = data; +} + +/* + * Write a word to the user area of a process at location addr. This + * operation does have an additional problem compared to peek_user. + * Stores to the program status word and on the floating point + * control register needs to get checked for validity. + */ +static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) +{ + struct user *dummy = NULL; + addr_t offset; + if (addr < (addr_t) &dummy->regs.acrs) { /* * psw and gprs are stored on the stack */ - if (addr == (addr_t) &dummy->regs.psw.mask && -#ifdef CONFIG_COMPAT - data != PSW_MASK_MERGE(psw_user32_bits, data) && -#endif - data != PSW_MASK_MERGE(psw_user_bits, data)) - /* Invalid psw mask. */ - return -EINVAL; -#ifndef CONFIG_64BIT - if (addr == (addr_t) &dummy->regs.psw.addr) - /* I'd like to reject addresses without the - high order bit but older gdb's rely on it */ - data |= PSW_ADDR_AMODE; -#endif + if (addr == (addr_t) &dummy->regs.psw.mask) { + unsigned long mask = PSW_MASK_USER; + + mask |= is_ri_task(child) ? PSW_MASK_RI : 0; + if ((data ^ PSW_USER_BITS) & ~mask) + /* Invalid psw mask. */ + return -EINVAL; + if ((data & PSW_MASK_ASC) == PSW_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; + if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)) + /* Invalid addressing mode bits */ + return -EINVAL; + } *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -267,55 +370,67 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) */ task_pt_regs(child)->orig_gpr2 = data; + } else if (addr < (addr_t) &dummy->regs.fp_regs) { + /* + * prevent writes of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + return 0; + } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure */ - if (addr == (addr_t) &dummy->regs.fp_regs.fpc && - (data & ~((unsigned long) FPC_VALID_MASK - << (BITS_PER_LONG - 32))) != 0) - return -EINVAL; + if (addr == (addr_t) &dummy->regs.fp_regs.fpc) + if ((unsigned int) data != 0 || + test_fp_ctl(data >> (BITS_PER_LONG - 32))) + return -EINVAL; offset = addr - (addr_t) &dummy->regs.fp_regs; *(addr_t *)((addr_t) &child->thread.fp_regs + offset) = data; } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* - * per_info is found in the thread structure + * Handle access to the per_info structure. */ - offset = addr - (addr_t) &dummy->regs.per_info; - *(addr_t *)((addr_t) &child->thread.per_info + offset) = data; + addr -= (addr_t) &dummy->regs.per_info; + __poke_user_per(child, addr, data); } - FixPerRegisters(child); return 0; } -static int -do_ptrace_normal(struct task_struct *child, long request, long addr, long data) +static int poke_user(struct task_struct *child, addr_t addr, addr_t data) +{ + addr_t mask; + + /* + * Stupid gdb peeks/pokes the access registers in 64 bit with + * an alignment of 4. Programmers from hell indeed... + */ + mask = __ADDR_MASK; +#ifdef CONFIG_64BIT + if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && + addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + mask = 3; +#endif + if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) + return -EIO; + + return __poke_user(child, addr, data); +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { ptrace_area parea; int copied, ret; switch (request) { - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - /* Remove high order bit from address (only for 31 bit). */ - addr &= PSW_ADDR_INSN; - /* read word at location addr. */ - return generic_ptrace_peekdata(child, addr, data); - case PTRACE_PEEKUSR: /* read the word at location addr in the USER area. */ return peek_user(child, addr, data); - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - /* Remove high order bit from address (only for 31 bit). */ - addr &= PSW_ADDR_INSN; - /* write the word at location addr. */ - return generic_ptrace_pokedata(child, addr, data); - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ return poke_user(child, addr, data); @@ -345,8 +460,45 @@ do_ptrace_normal(struct task_struct *child, long request, long addr, long data) copied += sizeof(unsigned long); } return 0; + case PTRACE_GET_LAST_BREAK: + put_user(task_thread_info(child)->last_break, + (unsigned long __user *) data); + return 0; + case PTRACE_ENABLE_TE: + if (!MACHINE_HAS_TE) + return -EIO; + child->thread.per_flags &= ~PER_FLAG_NO_TE; + return 0; + case PTRACE_DISABLE_TE: + if (!MACHINE_HAS_TE) + return -EIO; + child->thread.per_flags |= PER_FLAG_NO_TE; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + return 0; + case PTRACE_TE_ABORT_RAND: + if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + return -EIO; + switch (data) { + case 0UL: + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + break; + case 1UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND; + break; + case 2UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND; + break; + default: + return -EINVAL; + } + return 0; + default: + /* Removing high order bit from addr (only for 31 bit). */ + addr &= PSW_ADDR_INSN; + return ptrace_request(child, request, addr, data); } - return ptrace_request(child, request, addr, data); } #ifdef CONFIG_COMPAT @@ -365,36 +517,73 @@ do_ptrace_normal(struct task_struct *child, long request, long addr, long data) */ /* + * Same as peek_user_per but for a 31 bit program. + */ +static inline __u32 __peek_user_per_compat(struct task_struct *child, + addr_t addr) +{ + struct compat_per_struct_kernel *dummy32 = NULL; + + if (addr == (addr_t) &dummy32->cr9) + /* Control bits of the active per set. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + PER_EVENT_IFETCH : child->thread.per_user.control; + else if (addr == (addr_t) &dummy32->cr10) + /* Start address of the active per set. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + 0 : child->thread.per_user.start; + else if (addr == (addr_t) &dummy32->cr11) + /* End address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PSW32_ADDR_INSN : child->thread.per_user.end; + else if (addr == (addr_t) &dummy32->bits) + /* Single-step bit. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + 0x80000000 : 0; + else if (addr == (addr_t) &dummy32->starting_addr) + /* Start address of the user specified per set. */ + return (__u32) child->thread.per_user.start; + else if (addr == (addr_t) &dummy32->ending_addr) + /* End address of the user specified per set. */ + return (__u32) child->thread.per_user.end; + else if (addr == (addr_t) &dummy32->perc_atmid) + /* PER code, ATMID and AI of the last PER trap */ + return (__u32) child->thread.per_event.cause << 16; + else if (addr == (addr_t) &dummy32->address) + /* Address of the last PER trap */ + return (__u32) child->thread.per_event.address; + else if (addr == (addr_t) &dummy32->access_id) + /* Access id of the last PER trap */ + return (__u32) child->thread.per_event.paid << 24; + return 0; +} + +/* * Same as peek_user but for a 31 bit program. */ -static int -peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data) +static u32 __peek_user_compat(struct task_struct *child, addr_t addr) { - struct user32 *dummy32 = NULL; - per_struct32 *dummy_per32 = NULL; + struct compat_user *dummy32 = NULL; addr_t offset; __u32 tmp; - if (!test_thread_flag(TIF_31BIT) || - (addr & 3) || addr > sizeof(struct user) - 3) - return -EIO; - if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Fake a 31 bit psw mask. */ - tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32); - tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp); + tmp = (__u32)(regs->psw.mask >> 32); + tmp &= PSW32_MASK_USER | PSW32_MASK_RI; + tmp |= PSW32_USER_BITS; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ - tmp = (__u32) task_pt_regs(child)->psw.addr | - PSW32_ADDR_AMODE31; + tmp = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); } else { /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) &task_pt_regs(child)->psw + - addr*2 + 4); + tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -409,6 +598,13 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data) */ tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4); + } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + /* + * prevent reads of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + tmp = 0; + } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure @@ -418,62 +614,87 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data) } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* - * per_info is found in the thread structure + * Handle access to the per_info structure. */ - offset = addr - (addr_t) &dummy32->regs.per_info; - /* This is magic. See per_struct and per_struct32. */ - if ((offset >= (addr_t) &dummy_per32->control_regs && - offset < (addr_t) (&dummy_per32->control_regs + 1)) || - (offset >= (addr_t) &dummy_per32->starting_addr && - offset <= (addr_t) &dummy_per32->ending_addr) || - offset == (addr_t) &dummy_per32->lowcore.words.address) - offset = offset*2 + 4; - else - offset = offset*2; - tmp = *(__u32 *)((addr_t) &child->thread.per_info + offset); + addr -= (addr_t) &dummy32->regs.per_info; + tmp = __peek_user_per_compat(child, addr); } else tmp = 0; + return tmp; +} + +static int peek_user_compat(struct task_struct *child, + addr_t addr, addr_t data) +{ + __u32 tmp; + + if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3) + return -EIO; + + tmp = __peek_user_compat(child, addr); return put_user(tmp, (__u32 __user *) data); } /* + * Same as poke_user_per but for a 31 bit program. + */ +static inline void __poke_user_per_compat(struct task_struct *child, + addr_t addr, __u32 data) +{ + struct compat_per_struct_kernel *dummy32 = NULL; + + if (addr == (addr_t) &dummy32->cr9) + /* PER event mask of the user specified per set. */ + child->thread.per_user.control = + data & (PER_EVENT_MASK | PER_CONTROL_MASK); + else if (addr == (addr_t) &dummy32->starting_addr) + /* Starting address of the user specified per set. */ + child->thread.per_user.start = data; + else if (addr == (addr_t) &dummy32->ending_addr) + /* Ending address of the user specified per set. */ + child->thread.per_user.end = data; +} + +/* * Same as poke_user but for a 31 bit program. */ -static int -poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data) +static int __poke_user_compat(struct task_struct *child, + addr_t addr, addr_t data) { - struct user32 *dummy32 = NULL; - per_struct32 *dummy_per32 = NULL; + struct compat_user *dummy32 = NULL; + __u32 tmp = (__u32) data; addr_t offset; - __u32 tmp; - - if (!test_thread_flag(TIF_31BIT) || - (addr & 3) || addr > sizeof(struct user32) - 3) - return -EIO; - - tmp = (__u32) data; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { + __u32 mask = PSW32_MASK_USER; + + mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; /* Build a 64 bit psw mask from 31 bit mask. */ - if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp)) + if ((tmp ^ PSW32_USER_BITS) & ~mask) /* Invalid psw mask. */ return -EINVAL; - task_pt_regs(child)->psw.mask = - PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32); + if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME) + /* Invalid address-space-control bits */ + return -EINVAL; + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (regs->psw.mask & PSW_MASK_BA) | + (__u64)(tmp & mask) << 32; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ - task_pt_regs(child)->psw.addr = - (__u64) tmp & PSW32_ADDR_INSN; + regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* Transfer 31 bit amode bit to psw mask. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | + (__u64)(tmp & PSW32_ADDR_AMODE); } else { /* gpr 0-15 */ - *(__u32*)((addr_t) &task_pt_regs(child)->psw - + addr*2 + 4) = tmp; + *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -488,78 +709,60 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data) */ *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp; + } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + /* + * prevent writess of padding hole between + * orig_gpr2 and fp_regs on s390. + */ + return 0; + } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { /* * floating point regs. are stored in the thread structure */ if (addr == (addr_t) &dummy32->regs.fp_regs.fpc && - (tmp & ~FPC_VALID_MASK) != 0) - /* Invalid floating point control. */ + test_fp_ctl(tmp)) return -EINVAL; offset = addr - (addr_t) &dummy32->regs.fp_regs; *(__u32 *)((addr_t) &child->thread.fp_regs + offset) = tmp; } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* - * per_info is found in the thread structure. + * Handle access to the per_info structure. */ - offset = addr - (addr_t) &dummy32->regs.per_info; - /* - * This is magic. See per_struct and per_struct32. - * By incident the offsets in per_struct are exactly - * twice the offsets in per_struct32 for all fields. - * The 8 byte fields need special handling though, - * because the second half (bytes 4-7) is needed and - * not the first half. - */ - if ((offset >= (addr_t) &dummy_per32->control_regs && - offset < (addr_t) (&dummy_per32->control_regs + 1)) || - (offset >= (addr_t) &dummy_per32->starting_addr && - offset <= (addr_t) &dummy_per32->ending_addr) || - offset == (addr_t) &dummy_per32->lowcore.words.address) - offset = offset*2 + 4; - else - offset = offset*2; - *(__u32 *)((addr_t) &child->thread.per_info + offset) = tmp; - + addr -= (addr_t) &dummy32->regs.per_info; + __poke_user_per_compat(child, addr, data); } - FixPerRegisters(child); return 0; } -static int -do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) +static int poke_user_compat(struct task_struct *child, + addr_t addr, addr_t data) +{ + if (!is_compat_task() || (addr & 3) || + addr > sizeof(struct compat_user) - 3) + return -EIO; + + return __poke_user_compat(child, addr, data); +} + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) { - unsigned int tmp; /* 4 bytes !! */ - ptrace_area_emu31 parea; + unsigned long addr = caddr; + unsigned long data = cdata; + compat_ptrace_area parea; int copied, ret; switch (request) { - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - /* read word at location addr. */ - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - if (copied != sizeof(tmp)) - return -EIO; - return put_user(tmp, (unsigned int __force __user *) data); - case PTRACE_PEEKUSR: /* read the word at location addr in the USER area. */ - return peek_user_emu31(child, addr, data); - - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - /* write the word at location addr. */ - tmp = data; - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 1); - if (copied != sizeof(tmp)) - return -EIO; - return 0; + return peek_user_compat(child, addr, data); case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - return poke_user_emu31(child, addr, data); + return poke_user_compat(child, addr, data); case PTRACE_PEEKUSR_AREA: case PTRACE_POKEUSR_AREA: @@ -571,13 +774,13 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) copied = 0; while (copied < parea.len) { if (request == PTRACE_PEEKUSR_AREA) - ret = peek_user_emu31(child, addr, data); + ret = peek_user_compat(child, addr, data); else { __u32 utmp; if (get_user(utmp, (__u32 __force __user *) data)) return -EFAULT; - ret = poke_user_emu31(child, addr, utmp); + ret = poke_user_compat(child, addr, utmp); } if (ret) return ret; @@ -586,169 +789,582 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data) copied += sizeof(unsigned int); } return 0; - case PTRACE_GETEVENTMSG: - return put_user((__u32) child->ptrace_message, - (unsigned int __force __user *) data); - case PTRACE_GETSIGINFO: - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_to_user32((compat_siginfo_t - __force __user *) data, - child->last_siginfo); - case PTRACE_SETSIGINFO: - if (child->last_siginfo == NULL) - return -EINVAL; - return copy_siginfo_from_user32(child->last_siginfo, - (compat_siginfo_t - __force __user *) data); + case PTRACE_GET_LAST_BREAK: + put_user(task_thread_info(child)->last_break, + (unsigned int __user *) data); + return 0; } - return ptrace_request(child, request, addr, data); + return compat_ptrace_request(child, request, addr, data); } #endif -#define PT32_IEEE_IP 0x13c - -static int -do_ptrace(struct task_struct *child, long request, long addr, long data) +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { - int ret; + long ret = 0; - if (request == PTRACE_ATTACH) - return ptrace_attach(child); + /* Do the secure computing check first. */ + if (secure_computing(regs->gprs[2])) { + /* seccomp failures shouldn't expose any additional code. */ + ret = -1; + goto out; + } /* - * Special cases to get/store the ieee instructions pointer. + * The sysc_tracesys code in entry.S stored the system + * call number to gprs[2]. */ - if (child == current) { - if (request == PTRACE_PEEKUSR && addr == PT_IEEE_IP) - return peek_user(child, addr, data); - if (request == PTRACE_POKEUSR && addr == PT_IEEE_IP) - return poke_user(child, addr, data); -#ifdef CONFIG_COMPAT - if (request == PTRACE_PEEKUSR && - addr == PT32_IEEE_IP && test_thread_flag(TIF_31BIT)) - return peek_user_emu31(child, addr, data); - if (request == PTRACE_POKEUSR && - addr == PT32_IEEE_IP && test_thread_flag(TIF_31BIT)) - return poke_user_emu31(child, addr, data); -#endif + if (test_thread_flag(TIF_SYSCALL_TRACE) && + (tracehook_report_syscall_entry(regs) || + regs->gprs[2] >= NR_syscalls)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ + clear_pt_regs_flag(regs, PIF_SYSCALL); + ret = -1; } - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - return ret; + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->gprs[2]); - switch (request) { - case PTRACE_SYSCALL: - /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: - /* restart after signal. */ - if (!valid_signal(data)) - return -EIO; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - /* make sure the single step bit is not set. */ - user_disable_single_step(child); - wake_up_process(child); - return 0; + audit_syscall_entry(is_compat_task() ? + AUDIT_ARCH_S390 : AUDIT_ARCH_S390X, + regs->gprs[2], regs->orig_gpr2, + regs->gprs[3], regs->gprs[4], + regs->gprs[5]); +out: + return ret ?: regs->gprs[2]; +} - case PTRACE_KILL: - /* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - return 0; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - user_disable_single_step(child); - wake_up_process(child); - return 0; +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) +{ + audit_syscall_exit(regs); - case PTRACE_SINGLESTEP: - /* set the trap flag. */ - if (!valid_signal(data)) - return -EIO; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - user_enable_single_step(child); - /* give it a chance to run. */ - wake_up_process(child); - return 0; + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->gprs[2]); - /* Do requests that differ for 31/64 bit */ - default: -#ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) - return do_ptrace_emu31(child, request, addr, data); -#endif - return do_ptrace_normal(child, request, addr, data); + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); +} + +/* + * user_regset definitions. + */ + +static int s390_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (target == current) + save_access_regs(target->thread.acrs); + + if (kbuf) { + unsigned long *k = kbuf; + while (count > 0) { + *k++ = __peek_user(target, pos); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + unsigned long __user *u = ubuf; + while (count > 0) { + if (__put_user(__peek_user(target, pos), u++)) + return -EFAULT; + count -= sizeof(*u); + pos += sizeof(*u); + } } - /* Not reached. */ - return -EIO; + return 0; } -asmlinkage long -sys_ptrace(long request, long pid, long addr, long data) +static int s390_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) { - struct task_struct *child; - int ret; + int rc = 0; + + if (target == current) + save_access_regs(target->thread.acrs); - lock_kernel(); - if (request == PTRACE_TRACEME) { - ret = ptrace_traceme(); - goto out; + if (kbuf) { + const unsigned long *k = kbuf; + while (count > 0 && !rc) { + rc = __poke_user(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const unsigned long __user *u = ubuf; + while (count > 0 && !rc) { + unsigned long word; + rc = __get_user(word, u++); + if (rc) + break; + rc = __poke_user(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } } - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) { - ret = PTR_ERR(child); - goto out; + if (rc == 0 && target == current) + restore_access_regs(target->thread.acrs); + + return rc; +} + +static int s390_fpregs_get(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, void *kbuf, void __user *ubuf) +{ + if (target == current) { + save_fp_ctl(&target->thread.fp_regs.fpc); + save_fp_regs(target->thread.fp_regs.fprs); } - ret = do_ptrace(child, request, addr, data); - put_task_struct(child); -out: - unlock_kernel(); - return ret; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fp_regs, 0, -1); } -asmlinkage void -syscall_trace(struct pt_regs *regs, int entryexit) +static int s390_fpregs_set(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, + const void __user *ubuf) { - if (unlikely(current->audit_context) && entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]); + int rc = 0; - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - goto out; - if (!(current->ptrace & PT_PTRACED)) - goto out; - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); + if (target == current) { + save_fp_ctl(&target->thread.fp_regs.fpc); + save_fp_regs(target->thread.fp_regs.fprs); + } - /* - * If the debuffer has set an invalid system call number, - * we prepare to skip the system call restart handling. - */ - if (!entryexit && regs->gprs[2] >= NR_syscalls) - regs->trap = -1; + /* If setting FPC, must validate it first. */ + if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { + u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 }; + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc, + 0, offsetof(s390_fp_regs, fprs)); + if (rc) + return rc; + if (ufpc[1] != 0 || test_fp_ctl(ufpc[0])) + return -EINVAL; + target->thread.fp_regs.fpc = ufpc[0]; + } - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; + if (rc == 0 && count > 0) + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.fp_regs.fprs, + offsetof(s390_fp_regs, fprs), -1); + + if (rc == 0 && target == current) { + restore_fp_ctl(&target->thread.fp_regs.fpc); + restore_fp_regs(target->thread.fp_regs.fprs); + } + + return rc; +} + +#ifdef CONFIG_64BIT + +static int s390_last_break_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (count > 0) { + if (kbuf) { + unsigned long *k = kbuf; + *k = task_thread_info(target)->last_break; + } else { + unsigned long __user *u = ubuf; + if (__put_user(task_thread_info(target)->last_break, u)) + return -EFAULT; + } + } + return 0; +} + +static int s390_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +static int s390_tdb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + unsigned char *data; + + if (!(regs->int_code & 0x200)) + return -ENODATA; + data = target->thread.trap_tdb; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256); +} + +static int s390_tdb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +#endif + +static int s390_system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static int s390_system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static const struct user_regset s390_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(s390_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .get = s390_regs_get, + .set = s390_regs_set, + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(s390_fp_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .get = s390_fpregs_get, + .set = s390_fpregs_set, + }, +#ifdef CONFIG_64BIT + [REGSET_LAST_BREAK] = { + .core_note_type = NT_S390_LAST_BREAK, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .get = s390_last_break_get, + .set = s390_last_break_set, + }, + [REGSET_TDB] = { + .core_note_type = NT_S390_TDB, + .n = 1, + .size = 256, + .align = 1, + .get = s390_tdb_get, + .set = s390_tdb_set, + }, +#endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, +}; + +static const struct user_regset_view user_s390_view = { + .name = UTS_MACHINE, + .e_machine = EM_S390, + .regsets = s390_regsets, + .n = ARRAY_SIZE(s390_regsets) +}; + +#ifdef CONFIG_COMPAT +static int s390_compat_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (target == current) + save_access_regs(target->thread.acrs); + + if (kbuf) { + compat_ulong_t *k = kbuf; + while (count > 0) { + *k++ = __peek_user_compat(target, pos); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + compat_ulong_t __user *u = ubuf; + while (count > 0) { + if (__put_user(__peek_user_compat(target, pos), u++)) + return -EFAULT; + count -= sizeof(*u); + pos += sizeof(*u); + } + } + return 0; +} + +static int s390_compat_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int rc = 0; + + if (target == current) + save_access_regs(target->thread.acrs); + + if (kbuf) { + const compat_ulong_t *k = kbuf; + while (count > 0 && !rc) { + rc = __poke_user_compat(target, pos, *k++); + count -= sizeof(*k); + pos += sizeof(*k); + } + } else { + const compat_ulong_t __user *u = ubuf; + while (count > 0 && !rc) { + compat_ulong_t word; + rc = __get_user(word, u++); + if (rc) + break; + rc = __poke_user_compat(target, pos, word); + count -= sizeof(*u); + pos += sizeof(*u); + } + } + + if (rc == 0 && target == current) + restore_access_regs(target->thread.acrs); + + return rc; +} + +static int s390_compat_regs_high_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + compat_ulong_t *gprs_high; + + gprs_high = (compat_ulong_t *) + &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; + if (kbuf) { + compat_ulong_t *k = kbuf; + while (count > 0) { + *k++ = *gprs_high; + gprs_high += 2; + count -= sizeof(*k); + } + } else { + compat_ulong_t __user *u = ubuf; + while (count > 0) { + if (__put_user(*gprs_high, u++)) + return -EFAULT; + gprs_high += 2; + count -= sizeof(*u); + } + } + return 0; +} + +static int s390_compat_regs_high_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + compat_ulong_t *gprs_high; + int rc = 0; + + gprs_high = (compat_ulong_t *) + &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; + if (kbuf) { + const compat_ulong_t *k = kbuf; + while (count > 0) { + *gprs_high = *k++; + *gprs_high += 2; + count -= sizeof(*k); + } + } else { + const compat_ulong_t __user *u = ubuf; + while (count > 0 && !rc) { + unsigned long word; + rc = __get_user(word, u++); + if (rc) + break; + *gprs_high = word; + *gprs_high += 2; + count -= sizeof(*u); + } } - out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X, - regs->gprs[2], regs->orig_gpr2, regs->gprs[3], - regs->gprs[4], regs->gprs[5]); + + return rc; +} + +static int s390_compat_last_break_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + compat_ulong_t last_break; + + if (count > 0) { + last_break = task_thread_info(target)->last_break; + if (kbuf) { + unsigned long *k = kbuf; + *k = last_break; + } else { + unsigned long __user *u = ubuf; + if (__put_user(last_break, u)) + return -EFAULT; + } + } + return 0; +} + +static int s390_compat_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +static const struct user_regset s390_compat_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), + .size = sizeof(compat_long_t), + .align = sizeof(compat_long_t), + .get = s390_compat_regs_get, + .set = s390_compat_regs_set, + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), + .size = sizeof(compat_long_t), + .align = sizeof(compat_long_t), + .get = s390_fpregs_get, + .set = s390_fpregs_set, + }, + [REGSET_LAST_BREAK] = { + .core_note_type = NT_S390_LAST_BREAK, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .get = s390_compat_last_break_get, + .set = s390_compat_last_break_set, + }, + [REGSET_TDB] = { + .core_note_type = NT_S390_TDB, + .n = 1, + .size = 256, + .align = 1, + .get = s390_tdb_get, + .set = s390_tdb_set, + }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, + [REGSET_GENERAL_EXTENDED] = { + .core_note_type = NT_S390_HIGH_GPRS, + .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), + .size = sizeof(compat_long_t), + .align = sizeof(compat_long_t), + .get = s390_compat_regs_high_get, + .set = s390_compat_regs_high_set, + }, +}; + +static const struct user_regset_view user_s390_compat_view = { + .name = "s390", + .e_machine = EM_S390, + .regsets = s390_compat_regsets, + .n = ARRAY_SIZE(s390_compat_regsets) +}; +#endif + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) + return &user_s390_compat_view; +#endif + return &user_s390_view; +} + +static const char *gpr_names[NUM_GPRS] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", +}; + +unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + if (offset >= NUM_GPRS) + return 0; + return regs->gprs[offset]; +} + +int regs_query_register_offset(const char *name) +{ + unsigned long offset; + + if (!name || *name != 'r') + return -EINVAL; + if (kstrtoul(name + 1, 10, &offset)) + return -EINVAL; + if (offset >= NUM_GPRS) + return -EINVAL; + return offset; +} + +const char *regs_query_register_name(unsigned int offset) +{ + if (offset >= NUM_GPRS) + return NULL; + return gpr_names[offset]; +} + +static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + unsigned long ksp = kernel_stack_pointer(regs); + + return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs:pt_regs which contains kernel stack pointer. + * @n:stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long addr; + + addr = kernel_stack_pointer(regs) + n * sizeof(long); + if (!regs_within_kernel_stack(regs, addr)) + return 0; + return *(unsigned long *)addr; } diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 2f481cc3d1c..dd8016b0477 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -1,19 +1,25 @@ /* - * arch/s390/kernel/reipl.S - * * S390 version - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2000 * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com) */ -#include <asm/lowcore.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/sigp.h> + +# +# store_status: Empty implementation until kdump is supported on 31 bit +# +ENTRY(store_status) + br %r14 # # do_reipl_asm # Parameter: r2 = schid of reipl device # - .globl do_reipl_asm -do_reipl_asm: basr %r13,0 +ENTRY(do_reipl_asm) + basr %r13,0 .Lpg0: lpsw .Lnewpsw-.Lpg0(%r13) .Lpg1: # do store status of all registers @@ -53,7 +59,7 @@ do_reipl_asm: basr %r13,0 bas %r14,.Ldisab-.Lpg0(%r13) .L003: st %r1,__LC_SUBCHANNEL_ID lpsw 0 - sigp 0,0,0(6) + sigp 0,0,SIGP_RESTART .Ldisab: st %r14,.Ldispsw+4-.Lpg0(%r13) lpsw .Ldispsw-.Lpg0(%r13) .align 8 diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index c41930499a5..dc3b1273c4d 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -1,38 +1,83 @@ /* - * arch/s390/kernel/reipl.S - * - * S390 version - * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com) - Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + * Copyright IBM Corp 2000, 2011 + * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Denis Joseph Barrow, */ -#include <asm/lowcore.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/sigp.h> + +# +# store_status +# +# Prerequisites to run this function: +# - Prefix register is set to zero +# - Original prefix register is stored in "dump_prefix_page" +# - Lowcore protection is off +# +ENTRY(store_status) + /* Save register one and load save area base */ + stg %r1,__LC_SAVE_AREA_RESTART + lghi %r1,SAVE_AREA_BASE + /* General purpose registers */ + stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lg %r2,__LC_SAVE_AREA_RESTART + stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) + /* Control registers */ + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Access registers */ + stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point registers */ + std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point control register */ + stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* CPU timer */ + stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Saved prefix register */ + larl %r2,dump_prefix_page + mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2) + /* Clock comparator - seven bytes */ + larl %r2,.Lclkcmp + stckc 0(%r2) + mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2) + /* Program status word */ + epsw %r2,%r3 + st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1) + st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1) + larl %r2,store_status + stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) + br %r14 + + .section .bss + .align 8 +.Lclkcmp: .quad 0x0000000000000000 + .previous # # do_reipl_asm # Parameter: r2 = schid of reipl device # - .globl do_reipl_asm -do_reipl_asm: basr %r13,0 +ENTRY(do_reipl_asm) + basr %r13,0 .Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) -.Lpg1: # do store status of all registers - - stg %r1,.Lregsave-.Lpg0(%r13) - lghi %r1,0x1000 - stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1) - lg %r0,.Lregsave-.Lpg0(%r13) - stg %r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1) - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1) - stam %a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1) - lg %r10,.Ldump_pfx-.Lpg0(%r13) - mvc __LC_PREFIX_SAVE_AREA-0x1000(4,%r1),0(%r10) - stfpc __LC_FP_CREG_SAVE_AREA-0x1000(%r1) - stckc .Lclkcmp-.Lpg0(%r13) - mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(8,%r1),.Lclkcmp-.Lpg0(%r13) - stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1) - stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1) +.Lpg1: brasl %r14,store_status lctlg %c6,%c6,.Lall-.Lpg0(%r13) lgr %r1,%r2 @@ -62,17 +107,14 @@ do_reipl_asm: basr %r13,0 .L003: st %r1,__LC_SUBCHANNEL_ID lhi %r1,0 # mode 0 = esa slr %r0,%r0 # set cpuid to zero - sigp %r1,%r0,0x12 # switch to esa mode + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode lpsw 0 .Ldisab: sll %r14,1 srl %r14,1 # need to kill hi bit to avoid specification exceptions. st %r14,.Ldispsw+12-.Lpg0(%r13) lpswe .Ldispsw-.Lpg0(%r13) .align 8 -.Lclkcmp: .quad 0x0000000000000000 .Lall: .quad 0x00000000ff000000 -.Ldump_pfx: .quad dump_prefix_page -.Lregsave: .quad 0x0000000000000000 .align 16 /* * These addresses have to be 31 bit otherwise @@ -81,7 +123,7 @@ do_reipl_asm: basr %r13,0 * in the ESA psw. * Bit 31 of the addresses has to be 0 for the * 31bit lpswe instruction a fact they appear to have - * ommited from the pop. + * omitted from the pop. */ .Lnewpsw: .quad 0x0000000080000000 .quad .Lpg1 diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index 3b456b80bce..f4e6f20e117 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -1,13 +1,14 @@ /* - * arch/s390/kernel/relocate_kernel.S - * - * (C) Copyright IBM Corp. 2005 + * Copyright IBM Corp. 2005 * * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> * */ +#include <linux/linkage.h> +#include <asm/sigp.h> + /* * moves the new kernel to its destination... * %r2 = pointer to first kimage_entry_t @@ -22,8 +23,7 @@ */ .text - .globl relocate_kernel - relocate_kernel: +ENTRY(relocate_kernel) basr %r13,0 # base address .base: stnsm sys_msk-.base(%r13),0xfb # disable DAT @@ -92,7 +92,7 @@ .no_diag308: sr %r1,%r1 # clear %r1 sr %r2,%r2 # clear %r2 - sigp %r1,%r2,0x12 # set cpuid to zero + sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero lpsw 0 # hopefully start new kernel... .align 8 @@ -112,6 +112,7 @@ .byte 0 .align 8 relocate_kernel_end: + .align 8 .globl relocate_kernel_len relocate_kernel_len: .quad relocate_kernel_end - relocate_kernel diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S index 1f9ea2067b5..cfac28330b0 100644 --- a/arch/s390/kernel/relocate_kernel64.S +++ b/arch/s390/kernel/relocate_kernel64.S @@ -1,13 +1,14 @@ /* - * arch/s390/kernel/relocate_kernel64.S - * - * (C) Copyright IBM Corp. 2005 + * Copyright IBM Corp. 2005 * * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> * */ +#include <linux/linkage.h> +#include <asm/sigp.h> + /* * moves the new kernel to its destination... * %r2 = pointer to first kimage_entry_t @@ -23,8 +24,7 @@ */ .text - .globl relocate_kernel - relocate_kernel: +ENTRY(relocate_kernel) basr %r13,0 # base address .base: stnsm sys_msk-.base(%r13),0xfb # disable DAT @@ -44,7 +44,7 @@ diag %r0,%r0,0x308 .back: lhi %r1,1 # mode 1 = esame - sigp %r1,%r0,0x12 # switch to esame mode + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode sam64 # switch to 64 bit addressing mode basr %r13,0 .back_base: @@ -95,7 +95,7 @@ sam31 # 31 bit mode sr %r1,%r1 # erase register r1 sr %r2,%r2 # erase register r2 - sigp %r1,%r2,0x12 # set cpuid to zero + sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero lpsw 0 # hopefully start new kernel... .align 8 @@ -115,6 +115,7 @@ .byte 0 .align 8 relocate_kernel_end: + .align 8 .globl relocate_kernel_len relocate_kernel_len: .quad relocate_kernel_end - relocate_kernel diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c new file mode 100644 index 00000000000..26b4ae96fdd --- /dev/null +++ b/arch/s390/kernel/runtime_instr.c @@ -0,0 +1,149 @@ +/* + * Copyright IBM Corp. 2012 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/kernel_stat.h> +#include <asm/runtime_instr.h> +#include <asm/cpu_mf.h> +#include <asm/irq.h> + +/* empty control block to disable RI by loading it */ +struct runtime_instr_cb runtime_instr_empty_cb; + +static int runtime_instr_avail(void) +{ + return test_facility(64); +} + +static void disable_runtime_instr(void) +{ + struct pt_regs *regs = task_pt_regs(current); + + load_runtime_instr_cb(&runtime_instr_empty_cb); + + /* + * Make sure the RI bit is deleted from the PSW. If the user did not + * switch off RI before the system call the process will get a + * specification exception otherwise. + */ + regs->psw.mask &= ~PSW_MASK_RI; +} + +static void init_runtime_instr_cb(struct runtime_instr_cb *cb) +{ + cb->buf_limit = 0xfff; + cb->int_requested = 1; + cb->pstate = 1; + cb->pstate_set_buf = 1; + cb->pstate_sample = 1; + cb->pstate_collect = 1; + cb->key = PAGE_DEFAULT_KEY; + cb->valid = 1; +} + +void exit_thread_runtime_instr(void) +{ + struct task_struct *task = current; + + if (!task->thread.ri_cb) + return; + disable_runtime_instr(); + kfree(task->thread.ri_cb); + task->thread.ri_signum = 0; + task->thread.ri_cb = NULL; +} + +static void runtime_instr_int_handler(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct siginfo info; + + if (!(param32 & CPU_MF_INT_RI_MASK)) + return; + + inc_irq_stat(IRQEXT_CMR); + + if (!current->thread.ri_cb) + return; + if (current->thread.ri_signum < SIGRTMIN || + current->thread.ri_signum > SIGRTMAX) { + WARN_ON_ONCE(1); + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = current->thread.ri_signum; + info.si_code = SI_QUEUE; + if (param32 & CPU_MF_INT_RI_BUF_FULL) + info.si_int = ENOBUFS; + else if (param32 & CPU_MF_INT_RI_HALTED) + info.si_int = ECANCELED; + else + return; /* unknown reason */ + + send_sig_info(current->thread.ri_signum, &info, current); +} + +SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) +{ + struct runtime_instr_cb *cb; + + if (!runtime_instr_avail()) + return -EOPNOTSUPP; + + if (command == S390_RUNTIME_INSTR_STOP) { + preempt_disable(); + exit_thread_runtime_instr(); + preempt_enable(); + return 0; + } + + if (command != S390_RUNTIME_INSTR_START || + (signum < SIGRTMIN || signum > SIGRTMAX)) + return -EINVAL; + + if (!current->thread.ri_cb) { + cb = kzalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) + return -ENOMEM; + } else { + cb = current->thread.ri_cb; + memset(cb, 0, sizeof(*cb)); + } + + init_runtime_instr_cb(cb); + current->thread.ri_signum = signum; + + /* now load the control block to make it available */ + preempt_disable(); + current->thread.ri_cb = cb; + load_runtime_instr_cb(cb); + preempt_enable(); + return 0; +} + +static int __init runtime_instr_init(void) +{ + int rc; + + if (!runtime_instr_avail()) + return 0; + + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, + runtime_instr_int_handler); + if (rc) + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + else + pr_info("Runtime instrumentation facility initialized\n"); + return rc; +} +device_initcall(runtime_instr_init); diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c deleted file mode 100644 index acf93dba772..00000000000 --- a/arch/s390/kernel/s390_ext.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * arch/s390/kernel/s390_ext.c - * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com), - * Martin Schwidefsky (schwidefsky@de.ibm.com) - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/errno.h> -#include <linux/kernel_stat.h> -#include <linux/interrupt.h> - -#include <asm/lowcore.h> -#include <asm/s390_ext.h> -#include <asm/irq_regs.h> -#include <asm/irq.h> - -/* - * ext_int_hash[index] is the start of the list for all external interrupts - * that hash to this index. With the current set of external interrupts - * (0x1202 external call, 0x1004 cpu timer, 0x2401 hwc console, 0x4000 - * iucv and 0x2603 pfault) this is always the first element. - */ -ext_int_info_t *ext_int_hash[256] = { NULL, }; - -static inline int ext_hash(__u16 code) -{ - return (code + (code >> 9)) & 0xff; -} - -int register_external_interrupt(__u16 code, ext_int_handler_t handler) -{ - ext_int_info_t *p; - int index; - - p = kmalloc(sizeof(ext_int_info_t), GFP_ATOMIC); - if (p == NULL) - return -ENOMEM; - p->code = code; - p->handler = handler; - index = ext_hash(code); - p->next = ext_int_hash[index]; - ext_int_hash[index] = p; - return 0; -} - -int register_early_external_interrupt(__u16 code, ext_int_handler_t handler, - ext_int_info_t *p) -{ - int index; - - if (p == NULL) - return -EINVAL; - p->code = code; - p->handler = handler; - index = ext_hash(code); - p->next = ext_int_hash[index]; - ext_int_hash[index] = p; - return 0; -} - -int unregister_external_interrupt(__u16 code, ext_int_handler_t handler) -{ - ext_int_info_t *p, *q; - int index; - - index = ext_hash(code); - q = NULL; - p = ext_int_hash[index]; - while (p != NULL) { - if (p->code == code && p->handler == handler) - break; - q = p; - p = p->next; - } - if (p == NULL) - return -ENOENT; - if (q != NULL) - q->next = p->next; - else - ext_int_hash[index] = p->next; - kfree(p); - return 0; -} - -int unregister_early_external_interrupt(__u16 code, ext_int_handler_t handler, - ext_int_info_t *p) -{ - ext_int_info_t *q; - int index; - - if (p == NULL || p->code != code || p->handler != handler) - return -EINVAL; - index = ext_hash(code); - q = ext_int_hash[index]; - if (p != q) { - while (q != NULL) { - if (q->next == p) - break; - q = q->next; - } - if (q == NULL) - return -ENOENT; - q->next = p->next; - } else - ext_int_hash[index] = p->next; - return 0; -} - -void do_extint(struct pt_regs *regs, unsigned short code) -{ - ext_int_info_t *p; - int index; - struct pt_regs *old_regs; - - old_regs = set_irq_regs(regs); - irq_enter(); - asm volatile ("mc 0,0"); - if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer) - /** - * Make sure that the i/o interrupt did not "overtake" - * the last HZ timer interrupt. - */ - account_ticks(S390_lowcore.int_clock); - kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; - index = ext_hash(code); - for (p = ext_int_hash[index]; p; p = p->next) { - if (likely(p->code == code)) - p->handler(code); - } - irq_exit(); - set_irq_regs(old_regs); -} - -EXPORT_SYMBOL(register_external_interrupt); -EXPORT_SYMBOL(unregister_external_interrupt); diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 7234c737f82..9f60467938d 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -1,52 +1,13 @@ -/* - * arch/s390/kernel/s390_ksyms.c - * - * S390 version - */ -#include <linux/highuid.h> #include <linux/module.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/syscalls.h> -#include <linux/interrupt.h> -#include <asm/checksum.h> -#include <asm/cpcmd.h> -#include <asm/delay.h> -#include <asm/pgalloc.h> -#include <asm/setup.h> -#ifdef CONFIG_IP_MULTICAST -#include <net/arp.h> -#endif - -/* - * memory management - */ -EXPORT_SYMBOL(_oi_bitmap); -EXPORT_SYMBOL(_ni_bitmap); -EXPORT_SYMBOL(_zb_findmap); -EXPORT_SYMBOL(_sb_findmap); - -/* - * semaphore ops - */ -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); +#include <linux/kvm_host.h> +#include <asm/ftrace.h> -/* - * binfmt_elf loader - */ -extern int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs); -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(empty_zero_page); - -/* - * misc. - */ -EXPORT_SYMBOL(machine_flags); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(csum_fold); -EXPORT_SYMBOL(console_mode); -EXPORT_SYMBOL(console_devno); -EXPORT_SYMBOL(console_irq); +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif +#if IS_ENABLED(CONFIG_KVM) +EXPORT_SYMBOL(sie64a); +EXPORT_SYMBOL(sie_exit); +#endif +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S new file mode 100644 index 00000000000..a41f2c99dcc --- /dev/null +++ b/arch/s390/kernel/sclp.S @@ -0,0 +1,360 @@ +/* + * Mini SCLP driver. + * + * Copyright IBM Corp. 2004, 2009 + * + * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <linux/linkage.h> +#include <asm/irq.h> + +LC_EXT_NEW_PSW = 0x58 # addr of ext int handler +LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit +LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter +LC_EXT_INT_CODE = 0x86 # addr of ext int code +LC_AR_MODE_ID = 0xa3 + +# +# Subroutine which waits synchronously until either an external interruption +# or a timeout occurs. +# +# Parameters: +# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds +# +# Returns: +# R2 = 0 on interrupt, 2 on timeout +# R3 = external interruption parameter if R2=0 +# + +_sclp_wait_int: + stm %r6,%r15,24(%r15) # save registers + basr %r13,0 # get base register +.LbaseS1: + ahi %r15,-96 # create stack frame + la %r8,LC_EXT_NEW_PSW # register int handler + la %r9,.LextpswS1-.LbaseS1(%r13) +#ifdef CONFIG_64BIT + tm LC_AR_MODE_ID,1 + jno .Lesa1 + la %r8,LC_EXT_NEW_PSW_64 # register int handler 64 bit + la %r9,.LextpswS1_64-.LbaseS1(%r13) +.Lesa1: +#endif + mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8) + mvc 0(16,%r8),0(%r9) +#ifdef CONFIG_64BIT + epsw %r6,%r7 # set current addressing mode + nill %r6,0x1 # in new psw (31 or 64 bit mode) + nilh %r7,0x8000 + stm %r6,%r7,0(%r8) +#endif + lhi %r6,0x0200 # cr mask for ext int (cr0.54) + ltr %r2,%r2 + jz .LsetctS1 + ahi %r6,0x0800 # cr mask for clock int (cr0.52) + stck .LtimeS1-.LbaseS1(%r13) # initiate timeout + al %r2,.LtimeS1-.LbaseS1(%r13) + st %r2,.LtimeS1-.LbaseS1(%r13) + sckc .LtimeS1-.LbaseS1(%r13) + +.LsetctS1: + stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts + l %r0,.LctlS1-.LbaseS1(%r13) + lhi %r1,~(0x200 | 0x800) # clear old values + nr %r1,%r0 + or %r1,%r6 # set new value + st %r1,.LctlS1-.LbaseS1(%r13) + lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) + st %r0,.LctlS1-.LbaseS1(%r13) + lhi %r2,2 # return code for timeout +.LloopS1: + lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt +.LwaitS1: + lh %r7,LC_EXT_INT_CODE + chi %r7,EXT_IRQ_CLK_COMP # timeout? + je .LtimeoutS1 + chi %r7,EXT_IRQ_SERVICE_SIG # service int? + jne .LloopS1 + sr %r2,%r2 + l %r3,LC_EXT_INT_PARAM +.LtimeoutS1: + lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting + # restore old handler + mvc 0(16,%r8),.LoldpswS1-.LbaseS1(%r13) + lm %r6,%r15,120(%r15) # restore registers + br %r14 # return to caller + + .align 8 +.LoldpswS1: + .long 0, 0, 0, 0 # old ext int PSW +.LextpswS1: + .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int +#ifdef CONFIG_64BIT +.LextpswS1_64: + .quad 0, .LwaitS1 # PSW to handle ext int, 64 bit +#endif +.LwaitpswS1: + .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int +.LtimeS1: + .quad 0 # current time +.LctlS1: + .long 0 # CT0 contents + +# +# Subroutine to synchronously issue a service call. +# +# Parameters: +# R2 = command word +# R3 = sccb address +# +# Returns: +# R2 = 0 on success, 1 on failure +# R3 = sccb response code if R2 = 0 +# + +_sclp_servc: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + lr %r6,%r2 # save command word + lr %r7,%r3 # save sccb address +.LretryS2: + lhi %r2,1 # error return code + .insn rre,0xb2200000,%r6,%r7 # servc + brc 1,.LendS2 # exit if not operational + brc 8,.LnotbusyS2 # go on if not busy + sr %r2,%r2 # wait until no longer busy + bras %r14,_sclp_wait_int + j .LretryS2 # retry +.LnotbusyS2: + sr %r2,%r2 # wait until result + bras %r14,_sclp_wait_int + sr %r2,%r2 + lh %r3,6(%r7) +.LendS2: + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +# +# Subroutine to set up the SCLP interface. +# +# Parameters: +# R2 = 0 to activate, non-zero to deactivate +# +# Returns: +# R2 = 0 on success, non-zero on failure +# + +_sclp_setup: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + basr %r13,0 # get base register +.LbaseS3: + l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb + mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13) + ltr %r2,%r2 # initialization? + jz .LdoinitS3 # go ahead + # clear masks + xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6) +.LdoinitS3: + l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word + lr %r3,%r6 # get sccb address + bras %r14,_sclp_servc # issue service call + ltr %r2,%r2 # servc successful? + jnz .LerrorS3 + chi %r3,0x20 # write mask successful? + jne .LerrorS3 + # check masks + la %r2,.LinitmaskS3-.LinitsccbS3(%r6) + l %r1,0(%r2) # receive mask ok? + n %r1,12(%r2) + cl %r1,0(%r2) + jne .LerrorS3 + l %r1,4(%r2) # send mask ok? + n %r1,8(%r2) + cl %r1,4(%r2) + sr %r2,%r2 + je .LendS3 +.LerrorS3: + lhi %r2,1 # error return code +.LendS3: + lm %r6,%r15,120(%r15) # restore registers + br %r14 +.LwritemaskS3: + .long 0x00780005 # SCLP command for write mask +.LinitsccbS3: + .word .LinitendS3-.LinitsccbS3 + .byte 0,0,0,0 + .word 0 + .word 0 + .word 4 +.LinitmaskS3: + .long 0x80000000 + .long 0x40000000 + .long 0 + .long 0 +.LinitendS3: + +# +# Subroutine which prints a given text to the SCLP console. +# +# Parameters: +# R2 = address of nil-terminated ASCII text +# +# Returns: +# R2 = 0 on success, 1 on failure +# + +_sclp_print: + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame + basr %r13,0 # get base register +.LbaseS4: + l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb + mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13) + la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr + sr %r0,%r0 + l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table +.LinitmtoS4: + # initialize mto + mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13) + lhi %r6,.LmtoendS4-.LmtoS4 # current mto length +.LloopS4: + ic %r0,0(%r2) # get character + ahi %r2,1 + ltr %r0,%r0 # end of string? + jz .LfinalizemtoS4 + chi %r0,0x0a # end of line (NL)? + jz .LfinalizemtoS4 + stc %r0,0(%r6,%r7) # copy to mto + la %r11,0(%r6,%r7) + tr 0(1,%r11),0(%r10) # translate to EBCDIC + ahi %r6,1 + j .LloopS4 +.LfinalizemtoS4: + sth %r6,0(%r7) # update mto length + lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length + ar %r9,%r6 + sth %r9,.LmdbS4-.LwritesccbS4(%r8) + lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length + ar %r9,%r6 + sth %r9,.LevbufS4-.LwritesccbS4(%r8) + lh %r9,0(%r8) # update sccb length + ar %r9,%r6 + sth %r9,0(%r8) + ar %r7,%r6 # update current mto address + ltr %r0,%r0 # more characters? + jnz .LinitmtoS4 + l %r2,.LwritedataS4-.LbaseS4(%r13)# write data + lr %r3,%r8 + bras %r14,_sclp_servc + ltr %r2,%r2 # servc successful? + jnz .LendS4 + chi %r3,0x20 # write data successful? + je .LendS4 + lhi %r2,1 # error return code +.LendS4: + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +# +# Function which prints a given text to the SCLP console. +# +# Parameters: +# R2 = address of nil-terminated ASCII text +# +# Returns: +# R2 = 0 on success, 1 on failure +# + +ENTRY(_sclp_print_early) + stm %r6,%r15,24(%r15) # save registers + ahi %r15,-96 # create stack frame +#ifdef CONFIG_64BIT + tm LC_AR_MODE_ID,1 + jno .Lesa2 + ahi %r15,-80 + stmh %r6,%r15,96(%r15) # store upper register halves +.Lesa2: +#endif + lr %r10,%r2 # save string pointer + lhi %r2,0 + bras %r14,_sclp_setup # enable console + ltr %r2,%r2 + jnz .LendS5 + lr %r2,%r10 + bras %r14,_sclp_print # print string + ltr %r2,%r2 + jnz .LendS5 + lhi %r2,1 + bras %r14,_sclp_setup # disable console +.LendS5: +#ifdef CONFIG_64BIT + tm LC_AR_MODE_ID,1 + jno .Lesa3 + lmh %r6,%r15,96(%r15) # store upper register halves + ahi %r15,80 +.Lesa3: +#endif + lm %r6,%r15,120(%r15) # restore registers + br %r14 + +.LwritedataS4: + .long 0x00760005 # SCLP command for write data +.LwritesccbS4: + # sccb + .word .LmtoS4-.LwritesccbS4 + .byte 0 + .byte 0,0,0 + .word 0 + + # evbuf +.LevbufS4: + .word .LmtoS4-.LevbufS4 + .byte 0x02 + .byte 0 + .word 0 + +.LmdbS4: + # mdb + .word .LmtoS4-.LmdbS4 + .word 1 + .long 0xd4c4c240 + .long 1 + + # go +.LgoS4: + .word .LmtoS4-.LgoS4 + .word 1 + .long 0 + .byte 0,0,0,0,0,0,0,0 + .byte 0,0,0 + .byte 0 + .byte 0,0,0,0,0,0,0 + .byte 0 + .word 0 + .byte 0,0,0,0,0,0,0,0,0,0 + .byte 0,0,0,0,0,0,0,0 + .byte 0,0,0,0,0,0,0,0 + +.LmtoS4: + .word .LmtoendS4-.LmtoS4 + .word 4 + .word 0x1000 + .byte 0 + .byte 0,0,0 +.LmtoendS4: + + # Global constants +.LsccbS0: + .long _sclp_work_area +.Lascebc: + .long _ascebc + +.section .data,"aw",@progbits + .balign 4096 +_sclp_work_area: + .fill 4096 +.previous diff --git a/arch/s390/kernel/semaphore.c b/arch/s390/kernel/semaphore.c deleted file mode 100644 index 191303f6c1d..00000000000 --- a/arch/s390/kernel/semaphore.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * linux/arch/s390/kernel/semaphore.c - * - * S390 version - * Copyright (C) 1998-2000 IBM Corporation - * Author(s): Martin Schwidefsky - * - * Derived from "linux/arch/i386/kernel/semaphore.c - * Copyright (C) 1999, Linus Torvalds - * - */ -#include <linux/sched.h> -#include <linux/errno.h> -#include <linux/init.h> - -#include <asm/semaphore.h> - -/* - * Atomically update sem->count. Equivalent to: - * old_val = sem->count.counter; - * new_val = ((old_val >= 0) ? old_val : 0) + incr; - * sem->count.counter = new_val; - * return old_val; - */ -static inline int __sem_update_count(struct semaphore *sem, int incr) -{ - int old_val, new_val; - - asm volatile( - " l %0,0(%3)\n" - "0: ltr %1,%0\n" - " jhe 1f\n" - " lhi %1,0\n" - "1: ar %1,%4\n" - " cs %0,%1,0(%3)\n" - " jl 0b\n" - : "=&d" (old_val), "=&d" (new_val), "=m" (sem->count) - : "a" (&sem->count), "d" (incr), "m" (sem->count) - : "cc"); - return old_val; -} - -/* - * The inline function up() incremented count but the result - * was <= 0. This indicates that some process is waiting on - * the semaphore. The semaphore is free and we'll wake the - * first sleeping process, so we set count to 1 unless some - * other cpu has called up in the meantime in which case - * we just increment count by 1. - */ -void __up(struct semaphore *sem) -{ - __sem_update_count(sem, 1); - wake_up(&sem->wait); -} - -/* - * The inline function down() decremented count and the result - * was < 0. The wait loop will atomically test and update the - * semaphore counter following the rules: - * count > 0: decrement count, wake up queue and exit. - * count <= 0: set count to -1, go to sleep. - */ -void __sched __down(struct semaphore * sem) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - - __set_task_state(tsk, TASK_UNINTERRUPTIBLE); - add_wait_queue_exclusive(&sem->wait, &wait); - while (__sem_update_count(sem, -1) <= 0) { - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - remove_wait_queue(&sem->wait, &wait); - __set_task_state(tsk, TASK_RUNNING); - wake_up(&sem->wait); -} - -/* - * Same as __down() with an additional test for signals. - * If a signal is pending the count is updated as follows: - * count > 0: wake up queue and exit. - * count <= 0: set count to 0, wake up queue and exit. - */ -int __sched __down_interruptible(struct semaphore * sem) -{ - int retval = 0; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - - __set_task_state(tsk, TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&sem->wait, &wait); - while (__sem_update_count(sem, -1) <= 0) { - if (signal_pending(current)) { - __sem_update_count(sem, 0); - retval = -EINTR; - break; - } - schedule(); - set_task_state(tsk, TASK_INTERRUPTIBLE); - } - remove_wait_queue(&sem->wait, &wait); - __set_task_state(tsk, TASK_RUNNING); - wake_up(&sem->wait); - return retval; -} - diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 290e504061a..1e2264b46e4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/setup.c - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2012 * Author(s): Hartmut Penner (hp@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -14,15 +12,18 @@ * This file handles the architecture-dependent parts of initialization */ +#define KMSG_COMPONENT "setup" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/errno.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> -#include <linux/slab.h> #include <linux/user.h> #include <linux/tty.h> #include <linux/ioport.h> @@ -32,17 +33,21 @@ #include <linux/bootmem.h> #include <linux/root_dev.h> #include <linux/console.h> -#include <linux/seq_file.h> #include <linux/kernel_stat.h> #include <linux/device.h> #include <linux/notifier.h> #include <linux/pfn.h> #include <linux/ctype.h> #include <linux/reboot.h> +#include <linux/topology.h> +#include <linux/ftrace.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/memory.h> +#include <linux/compat.h> #include <asm/ipl.h> -#include <asm/uaccess.h> -#include <asm/system.h> +#include <asm/facility.h> #include <asm/smp.h> #include <asm/mmu_context.h> #include <asm/cpcmd.h> @@ -52,77 +57,56 @@ #include <asm/ptrace.h> #include <asm/sections.h> #include <asm/ebcdic.h> -#include <asm/compat.h> - -long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY); -long psw_user_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); - -/* - * User copy operations. - */ -struct uaccess_ops uaccess; -EXPORT_SYMBOL(uaccess); +#include <asm/kvm_virtio.h> +#include <asm/diag.h> +#include <asm/os_info.h> +#include <asm/sclp.h> +#include "entry.h" /* * Machine setup.. */ unsigned int console_mode = 0; +EXPORT_SYMBOL(console_mode); + unsigned int console_devno = -1; +EXPORT_SYMBOL(console_devno); + unsigned int console_irq = -1; -unsigned long machine_flags = 0; +EXPORT_SYMBOL(console_irq); + unsigned long elf_hwcap = 0; char elf_platform[ELF_PLATFORM_SIZE]; -struct mem_chunk __meminitdata memory_chunk[MEMORY_CHUNKS]; -volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ -static unsigned long __initdata memory_end; +int __initdata memory_end_set; +unsigned long __initdata memory_end; +unsigned long __initdata max_physmem_end; -/* - * This is set up by the setup-routine at boot-time - * for S390 need to find out, what we have to setup - * using address 0x10400 ... - */ +unsigned long VMALLOC_START; +EXPORT_SYMBOL(VMALLOC_START); -#include <asm/setup.h> +unsigned long VMALLOC_END; +EXPORT_SYMBOL(VMALLOC_END); -static struct resource code_resource = { - .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, -}; +struct page *vmemmap; +EXPORT_SYMBOL(vmemmap); -static struct resource data_resource = { - .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, -}; +#ifdef CONFIG_64BIT +unsigned long MODULES_VADDR; +unsigned long MODULES_END; +#endif + +/* An array with a pointer to the lowcore of every CPU. */ +struct _lowcore *lowcore_ptr[NR_CPUS]; +EXPORT_SYMBOL(lowcore_ptr); /* - * cpu_init() initializes state that is per-CPU. + * This is set up by the setup-routine at boot-time + * for S390 need to find out, what we have to setup + * using address 0x10400 ... */ -void __cpuinit cpu_init(void) -{ - int addr = hard_smp_processor_id(); - - /* - * Store processor id in lowcore (used e.g. in timer_interrupt) - */ - get_cpu_id(&S390_lowcore.cpu_data.cpu_id); - S390_lowcore.cpu_data.cpu_addr = addr; - /* - * Force FPU initialization: - */ - clear_thread_flag(TIF_USEDFPU); - clear_used_math(); - - atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - if (current->mm) - BUG(); - enter_lazy_tlb(&init_mm, current); -} +#include <asm/setup.h> /* * condev= and conmode= setup parameter. @@ -142,6 +126,21 @@ static int __init condev_setup(char *str) __setup("condev=", condev_setup); +static void __init set_preferred_console(void) +{ + if (MACHINE_IS_KVM) { + if (sclp_has_vt220()) + add_preferred_console("ttyS", 1, NULL); + else if (sclp_has_linemode()) + add_preferred_console("ttyS", 0, NULL); + else + add_preferred_console("hvc", 0, NULL); + } else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) + add_preferred_console("ttyS", 0, NULL); + else if (CONSOLE_IS_3270) + add_preferred_console("tty3270", 0, NULL); +} + static int __init conmode_setup(char *str) { #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) @@ -156,6 +155,7 @@ static int __init conmode_setup(char *str) if (strncmp(str, "3270", 5) == 0) SET_CONSOLE_3270; #endif + set_preferred_console(); return 1; } @@ -204,12 +204,6 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } - } else if (MACHINE_IS_P390) { -#if defined(CONFIG_TN3215_CONSOLE) - SET_CONSOLE_3215; -#elif defined(CONFIG_TN3270_CONSOLE) - SET_CONSOLE_3270; -#endif } else { #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) SET_CONSOLE_SCLP; @@ -217,26 +211,19 @@ static void __init conmode_default(void) } } -#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE) -static void __init setup_zfcpdump(unsigned int console_devno) +#ifdef CONFIG_CRASH_DUMP +static void __init setup_zfcpdump(void) { - static char str[64]; - if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; - if (console_devno != -1) - sprintf(str, "cio_ignore=all,!0.0.%04x,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno, console_devno); - else - sprintf(str, "cio_ignore=all,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno); - strcat(COMMAND_LINE, " "); - strcat(COMMAND_LINE, str); + if (OLDMEM_BASE) + return; + strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); console_loglevel = 2; } #else -static inline void setup_zfcpdump(unsigned int console_devno) {} -#endif /* CONFIG_ZFCPDUMP */ +static inline void setup_zfcpdump(void) {} +#endif /* CONFIG_CRASH_DUMP */ /* * Reboot, halt and power_off stubs. They just call _machine_restart, @@ -280,421 +267,430 @@ void machine_power_off(void) * Dummy power off function. */ void (*pm_power_off)(void) = machine_power_off; +EXPORT_SYMBOL_GPL(pm_power_off); static int __init early_parse_mem(char *p) { memory_end = memparse(p, &p); + memory_end &= PAGE_MASK; + memory_end_set = 1; return 0; } early_param("mem", early_parse_mem); -/* - * "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes - */ -static int __init early_parse_ipldelay(char *p) +static int __init parse_vmalloc(char *arg) { - unsigned long delay = 0; - - delay = simple_strtoul(p, &p, 0); - - switch (*p) { - case 's': - case 'S': - delay *= 1000000; - break; - case 'm': - case 'M': - delay *= 60 * 1000000; - } - - /* now wait for the requested amount of time */ - udelay(delay); - + if (!arg) + return -EINVAL; + VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK; return 0; } -early_param("ipldelay", early_parse_ipldelay); - -#ifdef CONFIG_S390_SWITCH_AMODE -unsigned int switch_amode = 0; -EXPORT_SYMBOL_GPL(switch_amode); +early_param("vmalloc", parse_vmalloc); -static void set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) -{ - psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; -#ifdef CONFIG_COMPAT - psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; - psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE; -#endif - psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY; - - if (MACHINE_HAS_MVCOS) { - printk("mvcos available.\n"); - memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); - } else { - printk("mvcos not available.\n"); - memcpy(&uaccess, &uaccess_pt, sizeof(uaccess)); - } -} - -/* - * Switch kernel/user addressing modes? - */ -static int __init early_parse_switch_amode(char *p) -{ - switch_amode = 1; - return 0; -} -early_param("switch_amode", early_parse_switch_amode); +void *restart_stack __attribute__((__section__(".data"))); -#else /* CONFIG_S390_SWITCH_AMODE */ -static inline void set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) -{ -} -#endif /* CONFIG_S390_SWITCH_AMODE */ - -#ifdef CONFIG_S390_EXEC_PROTECT -unsigned int s390_noexec = 0; -EXPORT_SYMBOL_GPL(s390_noexec); - -/* - * Enable execute protection? - */ -static int __init early_parse_noexec(char *p) -{ - if (!strncmp(p, "off", 3)) - return 0; - switch_amode = 1; - s390_noexec = 1; - return 0; -} -early_param("noexec", early_parse_noexec); -#endif /* CONFIG_S390_EXEC_PROTECT */ - -static void setup_addressing_mode(void) -{ - if (s390_noexec) { - printk("S390 execute protection active, "); - set_amode_and_uaccess(PSW_ASC_SECONDARY, PSW32_ASC_SECONDARY); - } else if (switch_amode) { - printk("S390 address spaces switched, "); - set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY); - } -#ifdef CONFIG_TRACE_IRQFLAGS - sysc_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; - io_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; -#endif -} - -static void __init -setup_lowcore(void) +static void __init setup_lowcore(void) { struct _lowcore *lc; - int lc_pages; /* * Setup lowcore for boot cpu */ - lc_pages = sizeof(void *) == 8 ? 2 : 1; - lc = (struct _lowcore *) - __alloc_bootmem(lc_pages * PAGE_SIZE, lc_pages * PAGE_SIZE, 0); - memset(lc, 0, lc_pages * PAGE_SIZE); - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); + lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); + lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; - if (switch_amode) - lc->restart_psw.mask |= PSW_ASC_HOME; - lc->external_new_psw.mask = psw_kernel_bits; + lc->external_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->external_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + lc->svc_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = psw_kernel_bits; + lc->program_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = - psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; + PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; + lc->mcck_new_psw.mask = PSW_KERNEL_BITS; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = psw_kernel_bits; + lc->io_new_psw.mask = PSW_KERNEL_BITS | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; - lc->ipl_device = S390_lowcore.ipl_device; - lc->jiffy_timer = -1LL; - lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; + lc->clock_comparator = -1ULL; + lc->kernel_stack = ((unsigned long) &init_thread_union) + + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->async_stack = (unsigned long) - __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; + __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->panic_stack = (unsigned long) - __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; + __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; + lc->machine_flags = S390_lowcore.machine_flags; + lc->stfl_fac_list = S390_lowcore.stfl_fac_list; + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { lc->extended_save_area_addr = (__u32) - __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0); + __alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0); /* enable extended save area */ __ctl_set_bit(14, 29); } +#else + lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif + lc->sync_enter_timer = S390_lowcore.sync_enter_timer; + lc->async_enter_timer = S390_lowcore.async_enter_timer; + lc->exit_timer = S390_lowcore.exit_timer; + lc->user_timer = S390_lowcore.user_timer; + lc->system_timer = S390_lowcore.system_timer; + lc->steal_timer = S390_lowcore.steal_timer; + lc->last_update_timer = S390_lowcore.last_update_timer; + lc->last_update_clock = S390_lowcore.last_update_clock; + lc->ftrace_func = S390_lowcore.ftrace_func; + + restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); + restart_stack += ASYNC_SIZE; + + /* + * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant + * restart data to the absolute zero lowcore. This is necessary if + * PSW restart is done on an offline CPU that has lowcore zero. + */ + lc->restart_stack = (unsigned long) restart_stack; + lc->restart_fn = (unsigned long) do_restart; + lc->restart_data = 0; + lc->restart_source = -1UL; + + /* Setup absolute zero lowcore */ + mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); + mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); + mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data); + mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); + mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); + +#ifdef CONFIG_SMP + lc->spinlock_lockval = arch_spin_lockval(0); +#endif + set_prefix((u32)(unsigned long) lc); + lowcore_ptr[0] = lc; } -static void __init -setup_resources(void) +static struct resource code_resource = { + .name = "Kernel code", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource data_resource = { + .name = "Kernel data", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM, +}; + +static struct resource __initdata *standard_resources[] = { + &code_resource, + &data_resource, + &bss_resource, +}; + +static void __init setup_resources(void) { - struct resource *res, *sub_res; - int i; + struct resource *res, *std_res, *sub_res; + struct memblock_region *reg; + int j; code_resource.start = (unsigned long) &_text; code_resource.end = (unsigned long) &_etext - 1; data_resource.start = (unsigned long) &_etext; data_resource.end = (unsigned long) &_edata - 1; + bss_resource.start = (unsigned long) &__bss_start; + bss_resource.end = (unsigned long) &__bss_stop - 1; - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (!memory_chunk[i].size) - continue; - res = alloc_bootmem_low(sizeof(struct resource)); + for_each_memblock(memory, reg) { + res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; - switch (memory_chunk[i].type) { - case CHUNK_READ_WRITE: - res->name = "System RAM"; - break; - case CHUNK_READ_ONLY: - res->name = "System ROM"; - res->flags |= IORESOURCE_READONLY; - break; - default: - res->name = "reserved"; - } - res->start = memory_chunk[i].addr; - res->end = memory_chunk[i].addr + memory_chunk[i].size - 1; - request_resource(&iomem_resource, res); - if (code_resource.start >= res->start && - code_resource.start <= res->end && - code_resource.end > res->end) { - sub_res = alloc_bootmem_low(sizeof(struct resource)); - memcpy(sub_res, &code_resource, - sizeof(struct resource)); - sub_res->end = res->end; - code_resource.start = res->end + 1; - request_resource(res, sub_res); - } + res->name = "System RAM"; + res->start = reg->base; + res->end = reg->base + reg->size - 1; + request_resource(&iomem_resource, res); - if (code_resource.start >= res->start && - code_resource.start <= res->end && - code_resource.end <= res->end) - request_resource(res, &code_resource); - - if (data_resource.start >= res->start && - data_resource.start <= res->end && - data_resource.end > res->end) { - sub_res = alloc_bootmem_low(sizeof(struct resource)); - memcpy(sub_res, &data_resource, - sizeof(struct resource)); - sub_res->end = res->end; - data_resource.start = res->end + 1; - request_resource(res, sub_res); + for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { + std_res = standard_resources[j]; + if (std_res->start < res->start || + std_res->start > res->end) + continue; + if (std_res->end > res->end) { + sub_res = alloc_bootmem_low(sizeof(*sub_res)); + *sub_res = *std_res; + sub_res->end = res->end; + std_res->start = res->end + 1; + request_resource(res, sub_res); + } else { + request_resource(res, std_res); + } } - - if (data_resource.start >= res->start && - data_resource.start <= res->end && - data_resource.end <= res->end) - request_resource(res, &data_resource); } } -unsigned long real_memory_size; -EXPORT_SYMBOL_GPL(real_memory_size); - static void __init setup_memory_end(void) { - unsigned long memory_size; - unsigned long max_mem; - int i; - -#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE) - if (ipl_info.type == IPL_TYPE_FCP_DUMP) - memory_end = ZFCPDUMP_HSA_SIZE; + unsigned long vmax, vmalloc_size, tmp; + + /* Choose kernel address space layout: 2, 3, or 4 levels. */ +#ifdef CONFIG_64BIT + vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; + tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; + tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; + if (tmp <= (1UL << 42)) + vmax = 1UL << 42; /* 3-level kernel page table */ + else + vmax = 1UL << 53; /* 4-level kernel page table */ + /* module area is at the end of the kernel address space. */ + MODULES_END = vmax; + MODULES_VADDR = MODULES_END - MODULES_LEN; + VMALLOC_END = MODULES_VADDR; +#else + vmalloc_size = VMALLOC_END ?: 96UL << 20; + vmax = 1UL << 31; /* 2-level kernel page table */ + /* vmalloc area is at the end of the kernel address space. */ + VMALLOC_END = vmax; #endif - memory_size = 0; - memory_end &= PAGE_MASK; + VMALLOC_START = vmax - vmalloc_size; + + /* Split remaining virtual space between 1:1 mapping & vmemmap array */ + tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); + /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ + tmp = SECTION_ALIGN_UP(tmp); + tmp = VMALLOC_START - tmp * sizeof(struct page); + tmp &= ~((vmax >> 11) - 1); /* align to page table level */ + tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); + vmemmap = (struct page *) tmp; + + /* Take care that memory_end is set and <= vmemmap */ + memory_end = min(memory_end ?: max_physmem_end, tmp); + max_pfn = max_low_pfn = PFN_DOWN(memory_end); + memblock_remove(memory_end, ULONG_MAX); + + pr_notice("Max memory size: %luMB\n", memory_end >> 20); +} - max_mem = memory_end ? min(VMEM_MAX_PHYS, memory_end) : VMEM_MAX_PHYS; - memory_end = min(max_mem, memory_end); +static void __init setup_vmcoreinfo(void) +{ + mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); +} - /* - * Make sure all chunks are MAX_ORDER aligned so we don't need the - * extra checks that HOLES_IN_ZONE would require. - */ - for (i = 0; i < MEMORY_CHUNKS; i++) { - unsigned long start, end; - struct mem_chunk *chunk; - unsigned long align; - - chunk = &memory_chunk[i]; - align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1); - start = (chunk->addr + align - 1) & ~(align - 1); - end = (chunk->addr + chunk->size) & ~(align - 1); - if (start >= end) - memset(chunk, 0, sizeof(*chunk)); - else { - chunk->addr = start; - chunk->size = end - start; - } - } +#ifdef CONFIG_CRASH_DUMP + +/* + * When kdump is enabled, we have to ensure that no memory from + * the area [0 - crashkernel memory size] and + * [crashk_res.start - crashk_res.end] is set offline. + */ +static int kdump_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct memory_notify *arg = data; + + if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) + return NOTIFY_BAD; + if (arg->start_pfn > PFN_DOWN(crashk_res.end)) + return NOTIFY_OK; + if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) + return NOTIFY_OK; + return NOTIFY_BAD; +} - for (i = 0; i < MEMORY_CHUNKS; i++) { - struct mem_chunk *chunk = &memory_chunk[i]; +static struct notifier_block kdump_mem_nb = { + .notifier_call = kdump_mem_notifier, +}; - real_memory_size = max(real_memory_size, - chunk->addr + chunk->size); - if (chunk->addr >= max_mem) { - memset(chunk, 0, sizeof(*chunk)); - continue; - } - if (chunk->addr + chunk->size > max_mem) - chunk->size = max_mem - chunk->addr; - memory_size = max(memory_size, chunk->addr + chunk->size); +#endif + +/* + * Make sure that the area behind memory_end is protected + */ +static void reserve_memory_end(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (ipl_info.type == IPL_TYPE_FCP_DUMP && + !OLDMEM_BASE && sclp_get_hsa_size()) { + memory_end = sclp_get_hsa_size(); + memory_end &= PAGE_MASK; + memory_end_set = 1; } - if (!memory_end) - memory_end = memory_size; +#endif + if (!memory_end_set) + return; + memblock_reserve(memory_end, ULONG_MAX); } -static void __init -setup_memory(void) +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void reserve_oldmem(void) { - unsigned long bootmap_size; - unsigned long start_pfn, end_pfn; - int i; - - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ - start_pfn = PFN_UP(__pa(&_end)); - end_pfn = max_pfn = PFN_DOWN(memory_end); +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) + /* Forget all memory above the running kdump system */ + memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); +#endif +} -#ifdef CONFIG_BLK_DEV_INITRD - /* - * Move the initrd in case the bitmap of the bootmem allocater - * would overwrite it. - */ +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void remove_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) + /* Forget all memory above the running kdump system */ + memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); +#endif +} - if (INITRD_START && INITRD_SIZE) { - unsigned long bmap_size; - unsigned long start; +/* + * Reserve memory for kdump kernel to be loaded with kexec + */ +static void __init reserve_crashkernel(void) +{ +#ifdef CONFIG_CRASH_DUMP + unsigned long long crash_base, crash_size; + phys_addr_t low, high; + int rc; - bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1); - bmap_size = PFN_PHYS(bmap_size); + rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + &crash_base); - if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { - start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); + crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); + if (rc || crash_size == 0) + return; - if (start + INITRD_SIZE > memory_end) { - printk("initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\n" - "disabling initrd\n", - start + INITRD_SIZE, memory_end); - INITRD_START = INITRD_SIZE = 0; - } else { - printk("Moving initrd (0x%08lx -> 0x%08lx, " - "size: %ld)\n", - INITRD_START, start, INITRD_SIZE); - memmove((void *) start, (void *) INITRD_START, - INITRD_SIZE); - INITRD_START = start; - } - } + if (memblock.memory.regions[0].size < crash_size) { + pr_info("crashkernel reservation failed: %s\n", + "first memory chunk must be at least crashkernel size"); + return; } -#endif - /* - * Initialize the boot-time allocator - */ - bootmap_size = init_bootmem(start_pfn, end_pfn); + low = crash_base ?: OLDMEM_BASE; + high = low + crash_size; + if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) { + /* The crashkernel fits into OLDMEM, reuse OLDMEM */ + crash_base = low; + } else { + /* Find suitable area in free memory */ + low = max_t(unsigned long, crash_size, sclp_get_hsa_size()); + high = crash_base ? crash_base + crash_size : ULONG_MAX; - /* - * Register RAM areas with the bootmem allocator. - */ + if (crash_base && crash_base < low) { + pr_info("crashkernel reservation failed: %s\n", + "crash_base too low"); + return; + } + low = crash_base ?: low; + crash_base = memblock_find_in_range(low, high, crash_size, + KEXEC_CRASH_MEM_ALIGN); + } - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - unsigned long start_chunk, end_chunk, pfn; - - if (memory_chunk[i].type != CHUNK_READ_WRITE) - continue; - start_chunk = PFN_DOWN(memory_chunk[i].addr); - end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size) - 1; - end_chunk = min(end_chunk, end_pfn); - if (start_chunk >= end_chunk) - continue; - add_active_range(0, start_chunk, end_chunk); - pfn = max(start_chunk, start_pfn); - for (; pfn <= end_chunk; pfn++) - page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY); + if (!crash_base) { + pr_info("crashkernel reservation failed: %s\n", + "no suitable area found"); + return; } - psw_set_key(PAGE_DEFAULT_KEY); + if (register_memory_notifier(&kdump_mem_nb)) + return; - free_bootmem_with_active_regions(0, max_pfn); + if (!OLDMEM_BASE && MACHINE_IS_VM) + diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); + memblock_remove(crash_base, crash_size); + pr_info("Reserving %lluMB of memory at %lluMB " + "for crashkernel (System RAM: %luMB)\n", + crash_size >> 20, crash_base >> 20, + (unsigned long)memblock.memory.total_size >> 20); + os_info_crashkernel_add(crash_base, crash_size); +#endif +} - /* - * Reserve memory used for lowcore/command line/kernel image. - */ - reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT); - reserve_bootmem((unsigned long)_stext, - PFN_PHYS(start_pfn) - (unsigned long)_stext, - BOOTMEM_DEFAULT); - /* - * Reserve the bootmem bitmap itself as well. We do this in two - * steps (first step was init_bootmem()) because this catches - * the (very unlikely) case of us accidentally initializing the - * bootmem allocator with an invalid RAM area. - */ - reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, - BOOTMEM_DEFAULT); +/* + * Reserve the initrd from being used by memblock + */ +static void __init reserve_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = INITRD_START; + initrd_end = initrd_start + INITRD_SIZE; + memblock_reserve(INITRD_START, INITRD_SIZE); +#endif +} +/* + * Check for initrd being in usable memory + */ +static void __init check_initrd(void) +{ #ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START && INITRD_SIZE) { - if (INITRD_START + INITRD_SIZE <= memory_end) { - reserve_bootmem(INITRD_START, INITRD_SIZE, - BOOTMEM_DEFAULT); - initrd_start = INITRD_START; - initrd_end = initrd_start + INITRD_SIZE; - } else { - printk("initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - initrd_start + INITRD_SIZE, memory_end); - initrd_start = initrd_end = 0; - } + if (INITRD_START && INITRD_SIZE && + !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) { + pr_err("initrd does not fit memory.\n"); + memblock_free(INITRD_START, INITRD_SIZE); + initrd_start = initrd_end = 0; } #endif } -static __init unsigned int stfl(void) +/* + * Reserve all kernel text + */ +static void __init reserve_kernel(void) +{ + unsigned long start_pfn; + start_pfn = PFN_UP(__pa(&_end)); + + /* + * Reserve memory used for lowcore/command line/kernel image. + */ + memblock_reserve(0, (unsigned long)_ehead); + memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) + - (unsigned long)_stext); +} + +static void __init reserve_elfcorehdr(void) { - asm volatile( - " .insn s,0xb2b10000,0(0)\n" /* stfl */ - "0:\n" - EX_TABLE(0b,0b)); - return S390_lowcore.stfl_fac_list; +#ifdef CONFIG_CRASH_DUMP + if (is_kdump_kernel()) + memblock_reserve(elfcorehdr_addr - OLDMEM_BASE, + PAGE_ALIGN(elfcorehdr_size)); +#endif } -static __init int stfle(unsigned long long *list, int doublewords) +static void __init setup_memory(void) { - typedef struct { unsigned long long _[doublewords]; } addrtype; - register unsigned long __nr asm("0") = doublewords - 1; + struct memblock_region *reg; - asm volatile(".insn s,0xb2b00000,%0" /* stfle */ - : "=m" (*(addrtype *) list), "+d" (__nr) : : "cc"); - return __nr + 1; + /* + * Init storage key for present memory + */ + for_each_memblock(memory, reg) { + storage_key_init_range(reg->base, reg->base + reg->size); + } + psw_set_key(PAGE_DEFAULT_KEY); + + /* Only cosmetics */ + memblock_enforce_memory_limit(memblock_end_of_DRAM()); } /* @@ -703,12 +699,9 @@ static __init int stfle(unsigned long long *list, int doublewords) static void __init setup_hwcaps(void) { static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; - struct cpuinfo_S390 *cpuinfo = &S390_lowcore.cpu_data; - unsigned long long facility_list_extended; - unsigned int facility_list; + struct cpuid cpu_id; int i; - facility_list = stfl(); /* * The store facility list bits numbers as found in the principles * of operation are numbered with bit 1UL<<31 as number 0 to @@ -719,34 +712,59 @@ static void __init setup_hwcaps(void) * Bit 17: the message-security assist is installed * Bit 19: the long-displacement facility is installed * Bit 21: the extended-immediate facility is installed + * Bit 22: extended-translation facility 3 is installed + * Bit 30: extended-translation facility 3 enhancement facility * These get translated to: * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1, * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3, - * HWCAP_S390_LDISP bit 4, and HWCAP_S390_EIMM bit 5. + * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and + * HWCAP_S390_ETF3EH bit 8 (22 && 30). */ for (i = 0; i < 6; i++) - if (facility_list & (1UL << (31 - stfl_bits[i]))) + if (test_facility(stfl_bits[i])) elf_hwcap |= 1UL << i; + if (test_facility(22) && test_facility(30)) + elf_hwcap |= HWCAP_S390_ETF3EH; + /* * Check for additional facilities with store-facility-list-extended. * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0 * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information * as stored by stfl, bits 32-xxx contain additional facilities. * How many facility words are stored depends on the number of - * doublewords passed to the instruction. The additional facilites + * doublewords passed to the instruction. The additional facilities * are: - * Bit 43: decimal floating point facility is installed + * Bit 42: decimal floating point facility is installed + * Bit 44: perform floating point operation facility is installed * translated to: - * HWCAP_S390_DFP bit 6. + * HWCAP_S390_DFP bit 6 (42 && 44). */ - if ((elf_hwcap & (1UL << 2)) && - stfle(&facility_list_extended, 1) > 0) { - if (facility_list_extended & (1ULL << (64 - 43))) - elf_hwcap |= 1UL << 6; - } + if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44)) + elf_hwcap |= HWCAP_S390_DFP; - switch (cpuinfo->cpu_id.machine) { + /* + * Huge page support HWCAP_S390_HPAGE is bit 7. + */ + if (MACHINE_HAS_HPAGE) + elf_hwcap |= HWCAP_S390_HPAGE; + +#if defined(CONFIG_64BIT) + /* + * 64-bit register support for 31-bit processes + * HWCAP_S390_HIGH_GPRS is bit 9. + */ + elf_hwcap |= HWCAP_S390_HIGH_GPRS; + + /* + * Transactional execution support HWCAP_S390_TE is bit 10. + */ + if (test_facility(50) && test_facility(73)) + elf_hwcap |= HWCAP_S390_TE; +#endif + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { case 0x9672: #if !defined(CONFIG_64BIT) default: /* Use "g5" as default for 31 bit kernels. */ @@ -765,8 +783,21 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z990"); break; case 0x2094: + case 0x2096: strcpy(elf_platform, "z9-109"); break; + case 0x2097: + case 0x2098: + strcpy(elf_platform, "z10"); + break; + case 0x2817: + case 0x2818: + strcpy(elf_platform, "z196"); + break; + case 0x2827: + case 0x2828: + strcpy(elf_platform, "zEC12"); + break; } } @@ -775,54 +806,82 @@ static void __init setup_hwcaps(void) * was printed. */ -void __init -setup_arch(char **cmdline_p) +void __init setup_arch(char **cmdline_p) { /* * print what head.S has found out about the machine */ #ifndef CONFIG_64BIT - printk((MACHINE_IS_VM) ? - "We are running under VM (31 bit mode)\n" : - "We are running native (31 bit mode)\n"); - printk((MACHINE_HAS_IEEE) ? - "This machine has an IEEE fpu\n" : - "This machine has no IEEE fpu\n"); + if (MACHINE_IS_VM) + pr_info("Linux is running as a z/VM " + "guest operating system in 31-bit mode\n"); + else if (MACHINE_IS_LPAR) + pr_info("Linux is running natively in 31-bit mode\n"); + if (MACHINE_HAS_IEEE) + pr_info("The hardware system has IEEE compatible " + "floating point units\n"); + else + pr_info("The hardware system has no IEEE compatible " + "floating point units\n"); #else /* CONFIG_64BIT */ - printk((MACHINE_IS_VM) ? - "We are running under VM (64 bit mode)\n" : - "We are running native (64 bit mode)\n"); + if (MACHINE_IS_VM) + pr_info("Linux is running as a z/VM " + "guest operating system in 64-bit mode\n"); + else if (MACHINE_IS_KVM) + pr_info("Linux is running under KVM in 64-bit mode\n"); + else if (MACHINE_IS_LPAR) + pr_info("Linux is running natively in 64-bit mode\n"); #endif /* CONFIG_64BIT */ - /* Save unparsed command line copy for /proc/cmdline */ - strlcpy(boot_command_line, COMMAND_LINE, COMMAND_LINE_SIZE); - - *cmdline_p = COMMAND_LINE; - *(*cmdline_p + COMMAND_LINE_SIZE - 1) = '\0'; + /* Have one command line that is parsed and saved in /proc/cmdline */ + /* boot_command_line has been already set up in early.c */ + *cmdline_p = boot_command_line; ROOT_DEV = Root_RAM0; + /* Is init_mm really needed? */ init_mm.start_code = PAGE_OFFSET; init_mm.end_code = (unsigned long) &_etext; init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; - if (MACHINE_HAS_MVCOS) - memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess)); - else - memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); - parse_early_param(); - + os_info_init(); setup_ipl(); + + /* Do some memory reservations *before* memory is added to memblock */ + reserve_memory_end(); + reserve_oldmem(); + reserve_kernel(); + reserve_initrd(); + reserve_elfcorehdr(); + memblock_allow_resize(); + + /* Get information about *all* installed memory */ + detect_memory_memblock(); + + remove_oldmem(); + + /* + * Make sure all chunks are MAX_ORDER aligned so we don't need the + * extra checks that HOLES_IN_ZONE would require. + * + * Is this still required? + */ + memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT)); + setup_memory_end(); - setup_addressing_mode(); setup_memory(); + + check_initrd(); + reserve_crashkernel(); + setup_resources(); + setup_vmcoreinfo(); setup_lowcore(); - + smp_fill_possible_mask(); cpu_init(); - __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr; + s390_init_cpu_topology(); /* * Setup capabilities (ELF_HWCAP & ELF_PLATFORM). @@ -836,93 +895,40 @@ setup_arch(char **cmdline_p) /* Setup default console */ conmode_default(); + set_preferred_console(); /* Setup zfcpdump support */ - setup_zfcpdump(console_devno); + setup_zfcpdump(); } -void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo) -{ - printk(KERN_INFO "cpu %d " -#ifdef CONFIG_SMP - "phys_idx=%d " -#endif - "vers=%02X ident=%06X machine=%04X unused=%04X\n", - cpuinfo->cpu_nr, -#ifdef CONFIG_SMP - cpuinfo->cpu_addr, -#endif - cpuinfo->cpu_id.version, - cpuinfo->cpu_id.ident, - cpuinfo->cpu_id.machine, - cpuinfo->cpu_id.unused); -} - -/* - * show_cpuinfo - Get information on one CPU for use by procfs. - */ +#ifdef CONFIG_32BIT +static int no_removal_warning __initdata; -static int show_cpuinfo(struct seq_file *m, void *v) +static int __init parse_no_removal_warning(char *str) { - static const char *hwcap_str[7] = { - "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp" - }; - struct cpuinfo_S390 *cpuinfo; - unsigned long n = (unsigned long) v - 1; - int i; - - s390_adjust_jiffies(); - preempt_disable(); - if (!n) { - seq_printf(m, "vendor_id : IBM/S390\n" - "# processors : %i\n" - "bogomips per cpu: %lu.%02lu\n", - num_online_cpus(), loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ))%100); - seq_puts(m, "features\t: "); - for (i = 0; i < 7; i++) - if (hwcap_str[i] && (elf_hwcap & (1UL << i))) - seq_printf(m, "%s ", hwcap_str[i]); - seq_puts(m, "\n"); - } - - if (cpu_online(n)) { -#ifdef CONFIG_SMP - if (smp_processor_id() == n) - cpuinfo = &S390_lowcore.cpu_data; - else - cpuinfo = &lowcore_ptr[n]->cpu_data; -#else - cpuinfo = &S390_lowcore.cpu_data; -#endif - seq_printf(m, "processor %li: " - "version = %02X, " - "identification = %06X, " - "machine = %04X\n", - n, cpuinfo->cpu_id.version, - cpuinfo->cpu_id.ident, - cpuinfo->cpu_id.machine); - } - preempt_enable(); - return 0; + no_removal_warning = 1; + return 0; } +__setup("no_removal_warning", parse_no_removal_warning); -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL; -} -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} -static void c_stop(struct seq_file *m, void *v) +static int __init removal_warning(void) { + if (no_removal_warning) + return 0; + printk(KERN_ALERT "\n\n"); + printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n"); + printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n"); + printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n"); + printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n"); + printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n"); + printk(KERN_CONT "please let us know. Please write to:\n"); + printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n"); + printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n"); + printk(KERN_CONT "Thank you!\n\n"); + printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n"); + printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n"); + schedule_timeout_uninterruptible(300 * HZ); + return 0; } -const struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; - +early_initcall(removal_warning); +#endif diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 4449bf32cbf..42b49f9e19b 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/signal.c - * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright IBM Corp. 1999, 2006 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) * * Based on Intel version @@ -24,12 +22,14 @@ #include <linux/tty.h> #include <linux/personality.h> #include <linux/binfmts.h> +#include <linux/tracehook.h> +#include <linux/syscalls.h> +#include <linux/compat.h> #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> - -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - +#include <asm/switch_to.h> +#include "entry.h" typedef struct { @@ -48,67 +48,6 @@ typedef struct struct ucontext uc; } rt_sigframe; -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -asmlinkage int -sys_sigsuspend(int history0, int history1, old_sigset_t mask) -{ - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - current->state = TASK_INTERRUPTIBLE; - schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); - - return -ERESTARTNOHAND; -} - -asmlinkage long -sys_sigaction(int sig, const struct old_sigaction __user *act, - struct old_sigaction __user *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || - __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(mask, &act->sa_mask)) - return -EFAULT; - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) - return -EFAULT; - } - - return ret; -} - -asmlinkage long -sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss) -{ - struct pt_regs *regs = task_pt_regs(current); - return do_sigaltstack(uss, uoss, regs->gprs[15]); -} - - - /* Returns non-zero on fault. */ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { @@ -118,102 +57,97 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask); + user_sregs.regs.psw.mask = PSW_USER_BITS | + (regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI)); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, - sizeof(sregs->regs.acrs)); + sizeof(user_sregs.regs.acrs)); /* * We have to store the fp registers to current->thread.fp_regs * to merge them with the emulated registers. */ - save_fp_regs(¤t->thread.fp_regs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + save_fp_regs(current->thread.fp_regs.fprs); memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, - sizeof(s390_fp_regs)); - return __copy_to_user(sregs, &user_sregs, sizeof(_sigregs)); + sizeof(user_sregs.fpregs)); + if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs))) + return -EFAULT; + return 0; } -/* Returns positive number on error */ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { - int err; _sigregs user_sregs; /* Alwys make any pending restarted system call return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; - err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); - if (err) - return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - user_sregs.regs.psw.mask); - regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr; + if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs))) + return -EFAULT; + + if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI)) + return -EINVAL; + + /* Loading the floating-point-control word can fail. Do that first. */ + if (restore_fp_ctl(&user_sregs.fpregs.fpc)) + return -EINVAL; + + /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) | + (user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI)); + /* Check for invalid user address space control. */ + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME) + regs->psw.mask = PSW_ASC_PRIMARY | + (regs->psw.mask & ~PSW_MASK_ASC); + /* Check for invalid amode */ + if (regs->psw.mask & PSW_MASK_EA) + regs->psw.mask |= PSW_MASK_BA; + regs->psw.addr = user_sregs.regs.psw.addr; memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, - sizeof(sregs->regs.acrs)); + sizeof(current->thread.acrs)); restore_access_regs(current->thread.acrs); memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, - sizeof(s390_fp_regs)); - current->thread.fp_regs.fpc &= FPC_VALID_MASK; + sizeof(current->thread.fp_regs)); - restore_fp_regs(¤t->thread.fp_regs); - regs->trap = -1; /* disable syscall checks */ + restore_fp_regs(current->thread.fp_regs.fprs); + clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } -asmlinkage long sys_sigreturn(void) +SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe __user *frame = (sigframe __user *)regs->gprs[15]; sigset_t set; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs(regs, &frame->sregs)) goto badframe; - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; } -asmlinkage long sys_rt_sigreturn(void) +SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15]; sigset_t set; - if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) - goto badframe; if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; - - if (do_sigaltstack(&frame->uc.uc_stack, NULL, - regs->gprs[15]) == -EFAULT) + if (restore_altstack(&frame->uc.uc_stack)) goto badframe; return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -235,19 +169,16 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) /* Default to using normal stack */ sp = regs->gprs[15]; + /* Overflow on alternate signal stack gives SIGSEGV. */ + if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL)) + return (void __user *) -1UL; + /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { if (! sas_ss_flags(sp)) sp = current->sas_ss_sp + current->sas_ss_size; } - /* This is the legacy signal stack switching. */ - else if (!user_mode(regs) && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - sp = (unsigned long) ka->sa.sa_restorer; - } - return (void __user *)((sp - frame_size) & -8ul); } @@ -267,7 +198,8 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigframe __user *frame; frame = get_sigframe(ka, regs, sizeof(sigframe)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe))) + + if (frame == (void __user *) -1UL) goto give_sigsegv; if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE)) @@ -297,6 +229,10 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + /* Force default amode and default user address space control. */ + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -304,8 +240,13 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ - regs->gprs[4] = current->thread.trap_no; - regs->gprs[5] = current->thread.prot_addr; + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; + } /* Place signal number on stack to allow backtrace from handler. */ if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) @@ -324,7 +265,8 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, rt_sigframe __user *frame; frame = get_sigframe(ka, regs, sizeof(rt_sigframe)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe))) + + if (frame == (void __user *) -1UL) goto give_sigsegv; if (copy_siginfo_to_user(&frame->info, info)) @@ -333,10 +275,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(NULL, &frame->uc.uc_link); - err |= __put_user((void __user *)current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->gprs[15]), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]); err |= save_sigregs(regs, &frame->uc.uc_mcontext); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) @@ -361,11 +300,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + /* Force default amode and default user address space control. */ + regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | + (PSW_USER_BITS & PSW_MASK_ASC) | + (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); regs->gprs[3] = (unsigned long) &frame->info; regs->gprs[4] = (unsigned long) &frame->uc; + regs->gprs[5] = task_thread_info(current)->last_break; return 0; give_sigsegv: @@ -373,13 +317,9 @@ give_sigsegv: return -EFAULT; } -/* - * OK, we're invoking a handler - */ - -static int -handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +static void handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs) { int ret; @@ -388,17 +328,10 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, ret = setup_rt_frame(sig, ka, info, oldset, regs); else ret = setup_frame(sig, ka, oldset, regs); - - if (ret == 0) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - - return ret; + if (ret) + return; + signal_delivered(sig, info, ka, regs, + test_thread_flag(TIF_SINGLE_STEP)); } /* @@ -412,111 +345,83 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, */ void do_signal(struct pt_regs *regs) { - unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; struct k_sigaction ka; - sigset_t *oldset; + sigset_t *oldset = sigmask_to_save(); /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. + * Get signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers, including the system + * call information. */ - if (!user_mode(regs)) - return; + current_thread_info()->system_call = + test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; + signr = get_signal_to_deliver(&info, &ka, regs, NULL); - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - oldset = ¤t->saved_sigmask; - else - oldset = ¤t->blocked; + if (signr > 0) { + /* Whee! Actually deliver the signal. */ + if (current_thread_info()->system_call) { + regs->int_code = current_thread_info()->system_call; + /* Check for system call restarting. */ + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->gprs[2] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka.sa.sa_flags & SA_RESTART)) { + regs->gprs[2] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->gprs[2] = regs->orig_gpr2; + regs->psw.addr = + __rewind_psw(regs->psw, + regs->int_code >> 16); + break; + } + } + /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL); - /* Are we from a system call? */ - if (regs->trap == __LC_SVC_OLD_PSW) { - continue_addr = regs->psw.addr; - restart_addr = continue_addr - regs->ilc; - retval = regs->gprs[2]; + if (is_compat_task()) + handle_signal32(signr, &ka, &info, oldset, regs); + else + handle_signal(signr, &ka, &info, oldset, regs); + return; + } - /* Prepare for system call restart. We do this here so that a - debugger will see the already changed PSW. */ - switch (retval) { + /* No handlers present - check for system call restart */ + clear_pt_regs_flag(regs, PIF_SYSCALL); + if (current_thread_info()->system_call) { + regs->int_code = current_thread_info()->system_call; + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + /* Restart with sys_restart_syscall */ + regs->int_code = __NR_restart_syscall; + /* fallthrough */ case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: + /* Restart system call with magic TIF bit. */ regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = restart_addr; + set_pt_regs_flag(regs, PIF_SYSCALL); + if (test_thread_flag(TIF_SINGLE_STEP)) + clear_pt_regs_flag(regs, PIF_PER_TRAP); break; - case -ERESTART_RESTARTBLOCK: - regs->gprs[2] = -EINTR; - } - regs->trap = -1; /* Don't deal with this again. */ - } - - /* Get signal to deliver. When running under ptrace, at this point - the debugger may change all our registers ... */ - signr = get_signal_to_deliver(&info, &ka, regs, NULL); - - /* Depending on the signal settings we may need to revert the - decision to restart the system call. */ - if (signr > 0 && regs->psw.addr == restart_addr) { - if (retval == -ERESTARTNOHAND - || (retval == -ERESTARTSYS - && !(current->sighand->action[signr-1].sa.sa_flags - & SA_RESTART))) { - regs->gprs[2] = -EINTR; - regs->psw.addr = continue_addr; - } - } - - if (signr > 0) { - /* Whee! Actually deliver the signal. */ - int ret; -#ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_31BIT)) { - extern int handle_signal32(unsigned long sig, - struct k_sigaction *ka, - siginfo_t *info, - sigset_t *oldset, - struct pt_regs *regs); - ret = handle_signal32(signr, &ka, &info, oldset, regs); - } - else -#endif - ret = handle_signal(signr, &ka, &info, oldset, regs); - if (!ret) { - /* - * A signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag. - */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - - /* - * If we would have taken a single-step trap - * for a normal instruction, act like we took - * one for the handler setup. - */ - if (current->thread.per_info.single_step) - set_thread_flag(TIF_SINGLE_STEP); } - return; } /* * If there's no signal to deliver, we just put the saved sigmask back. */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } + restore_saved_sigmask(); +} - /* Restart a different system call. */ - if (retval == -ERESTART_RESTARTBLOCK - && regs->psw.addr == continue_addr) { - regs->gprs[2] = __NR_restart_syscall; - set_thread_flag(TIF_RESTART_SVC); - } +void do_notify_resume(struct pt_regs *regs) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 8f894d380a6..243c7e51260 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,25 +1,24 @@ /* - * arch/s390/kernel/smp.c + * SMP related functions * - * Copyright IBM Corp. 1999,2007 - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Martin Schwidefsky (schwidefsky@de.ibm.com) - * Heiko Carstens (heiko.carstens@de.ibm.com) + * Copyright IBM Corp. 1999, 2012 + * Author(s): Denis Joseph Barrow, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, * * based on other smp stuff by * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net> * (c) 1998 Ingo Molnar * - * We work with logical cpu numbering everywhere we can. The only - * functions using the real cpu address (got from STAP) are the sigp - * functions. For all other functions we use the identity mapping. - * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is - * used e.g. to find the idle task belonging to a logical cpu. Every array - * in the kernel is sorted by the logical cpu number and not by the physical - * one which is causing all the confusion with __cpu_logical_map and - * cpu_number_map in other architectures. + * The code outside of smp.c uses logical cpu numbers, only smp.c does + * the translation of logical to physical cpu ids. All new code that + * operates on physical cpu numbers needs to go into smp.c. */ +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/workqueue.h> #include <linux/module.h> #include <linux/init.h> #include <linux/mm.h> @@ -27,234 +26,380 @@ #include <linux/spinlock.h> #include <linux/kernel_stat.h> #include <linux/delay.h> -#include <linux/cache.h> #include <linux/interrupt.h> +#include <linux/irqflags.h> #include <linux/cpu.h> -#include <linux/timex.h> -#include <linux/bootmem.h> +#include <linux/slab.h> +#include <linux/crash_dump.h> +#include <asm/asm-offsets.h> +#include <asm/switch_to.h> +#include <asm/facility.h> #include <asm/ipl.h> #include <asm/setup.h> -#include <asm/sigp.h> -#include <asm/pgalloc.h> #include <asm/irq.h> -#include <asm/s390_ext.h> -#include <asm/cpcmd.h> #include <asm/tlbflush.h> -#include <asm/timer.h> +#include <asm/vtimer.h> #include <asm/lowcore.h> #include <asm/sclp.h> -#include <asm/cpu.h> - -/* - * An array with a pointer the lowcore of every CPU. - */ -struct _lowcore *lowcore_ptr[NR_CPUS]; -EXPORT_SYMBOL(lowcore_ptr); - -cpumask_t cpu_online_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); - -cpumask_t cpu_possible_map = CPU_MASK_ALL; -EXPORT_SYMBOL(cpu_possible_map); - -static struct task_struct *current_set[NR_CPUS]; +#include <asm/vdso.h> +#include <asm/debug.h> +#include <asm/os_info.h> +#include <asm/sigp.h> +#include "entry.h" -static u8 smp_cpu_type; -static int smp_use_sigp_detection; +enum { + ec_schedule = 0, + ec_call_function_single, + ec_stop_cpu, +}; -enum s390_cpu_state { +enum { CPU_STATE_STANDBY, CPU_STATE_CONFIGURED, }; -#ifdef CONFIG_HOTPLUG_CPU -static DEFINE_MUTEX(smp_cpu_state_mutex); -#endif -static int smp_cpu_state[NR_CPUS]; - -static DEFINE_PER_CPU(struct cpu, cpu_devices); -DEFINE_PER_CPU(struct s390_idle_data, s390_idle); +struct pcpu { + struct cpu *cpu; + struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ + unsigned long async_stack; /* async stack for the cpu */ + unsigned long panic_stack; /* panic stack for the cpu */ + unsigned long ec_mask; /* bit mask for ec_xxx functions */ + int state; /* physical cpu state */ + int polarization; /* physical polarization */ + u16 address; /* physical cpu address */ +}; -static void smp_ext_bitcall(int, ec_bit_sig); +static u8 boot_cpu_type; +static u16 boot_cpu_address; +static struct pcpu pcpu_devices[NR_CPUS]; /* - * Structure and data for __smp_call_function_map(). This is designed to - * minimise static memory requirements. It also looks cleaner. + * The smp_cpu_state_mutex must be held when changing the state or polarization + * member of a pcpu data structure within the pcpu_devices arreay. */ -static DEFINE_SPINLOCK(call_lock); - -struct call_data_struct { - void (*func) (void *info); - void *info; - cpumask_t started; - cpumask_t finished; - int wait; -}; - -static struct call_data_struct *call_data; +DEFINE_MUTEX(smp_cpu_state_mutex); /* - * 'Call function' interrupt callback + * Signal processor helper functions. */ -static void do_call_function(void) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) { - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; - - cpu_set(smp_processor_id(), call_data->started); - (*func)(info); - if (wait) - cpu_set(smp_processor_id(), call_data->finished);; + int cc; + + while (1) { + cc = __pcpu_sigp(addr, order, parm, NULL); + if (cc != SIGP_CC_BUSY) + return cc; + cpu_relax(); + } } -static void __smp_call_function_map(void (*func) (void *info), void *info, - int nonatomic, int wait, cpumask_t map) +static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) { - struct call_data_struct data; - int cpu, local = 0; - - /* - * Can deadlock when interrupts are disabled or if in wrong context. - */ - WARN_ON(irqs_disabled() || in_irq()); - - /* - * Check for local function call. We have to have the same call order - * as in on_each_cpu() because of machine_restart_smp(). - */ - if (cpu_isset(smp_processor_id(), map)) { - local = 1; - cpu_clear(smp_processor_id(), map); + int cc, retry; + + for (retry = 0; ; retry++) { + cc = __pcpu_sigp(pcpu->address, order, parm, NULL); + if (cc != SIGP_CC_BUSY) + break; + if (retry >= 3) + udelay(10); } + return cc; +} - cpus_and(map, map, cpu_online_map); - if (cpus_empty(map)) - goto out; +static inline int pcpu_stopped(struct pcpu *pcpu) +{ + u32 uninitialized_var(status); - data.func = func; - data.info = info; - data.started = CPU_MASK_NONE; - data.wait = wait; - if (wait) - data.finished = CPU_MASK_NONE; + if (__pcpu_sigp(pcpu->address, SIGP_SENSE, + 0, &status) != SIGP_CC_STATUS_STORED) + return 0; + return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); +} - spin_lock(&call_lock); - call_data = &data; +static inline int pcpu_running(struct pcpu *pcpu) +{ + if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING, + 0, NULL) != SIGP_CC_STATUS_STORED) + return 1; + /* Status stored condition code is equivalent to cpu not running. */ + return 0; +} - for_each_cpu_mask(cpu, map) - smp_ext_bitcall(cpu, ec_call_function); +/* + * Find struct pcpu by cpu address. + */ +static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address) +{ + int cpu; - /* Wait for response */ - while (!cpus_equal(map, data.started)) - cpu_relax(); - if (wait) - while (!cpus_equal(map, data.finished)) - cpu_relax(); - spin_unlock(&call_lock); + for_each_cpu(cpu, mask) + if (pcpu_devices[cpu].address == address) + return pcpu_devices + cpu; + return NULL; +} + +static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) +{ + int order; + + if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) + return; + order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; + pcpu_sigp_retry(pcpu, order, 0); +} + +static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc; + + if (pcpu != &pcpu_devices[0]) { + pcpu->lowcore = (struct _lowcore *) + __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); + pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + pcpu->panic_stack = __get_free_page(GFP_KERNEL); + if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) + goto out; + } + lc = pcpu->lowcore; + memcpy(lc, &S390_lowcore, 512); + memset((char *) lc + 512, 0, sizeof(*lc) - 512); + lc->async_stack = pcpu->async_stack + ASYNC_SIZE + - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->panic_stack = pcpu->panic_stack + PAGE_SIZE + - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->cpu_nr = cpu; + lc->spinlock_lockval = arch_spin_lockval(cpu); +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL); + if (!lc->extended_save_area_addr) + goto out; + } +#else + if (vdso_alloc_per_cpu(lc)) + goto out; +#endif + lowcore_ptr[cpu] = lc; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); + return 0; out: - if (local) { - local_irq_disable(); - func(info); - local_irq_enable(); + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); + } + return -ENOMEM; +} + +#ifdef CONFIG_HOTPLUG_CPU + +static void pcpu_free_lowcore(struct pcpu *pcpu) +{ + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); + lowcore_ptr[pcpu - pcpu_devices] = NULL; +#ifndef CONFIG_64BIT + if (MACHINE_HAS_IEEE) { + struct _lowcore *lc = pcpu->lowcore; + + free_page((unsigned long) lc->extended_save_area_addr); + lc->extended_save_area_addr = 0; + } +#else + vdso_free_per_cpu(pcpu->lowcore); +#endif + if (pcpu != &pcpu_devices[0]) { + free_page(pcpu->panic_stack); + free_pages(pcpu->async_stack, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); } } +#endif /* CONFIG_HOTPLUG_CPU */ + +static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) +{ + struct _lowcore *lc = pcpu->lowcore; + + if (MACHINE_HAS_TLB_LC) + cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); + cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); + atomic_inc(&init_mm.context.attach_count); + lc->cpu_nr = cpu; + lc->spinlock_lockval = arch_spin_lockval(cpu); + lc->percpu_offset = __per_cpu_offset[cpu]; + lc->kernel_asce = S390_lowcore.kernel_asce; + lc->machine_flags = S390_lowcore.machine_flags; + lc->ftrace_func = S390_lowcore.ftrace_func; + lc->user_timer = lc->system_timer = lc->steal_timer = 0; + __ctl_store(lc->cregs_save_area, 0, 15); + save_access_regs((unsigned int *) lc->access_regs_save_area); + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); +} + +static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +{ + struct _lowcore *lc = pcpu->lowcore; + struct thread_info *ti = task_thread_info(tsk); + + lc->kernel_stack = (unsigned long) task_stack_page(tsk) + + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->thread_info = (unsigned long) task_thread_info(tsk); + lc->current_task = (unsigned long) tsk; + lc->user_timer = ti->user_timer; + lc->system_timer = ti->system_timer; + lc->steal_timer = 0; +} + +static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +{ + struct _lowcore *lc = pcpu->lowcore; + + lc->restart_stack = lc->kernel_stack; + lc->restart_fn = (unsigned long) func; + lc->restart_data = (unsigned long) data; + lc->restart_source = -1UL; + pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); +} + /* - * smp_call_function: - * @func: the function to run; this must be fast and non-blocking - * @info: an arbitrary pointer to pass to the function - * @nonatomic: unused - * @wait: if true, wait (atomically) until function has completed on other CPUs - * - * Run a function on all other CPUs. - * - * You must not call this function with disabled interrupts, from a - * hardware interrupt handler or from a bottom half. + * Call function via PSW restart on pcpu and stop the current cpu. */ -int smp_call_function(void (*func) (void *info), void *info, int nonatomic, - int wait) +static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), + void *data, unsigned long stack) { - cpumask_t map; + struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; + unsigned long source_cpu = stap(); + + __load_psw_mask(PSW_KERNEL_BITS); + if (pcpu->address == source_cpu) + func(data); /* should not return */ + /* Stop target cpu (if func returns this stops the current cpu). */ + pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + /* Restart func on the target cpu and stop the current cpu. */ + mem_assign_absolute(lc->restart_stack, stack); + mem_assign_absolute(lc->restart_fn, (unsigned long) func); + mem_assign_absolute(lc->restart_data, (unsigned long) data); + mem_assign_absolute(lc->restart_source, source_cpu); + asm volatile( + "0: sigp 0,%0,%2 # sigp restart to target cpu\n" + " brc 2,0b # busy, try again\n" + "1: sigp 0,%1,%3 # sigp stop to current cpu\n" + " brc 2,1b # busy, try again\n" + : : "d" (pcpu->address), "d" (source_cpu), + "K" (SIGP_RESTART), "K" (SIGP_STOP) + : "0", "1", "cc"); + for (;;) ; +} - preempt_disable(); - map = cpu_online_map; - cpu_clear(smp_processor_id(), map); - __smp_call_function_map(func, info, nonatomic, wait, map); - preempt_enable(); - return 0; +/* + * Call function on an online CPU. + */ +void smp_call_online_cpu(void (*func)(void *), void *data) +{ + struct pcpu *pcpu; + + /* Use the current cpu if it is online. */ + pcpu = pcpu_find_address(cpu_online_mask, stap()); + if (!pcpu) + /* Use the first online cpu. */ + pcpu = pcpu_devices + cpumask_first(cpu_online_mask); + pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); } -EXPORT_SYMBOL(smp_call_function); /* - * smp_call_function_single: - * @cpu: the CPU where func should run - * @func: the function to run; this must be fast and non-blocking - * @info: an arbitrary pointer to pass to the function - * @nonatomic: unused - * @wait: if true, wait (atomically) until function has completed on other CPUs - * - * Run a function on one processor. - * - * You must not call this function with disabled interrupts, from a - * hardware interrupt handler or from a bottom half. + * Call function on the ipl CPU. */ -int smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) +void smp_call_ipl_cpu(void (*func)(void *), void *data) { - preempt_disable(); - __smp_call_function_map(func, info, nonatomic, wait, - cpumask_of_cpu(cpu)); - preempt_enable(); - return 0; + pcpu_delegate(&pcpu_devices[0], func, data, + pcpu_devices->panic_stack + PAGE_SIZE); } -EXPORT_SYMBOL(smp_call_function_single); - -/** - * smp_call_function_mask(): Run a function on a set of other CPUs. - * @mask: The set of cpus to run on. Must not include the current cpu. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. + +int smp_find_processor_id(u16 address) +{ + int cpu; + + for_each_present_cpu(cpu) + if (pcpu_devices[cpu].address == address) + return cpu; + return -1; +} + +int smp_vcpu_scheduled(int cpu) +{ + return pcpu_running(pcpu_devices + cpu); +} + +void smp_yield(void) +{ + if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); +} + +void smp_yield_cpu(int cpu) +{ + if (MACHINE_HAS_DIAG9C) + asm volatile("diag %0,0,0x9c" + : : "d" (pcpu_devices[cpu].address)); + else if (MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); +} + +/* + * Send cpus emergency shutdown signal. This gives the cpus the + * opportunity to complete outstanding interrupts. */ -int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, - int wait) +static void smp_emergency_stop(cpumask_t *cpumask) { - preempt_disable(); - cpu_clear(smp_processor_id(), mask); - __smp_call_function_map(func, info, 0, wait, mask); - preempt_enable(); - return 0; + u64 end; + int cpu; + + end = get_tod_clock() + (1000000UL << 12); + for_each_cpu(cpu, cpumask) { + struct pcpu *pcpu = pcpu_devices + cpu; + set_bit(ec_stop_cpu, &pcpu->ec_mask); + while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, + 0, NULL) == SIGP_CC_BUSY && + get_tod_clock() < end) + cpu_relax(); + } + while (get_tod_clock() < end) { + for_each_cpu(cpu, cpumask) + if (pcpu_stopped(pcpu_devices + cpu)) + cpumask_clear_cpu(cpu, cpumask); + if (cpumask_empty(cpumask)) + break; + cpu_relax(); + } } -EXPORT_SYMBOL(smp_call_function_mask); +/* + * Stop all cpus but the current one. + */ void smp_send_stop(void) { - int cpu, rc; + cpumask_t cpumask; + int cpu; /* Disable all interrupts/machine checks */ - __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); + trace_hardirqs_off(); - /* write magic number to zero page (absolute 0) */ - lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC; + debug_set_critical(); + cpumask_copy(&cpumask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &cpumask); - /* stop all processors */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - do { - rc = signal_processor(cpu, sigp_stop); - } while (rc == sigp_busy); + if (oops_in_progress) + smp_emergency_stop(&cpumask); - while (!smp_cpu_not_running(cpu)) + /* stop all processors */ + for_each_cpu(cpu, &cpumask) { + struct pcpu *pcpu = pcpu_devices + cpu; + pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + while (!pcpu_stopped(pcpu)) cpu_relax(); } } @@ -263,49 +408,52 @@ void smp_send_stop(void) * This is the main routine where commands issued by other * cpus are handled. */ - -static void do_ext_call_interrupt(__u16 code) +static void smp_handle_ext_call(void) { unsigned long bits; - /* - * handle bit signal external calls - * - * For the ec_schedule signal we have to do nothing. All the work - * is done automatically when we return from the interrupt. - */ - bits = xchg(&S390_lowcore.ext_call_fast, 0); + /* handle bit signal external calls */ + bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); + if (test_bit(ec_stop_cpu, &bits)) + smp_stop_cpu(); + if (test_bit(ec_schedule, &bits)) + scheduler_ipi(); + if (test_bit(ec_call_function_single, &bits)) + generic_smp_call_function_single_interrupt(); +} - if (test_bit(ec_call_function, &bits)) - do_call_function(); +static void do_ext_call_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS); + smp_handle_ext_call(); } -/* - * Send an external call sigp to another cpu and return without waiting - * for its completion. - */ -static void smp_ext_bitcall(int cpu, ec_bit_sig sig) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - /* - * Set signaling bit in lowcore of target cpu and kick it - */ - set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while (signal_processor(cpu, sigp_emergency_signal) == sigp_busy) - udelay(10); + int cpu; + + for_each_cpu(cpu, mask) + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } #ifndef CONFIG_64BIT /* * this function sends a 'purge tlb' signal to another CPU. */ -void smp_ptlb_callback(void *info) +static void smp_ptlb_callback(void *info) { __tlb_flush_local(); } void smp_ptlb_all(void) { - on_each_cpu(smp_ptlb_callback, NULL, 0, 1); + on_each_cpu(smp_ptlb_callback, NULL, 1); } EXPORT_SYMBOL(smp_ptlb_all); #endif /* ! CONFIG_64BIT */ @@ -317,15 +465,16 @@ EXPORT_SYMBOL(smp_ptlb_all); */ void smp_send_reschedule(int cpu) { - smp_ext_bitcall(cpu, ec_schedule); + pcpu_ec_call(pcpu_devices + cpu, ec_schedule); } /* * parameter area for the set/clear control bit callbacks */ struct ec_creg_mask_parms { - unsigned long orvals[16]; - unsigned long andvals[16]; + unsigned long orval; + unsigned long andval; + int cr; }; /* @@ -335,11 +484,9 @@ static void smp_ctl_bit_callback(void *info) { struct ec_creg_mask_parms *pp = info; unsigned long cregs[16]; - int i; __ctl_store(cregs, 0, 15); - for (i = 0; i <= 15; i++) - cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i]; + cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval; __ctl_load(cregs, 0, 15); } @@ -348,12 +495,9 @@ static void smp_ctl_bit_callback(void *info) */ void smp_ctl_set_bit(int cr, int bit) { - struct ec_creg_mask_parms parms; + struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; - memset(&parms.orvals, 0, sizeof(parms.orvals)); - memset(&parms.andvals, 0xff, sizeof(parms.andvals)); - parms.orvals[cr] = 1 << bit; - on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1); + on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_set_bit); @@ -362,497 +506,303 @@ EXPORT_SYMBOL(smp_ctl_set_bit); */ void smp_ctl_clear_bit(int cr, int bit) { - struct ec_creg_mask_parms parms; + struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; - memset(&parms.orvals, 0, sizeof(parms.orvals)); - memset(&parms.andvals, 0xff, sizeof(parms.andvals)); - parms.andvals[cr] = ~(1L << bit); - on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1); + on_each_cpu(smp_ctl_bit_callback, &parms, 1); } EXPORT_SYMBOL(smp_ctl_clear_bit); -/* - * In early ipl state a temp. logically cpu number is needed, so the sigp - * functions can be used to sense other cpus. Since NR_CPUS is >= 2 on - * CONFIG_SMP and the ipl cpu is logical cpu 0, it must be 1. - */ -#define CPU_INIT_NO 1 +#ifdef CONFIG_CRASH_DUMP -#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE) - -/* - * zfcpdump_prefix_array holds prefix registers for the following scenario: - * 64 bit zfcpdump kernel and 31 bit kernel which is to be dumped. We have to - * save its prefix registers, since they get lost, when switching from 31 bit - * to 64 bit. - */ -unsigned int zfcpdump_prefix_array[NR_CPUS + 1] \ - __attribute__((__section__(".data"))); - -static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) +static void __init smp_get_save_area(int cpu, u16 address) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + void *lc = pcpu_devices[0].lowcore; + struct save_area *save_area; + + if (is_kdump_kernel()) return; - if (cpu >= NR_CPUS) { - printk(KERN_WARNING "Registers for cpu %i not saved since dump " - "kernel was compiled with NR_CPUS=%i\n", cpu, NR_CPUS); + if (!OLDMEM_BASE && (address == boot_cpu_address || + ipl_info.type != IPL_TYPE_FCP_DUMP)) + return; + save_area = dump_save_area_create(cpu); + if (!save_area) + panic("could not allocate memory for save area\n"); + if (address == boot_cpu_address) { + /* Copy the registers of the boot cpu. */ + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + SAVE_AREA_BASE - PAGE_SIZE, 0); return; } - zfcpdump_save_areas[cpu] = kmalloc(sizeof(union save_area), GFP_KERNEL); - __cpu_logical_map[CPU_INIT_NO] = (__u16) phy_cpu; - while (signal_processor(CPU_INIT_NO, sigp_stop_and_store_status) == - sigp_busy) - cpu_relax(); - memcpy(zfcpdump_save_areas[cpu], - (void *)(unsigned long) store_prefix() + SAVE_AREA_BASE, - SAVE_AREA_SIZE); -#ifdef CONFIG_64BIT - /* copy original prefix register */ - zfcpdump_save_areas[cpu]->s390x.pref_reg = zfcpdump_prefix_array[cpu]; -#endif + /* Get the registers of a non-boot cpu. */ + __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); + memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); } -union save_area *zfcpdump_save_areas[NR_CPUS + 1]; -EXPORT_SYMBOL_GPL(zfcpdump_save_areas); +int smp_store_status(int cpu) +{ + struct pcpu *pcpu; -#else + pcpu = pcpu_devices + cpu; + if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, + 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) + return -EIO; + return 0; +} -static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { } +#else /* CONFIG_CRASH_DUMP */ -#endif /* CONFIG_ZFCPDUMP || CONFIG_ZFCPDUMP_MODULE */ +static inline void smp_get_save_area(int cpu, u16 address) { } -static int cpu_stopped(int cpu) -{ - __u32 status; +#endif /* CONFIG_CRASH_DUMP */ - /* Check for stopped state */ - if (signal_processor_ps(&status, 0, cpu, sigp_sense) == - sigp_status_stored) { - if (status & 0x40) - return 1; - } - return 0; +void smp_cpu_set_polarization(int cpu, int val) +{ + pcpu_devices[cpu].polarization = val; } -static int cpu_known(int cpu_id) +int smp_cpu_get_polarization(int cpu) { - int cpu; - - for_each_present_cpu(cpu) { - if (__cpu_logical_map[cpu] == cpu_id) - return 1; - } - return 0; + return pcpu_devices[cpu].polarization; } -static int smp_rescan_cpus_sigp(cpumask_t avail) +static struct sclp_cpu_info *smp_get_cpu_info(void) { - int cpu_id, logical_cpu; - - logical_cpu = first_cpu(avail); - if (logical_cpu == NR_CPUS) - return 0; - for (cpu_id = 0; cpu_id <= 65535; cpu_id++) { - if (cpu_known(cpu_id)) - continue; - __cpu_logical_map[logical_cpu] = cpu_id; - if (!cpu_stopped(logical_cpu)) - continue; - cpu_set(logical_cpu, cpu_present_map); - smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; - logical_cpu = next_cpu(logical_cpu, avail); - if (logical_cpu == NR_CPUS) - break; + static int use_sigp_detection; + struct sclp_cpu_info *info; + int address; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { + use_sigp_detection = 1; + for (address = 0; address <= MAX_CPU_ADDRESS; address++) { + if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) == + SIGP_CC_NOT_OPERATIONAL) + continue; + info->cpu[info->configured].address = address; + info->configured++; + } + info->combined = info->configured; } - return 0; + return info; } -static int smp_rescan_cpus_sclp(cpumask_t avail) +static int smp_add_present_cpu(int cpu); + +static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) { - struct sclp_cpu_info *info; - int cpu_id, logical_cpu, cpu; - int rc; + struct pcpu *pcpu; + cpumask_t avail; + int cpu, nr, i; - logical_cpu = first_cpu(avail); - if (logical_cpu == NR_CPUS) - return 0; - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - rc = sclp_get_cpu_info(info); - if (rc) - goto out; - for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type) + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + cpu = cpumask_first(&avail); + for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { + if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) continue; - cpu_id = info->cpu[cpu].address; - if (cpu_known(cpu_id)) + if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) continue; - __cpu_logical_map[logical_cpu] = cpu_id; - cpu_set(logical_cpu, cpu_present_map); - if (cpu >= info->configured) - smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; + pcpu = pcpu_devices + cpu; + pcpu->address = info->cpu[i].address; + pcpu->state = (i >= info->configured) ? + CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (sysfs_add && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); else - smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; - logical_cpu = next_cpu(logical_cpu, avail); - if (logical_cpu == NR_CPUS) - break; + nr++; + cpu = cpumask_next(cpu, &avail); } -out: - kfree(info); - return rc; -} - -static int smp_rescan_cpus(void) -{ - cpumask_t avail; - - cpus_xor(avail, cpu_possible_map, cpu_present_map); - if (smp_use_sigp_detection) - return smp_rescan_cpus_sigp(avail); - else - return smp_rescan_cpus_sclp(avail); + return nr; } static void __init smp_detect_cpus(void) { unsigned int cpu, c_cpus, s_cpus; struct sclp_cpu_info *info; - u16 boot_cpu_addr, cpu_addr; - c_cpus = 1; - s_cpus = 0; - boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = smp_get_cpu_info(); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); - /* Use sigp detection algorithm if sclp doesn't work. */ - if (sclp_get_cpu_info(info)) { - smp_use_sigp_detection = 1; - for (cpu = 0; cpu <= 65535; cpu++) { - if (cpu == boot_cpu_addr) - continue; - __cpu_logical_map[CPU_INIT_NO] = cpu; - if (!cpu_stopped(CPU_INIT_NO)) - continue; - smp_get_save_area(c_cpus, cpu); - c_cpus++; - } - goto out; - } - if (info->has_cpu_type) { for (cpu = 0; cpu < info->combined; cpu++) { - if (info->cpu[cpu].address == boot_cpu_addr) { - smp_cpu_type = info->cpu[cpu].type; - break; - } + if (info->cpu[cpu].address != boot_cpu_address) + continue; + /* The boot cpu dictates the cpu type. */ + boot_cpu_type = info->cpu[cpu].type; + break; } } - + c_cpus = s_cpus = 0; for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type) - continue; - cpu_addr = info->cpu[cpu].address; - if (cpu_addr == boot_cpu_addr) + if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) continue; - __cpu_logical_map[CPU_INIT_NO] = cpu_addr; - if (!cpu_stopped(CPU_INIT_NO)) { + if (cpu < info->configured) { + smp_get_save_area(c_cpus, info->cpu[cpu].address); + c_cpus++; + } else s_cpus++; - continue; - } - smp_get_save_area(c_cpus, cpu_addr); - c_cpus++; } -out: - kfree(info); - printk(KERN_INFO "CPUs: %d configured, %d standby\n", c_cpus, s_cpus); + pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); get_online_cpus(); - smp_rescan_cpus(); + __smp_rescan_cpus(info, 0); put_online_cpus(); + kfree(info); } /* * Activate a secondary processor. */ -int __cpuinit start_secondary(void *cpuvoid) +static void smp_start_secondary(void *cpuvoid) { - /* Setup the cpu */ + S390_lowcore.last_update_clock = get_tod_clock(); + S390_lowcore.restart_stack = (unsigned long) restart_stack; + S390_lowcore.restart_fn = (unsigned long) do_restart; + S390_lowcore.restart_data = 0; + S390_lowcore.restart_source = -1UL; + restore_access_regs(S390_lowcore.access_regs_save_area); + __ctl_load(S390_lowcore.cregs_save_area, 0, 15); + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); cpu_init(); preempt_disable(); - /* Enable TOD clock interrupts on the secondary cpu. */ init_cpu_timer(); -#ifdef CONFIG_VIRT_TIMER - /* Enable cpu timer interrupts on the secondary cpu. */ init_cpu_vtimer(); -#endif - /* Enable pfault pseudo page faults on this cpu. */ pfault_init(); - - /* Mark this cpu as online */ - cpu_set(smp_processor_id(), cpu_online_map); - /* Switch on interrupts */ + notify_cpu_starting(smp_processor_id()); + set_cpu_online(smp_processor_id(), true); + inc_irq_stat(CPU_RST); local_irq_enable(); - /* Print info about this processor */ - print_cpu_info(&S390_lowcore.cpu_data); - /* cpu_idle will call schedule for us */ - cpu_idle(); - return 0; + cpu_startup_entry(CPUHP_ONLINE); } -static void __init smp_create_idle(unsigned int cpu) -{ - struct task_struct *p; - - /* - * don't care about the psw and regs settings since we'll never - * reschedule the forked task. - */ - p = fork_idle(cpu); - if (IS_ERR(p)) - panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p)); - current_set[cpu] = p; - spin_lock_init(&(&per_cpu(s390_idle, cpu))->lock); -} - -static int __cpuinit smp_alloc_lowcore(int cpu) -{ - unsigned long async_stack, panic_stack; - struct _lowcore *lowcore; - int lc_order; - - lc_order = sizeof(long) == 8 ? 1 : 0; - lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); - if (!lowcore) - return -ENOMEM; - async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - panic_stack = __get_free_page(GFP_KERNEL); - if (!panic_stack || !async_stack) - goto out; - memcpy(lowcore, &S390_lowcore, 512); - memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); - lowcore->async_stack = async_stack + ASYNC_SIZE; - lowcore->panic_stack = panic_stack + PAGE_SIZE; - -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) { - unsigned long save_area; - - save_area = get_zeroed_page(GFP_KERNEL); - if (!save_area) - goto out_save_area; - lowcore->extended_save_area_addr = (u32) save_area; - } -#endif - lowcore_ptr[cpu] = lowcore; - return 0; - -#ifndef CONFIG_64BIT -out_save_area: - free_page(panic_stack); -#endif -out: - free_pages(async_stack, ASYNC_ORDER); - free_pages((unsigned long) lowcore, lc_order); - return -ENOMEM; -} - -#ifdef CONFIG_HOTPLUG_CPU -static void smp_free_lowcore(int cpu) -{ - struct _lowcore *lowcore; - int lc_order; - - lc_order = sizeof(long) == 8 ? 1 : 0; - lowcore = lowcore_ptr[cpu]; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - free_page((unsigned long) lowcore->extended_save_area_addr); -#endif - free_page(lowcore->panic_stack - PAGE_SIZE); - free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); - free_pages((unsigned long) lowcore, lc_order); - lowcore_ptr[cpu] = NULL; -} -#endif /* CONFIG_HOTPLUG_CPU */ - /* Upping and downing of CPUs */ -int __cpuinit __cpu_up(unsigned int cpu) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct task_struct *idle; - struct _lowcore *cpu_lowcore; - struct stack_frame *sf; - sigp_ccode ccode; + struct pcpu *pcpu; + int rc; - if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED) + pcpu = pcpu_devices + cpu; + if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; - if (smp_alloc_lowcore(cpu)) - return -ENOMEM; - - ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]), - cpu, sigp_set_prefix); - if (ccode) { - printk("sigp_set_prefix failed for cpu %d " - "with condition code %d\n", - (int) cpu, (int) ccode); + if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != + SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - } - - idle = current_set[cpu]; - cpu_lowcore = lowcore_ptr[cpu]; - cpu_lowcore->kernel_stack = (unsigned long) - task_stack_page(idle) + THREAD_SIZE; - cpu_lowcore->thread_info = (unsigned long) task_thread_info(idle); - sf = (struct stack_frame *) (cpu_lowcore->kernel_stack - - sizeof(struct pt_regs) - - sizeof(struct stack_frame)); - memset(sf, 0, sizeof(struct stack_frame)); - sf->gprs[9] = (unsigned long) sf; - cpu_lowcore->save_area[15] = (unsigned long) sf; - __ctl_store(cpu_lowcore->cregs_save_area[0], 0, 15); - asm volatile( - " stam 0,15,0(%0)" - : : "a" (&cpu_lowcore->access_regs_save_area) : "memory"); - cpu_lowcore->percpu_offset = __per_cpu_offset[cpu]; - cpu_lowcore->current_task = (unsigned long) idle; - cpu_lowcore->cpu_data.cpu_nr = cpu; - cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce; - cpu_lowcore->ipl_device = S390_lowcore.ipl_device; - eieio(); - - while (signal_processor(cpu, sigp_restart) == sigp_busy) - udelay(10); + rc = pcpu_alloc_lowcore(pcpu, cpu); + if (rc) + return rc; + pcpu_prepare_secondary(pcpu, cpu); + pcpu_attach_task(pcpu, tidle); + pcpu_start_fn(pcpu, smp_start_secondary, NULL); while (!cpu_online(cpu)) cpu_relax(); return 0; } -static int __init setup_possible_cpus(char *s) -{ - int pcpus, cpu; +static unsigned int setup_possible_cpus __initdata; - pcpus = simple_strtoul(s, NULL, 0); - cpu_possible_map = cpumask_of_cpu(0); - for (cpu = 1; cpu < pcpus && cpu < NR_CPUS; cpu++) - cpu_set(cpu, cpu_possible_map); +static int __init _setup_possible_cpus(char *s) +{ + get_option(&s, &setup_possible_cpus); return 0; } -early_param("possible_cpus", setup_possible_cpus); +early_param("possible_cpus", _setup_possible_cpus); #ifdef CONFIG_HOTPLUG_CPU int __cpu_disable(void) { - struct ec_creg_mask_parms cr_parms; - int cpu = smp_processor_id(); - - cpu_clear(cpu, cpu_online_map); + unsigned long cregs[16]; - /* Disable pfault pseudo page faults on this cpu. */ + /* Handle possible pending IPIs */ + smp_handle_ext_call(); + set_cpu_online(smp_processor_id(), false); + /* Disable pseudo page faults on this cpu. */ pfault_fini(); - - memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals)); - memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals)); - - /* disable all external interrupts */ - cr_parms.orvals[0] = 0; - cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 12 | - 1 << 11 | 1 << 10 | 1 << 6 | 1 << 4); - /* disable all I/O interrupts */ - cr_parms.orvals[6] = 0; - cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 | - 1 << 27 | 1 << 26 | 1 << 25 | 1 << 24); - /* disable most machine checks */ - cr_parms.orvals[14] = 0; - cr_parms.andvals[14] = ~(1 << 28 | 1 << 27 | 1 << 26 | - 1 << 25 | 1 << 24); - - smp_ctl_bit_callback(&cr_parms); - + /* Disable interrupt sources via control register. */ + __ctl_store(cregs, 0, 15); + cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */ + cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ + cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ + __ctl_load(cregs, 0, 15); return 0; } void __cpu_die(unsigned int cpu) { + struct pcpu *pcpu; + /* Wait until target cpu is down */ - while (!smp_cpu_not_running(cpu)) + pcpu = pcpu_devices + cpu; + while (!pcpu_stopped(pcpu)) cpu_relax(); - smp_free_lowcore(cpu); - printk(KERN_INFO "Processor %d spun down\n", cpu); + pcpu_free_lowcore(pcpu); + atomic_dec(&init_mm.context.attach_count); + cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); + if (MACHINE_HAS_TLB_LC) + cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); } -void cpu_die(void) +void __noreturn cpu_die(void) { idle_task_exit(); - signal_processor(smp_processor_id(), sigp_stop); - BUG(); - for (;;); + pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); + for (;;) ; } #endif /* CONFIG_HOTPLUG_CPU */ -void __init smp_prepare_cpus(unsigned int max_cpus) +void __init smp_fill_possible_mask(void) { -#ifndef CONFIG_64BIT - unsigned long save_area = 0; -#endif - unsigned long async_stack, panic_stack; - struct _lowcore *lowcore; - unsigned int cpu; - int lc_order; + unsigned int possible, sclp, cpu; - smp_detect_cpus(); + sclp = sclp_get_max_cpu() ?: nr_cpu_ids; + possible = setup_possible_cpus ?: nr_cpu_ids; + possible = min(possible, sclp); + for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) + set_cpu_possible(cpu, true); +} +void __init smp_prepare_cpus(unsigned int max_cpus) +{ /* request the 0x1201 emergency signal external interrupt */ - if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) + if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); - print_cpu_info(&S390_lowcore.cpu_data); - - /* Reallocate current lowcore, but keep its contents. */ - lc_order = sizeof(long) == 8 ? 1 : 0; - lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); - panic_stack = __get_free_page(GFP_KERNEL); - async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - save_area = get_zeroed_page(GFP_KERNEL); -#endif - local_irq_disable(); - local_mcck_disable(); - lowcore_ptr[smp_processor_id()] = lowcore; - *lowcore = S390_lowcore; - lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->async_stack = async_stack + ASYNC_SIZE; -#ifndef CONFIG_64BIT - if (MACHINE_HAS_IEEE) - lowcore->extended_save_area_addr = (u32) save_area; -#endif - set_prefix((u32)(unsigned long) lowcore); - local_mcck_enable(); - local_irq_enable(); - for_each_possible_cpu(cpu) - if (cpu != smp_processor_id()) - smp_create_idle(cpu); + /* request the 0x1202 external call external interrupt */ + if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) + panic("Couldn't request external interrupt 0x1202"); + smp_detect_cpus(); } void __init smp_prepare_boot_cpu(void) { - BUG_ON(smp_processor_id() != 0); - - current_thread_info()->cpu = 0; - cpu_set(0, cpu_present_map); - cpu_set(0, cpu_online_map); + struct pcpu *pcpu = pcpu_devices; + + boot_cpu_address = stap(); + pcpu->state = CPU_STATE_CONFIGURED; + pcpu->address = boot_cpu_address; + pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); + pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE + + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE + + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); S390_lowcore.percpu_offset = __per_cpu_offset[0]; - current_set[0] = current; - smp_cpu_state[0] = CPU_STATE_CONFIGURED; - spin_lock_init(&(&__get_cpu_var(s390_idle))->lock); + smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); + set_cpu_present(0, true); + set_cpu_online(0, true); } void __init smp_cpus_done(unsigned int max_cpus) { } +void __init smp_setup_processor_id(void) +{ + S390_lowcore.cpu_nr = 0; + S390_lowcore.spinlock_lockval = arch_spin_lockval(0); +} + /* * the frequency of the profiling timer can be changed * by writing a multiplier value into /proc/profile. @@ -865,72 +815,82 @@ int setup_profiling_timer(unsigned int multiplier) } #ifdef CONFIG_HOTPLUG_CPU -static ssize_t cpu_configure_show(struct sys_device *dev, char *buf) +static ssize_t cpu_configure_show(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", smp_cpu_state[dev->id]); + count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); mutex_unlock(&smp_cpu_state_mutex); return count; } -static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf, - size_t count) +static ssize_t cpu_configure_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { - int cpu = dev->id; - int val, rc; + struct pcpu *pcpu; + int cpu, val, rc; char delim; if (sscanf(buf, "%d %c", &val, &delim) != 1) return -EINVAL; if (val != 0 && val != 1) return -EINVAL; - - mutex_lock(&smp_cpu_state_mutex); get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); rc = -EBUSY; - if (cpu_online(cpu)) + /* disallow configuration changes of online cpus and cpu 0 */ + cpu = dev->id; + if (cpu_online(cpu) || cpu == 0) goto out; + pcpu = pcpu_devices + cpu; rc = 0; switch (val) { case 0: - if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) { - rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]); - if (!rc) - smp_cpu_state[cpu] = CPU_STATE_STANDBY; - } + if (pcpu->state != CPU_STATE_CONFIGURED) + break; + rc = sclp_cpu_deconfigure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); break; case 1: - if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) { - rc = sclp_cpu_configure(__cpu_logical_map[cpu]); - if (!rc) - smp_cpu_state[cpu] = CPU_STATE_CONFIGURED; - } + if (pcpu->state != CPU_STATE_STANDBY) + break; + rc = sclp_cpu_configure(pcpu->address); + if (rc) + break; + pcpu->state = CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); break; default: break; } out: - put_online_cpus(); mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); return rc ? rc : count; } -static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); +static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t show_cpu_address(struct sys_device *dev, char *buf) +static ssize_t show_cpu_address(struct device *dev, + struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]); + return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); } -static SYSDEV_ATTR(address, 0444, show_cpu_address, NULL); - +static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); static struct attribute *cpu_common_attrs[] = { #ifdef CONFIG_HOTPLUG_CPU - &attr_configure.attr, + &dev_attr_configure.attr, #endif - &attr_address.attr, + &dev_attr_address.attr, NULL, }; @@ -938,53 +898,45 @@ static struct attribute_group cpu_common_attr_group = { .attrs = cpu_common_attrs, }; -static ssize_t show_capability(struct sys_device *dev, char *buf) +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) { - unsigned int capability; - int rc; - - rc = get_cpu_capability(&capability); - if (rc) - return rc; - return sprintf(buf, "%u\n", capability); -} -static SYSDEV_ATTR(capability, 0444, show_capability, NULL); - -static ssize_t show_idle_count(struct sys_device *dev, char *buf) -{ - struct s390_idle_data *idle; + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); unsigned long long idle_count; - - idle = &per_cpu(s390_idle, dev->id); - spin_lock_irq(&idle->lock); - idle_count = idle->idle_count; - spin_unlock_irq(&idle->lock); + unsigned int sequence; + + do { + sequence = ACCESS_ONCE(idle->sequence); + idle_count = ACCESS_ONCE(idle->idle_count); + if (ACCESS_ONCE(idle->clock_idle_enter)) + idle_count++; + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); return sprintf(buf, "%llu\n", idle_count); } -static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); +static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); -static ssize_t show_idle_time(struct sys_device *dev, char *buf) +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) { - struct s390_idle_data *idle; - unsigned long long new_time; - - idle = &per_cpu(s390_idle, dev->id); - spin_lock_irq(&idle->lock); - if (idle->in_idle) { - new_time = get_clock(); - idle->idle_time += new_time - idle->idle_enter; - idle->idle_enter = new_time; - } - new_time = idle->idle_time; - spin_unlock_irq(&idle->lock); - return sprintf(buf, "%llu\n", new_time >> 12); + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long now, idle_time, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_time = ACCESS_ONCE(idle->idle_time); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + return sprintf(buf, "%llu\n", idle_time >> 12); } -static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); +static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); static struct attribute *cpu_online_attrs[] = { - &attr_capability.attr, - &attr_idle_count.attr, - &attr_idle_time_us.attr, + &dev_attr_idle_count.attr, + &dev_attr_idle_time_us.attr, NULL, }; @@ -992,44 +944,36 @@ static struct attribute_group cpu_online_attr_group = { .attrs = cpu_online_attrs, }; -static int __cpuinit smp_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int smp_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; - struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; - struct s390_idle_data *idle; + struct cpu *c = pcpu_devices[cpu].cpu; + struct device *s = &c->dev; + int err = 0; - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - idle = &per_cpu(s390_idle, cpu); - spin_lock_irq(&idle->lock); - idle->idle_enter = 0; - idle->idle_time = 0; - idle->idle_count = 0; - spin_unlock_irq(&idle->lock); - if (sysfs_create_group(&s->kobj, &cpu_online_attr_group)) - return NOTIFY_BAD; + err = sysfs_create_group(&s->kobj, &cpu_online_attr_group); break; case CPU_DEAD: - case CPU_DEAD_FROZEN: sysfs_remove_group(&s->kobj, &cpu_online_attr_group); break; } - return NOTIFY_OK; + return notifier_from_errno(err); } -static struct notifier_block __cpuinitdata smp_cpu_nb = { - .notifier_call = smp_cpu_notify, -}; - -static int __devinit smp_add_present_cpu(int cpu) +static int smp_add_present_cpu(int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; + struct device *s; + struct cpu *c; int rc; + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + pcpu_devices[cpu].cpu = c; + s = &c->dev; c->hotpluggable = 1; rc = register_cpu(c, cpu); if (rc) @@ -1037,11 +981,20 @@ static int __devinit smp_add_present_cpu(int cpu) rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group); if (rc) goto out_cpu; - if (!cpu_online(cpu)) - goto out; - rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); - if (!rc) - return 0; + if (cpu_online(cpu)) { + rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); + if (rc) + goto out_online; + } + rc = topology_cpu_init(c); + if (rc) + goto out_topology; + return 0; + +out_topology: + if (cpu_online(cpu)) + sysfs_remove_group(&s->kobj, &cpu_online_attr_group); +out_online: sysfs_remove_group(&s->kobj, &cpu_common_attr_group); out_cpu: #ifdef CONFIG_HOTPLUG_CPU @@ -1052,52 +1005,59 @@ out: } #ifdef CONFIG_HOTPLUG_CPU -static ssize_t __ref rescan_store(struct sys_device *dev, - const char *buf, size_t count) + +int __ref smp_rescan_cpus(void) { - cpumask_t newcpus; - int cpu; - int rc; + struct sclp_cpu_info *info; + int nr; - mutex_lock(&smp_cpu_state_mutex); + info = smp_get_cpu_info(); + if (!info) + return -ENOMEM; get_online_cpus(); - newcpus = cpu_present_map; - rc = smp_rescan_cpus(); - if (rc) - goto out; - cpus_andnot(newcpus, cpu_present_map, newcpus); - for_each_cpu_mask(cpu, newcpus) { - rc = smp_add_present_cpu(cpu); - if (rc) - cpu_clear(cpu, cpu_present_map); - } - rc = 0; -out: - put_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + nr = __smp_rescan_cpus(info, 1); mutex_unlock(&smp_cpu_state_mutex); - return rc ? rc : count; + put_online_cpus(); + kfree(info); + if (nr) + topology_schedule_update(); + return 0; } -static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store); -#endif /* CONFIG_HOTPLUG_CPU */ -static int __init topology_init(void) +static ssize_t __ref rescan_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) { - int cpu; int rc; - register_cpu_notifier(&smp_cpu_nb); + rc = smp_rescan_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); +#endif /* CONFIG_HOTPLUG_CPU */ + +static int __init s390_smp_init(void) +{ + int cpu, rc = 0; #ifdef CONFIG_HOTPLUG_CPU - rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj, - &attr_rescan.attr); + rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); if (rc) return rc; #endif + cpu_notifier_register_begin(); for_each_present_cpu(cpu) { rc = smp_add_present_cpu(cpu); if (rc) - return rc; + goto out; } - return 0; + + __hotcpu_notifier(smp_cpu_notify, 0); + +out: + cpu_notifier_register_done(); + return rc; } -subsys_initcall(topology_init); +subsys_initcall(s390_smp_init); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 85e46a5d0e0..1785cd82253 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -1,15 +1,14 @@ /* - * arch/s390/kernel/stacktrace.c - * * Stack trace management functions * - * Copyright (C) IBM Corp. 2006 + * Copyright IBM Corp. 2006 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ #include <linux/sched.h> #include <linux/stacktrace.h> #include <linux/kallsyms.h> +#include <linux/module.h> static unsigned long save_context_stack(struct stack_trace *trace, unsigned long sp, @@ -81,6 +80,7 @@ void save_stack_trace(struct stack_trace *trace) S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE, 1); } +EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { @@ -93,3 +93,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c new file mode 100644 index 00000000000..a7a7537ce1e --- /dev/null +++ b/arch/s390/kernel/suspend.c @@ -0,0 +1,225 @@ +/* + * Suspend support specific for s390. + * + * Copyright IBM Corp. 2009 + * + * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> + */ + +#include <linux/pfn.h> +#include <linux/suspend.h> +#include <linux/mm.h> +#include <asm/ctl_reg.h> +#include <asm/ipl.h> +#include <asm/cio.h> +#include <asm/pci.h> +#include "entry.h" + +/* + * References to section boundaries + */ +extern const void __nosave_begin, __nosave_end; + +/* + * The restore of the saved pages in an hibernation image will set + * the change and referenced bits in the storage key for each page. + * Overindication of the referenced bits after an hibernation cycle + * does not cause any harm but the overindication of the change bits + * would cause trouble. + * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each + * page to the most significant byte of the associated page frame + * number in the hibernation image. + */ + +/* + * Key storage is allocated as a linked list of pages. + * The size of the keys array is (PAGE_SIZE - sizeof(long)) + */ +struct page_key_data { + struct page_key_data *next; + unsigned char data[]; +}; + +#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *)) + +static struct page_key_data *page_key_data; +static struct page_key_data *page_key_rp, *page_key_wp; +static unsigned long page_key_rx, page_key_wx; +unsigned long suspend_zero_pages; + +/* + * For each page in the hibernation image one additional byte is + * stored in the most significant byte of the page frame number. + * On suspend no additional memory is required but on resume the + * keys need to be memorized until the page data has been restored. + * Only then can the storage keys be set to their old state. + */ +unsigned long page_key_additional_pages(unsigned long pages) +{ + return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); +} + +/* + * Free page_key_data list of arrays. + */ +void page_key_free(void) +{ + struct page_key_data *pkd; + + while (page_key_data) { + pkd = page_key_data; + page_key_data = pkd->next; + free_page((unsigned long) pkd); + } +} + +/* + * Allocate page_key_data list of arrays with enough room to store + * one byte for each page in the hibernation image. + */ +int page_key_alloc(unsigned long pages) +{ + struct page_key_data *pk; + unsigned long size; + + size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); + while (size--) { + pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL); + if (!pk) { + page_key_free(); + return -ENOMEM; + } + pk->next = page_key_data; + page_key_data = pk; + } + page_key_rp = page_key_wp = page_key_data; + page_key_rx = page_key_wx = 0; + return 0; +} + +/* + * Save the storage key into the upper 8 bits of the page frame number. + */ +void page_key_read(unsigned long *pfn) +{ + unsigned long addr; + + addr = (unsigned long) page_address(pfn_to_page(*pfn)); + *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); +} + +/* + * Extract the storage key from the upper 8 bits of the page frame number + * and store it in the page_key_data list of arrays. + */ +void page_key_memorize(unsigned long *pfn) +{ + page_key_wp->data[page_key_wx] = *(unsigned char *) pfn; + *(unsigned char *) pfn = 0; + if (++page_key_wx < PAGE_KEY_DATA_SIZE) + return; + page_key_wp = page_key_wp->next; + page_key_wx = 0; +} + +/* + * Get the next key from the page_key_data list of arrays and set the + * storage key of the page referred by @address. If @address refers to + * a "safe" page the swsusp_arch_resume code will transfer the storage + * key from the buffer page to the original page. + */ +void page_key_write(void *address) +{ + page_set_storage_key((unsigned long) address, + page_key_rp->data[page_key_rx], 0); + if (++page_key_rx >= PAGE_KEY_DATA_SIZE) + return; + page_key_rp = page_key_rp->next; + page_key_rx = 0; +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); + unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end)); + + /* Always save lowcore pages (LC protection might be enabled). */ + if (pfn <= LC_PAGES) + return 0; + if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) + return 1; + /* Skip memory holes and read-only pages (NSS, DCSS, ...). */ + if (tprot(PFN_PHYS(pfn))) + return 1; + return 0; +} + +/* + * PM notifier callback for suspend + */ +static int suspend_pm_cb(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER); + if (!suspend_zero_pages) + return NOTIFY_BAD; + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + free_pages(suspend_zero_pages, LC_ORDER); + break; + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static int __init suspend_pm_init(void) +{ + pm_notifier(suspend_pm_cb, 0); + return 0; +} +arch_initcall(suspend_pm_init); + +void save_processor_state(void) +{ + /* swsusp_arch_suspend() actually saves all cpu register contents. + * Machine checks must be disabled since swsusp_arch_suspend() stores + * register contents to their lowcore save areas. That's the same + * place where register contents on machine checks would be saved. + * To avoid register corruption disable machine checks. + * We must also disable machine checks in the new psw mask for + * program checks, since swsusp_arch_suspend() may generate program + * checks. Disabling machine checks for all other new psw masks is + * just paranoia. + */ + local_mcck_disable(); + /* Disable lowcore protection */ + __ctl_clear_bit(0,28); + S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK; +} + +void restore_processor_state(void) +{ + S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK; + /* Enable lowcore protection */ + __ctl_set_bit(0,28); + local_mcck_enable(); +} + +/* Called at the end of swsusp_arch_resume */ +void s390_early_resume(void) +{ + lgr_info_log(); + channel_subsystem_reinit(); + zpci_rescan(); +} diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S new file mode 100644 index 00000000000..6b09fdffbd2 --- /dev/null +++ b/arch/s390/kernel/swsusp_asm64.S @@ -0,0 +1,306 @@ +/* + * S390 64-bit swsusp implementation + * + * Copyright IBM Corp. 2009 + * + * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/linkage.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> +#include <asm/sigp.h> + +/* + * Save register context in absolute 0 lowcore and call swsusp_save() to + * create in-memory kernel image. The context is saved in the designated + * "store status" memory locations (see POP). + * We return from this function twice. The first time during the suspend to + * disk process. The second time via the swsusp_arch_resume() function + * (see below) in the resume process. + * This function runs with disabled interrupts. + */ + .section .text +ENTRY(swsusp_arch_suspend) + stmg %r6,%r15,__SF_GPRS(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + + /* Deactivate DAT */ + stnsm __SF_EMPTY(%r15),0xfb + + /* Store prefix register on stack */ + stpx __SF_EMPTY(%r15) + + /* Save prefix register contents for lowcore copy */ + llgf %r10,__SF_EMPTY(%r15) + + /* Get pointer to save area */ + lghi %r1,0x1000 + + /* Save CPU address */ + stap __LC_EXT_CPU_ADDR(%r0) + + /* Store registers */ + mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ + stfpc 0x31c(%r1) /* store fpu control */ + std 0,0x200(%r1) /* store f0 */ + std 1,0x208(%r1) /* store f1 */ + std 2,0x210(%r1) /* store f2 */ + std 3,0x218(%r1) /* store f3 */ + std 4,0x220(%r1) /* store f4 */ + std 5,0x228(%r1) /* store f5 */ + std 6,0x230(%r1) /* store f6 */ + std 7,0x238(%r1) /* store f7 */ + std 8,0x240(%r1) /* store f8 */ + std 9,0x248(%r1) /* store f9 */ + std 10,0x250(%r1) /* store f10 */ + std 11,0x258(%r1) /* store f11 */ + std 12,0x260(%r1) /* store f12 */ + std 13,0x268(%r1) /* store f13 */ + std 14,0x270(%r1) /* store f14 */ + std 15,0x278(%r1) /* store f15 */ + stam %a0,%a15,0x340(%r1) /* store access registers */ + stctg %c0,%c15,0x380(%r1) /* store control registers */ + stmg %r0,%r15,0x280(%r1) /* store general registers */ + + stpt 0x328(%r1) /* store timer */ + stck __SF_EMPTY(%r15) /* store clock */ + stckc 0x330(%r1) /* store clock comparator */ + + /* Update cputime accounting before going to sleep */ + lg %r0,__LC_LAST_UPDATE_TIMER + slg %r0,0x328(%r1) + alg %r0,__LC_SYSTEM_TIMER + stg %r0,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1) + lg %r0,__LC_LAST_UPDATE_CLOCK + slg %r0,__SF_EMPTY(%r15) + alg %r0,__LC_STEAL_TIMER + stg %r0,__LC_STEAL_TIMER + mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15) + + /* Activate DAT */ + stosm __SF_EMPTY(%r15),0x04 + + /* Set prefix page to zero */ + xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) + spx __SF_EMPTY(%r15) + + /* Save absolute zero pages */ + larl %r2,suspend_zero_pages + lg %r2,0(%r2) + lghi %r4,0 + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + + /* Copy lowcore to absolute zero lowcore */ + lghi %r2,0 + lgr %r4,%r10 + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + + /* Save image */ + brasl %r14,swsusp_save + + /* Restore prefix register and return */ + lghi %r1,0x1000 + spx 0x318(%r1) + lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) + lghi %r2,0 + br %r14 + +/* + * Restore saved memory image to correct place and restore register context. + * Then we return to the function that called swsusp_arch_suspend(). + * swsusp_arch_resume() runs with disabled interrupts. + */ +ENTRY(swsusp_arch_resume) + stmg %r6,%r15,__SF_GPRS(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + + /* Make all free pages stable */ + lghi %r2,1 + brasl %r14,arch_set_page_states + + /* Deactivate DAT */ + stnsm __SF_EMPTY(%r15),0xfb + + /* Set prefix page to zero */ + xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) + spx __SF_EMPTY(%r15) + + /* Restore saved image */ + larl %r1,restore_pblist + lg %r1,0(%r1) + ltgr %r1,%r1 + jz 2f +0: + lg %r2,8(%r1) + lg %r4,0(%r1) + iske %r0,%r4 + lghi %r3,PAGE_SIZE + lghi %r5,PAGE_SIZE +1: + mvcle %r2,%r4,0 + jo 1b + lg %r2,8(%r1) + sske %r0,%r2 + lg %r1,16(%r1) + ltgr %r1,%r1 + jnz 0b +2: + ptlb /* flush tlb */ + + /* Reset System */ + larl %r1,restart_entry + larl %r2,.Lrestart_diag308_psw + og %r1,0(%r2) + stg %r1,0(%r0) + larl %r1,.Lnew_pgm_check_psw + epsw %r2,%r3 + stm %r2,%r3,0(%r1) + mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1) + lghi %r0,0 + diag %r0,%r0,0x308 +restart_entry: + lhi %r1,1 + sigp %r1,%r0,SIGP_SET_ARCHITECTURE + sam64 + larl %r1,.Lnew_pgm_check_psw + lpswe 0(%r1) +pgm_check_entry: + + /* Switch to original suspend CPU */ + larl %r1,.Lresume_cpu /* Resume CPU address: r2 */ + stap 0(%r1) + llgh %r2,0(%r1) + llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */ + cgr %r1,%r2 + je restore_registers /* r1 = r2 -> nothing to do */ + larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */ + mvc __LC_RST_NEW_PSW(16,%r0),0(%r4) +3: + sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */ + brc 8,4f /* accepted */ + brc 2,3b /* busy, try again */ + + /* Suspend CPU not available -> panic */ + larl %r15,init_thread_union + ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) + larl %r2,.Lpanic_string + larl %r3,_sclp_print_early + lghi %r1,0 + sam31 + sigp %r1,%r0,SIGP_SET_ARCHITECTURE + basr %r14,%r3 + larl %r3,.Ldisabled_wait_31 + lpsw 0(%r3) +4: + /* Switch to suspend CPU */ + sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */ + brc 2,4b /* busy, try again */ +5: + sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */ + brc 2,5b /* busy, try again */ +6: j 6b + +restart_suspend: + larl %r1,.Lresume_cpu + llgh %r2,0(%r1) +7: + sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */ + brc 8,7b /* accepted, status 0, still running */ + brc 2,7b /* busy, try again */ + tmll %r9,0x40 /* Test if resume CPU is stopped */ + jz 7b + +restore_registers: + /* Restore registers */ + lghi %r13,0x1000 /* %r1 = pointer to save area */ + + /* Ignore time spent in suspended state. */ + llgf %r1,0x318(%r13) + stck __LC_LAST_UPDATE_CLOCK(%r1) + spt 0x328(%r13) /* reprogram timer */ + //sckc 0x330(%r13) /* set clock comparator */ + + lctlg %c0,%c15,0x380(%r13) /* load control registers */ + lam %a0,%a15,0x340(%r13) /* load access registers */ + + lfpc 0x31c(%r13) /* load fpu control */ + ld 0,0x200(%r13) /* load f0 */ + ld 1,0x208(%r13) /* load f1 */ + ld 2,0x210(%r13) /* load f2 */ + ld 3,0x218(%r13) /* load f3 */ + ld 4,0x220(%r13) /* load f4 */ + ld 5,0x228(%r13) /* load f5 */ + ld 6,0x230(%r13) /* load f6 */ + ld 7,0x238(%r13) /* load f7 */ + ld 8,0x240(%r13) /* load f8 */ + ld 9,0x248(%r13) /* load f9 */ + ld 10,0x250(%r13) /* load f10 */ + ld 11,0x258(%r13) /* load f11 */ + ld 12,0x260(%r13) /* load f12 */ + ld 13,0x268(%r13) /* load f13 */ + ld 14,0x270(%r13) /* load f14 */ + ld 15,0x278(%r13) /* load f15 */ + + /* Load old stack */ + lg %r15,0x2f8(%r13) + + /* Save prefix register */ + mvc __SF_EMPTY(4,%r15),0x318(%r13) + + /* Restore absolute zero pages */ + lghi %r2,0 + larl %r4,suspend_zero_pages + lg %r4,0(%r4) + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b + + /* Restore prefix register */ + spx __SF_EMPTY(%r15) + + /* Activate DAT */ + stosm __SF_EMPTY(%r15),0x04 + + /* Make all free pages unstable */ + lghi %r2,0 + brasl %r14,arch_set_page_states + + /* Call arch specific early resume code */ + brasl %r14,s390_early_resume + + /* Return 0 */ + lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) + lghi %r2,0 + br %r14 + + .section .data..nosave,"aw",@progbits + .align 8 +.Ldisabled_wait_31: + .long 0x000a0000,0x00000000 +.Lpanic_string: + .asciz "Resume not possible because suspend CPU is no longer available" + .align 8 +.Lrestart_diag308_psw: + .long 0x00080000,0x80000000 +.Lrestart_suspend_psw: + .quad 0x0000000180000000,restart_suspend +.Lnew_pgm_check_psw: + .quad 0,pgm_check_entry +.Lresume_cpu: + .byte 0,0 diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index fefee99f28a..23eb222c165 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/sys_s390.c - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Thomas Spatzier (tspat@de.ibm.com) * @@ -29,60 +27,16 @@ #include <linux/personality.h> #include <linux/unistd.h> #include <linux/ipc.h> - #include <asm/uaccess.h> +#include "entry.h" /* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage long sys_pipe(unsigned long __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - long error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - -/* - * Perform the select(nd, in, out, ex, tv) and mmap() system - * calls. Linux for S/390 isn't able to handle more than 5 - * system call parameters, so these system calls used a memory - * block for parameter passing.. + * Perform the mmap() system call. Linux for S/390 isn't able to handle more + * than 5 system call parameters, so this system call uses a memory block + * for parameter passing. */ -struct mmap_arg_struct { +struct s390_mmap_arg_struct { unsigned long addr; unsigned long len; unsigned long prot; @@ -91,146 +45,48 @@ struct mmap_arg_struct { unsigned long offset; }; -asmlinkage long sys_mmap2(struct mmap_arg_struct __user *arg) +SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg) { - struct mmap_arg_struct a; + struct s390_mmap_arg_struct a; int error = -EFAULT; if (copy_from_user(&a, arg, sizeof(a))) goto out; - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -out: - return error; -} - -asmlinkage long old_mmap(struct mmap_arg_struct __user *arg) -{ - struct mmap_arg_struct a; - long error = -EFAULT; - - if (copy_from_user(&a, arg, sizeof(a))) - goto out; - - error = -EINVAL; - if (a.offset & ~PAGE_MASK) - goto out; - - error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); out: return error; } -#ifndef CONFIG_64BIT -struct sel_arg_struct { - unsigned long n; - fd_set __user *inp, *outp, *exp; - struct timeval __user *tvp; -}; - -asmlinkage long old_select(struct sel_arg_struct __user *arg) -{ - struct sel_arg_struct a; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - /* sys_select() does the appropriate kernel locking */ - return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); - -} -#endif /* CONFIG_64BIT */ - /* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. + * sys_ipc() is the de-multiplexer for the SysV IPC calls. */ -asmlinkage long sys_ipc(uint call, int first, unsigned long second, - unsigned long third, void __user *ptr) +SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, + unsigned long, third, void __user *, ptr) { - struct ipc_kludge tmp; - int ret; - - switch (call) { - case SEMOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, NULL); - case SEMTIMEDOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, - (const struct timespec __user *) third); - case SEMGET: - return sys_semget(first, (int)second, third); - case SEMCTL: { - union semun fourth; - if (!ptr) - return -EINVAL; - if (get_user(fourth.__pad, (void __user * __user *) ptr)) - return -EFAULT; - return sys_semctl(first, (int)second, third, fourth); - } - case MSGSND: - return sys_msgsnd (first, (struct msgbuf __user *) ptr, - (size_t)second, third); - break; - case MSGRCV: - if (!ptr) - return -EINVAL; - if (copy_from_user (&tmp, (struct ipc_kludge __user *) ptr, - sizeof (struct ipc_kludge))) - return -EFAULT; - return sys_msgrcv (first, tmp.msgp, - (size_t)second, tmp.msgtyp, third); - case MSGGET: - return sys_msgget((key_t)first, (int)second); - case MSGCTL: - return sys_msgctl(first, (int)second, - (struct msqid_ds __user *)ptr); - - case SHMAT: { - ulong raddr; - ret = do_shmat(first, (char __user *)ptr, - (int)second, &raddr); - if (ret) - return ret; - return put_user (raddr, (ulong __user *) third); - break; - } - case SHMDT: - return sys_shmdt ((char __user *)ptr); - case SHMGET: - return sys_shmget(first, (size_t)second, third); - case SHMCTL: - return sys_shmctl(first, (int)second, - (struct shmid_ds __user *) ptr); - default: - return -ENOSYS; - - } - - return -EINVAL; + if (call >> 16) + return -EINVAL; + /* The s390 sys_ipc variant has only five parameters instead of six + * like the generic variant. The only difference is the handling of + * the SEMTIMEDOP subcall where on s390 the third parameter is used + * as a pointer to a struct timespec where the generic variant uses + * the fifth parameter. + * Therefore we can call the generic variant by simply passing the + * third parameter also as fifth parameter. + */ + return sys_ipc(call, first, second, third, ptr, third); } #ifdef CONFIG_64BIT -asmlinkage long s390x_newuname(struct new_utsname __user *name) +SYSCALL_DEFINE1(s390_personality, unsigned int, personality) { - int ret = sys_newuname(name); + unsigned int ret; - if (current->personality == PER_LINUX32 && !ret) { - ret = copy_to_user(name->machine, "s390\0\0\0\0", 8); - if (ret) ret = -EFAULT; - } - return ret; -} - -asmlinkage long s390x_personality(unsigned long personality) -{ - int ret; - - if (current->personality == PER_LINUX32 && personality == PER_LINUX) - personality = PER_LINUX32; + if (personality(current->personality) == PER_LINUX32 && + personality(personality) == PER_LINUX) + personality |= PER_LINUX32; ret = sys_personality(personality); - if (ret == PER_LINUX32) - ret = PER_LINUX; + if (personality(ret) == PER_LINUX32) + ret &= ~PER_LINUX32; return ret; } @@ -241,15 +97,13 @@ asmlinkage long s390x_personality(unsigned long personality) */ #ifndef CONFIG_64BIT -asmlinkage long -s390_fadvise64(int fd, u32 offset_high, u32 offset_low, size_t len, int advice) +SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, offset_high, u32, offset_low, + size_t, len, int, advice) { return sys_fadvise64(fd, (u64) offset_high << 32 | offset_low, len, advice); } -#endif - struct fadvise64_64_args { int fd; long long offset; @@ -257,8 +111,7 @@ struct fadvise64_64_args { int advice; }; -asmlinkage long -s390_fadvise64_64(struct fadvise64_64_args __user *args) +SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) { struct fadvise64_64_args a; @@ -267,7 +120,6 @@ s390_fadvise64_64(struct fadvise64_64_args __user *args) return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); } -#ifndef CONFIG_64BIT /* * This is a wrapper to call sys_fallocate(). For 31 bit s390 the last * 64 bit argument "len" is split into the upper and lower 32 bits. The @@ -280,8 +132,8 @@ s390_fadvise64_64(struct fadvise64_64_args __user *args) * to * %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len */ -asmlinkage long s390_fallocate(int fd, int mode, loff_t offset, - u32 len_high, u32 len_low) +SYSCALL_DEFINE5(s390_fallocate, int, fd, int, mode, loff_t, offset, + u32, len_high, u32, len_low) { return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low); } diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index c87ec687d4c..fe5cdf29a00 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -9,324 +9,350 @@ #define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall,sys_ni_syscall) NI_SYSCALL /* 0 */ -SYSCALL(sys_exit,sys_exit,sys32_exit_wrapper) +SYSCALL(sys_exit,sys_exit,compat_sys_exit) SYSCALL(sys_fork,sys_fork,sys_fork) -SYSCALL(sys_read,sys_read,sys32_read_wrapper) -SYSCALL(sys_write,sys_write,sys32_write_wrapper) -SYSCALL(sys_open,sys_open,sys32_open_wrapper) /* 5 */ -SYSCALL(sys_close,sys_close,sys32_close_wrapper) +SYSCALL(sys_read,sys_read,compat_sys_s390_read) +SYSCALL(sys_write,sys_write,compat_sys_s390_write) +SYSCALL(sys_open,sys_open,compat_sys_open) /* 5 */ +SYSCALL(sys_close,sys_close,compat_sys_close) SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall) -SYSCALL(sys_creat,sys_creat,sys32_creat_wrapper) -SYSCALL(sys_link,sys_link,sys32_link_wrapper) -SYSCALL(sys_unlink,sys_unlink,sys32_unlink_wrapper) /* 10 */ -SYSCALL(sys_execve,sys_execve,sys32_execve) -SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper) -SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */ -SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper) -SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */ -SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper) /* old lchown16 syscall*/ +SYSCALL(sys_creat,sys_creat,compat_sys_creat) +SYSCALL(sys_link,sys_link,compat_sys_link) +SYSCALL(sys_unlink,sys_unlink,compat_sys_unlink) /* 10 */ +SYSCALL(sys_execve,sys_execve,compat_sys_execve) +SYSCALL(sys_chdir,sys_chdir,compat_sys_chdir) +SYSCALL(sys_time,sys_ni_syscall,compat_sys_time) /* old time syscall */ +SYSCALL(sys_mknod,sys_mknod,compat_sys_mknod) +SYSCALL(sys_chmod,sys_chmod,compat_sys_chmod) /* 15 */ +SYSCALL(sys_lchown16,sys_ni_syscall,compat_sys_s390_lchown16) /* old lchown16 syscall*/ NI_SYSCALL /* old break syscall holder */ NI_SYSCALL /* old stat syscall holder */ -SYSCALL(sys_lseek,sys_lseek,sys32_lseek_wrapper) +SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek) SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */ -SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper) -SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper) -SYSCALL(sys_setuid16,sys_ni_syscall,sys32_setuid16_wrapper) /* old setuid16 syscall*/ -SYSCALL(sys_getuid16,sys_ni_syscall,sys32_getuid16) /* old getuid16 syscall*/ -SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */ -SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) -SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper) +SYSCALL(sys_mount,sys_mount,compat_sys_mount) +SYSCALL(sys_oldumount,sys_oldumount,compat_sys_oldumount) +SYSCALL(sys_setuid16,sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/ +SYSCALL(sys_getuid16,sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/ +SYSCALL(sys_stime,sys_ni_syscall,compat_sys_stime) /* 25 old stime syscall */ +SYSCALL(sys_ptrace,sys_ptrace,compat_sys_ptrace) +SYSCALL(sys_alarm,sys_alarm,compat_sys_alarm) NI_SYSCALL /* old fstat syscall */ -SYSCALL(sys_pause,sys_pause,sys32_pause) -SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */ +SYSCALL(sys_pause,sys_pause,sys_pause) +SYSCALL(sys_utime,sys_utime,compat_sys_utime) /* 30 */ NI_SYSCALL /* old stty syscall */ NI_SYSCALL /* old gtty syscall */ -SYSCALL(sys_access,sys_access,sys32_access_wrapper) -SYSCALL(sys_nice,sys_nice,sys32_nice_wrapper) +SYSCALL(sys_access,sys_access,compat_sys_access) +SYSCALL(sys_nice,sys_nice,compat_sys_nice) NI_SYSCALL /* 35 old ftime syscall */ SYSCALL(sys_sync,sys_sync,sys_sync) -SYSCALL(sys_kill,sys_kill,sys32_kill_wrapper) -SYSCALL(sys_rename,sys_rename,sys32_rename_wrapper) -SYSCALL(sys_mkdir,sys_mkdir,sys32_mkdir_wrapper) -SYSCALL(sys_rmdir,sys_rmdir,sys32_rmdir_wrapper) /* 40 */ -SYSCALL(sys_dup,sys_dup,sys32_dup_wrapper) -SYSCALL(sys_pipe,sys_pipe,sys32_pipe_wrapper) -SYSCALL(sys_times,sys_times,compat_sys_times_wrapper) +SYSCALL(sys_kill,sys_kill,compat_sys_kill) +SYSCALL(sys_rename,sys_rename,compat_sys_rename) +SYSCALL(sys_mkdir,sys_mkdir,compat_sys_mkdir) +SYSCALL(sys_rmdir,sys_rmdir,compat_sys_rmdir) /* 40 */ +SYSCALL(sys_dup,sys_dup,compat_sys_dup) +SYSCALL(sys_pipe,sys_pipe,compat_sys_pipe) +SYSCALL(sys_times,sys_times,compat_sys_times) NI_SYSCALL /* old prof syscall */ -SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */ -SYSCALL(sys_setgid16,sys_ni_syscall,sys32_setgid16_wrapper) /* old setgid16 syscall*/ -SYSCALL(sys_getgid16,sys_ni_syscall,sys32_getgid16) /* old getgid16 syscall*/ -SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper) -SYSCALL(sys_geteuid16,sys_ni_syscall,sys32_geteuid16) /* old geteuid16 syscall */ -SYSCALL(sys_getegid16,sys_ni_syscall,sys32_getegid16) /* 50 old getegid16 syscall */ -SYSCALL(sys_acct,sys_acct,sys32_acct_wrapper) -SYSCALL(sys_umount,sys_umount,sys32_umount_wrapper) +SYSCALL(sys_brk,sys_brk,compat_sys_brk) /* 45 */ +SYSCALL(sys_setgid16,sys_ni_syscall,compat_sys_s390_setgid16) /* old setgid16 syscall*/ +SYSCALL(sys_getgid16,sys_ni_syscall,compat_sys_s390_getgid16) /* old getgid16 syscall*/ +SYSCALL(sys_signal,sys_signal,compat_sys_signal) +SYSCALL(sys_geteuid16,sys_ni_syscall,compat_sys_s390_geteuid16) /* old geteuid16 syscall */ +SYSCALL(sys_getegid16,sys_ni_syscall,compat_sys_s390_getegid16) /* 50 old getegid16 syscall */ +SYSCALL(sys_acct,sys_acct,compat_sys_acct) +SYSCALL(sys_umount,sys_umount,compat_sys_umount) NI_SYSCALL /* old lock syscall */ -SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper) -SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */ +SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl) +SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl) /* 55 */ NI_SYSCALL /* intel mpx syscall */ -SYSCALL(sys_setpgid,sys_setpgid,sys32_setpgid_wrapper) +SYSCALL(sys_setpgid,sys_setpgid,compat_sys_setpgid) NI_SYSCALL /* old ulimit syscall */ NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_umask,sys_umask,sys32_umask_wrapper) /* 60 */ -SYSCALL(sys_chroot,sys_chroot,sys32_chroot_wrapper) -SYSCALL(sys_ustat,sys_ustat,sys32_ustat_wrapper) -SYSCALL(sys_dup2,sys_dup2,sys32_dup2_wrapper) +SYSCALL(sys_umask,sys_umask,compat_sys_umask) /* 60 */ +SYSCALL(sys_chroot,sys_chroot,compat_sys_chroot) +SYSCALL(sys_ustat,sys_ustat,compat_sys_ustat) +SYSCALL(sys_dup2,sys_dup2,compat_sys_dup2) SYSCALL(sys_getppid,sys_getppid,sys_getppid) SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp) /* 65 */ SYSCALL(sys_setsid,sys_setsid,sys_setsid) -SYSCALL(sys_sigaction,sys_sigaction,sys32_sigaction_wrapper) +SYSCALL(sys_sigaction,sys_sigaction,compat_sys_sigaction) NI_SYSCALL /* old sgetmask syscall*/ NI_SYSCALL /* old ssetmask syscall*/ -SYSCALL(sys_setreuid16,sys_ni_syscall,sys32_setreuid16_wrapper) /* old setreuid16 syscall */ -SYSCALL(sys_setregid16,sys_ni_syscall,sys32_setregid16_wrapper) /* old setregid16 syscall */ -SYSCALL(sys_sigsuspend,sys_sigsuspend,sys_sigsuspend_wrapper) -SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper) -SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper) -SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */ -SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) -SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage_wrapper) -SYSCALL(sys_gettimeofday,sys_gettimeofday,sys32_gettimeofday_wrapper) -SYSCALL(sys_settimeofday,sys_settimeofday,sys32_settimeofday_wrapper) -SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */ -SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */ +SYSCALL(sys_setreuid16,sys_ni_syscall,compat_sys_s390_setreuid16) /* old setreuid16 syscall */ +SYSCALL(sys_setregid16,sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */ +SYSCALL(sys_sigsuspend,sys_sigsuspend,compat_sys_sigsuspend) +SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending) +SYSCALL(sys_sethostname,sys_sethostname,compat_sys_sethostname) +SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit) /* 75 */ +SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit) +SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage) +SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday) +SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday) +SYSCALL(sys_getgroups16,sys_ni_syscall,compat_sys_s390_getgroups16) /* 80 old getgroups16 syscall */ +SYSCALL(sys_setgroups16,sys_ni_syscall,compat_sys_s390_setgroups16) /* old setgroups16 syscall */ NI_SYSCALL /* old select syscall */ -SYSCALL(sys_symlink,sys_symlink,sys32_symlink_wrapper) +SYSCALL(sys_symlink,sys_symlink,compat_sys_symlink) NI_SYSCALL /* old lstat syscall */ -SYSCALL(sys_readlink,sys_readlink,sys32_readlink_wrapper) /* 85 */ -SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper) -SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper) -SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper) -SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ -SYSCALL(old_mmap,old_mmap,old32_mmap_wrapper) /* 90 */ -SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper) -SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper) -SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper) -SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper) -SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper) /* 95 old fchown16 syscall*/ -SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper) -SYSCALL(sys_setpriority,sys_setpriority,sys32_setpriority_wrapper) +SYSCALL(sys_readlink,sys_readlink,compat_sys_readlink) /* 85 */ +SYSCALL(sys_uselib,sys_uselib,compat_sys_uselib) +SYSCALL(sys_swapon,sys_swapon,compat_sys_swapon) +SYSCALL(sys_reboot,sys_reboot,compat_sys_reboot) +SYSCALL(sys_ni_syscall,sys_ni_syscall,compat_sys_old_readdir) /* old readdir syscall */ +SYSCALL(sys_old_mmap,sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */ +SYSCALL(sys_munmap,sys_munmap,compat_sys_munmap) +SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate) +SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate) +SYSCALL(sys_fchmod,sys_fchmod,compat_sys_fchmod) +SYSCALL(sys_fchown16,sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 syscall*/ +SYSCALL(sys_getpriority,sys_getpriority,compat_sys_getpriority) +SYSCALL(sys_setpriority,sys_setpriority,compat_sys_setpriority) NI_SYSCALL /* old profil syscall */ -SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs_wrapper) -SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs_wrapper) /* 100 */ +SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs) +SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs) /* 100 */ NI_SYSCALL /* ioperm for i386 */ -SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall_wrapper) -SYSCALL(sys_syslog,sys_syslog,sys32_syslog_wrapper) -SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer_wrapper) -SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer_wrapper) /* 105 */ -SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper) -SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper) -SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper) +SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall) +SYSCALL(sys_syslog,sys_syslog,compat_sys_syslog) +SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer) +SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer) /* 105 */ +SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat) +SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat) +SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat) NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,sys32_lookup_dcookie_wrapper) /* 110 */ +SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */ SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup) NI_SYSCALL /* old "idle" system call */ NI_SYSCALL /* vm86old for i386 */ -SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4_wrapper) -SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */ -SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper) -SYSCALL(sys_ipc,sys_ipc,sys32_ipc_wrapper) -SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper) -SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn) -SYSCALL(sys_clone,sys_clone,sys32_clone) /* 120 */ -SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper) -SYSCALL(sys_newuname,s390x_newuname,sys32_newuname_wrapper) +SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4) +SYSCALL(sys_swapoff,sys_swapoff,compat_sys_swapoff) /* 115 */ +SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo) +SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc) +SYSCALL(sys_fsync,sys_fsync,compat_sys_fsync) +SYSCALL(sys_sigreturn,sys_sigreturn,compat_sys_sigreturn) +SYSCALL(sys_clone,sys_clone,compat_sys_clone) /* 120 */ +SYSCALL(sys_setdomainname,sys_setdomainname,compat_sys_setdomainname) +SYSCALL(sys_newuname,sys_newuname,compat_sys_newuname) NI_SYSCALL /* modify_ldt for i386 */ -SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper) -SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */ -SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask_wrapper) +SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex) +SYSCALL(sys_mprotect,sys_mprotect,compat_sys_mprotect) /* 125 */ +SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask) NI_SYSCALL /* old "create module" */ -SYSCALL(sys_init_module,sys_init_module,sys32_init_module_wrapper) -SYSCALL(sys_delete_module,sys_delete_module,sys32_delete_module_wrapper) +SYSCALL(sys_init_module,sys_init_module,compat_sys_init_module) +SYSCALL(sys_delete_module,sys_delete_module,compat_sys_delete_module) NI_SYSCALL /* 130: old get_kernel_syms */ -SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper) -SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper) -SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper) -SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper) -SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */ -SYSCALL(sys_personality,s390x_personality,sys32_personality_wrapper) +SYSCALL(sys_quotactl,sys_quotactl,compat_sys_quotactl) +SYSCALL(sys_getpgid,sys_getpgid,compat_sys_getpgid) +SYSCALL(sys_fchdir,sys_fchdir,compat_sys_fchdir) +SYSCALL(sys_bdflush,sys_bdflush,compat_sys_bdflush) +SYSCALL(sys_sysfs,sys_sysfs,compat_sys_sysfs) /* 135 */ +SYSCALL(sys_personality,sys_s390_personality,compat_sys_s390_personality) NI_SYSCALL /* for afs_syscall */ -SYSCALL(sys_setfsuid16,sys_ni_syscall,sys32_setfsuid16_wrapper) /* old setfsuid16 syscall */ -SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */ -SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */ -SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper) -SYSCALL(sys_select,sys_select,compat_sys_select_wrapper) -SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper) -SYSCALL(sys_msync,sys_msync,sys32_msync_wrapper) -SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */ -SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper) -SYSCALL(sys_getsid,sys_getsid,sys32_getsid_wrapper) -SYSCALL(sys_fdatasync,sys_fdatasync,sys32_fdatasync_wrapper) -SYSCALL(sys_sysctl,sys_sysctl,sys32_sysctl_wrapper) -SYSCALL(sys_mlock,sys_mlock,sys32_mlock_wrapper) /* 150 */ -SYSCALL(sys_munlock,sys_munlock,sys32_munlock_wrapper) -SYSCALL(sys_mlockall,sys_mlockall,sys32_mlockall_wrapper) +SYSCALL(sys_setfsuid16,sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */ +SYSCALL(sys_setfsgid16,sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */ +SYSCALL(sys_llseek,sys_llseek,compat_sys_llseek) /* 140 */ +SYSCALL(sys_getdents,sys_getdents,compat_sys_getdents) +SYSCALL(sys_select,sys_select,compat_sys_select) +SYSCALL(sys_flock,sys_flock,compat_sys_flock) +SYSCALL(sys_msync,sys_msync,compat_sys_msync) +SYSCALL(sys_readv,sys_readv,compat_sys_readv) /* 145 */ +SYSCALL(sys_writev,sys_writev,compat_sys_writev) +SYSCALL(sys_getsid,sys_getsid,compat_sys_getsid) +SYSCALL(sys_fdatasync,sys_fdatasync,compat_sys_fdatasync) +SYSCALL(sys_sysctl,sys_sysctl,compat_sys_sysctl) +SYSCALL(sys_mlock,sys_mlock,compat_sys_mlock) /* 150 */ +SYSCALL(sys_munlock,sys_munlock,compat_sys_munlock) +SYSCALL(sys_mlockall,sys_mlockall,compat_sys_mlockall) SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall) -SYSCALL(sys_sched_setparam,sys_sched_setparam,sys32_sched_setparam_wrapper) -SYSCALL(sys_sched_getparam,sys_sched_getparam,sys32_sched_getparam_wrapper) /* 155 */ -SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,sys32_sched_setscheduler_wrapper) -SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,sys32_sched_getscheduler_wrapper) +SYSCALL(sys_sched_setparam,sys_sched_setparam,compat_sys_sched_setparam) +SYSCALL(sys_sched_getparam,sys_sched_getparam,compat_sys_sched_getparam) /* 155 */ +SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,compat_sys_sched_setscheduler) +SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,compat_sys_sched_getscheduler) SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield) -SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,sys32_sched_get_priority_max_wrapper) -SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,sys32_sched_get_priority_min_wrapper) /* 160 */ -SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,sys32_sched_rr_get_interval_wrapper) -SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper) -SYSCALL(sys_mremap,sys_mremap,sys32_mremap_wrapper) -SYSCALL(sys_setresuid16,sys_ni_syscall,sys32_setresuid16_wrapper) /* old setresuid16 syscall */ -SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper) /* 165 old getresuid16 syscall */ +SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,compat_sys_sched_get_priority_max) +SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,compat_sys_sched_get_priority_min) /* 160 */ +SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval) +SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep) +SYSCALL(sys_mremap,sys_mremap,compat_sys_mremap) +SYSCALL(sys_setresuid16,sys_ni_syscall,compat_sys_s390_setresuid16) /* old setresuid16 syscall */ +SYSCALL(sys_getresuid16,sys_ni_syscall,compat_sys_s390_getresuid16) /* 165 old getresuid16 syscall */ NI_SYSCALL /* for vm86 */ NI_SYSCALL /* old sys_query_module */ -SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper) -SYSCALL(sys_nfsservctl,sys_nfsservctl,compat_sys_nfsservctl_wrapper) -SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */ -SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */ -SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper) -SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn) -SYSCALL(sys_rt_sigaction,sys_rt_sigaction,sys32_rt_sigaction_wrapper) -SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,sys32_rt_sigprocmask_wrapper) /* 175 */ -SYSCALL(sys_rt_sigpending,sys_rt_sigpending,sys32_rt_sigpending_wrapper) -SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait_wrapper) -SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,sys32_rt_sigqueueinfo_wrapper) -SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend_wrapper) -SYSCALL(sys_pread64,sys_pread64,sys32_pread64_wrapper) /* 180 */ -SYSCALL(sys_pwrite64,sys_pwrite64,sys32_pwrite64_wrapper) -SYSCALL(sys_chown16,sys_ni_syscall,sys32_chown16_wrapper) /* old chown16 syscall */ -SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) -SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) -SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */ -SYSCALL(sys_sigaltstack,sys_sigaltstack,sys32_sigaltstack) -SYSCALL(sys_sendfile,sys_sendfile64,sys32_sendfile_wrapper) +SYSCALL(sys_poll,sys_poll,compat_sys_poll) +NI_SYSCALL /* old nfsservctl */ +SYSCALL(sys_setresgid16,sys_ni_syscall,compat_sys_s390_setresgid16) /* 170 old setresgid16 syscall */ +SYSCALL(sys_getresgid16,sys_ni_syscall,compat_sys_s390_getresgid16) /* old getresgid16 syscall */ +SYSCALL(sys_prctl,sys_prctl,compat_sys_prctl) +SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,compat_sys_rt_sigreturn) +SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction) +SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */ +SYSCALL(sys_rt_sigpending,sys_rt_sigpending,compat_sys_rt_sigpending) +SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait) +SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo) +SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend) +SYSCALL(sys_pread64,sys_pread64,compat_sys_s390_pread64) /* 180 */ +SYSCALL(sys_pwrite64,sys_pwrite64,compat_sys_s390_pwrite64) +SYSCALL(sys_chown16,sys_ni_syscall,compat_sys_s390_chown16) /* old chown16 syscall */ +SYSCALL(sys_getcwd,sys_getcwd,compat_sys_getcwd) +SYSCALL(sys_capget,sys_capget,compat_sys_capget) +SYSCALL(sys_capset,sys_capset,compat_sys_capset) /* 185 */ +SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack) +SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile) NI_SYSCALL /* streams1 */ NI_SYSCALL /* streams2 */ SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ -SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper) -SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper) -SYSCALL(sys_truncate64,sys_ni_syscall,sys32_truncate64_wrapper) -SYSCALL(sys_ftruncate64,sys_ni_syscall,sys32_ftruncate64_wrapper) -SYSCALL(sys_stat64,sys_ni_syscall,sys32_stat64_wrapper) /* 195 */ -SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper) -SYSCALL(sys_fstat64,sys_ni_syscall,sys32_fstat64_wrapper) -SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper) +SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit) +SYSCALL(sys_mmap2,sys_mmap2,compat_sys_s390_mmap2) +SYSCALL(sys_truncate64,sys_ni_syscall,compat_sys_s390_truncate64) +SYSCALL(sys_ftruncate64,sys_ni_syscall,compat_sys_s390_ftruncate64) +SYSCALL(sys_stat64,sys_ni_syscall,compat_sys_s390_stat64) /* 195 */ +SYSCALL(sys_lstat64,sys_ni_syscall,compat_sys_s390_lstat64) +SYSCALL(sys_fstat64,sys_ni_syscall,compat_sys_s390_fstat64) +SYSCALL(sys_lchown,sys_lchown,compat_sys_lchown) SYSCALL(sys_getuid,sys_getuid,sys_getuid) SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */ SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid) SYSCALL(sys_getegid,sys_getegid,sys_getegid) -SYSCALL(sys_setreuid,sys_setreuid,sys32_setreuid_wrapper) -SYSCALL(sys_setregid,sys_setregid,sys32_setregid_wrapper) -SYSCALL(sys_getgroups,sys_getgroups,sys32_getgroups_wrapper) /* 205 */ -SYSCALL(sys_setgroups,sys_setgroups,sys32_setgroups_wrapper) -SYSCALL(sys_fchown,sys_fchown,sys32_fchown_wrapper) -SYSCALL(sys_setresuid,sys_setresuid,sys32_setresuid_wrapper) -SYSCALL(sys_getresuid,sys_getresuid,sys32_getresuid_wrapper) -SYSCALL(sys_setresgid,sys_setresgid,sys32_setresgid_wrapper) /* 210 */ -SYSCALL(sys_getresgid,sys_getresgid,sys32_getresgid_wrapper) -SYSCALL(sys_chown,sys_chown,sys32_chown_wrapper) -SYSCALL(sys_setuid,sys_setuid,sys32_setuid_wrapper) -SYSCALL(sys_setgid,sys_setgid,sys32_setgid_wrapper) -SYSCALL(sys_setfsuid,sys_setfsuid,sys32_setfsuid_wrapper) /* 215 */ -SYSCALL(sys_setfsgid,sys_setfsgid,sys32_setfsgid_wrapper) -SYSCALL(sys_pivot_root,sys_pivot_root,sys32_pivot_root_wrapper) -SYSCALL(sys_mincore,sys_mincore,sys32_mincore_wrapper) -SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper) -SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */ -SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) -SYSCALL(sys_readahead,sys_readahead,sys32_readahead) -SYSCALL(sys_sendfile64,sys_ni_syscall,sys32_sendfile64) -SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper) -SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper) -SYSCALL(sys_getxattr,sys_getxattr,sys32_getxattr_wrapper) -SYSCALL(sys_lgetxattr,sys_lgetxattr,sys32_lgetxattr_wrapper) -SYSCALL(sys_fgetxattr,sys_fgetxattr,sys32_fgetxattr_wrapper) -SYSCALL(sys_listxattr,sys_listxattr,sys32_listxattr_wrapper) /* 230 */ -SYSCALL(sys_llistxattr,sys_llistxattr,sys32_llistxattr_wrapper) -SYSCALL(sys_flistxattr,sys_flistxattr,sys32_flistxattr_wrapper) -SYSCALL(sys_removexattr,sys_removexattr,sys32_removexattr_wrapper) -SYSCALL(sys_lremovexattr,sys_lremovexattr,sys32_lremovexattr_wrapper) -SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */ +SYSCALL(sys_setreuid,sys_setreuid,compat_sys_setreuid) +SYSCALL(sys_setregid,sys_setregid,compat_sys_setregid) +SYSCALL(sys_getgroups,sys_getgroups,compat_sys_getgroups) /* 205 */ +SYSCALL(sys_setgroups,sys_setgroups,compat_sys_setgroups) +SYSCALL(sys_fchown,sys_fchown,compat_sys_fchown) +SYSCALL(sys_setresuid,sys_setresuid,compat_sys_setresuid) +SYSCALL(sys_getresuid,sys_getresuid,compat_sys_getresuid) +SYSCALL(sys_setresgid,sys_setresgid,compat_sys_setresgid) /* 210 */ +SYSCALL(sys_getresgid,sys_getresgid,compat_sys_getresgid) +SYSCALL(sys_chown,sys_chown,compat_sys_chown) +SYSCALL(sys_setuid,sys_setuid,compat_sys_setuid) +SYSCALL(sys_setgid,sys_setgid,compat_sys_setgid) +SYSCALL(sys_setfsuid,sys_setfsuid,compat_sys_setfsuid) /* 215 */ +SYSCALL(sys_setfsgid,sys_setfsgid,compat_sys_setfsgid) +SYSCALL(sys_pivot_root,sys_pivot_root,compat_sys_pivot_root) +SYSCALL(sys_mincore,sys_mincore,compat_sys_mincore) +SYSCALL(sys_madvise,sys_madvise,compat_sys_madvise) +SYSCALL(sys_getdents64,sys_getdents64,compat_sys_getdents64) /* 220 */ +SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64) +SYSCALL(sys_readahead,sys_readahead,compat_sys_s390_readahead) +SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64) +SYSCALL(sys_setxattr,sys_setxattr,compat_sys_setxattr) +SYSCALL(sys_lsetxattr,sys_lsetxattr,compat_sys_lsetxattr) /* 225 */ +SYSCALL(sys_fsetxattr,sys_fsetxattr,compat_sys_fsetxattr) +SYSCALL(sys_getxattr,sys_getxattr,compat_sys_getxattr) +SYSCALL(sys_lgetxattr,sys_lgetxattr,compat_sys_lgetxattr) +SYSCALL(sys_fgetxattr,sys_fgetxattr,compat_sys_fgetxattr) +SYSCALL(sys_listxattr,sys_listxattr,compat_sys_listxattr) /* 230 */ +SYSCALL(sys_llistxattr,sys_llistxattr,compat_sys_llistxattr) +SYSCALL(sys_flistxattr,sys_flistxattr,compat_sys_flistxattr) +SYSCALL(sys_removexattr,sys_removexattr,compat_sys_removexattr) +SYSCALL(sys_lremovexattr,sys_lremovexattr,compat_sys_lremovexattr) +SYSCALL(sys_fremovexattr,sys_fremovexattr,compat_sys_fremovexattr) /* 235 */ SYSCALL(sys_gettid,sys_gettid,sys_gettid) -SYSCALL(sys_tkill,sys_tkill,sys_tkill) -SYSCALL(sys_futex,sys_futex,compat_sys_futex_wrapper) -SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper) -SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */ -SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill) +SYSCALL(sys_tkill,sys_tkill,compat_sys_tkill) +SYSCALL(sys_futex,sys_futex,compat_sys_futex) +SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,compat_sys_sched_setaffinity) +SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,compat_sys_sched_getaffinity) /* 240 */ +SYSCALL(sys_tgkill,sys_tgkill,compat_sys_tgkill) NI_SYSCALL /* reserved for TUX */ -SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper) -SYSCALL(sys_io_destroy,sys_io_destroy,sys32_io_destroy_wrapper) -SYSCALL(sys_io_getevents,sys_io_getevents,sys32_io_getevents_wrapper) /* 245 */ -SYSCALL(sys_io_submit,sys_io_submit,sys32_io_submit_wrapper) -SYSCALL(sys_io_cancel,sys_io_cancel,sys32_io_cancel_wrapper) -SYSCALL(sys_exit_group,sys_exit_group,sys32_exit_group_wrapper) -SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper) -SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */ -SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper) -SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper) -SYSCALL(s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper) -SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper) -SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */ -SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper) -SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,sys32_timer_getoverrun_wrapper) -SYSCALL(sys_timer_delete,sys_timer_delete,sys32_timer_delete_wrapper) -SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper) -SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ -SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) -SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) +SYSCALL(sys_io_setup,sys_io_setup,compat_sys_io_setup) +SYSCALL(sys_io_destroy,sys_io_destroy,compat_sys_io_destroy) +SYSCALL(sys_io_getevents,sys_io_getevents,compat_sys_io_getevents) /* 245 */ +SYSCALL(sys_io_submit,sys_io_submit,compat_sys_io_submit) +SYSCALL(sys_io_cancel,sys_io_cancel,compat_sys_io_cancel) +SYSCALL(sys_exit_group,sys_exit_group,compat_sys_exit_group) +SYSCALL(sys_epoll_create,sys_epoll_create,compat_sys_epoll_create) +SYSCALL(sys_epoll_ctl,sys_epoll_ctl,compat_sys_epoll_ctl) /* 250 */ +SYSCALL(sys_epoll_wait,sys_epoll_wait,compat_sys_epoll_wait) +SYSCALL(sys_set_tid_address,sys_set_tid_address,compat_sys_set_tid_address) +SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,compat_sys_s390_fadvise64) +SYSCALL(sys_timer_create,sys_timer_create,compat_sys_timer_create) +SYSCALL(sys_timer_settime,sys_timer_settime,compat_sys_timer_settime) /* 255 */ +SYSCALL(sys_timer_gettime,sys_timer_gettime,compat_sys_timer_gettime) +SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,compat_sys_timer_getoverrun) +SYSCALL(sys_timer_delete,sys_timer_delete,compat_sys_timer_delete) +SYSCALL(sys_clock_settime,sys_clock_settime,compat_sys_clock_settime) +SYSCALL(sys_clock_gettime,sys_clock_gettime,compat_sys_clock_gettime) /* 260 */ +SYSCALL(sys_clock_getres,sys_clock_getres,compat_sys_clock_getres) +SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,compat_sys_clock_nanosleep) NI_SYSCALL /* reserved for vserver */ -SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) -SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) -SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) -SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper) +SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,compat_sys_s390_fadvise64_64) +SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64) +SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64) +SYSCALL(sys_remap_file_pages,sys_remap_file_pages,compat_sys_remap_file_pages) NI_SYSCALL /* 268 sys_mbind */ NI_SYSCALL /* 269 sys_get_mempolicy */ NI_SYSCALL /* 270 sys_set_mempolicy */ -SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper) -SYSCALL(sys_mq_unlink,sys_mq_unlink,sys32_mq_unlink_wrapper) -SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper) -SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper) -SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */ -SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper) -SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper) -SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper) -SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper) +SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open) +SYSCALL(sys_mq_unlink,sys_mq_unlink,compat_sys_mq_unlink) +SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend) +SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive) +SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify) /* 275 */ +SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr) +SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load) +SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key) +SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key) SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl) /* 280 */ -SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid_wrapper) -SYSCALL(sys_ioprio_set,sys_ioprio_set,sys_ioprio_set_wrapper) -SYSCALL(sys_ioprio_get,sys_ioprio_get,sys_ioprio_get_wrapper) +SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid) +SYSCALL(sys_ioprio_set,sys_ioprio_set,compat_sys_ioprio_set) +SYSCALL(sys_ioprio_get,sys_ioprio_get,compat_sys_ioprio_get) SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init) -SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper) /* 285 */ -SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper) +SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */ +SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,compat_sys_inotify_rm_watch) NI_SYSCALL /* 287 sys_migrate_pages */ -SYSCALL(sys_openat,sys_openat,compat_sys_openat_wrapper) -SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper) -SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */ -SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper) -SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper) -SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat64_wrapper) -SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper) -SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */ -SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper) -SYSCALL(sys_symlinkat,sys_symlinkat,sys_symlinkat_wrapper) -SYSCALL(sys_readlinkat,sys_readlinkat,sys_readlinkat_wrapper) -SYSCALL(sys_fchmodat,sys_fchmodat,sys_fchmodat_wrapper) -SYSCALL(sys_faccessat,sys_faccessat,sys_faccessat_wrapper) /* 300 */ -SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper) -SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper) -SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper) -SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list_wrapper) -SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list_wrapper) -SYSCALL(sys_splice,sys_splice,sys_splice_wrapper) -SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper) -SYSCALL(sys_tee,sys_tee,sys_tee_wrapper) -SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice_wrapper) +SYSCALL(sys_openat,sys_openat,compat_sys_openat) +SYSCALL(sys_mkdirat,sys_mkdirat,compat_sys_mkdirat) +SYSCALL(sys_mknodat,sys_mknodat,compat_sys_mknodat) /* 290 */ +SYSCALL(sys_fchownat,sys_fchownat,compat_sys_fchownat) +SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat) +SYSCALL(sys_fstatat64,sys_newfstatat,compat_sys_s390_fstatat64) +SYSCALL(sys_unlinkat,sys_unlinkat,compat_sys_unlinkat) +SYSCALL(sys_renameat,sys_renameat,compat_sys_renameat) /* 295 */ +SYSCALL(sys_linkat,sys_linkat,compat_sys_linkat) +SYSCALL(sys_symlinkat,sys_symlinkat,compat_sys_symlinkat) +SYSCALL(sys_readlinkat,sys_readlinkat,compat_sys_readlinkat) +SYSCALL(sys_fchmodat,sys_fchmodat,compat_sys_fchmodat) +SYSCALL(sys_faccessat,sys_faccessat,compat_sys_faccessat) /* 300 */ +SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6) +SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll) +SYSCALL(sys_unshare,sys_unshare,compat_sys_unshare) +SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list) +SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list) +SYSCALL(sys_splice,sys_splice,compat_sys_splice) +SYSCALL(sys_sync_file_range,sys_sync_file_range,compat_sys_s390_sync_file_range) +SYSCALL(sys_tee,sys_tee,compat_sys_tee) +SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice) NI_SYSCALL /* 310 sys_move_pages */ -SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper) -SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait_wrapper) -SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) -SYSCALL(s390_fallocate,sys_fallocate,sys_fallocate_wrapper) -SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ -SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd_wrapper) +SYSCALL(sys_getcpu,sys_getcpu,compat_sys_getcpu) +SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait) +SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes) +SYSCALL(sys_s390_fallocate,sys_fallocate,compat_sys_s390_fallocate) +SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat) /* 315 */ +SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd) NI_SYSCALL /* 317 old sys_timer_fd */ -SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper) -SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper) -SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime_wrapper) /* 320 */ -SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime_wrapper) +SYSCALL(sys_eventfd,sys_eventfd,compat_sys_eventfd) +SYSCALL(sys_timerfd_create,sys_timerfd_create,compat_sys_timerfd_create) +SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */ +SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime) +SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4) +SYSCALL(sys_eventfd2,sys_eventfd2,compat_sys_eventfd2) +SYSCALL(sys_inotify_init1,sys_inotify_init1,compat_sys_inotify_init1) +SYSCALL(sys_pipe2,sys_pipe2,compat_sys_pipe2) /* 325 */ +SYSCALL(sys_dup3,sys_dup3,compat_sys_dup3) +SYSCALL(sys_epoll_create1,sys_epoll_create1,compat_sys_epoll_create1) +SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv) +SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev) +SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */ +SYSCALL(sys_perf_event_open,sys_perf_event_open,compat_sys_perf_event_open) +SYSCALL(sys_fanotify_init,sys_fanotify_init,compat_sys_fanotify_init) +SYSCALL(sys_fanotify_mark,sys_fanotify_mark,compat_sys_fanotify_mark) +SYSCALL(sys_prlimit64,sys_prlimit64,compat_sys_prlimit64) +SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */ +SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at) +SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime) +SYSCALL(sys_syncfs,sys_syncfs,compat_sys_syncfs) +SYSCALL(sys_setns,sys_setns,compat_sys_setns) +SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */ +SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev) +SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,compat_sys_s390_runtime_instr) +SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp) +SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module) +SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */ +SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr) +SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c new file mode 100644 index 00000000000..811f542b8ed --- /dev/null +++ b/arch/s390/kernel/sysinfo.c @@ -0,0 +1,428 @@ +/* + * Copyright IBM Corp. 2001, 2009 + * Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <asm/ebcdic.h> +#include <asm/sysinfo.h> +#include <asm/cpcmd.h> +#include <asm/topology.h> + +/* Sigh, math-emu. Don't ask. */ +#include <asm/sfp-util.h> +#include <math-emu/soft-fp.h> +#include <math-emu/single.h> + +int topology_max_mnest; + +/* + * stsi - store system information + * + * Returns the current configuration level if function code 0 was specified. + * Otherwise returns 0 on success or a negative value on error. + */ +int stsi(void *sysinfo, int fc, int sel1, int sel2) +{ + register int r0 asm("0") = (fc << 28) | sel1; + register int r1 asm("1") = sel2; + int rc = 0; + + asm volatile( + " stsi 0(%3)\n" + "0: jz 2f\n" + "1: lhi %1,%4\n" + "2:\n" + EX_TABLE(0b, 1b) + : "+d" (r0), "+d" (rc) + : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP) + : "cc", "memory"); + if (rc) + return rc; + return fc ? 0 : ((unsigned int) r0) >> 28; +} +EXPORT_SYMBOL(stsi); + +static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) +{ + int i; + + if (stsi(info, 1, 1, 1)) + return; + EBCASC(info->manufacturer, sizeof(info->manufacturer)); + EBCASC(info->type, sizeof(info->type)); + EBCASC(info->model, sizeof(info->model)); + EBCASC(info->sequence, sizeof(info->sequence)); + EBCASC(info->plant, sizeof(info->plant)); + EBCASC(info->model_capacity, sizeof(info->model_capacity)); + EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap)); + EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap)); + seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer); + seq_printf(m, "Type: %-4.4s\n", info->type); + /* + * Sigh: the model field has been renamed with System z9 + * to model_capacity and a new model field has been added + * after the plant field. To avoid confusing older programs + * the "Model:" prints "model_capacity model" or just + * "model_capacity" if the model string is empty . + */ + seq_printf(m, "Model: %-16.16s", info->model_capacity); + if (info->model[0] != '\0') + seq_printf(m, " %-16.16s", info->model); + seq_putc(m, '\n'); + seq_printf(m, "Sequence Code: %-16.16s\n", info->sequence); + seq_printf(m, "Plant: %-4.4s\n", info->plant); + seq_printf(m, "Model Capacity: %-16.16s %08u\n", + info->model_capacity, info->model_cap_rating); + if (info->model_perm_cap_rating) + seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n", + info->model_perm_cap, + info->model_perm_cap_rating); + if (info->model_temp_cap_rating) + seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n", + info->model_temp_cap, + info->model_temp_cap_rating); + if (info->ncr) + seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr); + if (info->npr) + seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr); + if (info->ntr) + seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr); + if (info->cai) { + seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai); + seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr); + seq_printf(m, "Capacity Transient: %d\n", info->t); + } + if (info->p) { + for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) { + seq_printf(m, "Type %d Percentage: %d\n", + i, info->typepct[i - 1]); + } + } +} + +static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) +{ + static int max_mnest; + int i, rc; + + seq_putc(m, '\n'); + if (!MACHINE_HAS_TOPOLOGY) + return; + if (stsi(info, 15, 1, topology_max_mnest)) + return; + seq_printf(m, "CPU Topology HW: "); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + seq_printf(m, " %d", info->mag[i]); + seq_putc(m, '\n'); +#ifdef CONFIG_SCHED_MC + store_topology(info); + seq_printf(m, "CPU Topology SW: "); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + seq_printf(m, " %d", info->mag[i]); + seq_putc(m, '\n'); +#endif +} + +static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info) +{ + struct sysinfo_1_2_2_extension *ext; + int i; + + if (stsi(info, 1, 2, 2)) + return; + ext = (struct sysinfo_1_2_2_extension *) + ((unsigned long) info + info->acc_offset); + seq_printf(m, "CPUs Total: %d\n", info->cpus_total); + seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured); + seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby); + seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved); + /* + * Sigh 2. According to the specification the alternate + * capability field is a 32 bit floating point number + * if the higher order 8 bits are not zero. Printing + * a floating point number in the kernel is a no-no, + * always print the number as 32 bit unsigned integer. + * The user-space needs to know about the strange + * encoding of the alternate cpu capability. + */ + seq_printf(m, "Capability: %u", info->capability); + if (info->format == 1) + seq_printf(m, " %u", ext->alt_capability); + seq_putc(m, '\n'); + if (info->nominal_cap) + seq_printf(m, "Nominal Capability: %d\n", info->nominal_cap); + if (info->secondary_cap) + seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap); + for (i = 2; i <= info->cpus_total; i++) { + seq_printf(m, "Adjustment %02d-way: %u", + i, info->adjustment[i-2]); + if (info->format == 1) + seq_printf(m, " %u", ext->alt_adjustment[i-2]); + seq_putc(m, '\n'); + } +} + +static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info) +{ + if (stsi(info, 2, 2, 2)) + return; + EBCASC(info->name, sizeof(info->name)); + seq_putc(m, '\n'); + seq_printf(m, "LPAR Number: %d\n", info->lpar_number); + seq_printf(m, "LPAR Characteristics: "); + if (info->characteristics & LPAR_CHAR_DEDICATED) + seq_printf(m, "Dedicated "); + if (info->characteristics & LPAR_CHAR_SHARED) + seq_printf(m, "Shared "); + if (info->characteristics & LPAR_CHAR_LIMITED) + seq_printf(m, "Limited "); + seq_putc(m, '\n'); + seq_printf(m, "LPAR Name: %-8.8s\n", info->name); + seq_printf(m, "LPAR Adjustment: %d\n", info->caf); + seq_printf(m, "LPAR CPUs Total: %d\n", info->cpus_total); + seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured); + seq_printf(m, "LPAR CPUs Standby: %d\n", info->cpus_standby); + seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved); + seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated); + seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared); +} + +static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info) +{ + int i; + + if (stsi(info, 3, 2, 2)) + return; + for (i = 0; i < info->count; i++) { + EBCASC(info->vm[i].name, sizeof(info->vm[i].name)); + EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi)); + seq_putc(m, '\n'); + seq_printf(m, "VM%02d Name: %-8.8s\n", i, info->vm[i].name); + seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi); + seq_printf(m, "VM%02d Adjustment: %d\n", i, info->vm[i].caf); + seq_printf(m, "VM%02d CPUs Total: %d\n", i, info->vm[i].cpus_total); + seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured); + seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby); + seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved); + } +} + +static int sysinfo_show(struct seq_file *m, void *v) +{ + void *info = (void *)get_zeroed_page(GFP_KERNEL); + int level; + + if (!info) + return 0; + level = stsi(NULL, 0, 0, 0); + if (level >= 1) + stsi_1_1_1(m, info); + if (level >= 1) + stsi_15_1_x(m, info); + if (level >= 1) + stsi_1_2_2(m, info); + if (level >= 2) + stsi_2_2_2(m, info); + if (level >= 3) + stsi_3_2_2(m, info); + free_page((unsigned long)info); + return 0; +} + +static int sysinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, sysinfo_show, NULL); +} + +static const struct file_operations sysinfo_fops = { + .open = sysinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init sysinfo_create_proc(void) +{ + proc_create("sysinfo", 0444, NULL, &sysinfo_fops); + return 0; +} +device_initcall(sysinfo_create_proc); + +/* + * Service levels interface. + */ + +static DECLARE_RWSEM(service_level_sem); +static LIST_HEAD(service_level_list); + +int register_service_level(struct service_level *slr) +{ + struct service_level *ptr; + + down_write(&service_level_sem); + list_for_each_entry(ptr, &service_level_list, list) + if (ptr == slr) { + up_write(&service_level_sem); + return -EEXIST; + } + list_add_tail(&slr->list, &service_level_list); + up_write(&service_level_sem); + return 0; +} +EXPORT_SYMBOL(register_service_level); + +int unregister_service_level(struct service_level *slr) +{ + struct service_level *ptr, *next; + int rc = -ENOENT; + + down_write(&service_level_sem); + list_for_each_entry_safe(ptr, next, &service_level_list, list) { + if (ptr != slr) + continue; + list_del(&ptr->list); + rc = 0; + break; + } + up_write(&service_level_sem); + return rc; +} +EXPORT_SYMBOL(unregister_service_level); + +static void *service_level_start(struct seq_file *m, loff_t *pos) +{ + down_read(&service_level_sem); + return seq_list_start(&service_level_list, *pos); +} + +static void *service_level_next(struct seq_file *m, void *p, loff_t *pos) +{ + return seq_list_next(p, &service_level_list, pos); +} + +static void service_level_stop(struct seq_file *m, void *p) +{ + up_read(&service_level_sem); +} + +static int service_level_show(struct seq_file *m, void *p) +{ + struct service_level *slr; + + slr = list_entry(p, struct service_level, list); + slr->seq_print(m, slr); + return 0; +} + +static const struct seq_operations service_level_seq_ops = { + .start = service_level_start, + .next = service_level_next, + .stop = service_level_stop, + .show = service_level_show +}; + +static int service_level_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &service_level_seq_ops); +} + +static const struct file_operations service_level_ops = { + .open = service_level_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static void service_level_vm_print(struct seq_file *m, + struct service_level *slr) +{ + char *query_buffer, *str; + + query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA); + if (!query_buffer) + return; + cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL); + str = strchr(query_buffer, '\n'); + if (str) + *str = 0; + seq_printf(m, "VM: %s\n", query_buffer); + kfree(query_buffer); +} + +static struct service_level service_level_vm = { + .seq_print = service_level_vm_print +}; + +static __init int create_proc_service_level(void) +{ + proc_create("service_levels", 0, NULL, &service_level_ops); + if (MACHINE_IS_VM) + register_service_level(&service_level_vm); + return 0; +} +subsys_initcall(create_proc_service_level); + +/* + * CPU capability might have changed. Therefore recalculate loops_per_jiffy. + */ +void s390_adjust_jiffies(void) +{ + struct sysinfo_1_2_2 *info; + const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */ + FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); + FP_DECL_EX; + unsigned int capability; + + info = (void *) get_zeroed_page(GFP_KERNEL); + if (!info) + return; + + if (stsi(info, 1, 2, 2) == 0) { + /* + * Major sigh. The cpu capability encoding is "special". + * If the first 9 bits of info->capability are 0 then it + * is a 32 bit unsigned integer in the range 0 .. 2^23. + * If the first 9 bits are != 0 then it is a 32 bit float. + * In addition a lower value indicates a proportionally + * higher cpu capacity. Bogomips are the other way round. + * To get to a halfway suitable number we divide 1e7 + * by the cpu capability number. Yes, that means a floating + * point division .. math-emu here we come :-) + */ + FP_UNPACK_SP(SA, &fmil); + if ((info->capability >> 23) == 0) + FP_FROM_INT_S(SB, (long) info->capability, 64, long); + else + FP_UNPACK_SP(SB, &info->capability); + FP_DIV_S(SR, SA, SB); + FP_TO_INT_S(capability, SR, 32, 0); + } else + /* + * Really old machine without stsi block for basic + * cpu information. Report 42.0 bogomips. + */ + capability = 42; + loops_per_jiffy = capability * (500000/HZ); + free_page((unsigned long) info); +} + +/* + * calibrate the delay loop + */ +void calibrate_delay(void) +{ + s390_adjust_jiffies(); + /* Print the good old Bogomips line .. */ + printk(KERN_DEBUG "Calibrating delay loop (skipped)... " + "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); +} diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index cb232c15536..0931b110c82 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1,9 +1,8 @@ /* - * arch/s390/kernel/time.c * Time of day based timer functions. * * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2008 * Author(s): Hartmut Penner (hp@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) @@ -12,6 +11,10 @@ * Copyright (C) 1991, 1992, 1995 Linus Torvalds */ +#define KMSG_COMPONENT "time" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel_stat.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/sched.h> @@ -20,8 +23,10 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> +#include <linux/cpu.h> +#include <linux/stop_machine.h> #include <linux/time.h> -#include <linux/sysdev.h> +#include <linux/device.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/smp.h> @@ -29,44 +34,36 @@ #include <linux/profile.h> #include <linux/timex.h> #include <linux/notifier.h> -#include <linux/clocksource.h> - +#include <linux/timekeeper_internal.h> +#include <linux/clockchips.h> +#include <linux/gfp.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/delay.h> -#include <asm/s390_ext.h> #include <asm/div64.h> +#include <asm/vdso.h> #include <asm/irq.h> #include <asm/irq_regs.h> -#include <asm/timer.h> +#include <asm/vtimer.h> #include <asm/etr.h> +#include <asm/cio.h> +#include "entry.h" /* change this if you have some constant time drift */ #define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) #define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12) -/* The value of the TOD clock for 1.1.1970. */ -#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL +u64 sched_clock_base_cc = -1; /* Force to data section. */ +EXPORT_SYMBOL_GPL(sched_clock_base_cc); -/* - * Create a small time difference between the timer interrupts - * on the different cpus to avoid lock contention. - */ -#define CPU_DEVIATION (smp_processor_id() << 12) - -#define TICK_SIZE tick - -static ext_int_info_t ext_int_info_cc; -static ext_int_info_t ext_int_etr_cc; -static u64 init_timer_cc; -static u64 jiffies_timer_cc; -static u64 xtime_cc; +static DEFINE_PER_CPU(struct clock_event_device, comparators); /* * Scheduler clock - returns current time in nanosec units. */ -unsigned long long sched_clock(void) +unsigned long long notrace __kprobes sched_clock(void) { - return ((get_clock() - jiffies_timer_cc) * 125) >> 9; + return tod_to_ns(get_tod_clock_monotonic()); } /* @@ -78,179 +75,50 @@ unsigned long long monotonic_clock(void) } EXPORT_SYMBOL(monotonic_clock); -void tod_to_timeval(__u64 todval, struct timespec *xtime) +void tod_to_timeval(__u64 todval, struct timespec *xt) { unsigned long long sec; sec = todval >> 12; do_div(sec, 1000000); - xtime->tv_sec = sec; + xt->tv_sec = sec; todval -= (sec * 1000000) << 12; - xtime->tv_nsec = ((todval * 1000) >> 12); + xt->tv_nsec = ((todval * 1000) >> 12); } +EXPORT_SYMBOL(tod_to_timeval); -#ifdef CONFIG_PROFILING -#define s390_do_profile() profile_tick(CPU_PROFILING) -#else -#define s390_do_profile() do { ; } while(0) -#endif /* CONFIG_PROFILING */ - -/* - * Advance the per cpu tick counter up to the time given with the - * "time" argument. The per cpu update consists of accounting - * the virtual cpu time, calling update_process_times and calling - * the profiling hook. If xtime is before time it is advanced as well. - */ -void account_ticks(u64 time) -{ - __u32 ticks; - __u64 tmp; - - /* Calculate how many ticks have passed. */ - if (time < S390_lowcore.jiffy_timer) - return; - tmp = time - S390_lowcore.jiffy_timer; - if (tmp >= 2*CLK_TICKS_PER_JIFFY) { /* more than two ticks ? */ - ticks = __div(tmp, CLK_TICKS_PER_JIFFY) + 1; - S390_lowcore.jiffy_timer += - CLK_TICKS_PER_JIFFY * (__u64) ticks; - } else if (tmp >= CLK_TICKS_PER_JIFFY) { - ticks = 2; - S390_lowcore.jiffy_timer += 2*CLK_TICKS_PER_JIFFY; - } else { - ticks = 1; - S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY; - } - -#ifdef CONFIG_SMP - /* - * Do not rely on the boot cpu to do the calls to do_timer. - * Spread it over all cpus instead. - */ - write_seqlock(&xtime_lock); - if (S390_lowcore.jiffy_timer > xtime_cc) { - __u32 xticks; - tmp = S390_lowcore.jiffy_timer - xtime_cc; - if (tmp >= 2*CLK_TICKS_PER_JIFFY) { - xticks = __div(tmp, CLK_TICKS_PER_JIFFY); - xtime_cc += (__u64) xticks * CLK_TICKS_PER_JIFFY; - } else { - xticks = 1; - xtime_cc += CLK_TICKS_PER_JIFFY; - } - do_timer(xticks); - } - write_sequnlock(&xtime_lock); -#else - do_timer(ticks); -#endif - - while (ticks--) - update_process_times(user_mode(get_irq_regs())); - - s390_do_profile(); -} - -#ifdef CONFIG_NO_IDLE_HZ - -#ifdef CONFIG_NO_IDLE_HZ_INIT -int sysctl_hz_timer = 0; -#else -int sysctl_hz_timer = 1; -#endif - -/* - * Stop the HZ tick on the current CPU. - * Only cpu_idle may call this function. - */ -static void stop_hz_timer(void) +void clock_comparator_work(void) { - unsigned long flags; - unsigned long seq, next; - __u64 timer, todval; - int cpu = smp_processor_id(); - - if (sysctl_hz_timer != 0) - return; - - cpu_set(cpu, nohz_cpu_mask); - - /* - * Leave the clock comparator set up for the next timer - * tick if either rcu or a softirq is pending. - */ - if (rcu_needs_cpu(cpu) || local_softirq_pending()) { - cpu_clear(cpu, nohz_cpu_mask); - return; - } + struct clock_event_device *cd; - /* - * This cpu is going really idle. Set up the clock comparator - * for the next event. - */ - next = next_timer_interrupt(); - do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); - timer = ((__u64) next) - ((__u64) jiffies) + jiffies_64; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); - todval = -1ULL; - /* Be careful about overflows. */ - if (timer < (-1ULL / CLK_TICKS_PER_JIFFY)) { - timer = jiffies_timer_cc + timer * CLK_TICKS_PER_JIFFY; - if (timer >= jiffies_timer_cc) - todval = timer; - } - set_clock_comparator(todval); + S390_lowcore.clock_comparator = -1ULL; + cd = &__get_cpu_var(comparators); + cd->event_handler(cd); } /* - * Start the HZ tick on the current CPU. - * Only cpu_idle may call this function. + * Fixup the clock comparator. */ -static void start_hz_timer(void) +static void fixup_clock_comparator(unsigned long long delta) { - if (!cpu_isset(smp_processor_id(), nohz_cpu_mask)) + /* If nobody is waiting there's nothing to fix. */ + if (S390_lowcore.clock_comparator == -1ULL) return; - account_ticks(get_clock()); - set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION); - cpu_clear(smp_processor_id(), nohz_cpu_mask); + S390_lowcore.clock_comparator += delta; + set_clock_comparator(S390_lowcore.clock_comparator); } -static int nohz_idle_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int s390_next_event(unsigned long delta, + struct clock_event_device *evt) { - switch (action) { - case S390_CPU_IDLE: - stop_hz_timer(); - break; - case S390_CPU_NOT_IDLE: - start_hz_timer(); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block nohz_idle_nb = { - .notifier_call = nohz_idle_notify, -}; - -static void __init nohz_init(void) -{ - if (register_idle_notifier(&nohz_idle_nb)) - panic("Couldn't register idle notifier"); + S390_lowcore.clock_comparator = get_tod_clock() + delta; + set_clock_comparator(S390_lowcore.clock_comparator); + return 0; } -#endif - -/* - * Set up per cpu jiffy timer and set the clock comparator. - */ -static void setup_jiffy_timer(void) +static void s390_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) { - /* Set up clock comparator to next jiffy. */ - S390_lowcore.jiffy_timer = - jiffies_timer_cc + (jiffies_64 + 1) * CLK_TICKS_PER_JIFFY; - set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION); } /* @@ -259,44 +127,72 @@ static void setup_jiffy_timer(void) */ void init_cpu_timer(void) { - setup_jiffy_timer(); + struct clock_event_device *cd; + int cpu; + + S390_lowcore.clock_comparator = -1ULL; + set_clock_comparator(S390_lowcore.clock_comparator); + + cpu = smp_processor_id(); + cd = &per_cpu(comparators, cpu); + cd->name = "comparator"; + cd->features = CLOCK_EVT_FEAT_ONESHOT; + cd->mult = 16777; + cd->shift = 12; + cd->min_delta_ns = 1; + cd->max_delta_ns = LONG_MAX; + cd->rating = 400; + cd->cpumask = cpumask_of(cpu); + cd->set_next_event = s390_next_event; + cd->set_mode = s390_set_mode; + + clockevents_register_device(cd); /* Enable clock comparator timer interrupt. */ __ctl_set_bit(0,11); - /* Always allow ETR external interrupts, even without an ETR. */ + /* Always allow the timing alert external interrupt. */ __ctl_set_bit(0, 4); } -static void clock_comparator_interrupt(__u16 code) +static void clock_comparator_interrupt(struct ext_code ext_code, + unsigned int param32, + unsigned long param64) { - /* set clock comparator for next tick */ - set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION); + inc_irq_stat(IRQEXT_CLK); + if (S390_lowcore.clock_comparator == -1ULL) + set_clock_comparator(S390_lowcore.clock_comparator); } -static void etr_reset(void); -static void etr_ext_handler(__u16); +static void etr_timing_alert(struct etr_irq_parm *); +static void stp_timing_alert(struct stp_irq_parm *); -/* - * Get the TOD clock running. - */ -static u64 __init reset_tod_clock(void) +static void timing_alert_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) { - u64 time; + inc_irq_stat(IRQEXT_TLA); + if (param32 & 0x00c40000) + etr_timing_alert((struct etr_irq_parm *) ¶m32); + if (param32 & 0x00038000) + stp_timing_alert((struct stp_irq_parm *) ¶m32); +} - etr_reset(); - if (store_clock(&time) == 0) - return time; - /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_clock(TOD_UNIX_EPOCH) != 0 || store_clock(&time) != 0) - panic("TOD clock not operational."); +static void etr_reset(void); +static void stp_reset(void); - return TOD_UNIX_EPOCH; +void read_persistent_clock(struct timespec *ts) +{ + tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts); } -static cycle_t read_tod_clock(void) +void read_boot_clock(struct timespec *ts) { - return get_clock(); + tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); +} + +static cycle_t read_tod_clock(struct clocksource *cs) +{ + return get_tod_clock(); } static struct clocksource clocksource_tod = { @@ -309,6 +205,51 @@ static struct clocksource clocksource_tod = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +struct clocksource * __init clocksource_default_clock(void) +{ + return &clocksource_tod; +} + +void update_vsyscall(struct timekeeper *tk) +{ + u64 nsecps; + + if (tk->clock != &clocksource_tod) + return; + + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->xtime_tod_stamp = tk->clock->cycle_last; + vdso_data->xtime_clock_sec = tk->xtime_sec; + vdso_data->xtime_clock_nsec = tk->xtime_nsec; + vdso_data->wtom_clock_sec = + tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + vdso_data->wtom_clock_nsec = tk->xtime_nsec + + + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift); + nsecps = (u64) NSEC_PER_SEC << tk->shift; + while (vdso_data->wtom_clock_nsec >= nsecps) { + vdso_data->wtom_clock_nsec -= nsecps; + vdso_data->wtom_clock_sec++; + } + vdso_data->tk_mult = tk->mult; + vdso_data->tk_shift = tk->shift; + smp_wmb(); + ++vdso_data->tb_update_count; +} + +extern struct timezone sys_tz; + +void update_vsyscall_tz(void) +{ + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_wmb(); + ++vdso_data->tb_update_count; +} /* * Initialize the TOD clock and the CPU timer of @@ -316,39 +257,152 @@ static struct clocksource clocksource_tod = { */ void __init time_init(void) { - init_timer_cc = reset_tod_clock(); - xtime_cc = init_timer_cc + CLK_TICKS_PER_JIFFY; - jiffies_timer_cc = init_timer_cc - jiffies_64 * CLK_TICKS_PER_JIFFY; - - /* set xtime */ - tod_to_timeval(init_timer_cc - TOD_UNIX_EPOCH, &xtime); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); + /* Reset time synchronization interfaces. */ + etr_reset(); + stp_reset(); /* request the clock comparator external interrupt */ - if (register_early_external_interrupt(0x1004, - clock_comparator_interrupt, - &ext_int_info_cc) != 0) - panic("Couldn't request external interrupt 0x1004"); + if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt)) + panic("Couldn't request external interrupt 0x1004"); + + /* request the timing alert external interrupt */ + if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt)) + panic("Couldn't request external interrupt 0x1406"); if (clocksource_register(&clocksource_tod) != 0) panic("Could not register TOD clock source"); - /* request the etr external interrupt */ - if (register_early_external_interrupt(0x1406, etr_ext_handler, - &ext_int_etr_cc) != 0) - panic("Couldn't request external interrupt 0x1406"); - /* Enable TOD clock interrupts on the boot cpu. */ init_cpu_timer(); -#ifdef CONFIG_NO_IDLE_HZ - nohz_init(); -#endif - -#ifdef CONFIG_VIRT_TIMER + /* Enable cpu timer interrupts on the boot cpu. */ vtime_init(); -#endif +} + +/* + * The time is "clock". old is what we think the time is. + * Adjust the value by a multiple of jiffies and add the delta to ntp. + * "delay" is an approximation how long the synchronization took. If + * the time correction is positive, then "delay" is subtracted from + * the time difference and only the remaining part is passed to ntp. + */ +static unsigned long long adjust_time(unsigned long long old, + unsigned long long clock, + unsigned long long delay) +{ + unsigned long long delta, ticks; + struct timex adjust; + + if (clock > old) { + /* It is later than we thought. */ + delta = ticks = clock - old; + delta = ticks = (delta < delay) ? 0 : delta - delay; + delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); + adjust.offset = ticks * (1000000 / HZ); + } else { + /* It is earlier than we thought. */ + delta = ticks = old - clock; + delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); + delta = -delta; + adjust.offset = -ticks * (1000000 / HZ); + } + sched_clock_base_cc += delta; + if (adjust.offset != 0) { + pr_notice("The ETR interface has adjusted the clock " + "by %li microseconds\n", adjust.offset); + adjust.modes = ADJ_OFFSET_SINGLESHOT; + do_adjtimex(&adjust); + } + return delta; +} + +static DEFINE_PER_CPU(atomic_t, clock_sync_word); +static DEFINE_MUTEX(clock_sync_mutex); +static unsigned long clock_sync_flags; + +#define CLOCK_SYNC_HAS_ETR 0 +#define CLOCK_SYNC_HAS_STP 1 +#define CLOCK_SYNC_ETR 2 +#define CLOCK_SYNC_STP 3 + +/* + * The synchronous get_clock function. It will write the current clock + * value to the clock pointer and return 0 if the clock is in sync with + * the external time source. If the clock mode is local it will return + * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external + * reference. + */ +int get_sync_clock(unsigned long long *clock) +{ + atomic_t *sw_ptr; + unsigned int sw0, sw1; + + sw_ptr = &get_cpu_var(clock_sync_word); + sw0 = atomic_read(sw_ptr); + *clock = get_tod_clock(); + sw1 = atomic_read(sw_ptr); + put_cpu_var(clock_sync_word); + if (sw0 == sw1 && (sw0 & 0x80000000U)) + /* Success: time is in sync. */ + return 0; + if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) && + !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return -EOPNOTSUPP; + if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) && + !test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) + return -EACCES; + return -EAGAIN; +} +EXPORT_SYMBOL(get_sync_clock); + +/* + * Make get_sync_clock return -EAGAIN. + */ +static void disable_sync_clock(void *dummy) +{ + atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + /* + * Clear the in-sync bit 2^31. All get_sync_clock calls will + * fail until the sync bit is turned back on. In addition + * increase the "sequence" counter to avoid the race of an + * etr event and the complete recovery against get_sync_clock. + */ + atomic_clear_mask(0x80000000, sw_ptr); + atomic_inc(sw_ptr); +} + +/* + * Make get_sync_clock return 0 again. + * Needs to be called from a context disabled for preemption. + */ +static void enable_sync_clock(void) +{ + atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + atomic_set_mask(0x80000000, sw_ptr); +} + +/* + * Function to check if the clock is in sync. + */ +static inline int check_sync_clock(void) +{ + atomic_t *sw_ptr; + int rc; + + sw_ptr = &get_cpu_var(clock_sync_word); + rc = (atomic_read(sw_ptr) & 0x80000000U) != 0; + put_cpu_var(clock_sync_word); + return rc; +} + +/* Single threaded workqueue used for etr and stp sync events */ +static struct workqueue_struct *time_sync_wq; + +static void __init time_init_wq(void) +{ + if (time_sync_wq) + return; + time_sync_wq = create_singlethread_workqueue("timesync"); } /* @@ -356,6 +410,7 @@ void __init time_init(void) */ static int etr_port0_online; static int etr_port1_online; +static int etr_steai_available; static int __init early_parse_etr(char *p) { @@ -380,12 +435,6 @@ enum etr_event { ETR_EVENT_UPDATE, }; -enum etr_flags { - ETR_FLAG_ENOSYS, - ETR_FLAG_EACCES, - ETR_FLAG_STEAI, -}; - /* * Valid bit combinations of the eacr register are (x = don't care): * e0 e1 dp p0 p1 ea es sl @@ -412,74 +461,19 @@ enum etr_flags { */ static struct etr_eacr etr_eacr; static u64 etr_tolec; /* time of last eacr update */ -static unsigned long etr_flags; static struct etr_aib etr_port0; static int etr_port0_uptodate; static struct etr_aib etr_port1; static int etr_port1_uptodate; static unsigned long etr_events; static struct timer_list etr_timer; -static DEFINE_PER_CPU(atomic_t, etr_sync_word); static void etr_timeout(unsigned long dummy); static void etr_work_fn(struct work_struct *work); +static DEFINE_MUTEX(etr_work_mutex); static DECLARE_WORK(etr_work, etr_work_fn); /* - * The etr get_clock function. It will write the current clock value - * to the clock pointer and return 0 if the clock is in sync with the - * external time source. If the clock mode is local it will return - * -ENOSYS and -EAGAIN if the clock is not in sync with the external - * reference. This function is what ETR is all about.. - */ -int get_sync_clock(unsigned long long *clock) -{ - atomic_t *sw_ptr; - unsigned int sw0, sw1; - - sw_ptr = &get_cpu_var(etr_sync_word); - sw0 = atomic_read(sw_ptr); - *clock = get_clock(); - sw1 = atomic_read(sw_ptr); - put_cpu_var(etr_sync_sync); - if (sw0 == sw1 && (sw0 & 0x80000000U)) - /* Success: time is in sync. */ - return 0; - if (test_bit(ETR_FLAG_ENOSYS, &etr_flags)) - return -ENOSYS; - if (test_bit(ETR_FLAG_EACCES, &etr_flags)) - return -EACCES; - return -EAGAIN; -} -EXPORT_SYMBOL(get_sync_clock); - -/* - * Make get_sync_clock return -EAGAIN. - */ -static void etr_disable_sync_clock(void *dummy) -{ - atomic_t *sw_ptr = &__get_cpu_var(etr_sync_word); - /* - * Clear the in-sync bit 2^31. All get_sync_clock calls will - * fail until the sync bit is turned back on. In addition - * increase the "sequence" counter to avoid the race of an - * etr event and the complete recovery against get_sync_clock. - */ - atomic_clear_mask(0x80000000, sw_ptr); - atomic_inc(sw_ptr); -} - -/* - * Make get_sync_clock return 0 again. - * Needs to be called from a context disabled for preemption. - */ -static void etr_enable_sync_clock(void) -{ - atomic_t *sw_ptr = &__get_cpu_var(etr_sync_word); - atomic_set_mask(0x80000000, sw_ptr); -} - -/* * Reset ETR attachment. */ static void etr_reset(void) @@ -488,15 +482,15 @@ static void etr_reset(void) .e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0, .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0, .es = 0, .sl = 0 }; - if (etr_setr(&etr_eacr) == 0) - etr_tolec = get_clock(); - else { - set_bit(ETR_FLAG_ENOSYS, &etr_flags); - if (etr_port0_online || etr_port1_online) { - printk(KERN_WARNING "Running on non ETR capable " - "machine, only local mode available.\n"); - etr_port0_online = etr_port1_online = 0; - } + if (etr_setr(&etr_eacr) == 0) { + etr_tolec = get_tod_clock(); + set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags); + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + } else if (etr_port0_online || etr_port1_online) { + pr_warning("The real or virtual hardware system does " + "not provide an ETR interface\n"); + etr_port0_online = etr_port1_online = 0; } } @@ -504,21 +498,20 @@ static int __init etr_init(void) { struct etr_aib aib; - if (test_bit(ETR_FLAG_ENOSYS, &etr_flags)) + if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) return 0; + time_init_wq(); /* Check if this machine has the steai instruction. */ if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0) - set_bit(ETR_FLAG_STEAI, &etr_flags); + etr_steai_available = 1; setup_timer(&etr_timer, etr_timeout, 0UL); - if (!etr_port0_online && !etr_port1_online) - set_bit(ETR_FLAG_EACCES, &etr_flags); if (etr_port0_online) { set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } if (etr_port1_online) { set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } return 0; } @@ -542,9 +535,12 @@ void etr_switch_to_local(void) { if (!etr_eacr.sl) return; - etr_disable_sync_clock(NULL); - set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events); - schedule_work(&etr_work); + disable_sync_clock(NULL); + if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) { + etr_eacr.es = etr_eacr.sl = 0; + etr_setr(&etr_eacr); + queue_work(time_sync_wq, &etr_work); + } } /* @@ -557,23 +553,23 @@ void etr_sync_check(void) { if (!etr_eacr.es) return; - etr_disable_sync_clock(NULL); - set_bit(ETR_EVENT_SYNC_CHECK, &etr_events); - schedule_work(&etr_work); + disable_sync_clock(NULL); + if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) { + etr_eacr.es = 0; + etr_setr(&etr_eacr); + queue_work(time_sync_wq, &etr_work); + } } /* - * ETR external interrupt. There are two causes: + * ETR timing alert. There are two causes: * 1) port state change, check the usability of the port * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the * sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3) * or ETR-data word 4 (edf4) has changed. */ -static void etr_ext_handler(__u16 code) +static void etr_timing_alert(struct etr_irq_parm *intparm) { - struct etr_interruption_parameter *intparm = - (struct etr_interruption_parameter *) &S390_lowcore.ext_params; - if (intparm->pc0) /* ETR port 0 state change. */ set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); @@ -586,13 +582,13 @@ static void etr_ext_handler(__u16 code) * Both ports are not up-to-date now. */ set_bit(ETR_EVENT_PORT_ALERT, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } static void etr_timeout(unsigned long dummy) { set_bit(ETR_EVENT_UPDATE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } /* @@ -698,62 +694,25 @@ static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p) return 1; } -/* - * The time is "clock". xtime is what we think the time is. - * Adjust the value by a multiple of jiffies and add the delta to ntp. - * "delay" is an approximation how long the synchronization took. If - * the time correction is positive, then "delay" is subtracted from - * the time difference and only the remaining part is passed to ntp. - */ -static void etr_adjust_time(unsigned long long clock, unsigned long long delay) -{ - unsigned long long delta, ticks; - struct timex adjust; - - /* - * We don't have to take the xtime lock because the cpu - * executing etr_adjust_time is running disabled in - * tasklet context and all other cpus are looping in - * etr_sync_cpu_start. - */ - if (clock > xtime_cc) { - /* It is later than we thought. */ - delta = ticks = clock - xtime_cc; - delta = ticks = (delta < delay) ? 0 : delta - delay; - delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); - init_timer_cc = init_timer_cc + delta; - jiffies_timer_cc = jiffies_timer_cc + delta; - xtime_cc = xtime_cc + delta; - adjust.offset = ticks * (1000000 / HZ); - } else { - /* It is earlier than we thought. */ - delta = ticks = xtime_cc - clock; - delta -= do_div(ticks, CLK_TICKS_PER_JIFFY); - init_timer_cc = init_timer_cc - delta; - jiffies_timer_cc = jiffies_timer_cc - delta; - xtime_cc = xtime_cc - delta; - adjust.offset = -ticks * (1000000 / HZ); - } - if (adjust.offset != 0) { - printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n", - adjust.offset); - adjust.modes = ADJ_OFFSET_SINGLESHOT; - do_adjtimex(&adjust); - } -} +struct clock_sync_data { + atomic_t cpus; + int in_sync; + unsigned long long fixup_cc; + int etr_port; + struct etr_aib *etr_aib; +}; -static void etr_sync_cpu_start(void *dummy) +static void clock_sync_cpu(struct clock_sync_data *sync) { - int *in_sync = dummy; - - etr_enable_sync_clock(); + atomic_dec(&sync->cpus); + enable_sync_clock(); /* * This looks like a busy wait loop but it isn't. etr_sync_cpus * is called on all other cpus while the TOD clocks is stopped. * __udelay will stop the cpu on an enabled wait psw until the * TOD is running again. */ - while (*in_sync == 0) { + while (sync->in_sync == 0) { __udelay(1); /* * A different cpu changes *in_sync. Therefore use @@ -761,55 +720,53 @@ static void etr_sync_cpu_start(void *dummy) */ barrier(); } - if (*in_sync != 1) + if (sync->in_sync != 1) /* Didn't work. Clear per-cpu in sync bit again. */ - etr_disable_sync_clock(NULL); + disable_sync_clock(NULL); /* * This round of TOD syncing is done. Set the clock comparator * to the next tick and let the processor continue. */ - setup_jiffy_timer(); -} - -static void etr_sync_cpu_end(void *dummy) -{ + fixup_clock_comparator(sync->fixup_cc); } /* - * Sync the TOD clock using the port refered to by aibp. This port + * Sync the TOD clock using the port referred to by aibp. This port * has to be enabled and the other port has to be disabled. The * last eacr update has to be more than 1.6 seconds in the past. */ -static int etr_sync_clock(struct etr_aib *aib, int port) +static int etr_sync_clock(void *data) { - struct etr_aib *sync_port; - unsigned long long clock, delay; - int in_sync, follows; + static int first; + unsigned long long clock, old_clock, delay, delta; + struct clock_sync_data *etr_sync; + struct etr_aib *sync_port, *aib; + int port; int rc; - /* Check if the current aib is adjacent to the sync port aib. */ - sync_port = (port == 0) ? &etr_port0 : &etr_port1; - follows = etr_aib_follows(sync_port, aib, port); - memcpy(sync_port, aib, sizeof(*aib)); - if (!follows) - return -EAGAIN; + etr_sync = data; - /* - * Catch all other cpus and make them wait until we have - * successfully synced the clock. smp_call_function will - * return after all other cpus are in etr_sync_cpu_start. - */ - in_sync = 0; - preempt_disable(); - smp_call_function(etr_sync_cpu_start,&in_sync,0,0); - local_irq_disable(); - etr_enable_sync_clock(); + if (xchg(&first, 1) == 1) { + /* Slave */ + clock_sync_cpu(etr_sync); + return 0; + } + + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&etr_sync->cpus) != 0) + cpu_relax(); + + port = etr_sync->etr_port; + aib = etr_sync->etr_aib; + sync_port = (port == 0) ? &etr_port0 : &etr_port1; + enable_sync_clock(); /* Set clock to next OTE. */ __ctl_set_bit(14, 21); __ctl_set_bit(0, 29); clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32; - if (set_clock(clock) == 0) { + old_clock = get_tod_clock(); + if (set_tod_clock(clock) == 0) { __udelay(1); /* Wait for the clock to start. */ __ctl_clear_bit(0, 29); __ctl_clear_bit(14, 21); @@ -817,29 +774,51 @@ static int etr_sync_clock(struct etr_aib *aib, int port) /* Adjust Linux timing variables. */ delay = (unsigned long long) (aib->edf2.etv - sync_port->edf2.etv) << 32; - etr_adjust_time(clock, delay); - setup_jiffy_timer(); + delta = adjust_time(old_clock, clock, delay); + etr_sync->fixup_cc = delta; + fixup_clock_comparator(delta); /* Verify that the clock is properly set. */ if (!etr_aib_follows(sync_port, aib, port)) { /* Didn't work. */ - etr_disable_sync_clock(NULL); - in_sync = -EAGAIN; + disable_sync_clock(NULL); + etr_sync->in_sync = -EAGAIN; rc = -EAGAIN; } else { - in_sync = 1; + etr_sync->in_sync = 1; rc = 0; } } else { /* Could not set the clock ?!? */ __ctl_clear_bit(0, 29); __ctl_clear_bit(14, 21); - etr_disable_sync_clock(NULL); - in_sync = -EAGAIN; + disable_sync_clock(NULL); + etr_sync->in_sync = -EAGAIN; rc = -EAGAIN; } - local_irq_enable(); - smp_call_function(etr_sync_cpu_end,NULL,0,0); - preempt_enable(); + xchg(&first, 0); + return rc; +} + +static int etr_sync_clock_stop(struct etr_aib *aib, int port) +{ + struct clock_sync_data etr_sync; + struct etr_aib *sync_port; + int follows; + int rc; + + /* Check if the current aib is adjacent to the sync port aib. */ + sync_port = (port == 0) ? &etr_port0 : &etr_port1; + follows = etr_aib_follows(sync_port, aib, port); + memcpy(sync_port, aib, sizeof(*aib)); + if (!follows) + return -EAGAIN; + memset(&etr_sync, 0, sizeof(etr_sync)); + etr_sync.etr_aib = aib; + etr_sync.etr_port = port; + get_online_cpus(); + atomic_set(&etr_sync.cpus, num_online_cpus() - 1); + rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask); + put_online_cpus(); return rc; } @@ -863,7 +842,7 @@ static struct etr_eacr etr_handle_events(struct etr_eacr eacr) * assume that this can have caused an stepping * port switch. */ - etr_tolec = get_clock(); + etr_tolec = get_tod_clock(); eacr.p0 = etr_port0_online; if (!eacr.p0) eacr.e0 = 0; @@ -876,7 +855,7 @@ static struct etr_eacr etr_handle_events(struct etr_eacr eacr) * assume that this can have caused an stepping * port switch. */ - etr_tolec = get_clock(); + etr_tolec = get_tod_clock(); eacr.p1 = etr_port1_online; if (!eacr.p1) eacr.e1 = 0; @@ -941,7 +920,7 @@ static struct etr_eacr etr_handle_update(struct etr_aib *aib, * Do not try to get the alternate port aib if the clock * is not in sync yet. */ - if (!eacr.es) + if (!eacr.es || !check_sync_clock()) return eacr; /* @@ -949,7 +928,7 @@ static struct etr_eacr etr_handle_update(struct etr_aib *aib, * the other port immediately. If only stetr is available the * data-port bit toggle has to be used. */ - if (test_bit(ETR_FLAG_STEAI, &etr_flags)) { + if (etr_steai_available) { if (eacr.p0 && !etr_port0_uptodate) { etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0); etr_port0_uptodate = 1; @@ -992,11 +971,11 @@ static void etr_update_eacr(struct etr_eacr eacr) etr_eacr = eacr; etr_setr(&etr_eacr); if (dp_changed) - etr_tolec = get_clock(); + etr_tolec = get_tod_clock(); } /* - * ETR tasklet. In this function you'll find the main logic. In + * ETR work. In this function you'll find the main logic. In * particular this is the only function that calls etr_update_eacr(), * it "controls" the etr control register. */ @@ -1007,6 +986,9 @@ static void etr_work_fn(struct work_struct *work) struct etr_aib aib; int sync_port; + /* prevent multiple execution. */ + mutex_lock(&etr_work_mutex); + /* Create working copy of etr_eacr. */ eacr = etr_eacr; @@ -1018,17 +1000,16 @@ static void etr_work_fn(struct work_struct *work) if (!eacr.ea) { /* Both ports offline. Reset everything. */ eacr.dp = eacr.es = eacr.sl = 0; - on_each_cpu(etr_disable_sync_clock, NULL, 0, 1); + on_each_cpu(disable_sync_clock, NULL, 1); del_timer_sync(&etr_timer); etr_update_eacr(eacr); - set_bit(ETR_FLAG_EACCES, &etr_flags); - return; + goto out_unlock; } /* Store aib to get the current ETR status word. */ BUG_ON(etr_stetr(&aib) != 0); etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */ - now = get_clock(); + now = get_tod_clock(); /* * Update the port information if the last stepping port change @@ -1038,7 +1019,7 @@ static void etr_work_fn(struct work_struct *work) eacr = etr_handle_update(&aib, eacr); /* - * Select ports to enable. The prefered synchronization mode is PPS. + * Select ports to enable. The preferred synchronization mode is PPS. * If a port can be enabled depends on a number of things: * 1) The port needs to be online and uptodate. A port is not * disabled just because it is not uptodate, but it is only @@ -1062,7 +1043,6 @@ static void etr_work_fn(struct work_struct *work) eacr.e1 = 1; sync_port = (etr_port0_uptodate && etr_port_valid(&etr_port0, 0)) ? 0 : -1; - clear_bit(ETR_FLAG_EACCES, &etr_flags); } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) { eacr.sl = 0; eacr.e0 = 0; @@ -1071,7 +1051,6 @@ static void etr_work_fn(struct work_struct *work) eacr.es = 0; sync_port = (etr_port1_uptodate && etr_port_valid(&etr_port1, 1)) ? 1 : -1; - clear_bit(ETR_FLAG_EACCES, &etr_flags); } else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) { eacr.sl = 1; eacr.e0 = 1; @@ -1085,7 +1064,6 @@ static void etr_work_fn(struct work_struct *work) eacr.e1 = 1; sync_port = (etr_port0_uptodate && etr_port_valid(&etr_port0, 0)) ? 0 : -1; - clear_bit(ETR_FLAG_EACCES, &etr_flags); } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) { eacr.sl = 1; eacr.e0 = 0; @@ -1094,22 +1072,20 @@ static void etr_work_fn(struct work_struct *work) eacr.es = 0; sync_port = (etr_port1_uptodate && etr_port_valid(&etr_port1, 1)) ? 1 : -1; - clear_bit(ETR_FLAG_EACCES, &etr_flags); } else { /* Both ports not usable. */ eacr.es = eacr.sl = 0; sync_port = -1; - set_bit(ETR_FLAG_EACCES, &etr_flags); } /* * If the clock is in sync just update the eacr and return. * If there is no valid sync port wait for a port update. */ - if (eacr.es || sync_port < 0) { + if ((eacr.es && check_sync_clock()) || sync_port < 0) { etr_update_eacr(eacr); etr_set_tolec_timeout(now); - return; + goto out_unlock; } /* @@ -1122,49 +1098,56 @@ static void etr_work_fn(struct work_struct *work) /* * Update eacr and try to synchronize the clock. If the update * of eacr caused a stepping port switch (or if we have to - * assume that a stepping port switch has occured) or the + * assume that a stepping port switch has occurred) or the * clock syncing failed, reset the sync check control bit * and set up a timer to try again after 0.5 seconds */ etr_update_eacr(eacr); if (now < etr_tolec + (1600000 << 12) || - etr_sync_clock(&aib, sync_port) != 0) { + etr_sync_clock_stop(&aib, sync_port) != 0) { /* Sync failed. Try again in 1/2 second. */ eacr.es = 0; etr_update_eacr(eacr); etr_set_sync_timeout(); } else etr_set_tolec_timeout(now); +out_unlock: + mutex_unlock(&etr_work_mutex); } /* * Sysfs interface functions */ -static struct sysdev_class etr_sysclass = { - .name = "etr", +static struct bus_type etr_subsys = { + .name = "etr", + .dev_name = "etr", }; -static struct sys_device etr_port0_dev = { +static struct device etr_port0_dev = { .id = 0, - .cls = &etr_sysclass, + .bus = &etr_subsys, }; -static struct sys_device etr_port1_dev = { +static struct device etr_port1_dev = { .id = 1, - .cls = &etr_sysclass, + .bus = &etr_subsys, }; /* - * ETR class attributes + * ETR subsys attributes */ -static ssize_t etr_stepping_port_show(struct sysdev_class *class, char *buf) +static ssize_t etr_stepping_port_show(struct device *dev, + struct device_attribute *attr, + char *buf) { return sprintf(buf, "%i\n", etr_port0.esw.p); } -static SYSDEV_CLASS_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); +static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); -static ssize_t etr_stepping_mode_show(struct sysdev_class *class, char *buf) +static ssize_t etr_stepping_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) { char *mode_str; @@ -1177,12 +1160,12 @@ static ssize_t etr_stepping_mode_show(struct sysdev_class *class, char *buf) return sprintf(buf, "%s\n", mode_str); } -static SYSDEV_CLASS_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); +static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); /* * ETR port attributes */ -static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev) +static inline struct etr_aib *etr_aib_from_dev(struct device *dev) { if (dev == &etr_port0_dev) return etr_port0_online ? &etr_port0 : NULL; @@ -1190,7 +1173,9 @@ static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev) return etr_port1_online ? &etr_port1 : NULL; } -static ssize_t etr_online_show(struct sys_device *dev, char *buf) +static ssize_t etr_online_show(struct device *dev, + struct device_attribute *attr, + char *buf) { unsigned int online; @@ -1198,43 +1183,58 @@ static ssize_t etr_online_show(struct sys_device *dev, char *buf) return sprintf(buf, "%i\n", online); } -static ssize_t etr_online_store(struct sys_device *dev, - const char *buf, size_t count) +static ssize_t etr_online_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { unsigned int value; value = simple_strtoul(buf, NULL, 0); if (value != 0 && value != 1) return -EINVAL; - if (test_bit(ETR_FLAG_ENOSYS, &etr_flags)) - return -ENOSYS; + if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) + return -EOPNOTSUPP; + mutex_lock(&clock_sync_mutex); if (dev == &etr_port0_dev) { if (etr_port0_online == value) - return count; /* Nothing to do. */ + goto out; /* Nothing to do. */ etr_port0_online = value; + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } else { if (etr_port1_online == value) - return count; /* Nothing to do. */ + goto out; /* Nothing to do. */ etr_port1_online = value; + if (etr_port0_online && etr_port1_online) + set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } +out: + mutex_unlock(&clock_sync_mutex); return count; } -static SYSDEV_ATTR(online, 0600, etr_online_show, etr_online_store); +static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store); -static ssize_t etr_stepping_control_show(struct sys_device *dev, char *buf) +static ssize_t etr_stepping_control_show(struct device *dev, + struct device_attribute *attr, + char *buf) { return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? etr_eacr.e0 : etr_eacr.e1); } -static SYSDEV_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); +static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); -static ssize_t etr_mode_code_show(struct sys_device *dev, char *buf) +static ssize_t etr_mode_code_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!etr_port0_online && !etr_port1_online) /* Status word is not uptodate if both ports are offline. */ @@ -1243,9 +1243,10 @@ static ssize_t etr_mode_code_show(struct sys_device *dev, char *buf) etr_port0.esw.psc0 : etr_port0.esw.psc1); } -static SYSDEV_ATTR(state_code, 0400, etr_mode_code_show, NULL); +static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL); -static ssize_t etr_untuned_show(struct sys_device *dev, char *buf) +static ssize_t etr_untuned_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1254,9 +1255,10 @@ static ssize_t etr_untuned_show(struct sys_device *dev, char *buf) return sprintf(buf, "%i\n", aib->edf1.u); } -static SYSDEV_ATTR(untuned, 0400, etr_untuned_show, NULL); +static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL); -static ssize_t etr_network_id_show(struct sys_device *dev, char *buf) +static ssize_t etr_network_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1265,9 +1267,10 @@ static ssize_t etr_network_id_show(struct sys_device *dev, char *buf) return sprintf(buf, "%i\n", aib->edf1.net_id); } -static SYSDEV_ATTR(network, 0400, etr_network_id_show, NULL); +static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL); -static ssize_t etr_id_show(struct sys_device *dev, char *buf) +static ssize_t etr_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1276,9 +1279,10 @@ static ssize_t etr_id_show(struct sys_device *dev, char *buf) return sprintf(buf, "%i\n", aib->edf1.etr_id); } -static SYSDEV_ATTR(id, 0400, etr_id_show, NULL); +static DEVICE_ATTR(id, 0400, etr_id_show, NULL); -static ssize_t etr_port_number_show(struct sys_device *dev, char *buf) +static ssize_t etr_port_number_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1287,9 +1291,10 @@ static ssize_t etr_port_number_show(struct sys_device *dev, char *buf) return sprintf(buf, "%i\n", aib->edf1.etr_pn); } -static SYSDEV_ATTR(port, 0400, etr_port_number_show, NULL); +static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL); -static ssize_t etr_coupled_show(struct sys_device *dev, char *buf) +static ssize_t etr_coupled_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1298,9 +1303,10 @@ static ssize_t etr_coupled_show(struct sys_device *dev, char *buf) return sprintf(buf, "%i\n", aib->edf3.c); } -static SYSDEV_ATTR(coupled, 0400, etr_coupled_show, NULL); +static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL); -static ssize_t etr_local_time_show(struct sys_device *dev, char *buf) +static ssize_t etr_local_time_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1309,9 +1315,10 @@ static ssize_t etr_local_time_show(struct sys_device *dev, char *buf) return sprintf(buf, "%i\n", aib->edf3.blto); } -static SYSDEV_ATTR(local_time, 0400, etr_local_time_show, NULL); +static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL); -static ssize_t etr_utc_offset_show(struct sys_device *dev, char *buf) +static ssize_t etr_utc_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1320,64 +1327,64 @@ static ssize_t etr_utc_offset_show(struct sys_device *dev, char *buf) return sprintf(buf, "%i\n", aib->edf3.buo); } -static SYSDEV_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); - -static struct sysdev_attribute *etr_port_attributes[] = { - &attr_online, - &attr_stepping_control, - &attr_state_code, - &attr_untuned, - &attr_network, - &attr_id, - &attr_port, - &attr_coupled, - &attr_local_time, - &attr_utc_offset, +static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); + +static struct device_attribute *etr_port_attributes[] = { + &dev_attr_online, + &dev_attr_stepping_control, + &dev_attr_state_code, + &dev_attr_untuned, + &dev_attr_network, + &dev_attr_id, + &dev_attr_port, + &dev_attr_coupled, + &dev_attr_local_time, + &dev_attr_utc_offset, NULL }; -static int __init etr_register_port(struct sys_device *dev) +static int __init etr_register_port(struct device *dev) { - struct sysdev_attribute **attr; + struct device_attribute **attr; int rc; - rc = sysdev_register(dev); + rc = device_register(dev); if (rc) goto out; for (attr = etr_port_attributes; *attr; attr++) { - rc = sysdev_create_file(dev, *attr); + rc = device_create_file(dev, *attr); if (rc) goto out_unreg; } return 0; out_unreg: for (; attr >= etr_port_attributes; attr--) - sysdev_remove_file(dev, *attr); - sysdev_unregister(dev); + device_remove_file(dev, *attr); + device_unregister(dev); out: return rc; } -static void __init etr_unregister_port(struct sys_device *dev) +static void __init etr_unregister_port(struct device *dev) { - struct sysdev_attribute **attr; + struct device_attribute **attr; for (attr = etr_port_attributes; *attr; attr++) - sysdev_remove_file(dev, *attr); - sysdev_unregister(dev); + device_remove_file(dev, *attr); + device_unregister(dev); } static int __init etr_init_sysfs(void) { int rc; - rc = sysdev_class_register(&etr_sysclass); + rc = subsys_system_register(&etr_subsys, NULL); if (rc) goto out; - rc = sysdev_class_create_file(&etr_sysclass, &attr_stepping_port); + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port); if (rc) - goto out_unreg_class; - rc = sysdev_class_create_file(&etr_sysclass, &attr_stepping_mode); + goto out_unreg_subsys; + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode); if (rc) goto out_remove_stepping_port; rc = etr_register_port(&etr_port0_dev); @@ -1391,13 +1398,391 @@ static int __init etr_init_sysfs(void) out_remove_port0: etr_unregister_port(&etr_port0_dev); out_remove_stepping_mode: - sysdev_class_remove_file(&etr_sysclass, &attr_stepping_mode); + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode); out_remove_stepping_port: - sysdev_class_remove_file(&etr_sysclass, &attr_stepping_port); -out_unreg_class: - sysdev_class_unregister(&etr_sysclass); + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port); +out_unreg_subsys: + bus_unregister(&etr_subsys); out: return rc; } device_initcall(etr_init_sysfs); + +/* + * Server Time Protocol (STP) code. + */ +static int stp_online; +static struct stp_sstpi stp_info; +static void *stp_page; + +static void stp_work_fn(struct work_struct *work); +static DEFINE_MUTEX(stp_work_mutex); +static DECLARE_WORK(stp_work, stp_work_fn); +static struct timer_list stp_timer; + +static int __init early_parse_stp(char *p) +{ + if (strncmp(p, "off", 3) == 0) + stp_online = 0; + else if (strncmp(p, "on", 2) == 0) + stp_online = 1; + return 0; +} +early_param("stp", early_parse_stp); + +/* + * Reset STP attachment. + */ +static void __init stp_reset(void) +{ + int rc; + + stp_page = (void *) get_zeroed_page(GFP_ATOMIC); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + if (rc == 0) + set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); + else if (stp_online) { + pr_warning("The real or virtual hardware system does " + "not provide an STP interface\n"); + free_page((unsigned long) stp_page); + stp_page = NULL; + stp_online = 0; + } +} + +static void stp_timeout(unsigned long dummy) +{ + queue_work(time_sync_wq, &stp_work); +} + +static int __init stp_init(void) +{ + if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return 0; + setup_timer(&stp_timer, stp_timeout, 0UL); + time_init_wq(); + if (!stp_online) + return 0; + queue_work(time_sync_wq, &stp_work); + return 0; +} + +arch_initcall(stp_init); + +/* + * STP timing alert. There are three causes: + * 1) timing status change + * 2) link availability change + * 3) time control parameter change + * In all three cases we are only interested in the clock source state. + * If a STP clock source is now available use it. + */ +static void stp_timing_alert(struct stp_irq_parm *intparm) +{ + if (intparm->tsc || intparm->lac || intparm->tcpc) + queue_work(time_sync_wq, &stp_work); +} + +/* + * STP sync check machine check. This is called when the timing state + * changes from the synchronized state to the unsynchronized state. + * After a STP sync check the clock is not in sync. The machine check + * is broadcasted to all cpus at the same time. + */ +void stp_sync_check(void) +{ + disable_sync_clock(NULL); + queue_work(time_sync_wq, &stp_work); +} + +/* + * STP island condition machine check. This is called when an attached + * server attempts to communicate over an STP link and the servers + * have matching CTN ids and have a valid stratum-1 configuration + * but the configurations do not match. + */ +void stp_island_check(void) +{ + disable_sync_clock(NULL); + queue_work(time_sync_wq, &stp_work); +} + + +static int stp_sync_clock(void *data) +{ + static int first; + unsigned long long old_clock, delta; + struct clock_sync_data *stp_sync; + int rc; + + stp_sync = data; + + if (xchg(&first, 1) == 1) { + /* Slave */ + clock_sync_cpu(stp_sync); + return 0; + } + + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&stp_sync->cpus) != 0) + cpu_relax(); + + enable_sync_clock(); + + rc = 0; + if (stp_info.todoff[0] || stp_info.todoff[1] || + stp_info.todoff[2] || stp_info.todoff[3] || + stp_info.tmd != 2) { + old_clock = get_tod_clock(); + rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0); + if (rc == 0) { + delta = adjust_time(old_clock, get_tod_clock(), 0); + fixup_clock_comparator(delta); + rc = chsc_sstpi(stp_page, &stp_info, + sizeof(struct stp_sstpi)); + if (rc == 0 && stp_info.tmd != 2) + rc = -EAGAIN; + } + } + if (rc) { + disable_sync_clock(NULL); + stp_sync->in_sync = -EAGAIN; + } else + stp_sync->in_sync = 1; + xchg(&first, 0); + return 0; +} + +/* + * STP work. Check for the STP state and take over the clock + * synchronization if the STP clock source is usable. + */ +static void stp_work_fn(struct work_struct *work) +{ + struct clock_sync_data stp_sync; + int rc; + + /* prevent multiple execution. */ + mutex_lock(&stp_work_mutex); + + if (!stp_online) { + chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + del_timer_sync(&stp_timer); + goto out_unlock; + } + + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0); + if (rc) + goto out_unlock; + + rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + if (rc || stp_info.c == 0) + goto out_unlock; + + /* Skip synchronization if the clock is already in sync. */ + if (check_sync_clock()) + goto out_unlock; + + memset(&stp_sync, 0, sizeof(stp_sync)); + get_online_cpus(); + atomic_set(&stp_sync.cpus, num_online_cpus() - 1); + stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask); + put_online_cpus(); + + if (!check_sync_clock()) + /* + * There is a usable clock but the synchonization failed. + * Retry after a second. + */ + mod_timer(&stp_timer, jiffies + HZ); + +out_unlock: + mutex_unlock(&stp_work_mutex); +} + +/* + * STP subsys sysfs interface functions + */ +static struct bus_type stp_subsys = { + .name = "stp", + .dev_name = "stp", +}; + +static ssize_t stp_ctn_id_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%016llx\n", + *(unsigned long long *) stp_info.ctnid); +} + +static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); + +static ssize_t stp_ctn_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", stp_info.ctn); +} + +static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); + +static ssize_t stp_dst_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x2000)) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); +} + +static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); + +static ssize_t stp_leap_seconds_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x8000)) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); +} + +static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); + +static ssize_t stp_stratum_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); +} + +static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); + +static ssize_t stp_time_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x0800)) + return -ENODATA; + return sprintf(buf, "%i\n", (int) stp_info.tto); +} + +static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); + +static ssize_t stp_time_zone_offset_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online || !(stp_info.vbits & 0x4000)) + return -ENODATA; + return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); +} + +static DEVICE_ATTR(time_zone_offset, 0400, + stp_time_zone_offset_show, NULL); + +static ssize_t stp_timing_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", stp_info.tmd); +} + +static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); + +static ssize_t stp_timing_state_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + if (!stp_online) + return -ENODATA; + return sprintf(buf, "%i\n", stp_info.tst); +} + +static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); + +static ssize_t stp_online_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%i\n", stp_online); +} + +static ssize_t stp_online_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int value; + + value = simple_strtoul(buf, NULL, 0); + if (value != 0 && value != 1) + return -EINVAL; + if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return -EOPNOTSUPP; + mutex_lock(&clock_sync_mutex); + stp_online = value; + if (stp_online) + set_bit(CLOCK_SYNC_STP, &clock_sync_flags); + else + clear_bit(CLOCK_SYNC_STP, &clock_sync_flags); + queue_work(time_sync_wq, &stp_work); + mutex_unlock(&clock_sync_mutex); + return count; +} + +/* + * Can't use DEVICE_ATTR because the attribute should be named + * stp/online but dev_attr_online already exists in this file .. + */ +static struct device_attribute dev_attr_stp_online = { + .attr = { .name = "online", .mode = 0600 }, + .show = stp_online_show, + .store = stp_online_store, +}; + +static struct device_attribute *stp_attributes[] = { + &dev_attr_ctn_id, + &dev_attr_ctn_type, + &dev_attr_dst_offset, + &dev_attr_leap_seconds, + &dev_attr_stp_online, + &dev_attr_stratum, + &dev_attr_time_offset, + &dev_attr_time_zone_offset, + &dev_attr_timing_mode, + &dev_attr_timing_state, + NULL +}; + +static int __init stp_init_sysfs(void) +{ + struct device_attribute **attr; + int rc; + + rc = subsys_system_register(&stp_subsys, NULL); + if (rc) + goto out; + for (attr = stp_attributes; *attr; attr++) { + rc = device_create_file(stp_subsys.dev_root, *attr); + if (rc) + goto out_unreg; + } + return 0; +out_unreg: + for (; attr >= stp_attributes; attr--) + device_remove_file(stp_subsys.dev_root, *attr); + bus_unregister(&stp_subsys); +out: + return rc; +} + +device_initcall(stp_init_sysfs); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c new file mode 100644 index 00000000000..355a16c5570 --- /dev/null +++ b/arch/s390/kernel/topology.c @@ -0,0 +1,478 @@ +/* + * Copyright IBM Corp. 2007, 2011 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/workqueue.h> +#include <linux/bootmem.h> +#include <linux/cpuset.h> +#include <linux/device.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <asm/sysinfo.h> + +#define PTF_HORIZONTAL (0UL) +#define PTF_VERTICAL (1UL) +#define PTF_CHECK (2UL) + +struct mask_info { + struct mask_info *next; + unsigned char id; + cpumask_t mask; +}; + +static void set_topology_timer(void); +static void topology_work_fn(struct work_struct *work); +static struct sysinfo_15_1_x *tl_info; + +static int topology_enabled = 1; +static DECLARE_WORK(topology_work, topology_work_fn); + +/* topology_lock protects the socket and book linked lists */ +static DEFINE_SPINLOCK(topology_lock); +static struct mask_info socket_info; +static struct mask_info book_info; + +struct cpu_topology_s390 cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) +{ + cpumask_t mask; + + cpumask_copy(&mask, cpumask_of(cpu)); + if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) + return mask; + for (; info; info = info->next) { + if (cpumask_test_cpu(cpu, &info->mask)) + return info->mask; + } + return mask; +} + +static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, + struct mask_info *book, + struct mask_info *socket, + int one_socket_per_cpu) +{ + unsigned int cpu; + + for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) { + unsigned int rcpu; + int lcpu; + + rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; + lcpu = smp_find_processor_id(rcpu); + if (lcpu < 0) + continue; + cpumask_set_cpu(lcpu, &book->mask); + cpu_topology[lcpu].book_id = book->id; + cpumask_set_cpu(lcpu, &socket->mask); + cpu_topology[lcpu].core_id = rcpu; + if (one_socket_per_cpu) { + cpu_topology[lcpu].socket_id = rcpu; + socket = socket->next; + } else { + cpu_topology[lcpu].socket_id = socket->id; + } + smp_cpu_set_polarization(lcpu, tl_cpu->pp); + } + return socket; +} + +static void clear_masks(void) +{ + struct mask_info *info; + + info = &socket_info; + while (info) { + cpumask_clear(&info->mask); + info = info->next; + } + info = &book_info; + while (info) { + cpumask_clear(&info->mask); + info = info->next; + } +} + +static union topology_entry *next_tle(union topology_entry *tle) +{ + if (!tle->nl) + return (union topology_entry *)((struct topology_cpu *)tle + 1); + return (union topology_entry *)((struct topology_container *)tle + 1); +} + +static void __tl_to_masks_generic(struct sysinfo_15_1_x *info) +{ + struct mask_info *socket = &socket_info; + struct mask_info *book = &book_info; + union topology_entry *tle, *end; + + tle = info->tle; + end = (union topology_entry *)((unsigned long)info + info->length); + while (tle < end) { + switch (tle->nl) { + case 2: + book = book->next; + book->id = tle->container.id; + break; + case 1: + socket = socket->next; + socket->id = tle->container.id; + break; + case 0: + add_cpus_to_mask(&tle->cpu, book, socket, 0); + break; + default: + clear_masks(); + return; + } + tle = next_tle(tle); + } +} + +static void __tl_to_masks_z10(struct sysinfo_15_1_x *info) +{ + struct mask_info *socket = &socket_info; + struct mask_info *book = &book_info; + union topology_entry *tle, *end; + + tle = info->tle; + end = (union topology_entry *)((unsigned long)info + info->length); + while (tle < end) { + switch (tle->nl) { + case 1: + book = book->next; + book->id = tle->container.id; + break; + case 0: + socket = add_cpus_to_mask(&tle->cpu, book, socket, 1); + break; + default: + clear_masks(); + return; + } + tle = next_tle(tle); + } +} + +static void tl_to_masks(struct sysinfo_15_1_x *info) +{ + struct cpuid cpu_id; + + spin_lock_irq(&topology_lock); + get_cpu_id(&cpu_id); + clear_masks(); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + __tl_to_masks_z10(info); + break; + default: + __tl_to_masks_generic(info); + } + spin_unlock_irq(&topology_lock); +} + +static void topology_update_polarization_simple(void) +{ + int cpu; + + mutex_lock(&smp_cpu_state_mutex); + for_each_possible_cpu(cpu) + smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); + mutex_unlock(&smp_cpu_state_mutex); +} + +static int ptf(unsigned long fc) +{ + int rc; + + asm volatile( + " .insn rre,0xb9a20000,%1,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (rc) + : "d" (fc) : "cc"); + return rc; +} + +int topology_set_cpu_management(int fc) +{ + int cpu, rc; + + if (!MACHINE_HAS_TOPOLOGY) + return -EOPNOTSUPP; + if (fc) + rc = ptf(PTF_VERTICAL); + else + rc = ptf(PTF_HORIZONTAL); + if (rc) + return -EBUSY; + for_each_possible_cpu(cpu) + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + return rc; +} + +static void update_cpu_masks(void) +{ + unsigned long flags; + int cpu; + + spin_lock_irqsave(&topology_lock, flags); + for_each_possible_cpu(cpu) { + cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu); + cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu); + if (!MACHINE_HAS_TOPOLOGY) { + cpu_topology[cpu].core_id = cpu; + cpu_topology[cpu].socket_id = cpu; + cpu_topology[cpu].book_id = cpu; + } + } + spin_unlock_irqrestore(&topology_lock, flags); +} + +void store_topology(struct sysinfo_15_1_x *info) +{ + if (topology_max_mnest >= 3) + stsi(info, 15, 1, 3); + else + stsi(info, 15, 1, 2); +} + +int arch_update_cpu_topology(void) +{ + struct sysinfo_15_1_x *info = tl_info; + struct device *dev; + int cpu; + + if (!MACHINE_HAS_TOPOLOGY) { + update_cpu_masks(); + topology_update_polarization_simple(); + return 0; + } + store_topology(info); + tl_to_masks(info); + update_cpu_masks(); + for_each_online_cpu(cpu) { + dev = get_cpu_device(cpu); + kobject_uevent(&dev->kobj, KOBJ_CHANGE); + } + return 1; +} + +static void topology_work_fn(struct work_struct *work) +{ + rebuild_sched_domains(); +} + +void topology_schedule_update(void) +{ + schedule_work(&topology_work); +} + +static void topology_timer_fn(unsigned long ignored) +{ + if (ptf(PTF_CHECK)) + topology_schedule_update(); + set_topology_timer(); +} + +static struct timer_list topology_timer = + TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); + +static atomic_t topology_poll = ATOMIC_INIT(0); + +static void set_topology_timer(void) +{ + if (atomic_add_unless(&topology_poll, -1, 0)) + mod_timer(&topology_timer, jiffies + HZ / 10); + else + mod_timer(&topology_timer, jiffies + HZ * 60); +} + +void topology_expect_change(void) +{ + if (!MACHINE_HAS_TOPOLOGY) + return; + /* This is racy, but it doesn't matter since it is just a heuristic. + * Worst case is that we poll in a higher frequency for a bit longer. + */ + if (atomic_read(&topology_poll) > 60) + return; + atomic_add(60, &topology_poll); + set_topology_timer(); +} + +static int __init early_parse_topology(char *p) +{ + if (strncmp(p, "off", 3)) + return 0; + topology_enabled = 0; + return 0; +} +early_param("topology", early_parse_topology); + +static void __init alloc_masks(struct sysinfo_15_1_x *info, + struct mask_info *mask, int offset) +{ + int i, nr_masks; + + nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; + for (i = 0; i < info->mnest - offset; i++) + nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; + nr_masks = max(nr_masks, 1); + for (i = 0; i < nr_masks; i++) { + mask->next = alloc_bootmem_align( + roundup_pow_of_two(sizeof(struct mask_info)), + roundup_pow_of_two(sizeof(struct mask_info))); + mask = mask->next; + } +} + +void __init s390_init_cpu_topology(void) +{ + struct sysinfo_15_1_x *info; + int i; + + if (!MACHINE_HAS_TOPOLOGY) + return; + tl_info = alloc_bootmem_pages(PAGE_SIZE); + info = tl_info; + store_topology(info); + pr_info("The CPU configuration topology of the machine is:"); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + printk(KERN_CONT " %d", info->mag[i]); + printk(KERN_CONT " / %d\n", info->mnest); + alloc_masks(info, &socket_info, 1); + alloc_masks(info, &book_info, 2); +} + +static int cpu_management; + +static ssize_t dispatching_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + count = sprintf(buf, "%d\n", cpu_management); + mutex_unlock(&smp_cpu_state_mutex); + return count; +} + +static ssize_t dispatching_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + int val, rc; + char delim; + + if (sscanf(buf, "%d %c", &val, &delim) != 1) + return -EINVAL; + if (val != 0 && val != 1) + return -EINVAL; + rc = 0; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + if (cpu_management == val) + goto out; + rc = topology_set_cpu_management(val); + if (rc) + goto out; + cpu_management = val; + topology_expect_change(); +out: + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(dispatching, 0644, dispatching_show, + dispatching_store); + +static ssize_t cpu_polarization_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu = dev->id; + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + switch (smp_cpu_get_polarization(cpu)) { + case POLARIZATION_HRZ: + count = sprintf(buf, "horizontal\n"); + break; + case POLARIZATION_VL: + count = sprintf(buf, "vertical:low\n"); + break; + case POLARIZATION_VM: + count = sprintf(buf, "vertical:medium\n"); + break; + case POLARIZATION_VH: + count = sprintf(buf, "vertical:high\n"); + break; + default: + count = sprintf(buf, "unknown\n"); + break; + } + mutex_unlock(&smp_cpu_state_mutex); + return count; +} +static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); + +static struct attribute *topology_cpu_attrs[] = { + &dev_attr_polarization.attr, + NULL, +}; + +static struct attribute_group topology_cpu_attr_group = { + .attrs = topology_cpu_attrs, +}; + +int topology_cpu_init(struct cpu *cpu) +{ + return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); +} + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_mask; +} + +static const struct cpumask *cpu_book_mask(int cpu) +{ + return &cpu_topology[cpu].book_mask; +} + +static struct sched_domain_topology_level s390_topology[] = { + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, + { cpu_book_mask, SD_INIT_NAME(BOOK) }, + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + +static int __init topology_init(void) +{ + if (!MACHINE_HAS_TOPOLOGY) { + topology_update_polarization_simple(); + goto out; + } + set_topology_timer(); +out: + + set_sched_topology(s390_topology); + + return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); +} +device_initcall(topology_init); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 60f728aeaf1..c5762324d9e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -1,8 +1,6 @@ /* - * arch/s390/kernel/traps.c - * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * @@ -14,295 +12,45 @@ * 'Traps.c' handles hardware traps and faults after we have saved some * state in 'asm.s'. */ -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> +#include <linux/module.h> #include <linux/ptrace.h> -#include <linux/timer.h> +#include <linux/sched.h> #include <linux/mm.h> -#include <linux/smp.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/seq_file.h> -#include <linux/delay.h> -#include <linux/module.h> -#include <linux/kdebug.h> -#include <linux/kallsyms.h> -#include <linux/reboot.h> -#include <linux/kprobes.h> -#include <linux/bug.h> -#include <linux/utsname.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/atomic.h> -#include <asm/mathemu.h> -#include <asm/cpcmd.h> -#include <asm/s390_ext.h> -#include <asm/lowcore.h> -#include <asm/debug.h> - -/* Called from entry.S only */ -extern void handle_per_exception(struct pt_regs *regs); - -typedef void pgm_check_handler_t(struct pt_regs *, long); -pgm_check_handler_t *pgm_check_table[128]; - -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_PROCESS_DEBUG -int sysctl_userprocess_debug = 1; -#else -int sysctl_userprocess_debug = 0; -#endif -#endif +#include "entry.h" -extern pgm_check_handler_t do_protection_exception; -extern pgm_check_handler_t do_dat_exception; -extern pgm_check_handler_t do_monitor_call; -extern pgm_check_handler_t do_asce_exception; +int show_unhandled_signals = 1; -#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) - -#ifndef CONFIG_64BIT -#define FOURLONG "%08lx %08lx %08lx %08lx\n" -static int kstack_depth_to_print = 12; -#else /* CONFIG_64BIT */ -#define FOURLONG "%016lx %016lx %016lx %016lx\n" -static int kstack_depth_to_print = 20; -#endif /* CONFIG_64BIT */ - -/* - * For show_trace we have tree different stack to consider: - * - the panic stack which is used if the kernel stack has overflown - * - the asynchronous interrupt stack (cpu related) - * - the synchronous kernel stack (process related) - * The stack trace can start at any of the three stack and can potentially - * touch all of them. The order is: panic stack, async stack, sync stack. - */ -static unsigned long -__show_trace(unsigned long sp, unsigned long low, unsigned long high) -{ - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - sp = sp & PSW_ADDR_INSN; - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk("([<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s)\n", sf->gprs[8] & PSW_ADDR_INSN); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - printk(" [<%016lx>] ", sf->gprs[8] & PSW_ADDR_INSN); - print_symbol("%s\n", sf->gprs[8] & PSW_ADDR_INSN); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - printk(" [<%016lx>] ", regs->psw.addr & PSW_ADDR_INSN); - print_symbol("%s\n", regs->psw.addr & PSW_ADDR_INSN); - low = sp; - sp = regs->gprs[15]; - } -} - -void show_trace(struct task_struct *task, unsigned long *stack) +static inline void __user *get_trap_ip(struct pt_regs *regs) { - register unsigned long __r15 asm ("15"); - unsigned long sp; - - sp = (unsigned long) stack; - if (!sp) - sp = task ? task->thread.ksp : __r15; - printk("Call Trace:\n"); -#ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, S390_lowcore.panic_stack - 4096, - S390_lowcore.panic_stack); -#endif - sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); - if (task) - __show_trace(sp, (unsigned long) task_stack_page(task), - (unsigned long) task_stack_page(task) + THREAD_SIZE); - else - __show_trace(sp, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); - printk("\n"); - if (!task) - task = current; - debug_show_held_locks(task); -} - -void show_stack(struct task_struct *task, unsigned long *sp) -{ - register unsigned long * __r15 asm ("15"); - unsigned long *stack; - int i; - - if (!sp) - stack = task ? (unsigned long *) task->thread.ksp : __r15; - else - stack = sp; - - for (i = 0; i < kstack_depth_to_print; i++) { - if (((addr_t) stack & (THREAD_SIZE-1)) == 0) - break; - if (i && ((i * sizeof (long) % 32) == 0)) - printk("\n "); - printk("%p ", (void *)*stack++); - } - printk("\n"); - show_trace(task, sp); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - printk("CPU: %d %s %s %.*s\n", - task_thread_info(current)->cpu, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("Process %s (pid: %d, task: %p, ksp: %p)\n", - current->comm, current->pid, current, - (void *) current->thread.ksp); - show_stack(NULL, NULL); -} -EXPORT_SYMBOL(dump_stack); - -static inline int mask_bits(struct pt_regs *regs, unsigned long bits) -{ - return (regs->psw.mask & bits) / ((~bits + 1) & bits); -} - -void show_registers(struct pt_regs *regs) -{ - char *mode; - - mode = (regs->psw.mask & PSW_MASK_PSTATE) ? "User" : "Krnl"; - printk("%s PSW : %p %p", - mode, (void *) regs->psw.mask, - (void *) regs->psw.addr); - print_symbol(" (%s)\n", regs->psw.addr & PSW_ADDR_INSN); - printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), - mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), - mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), - mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), - mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), - mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); #ifdef CONFIG_64BIT - printk(" EA:%x", mask_bits(regs, PSW_BASE_BITS)); -#endif - printk("\n%s GPRS: " FOURLONG, mode, - regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); - printk(" " FOURLONG, - regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); - printk(" " FOURLONG, - regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]); - printk(" " FOURLONG, - regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]); - - show_code(regs); -} - -/* This is called from fs/proc/array.c */ -void task_show_regs(struct seq_file *m, struct task_struct *task) -{ - struct pt_regs *regs; - - regs = task_pt_regs(task); - seq_printf(m, "task: %p, ksp: %p\n", - task, (void *)task->thread.ksp); - seq_printf(m, "User PSW : %p %p\n", - (void *) regs->psw.mask, (void *)regs->psw.addr); - - seq_printf(m, "User GPRS: " FOURLONG, - regs->gprs[0], regs->gprs[1], - regs->gprs[2], regs->gprs[3]); - seq_printf(m, " " FOURLONG, - regs->gprs[4], regs->gprs[5], - regs->gprs[6], regs->gprs[7]); - seq_printf(m, " " FOURLONG, - regs->gprs[8], regs->gprs[9], - regs->gprs[10], regs->gprs[11]); - seq_printf(m, " " FOURLONG, - regs->gprs[12], regs->gprs[13], - regs->gprs[14], regs->gprs[15]); - seq_printf(m, "User ACRS: %08x %08x %08x %08x\n", - task->thread.acrs[0], task->thread.acrs[1], - task->thread.acrs[2], task->thread.acrs[3]); - seq_printf(m, " %08x %08x %08x %08x\n", - task->thread.acrs[4], task->thread.acrs[5], - task->thread.acrs[6], task->thread.acrs[7]); - seq_printf(m, " %08x %08x %08x %08x\n", - task->thread.acrs[8], task->thread.acrs[9], - task->thread.acrs[10], task->thread.acrs[11]); - seq_printf(m, " %08x %08x %08x %08x\n", - task->thread.acrs[12], task->thread.acrs[13], - task->thread.acrs[14], task->thread.acrs[15]); -} + unsigned long address; -static DEFINE_SPINLOCK(die_lock); - -void die(const char * str, struct pt_regs * regs, long err) -{ - static int die_counter; - - oops_enter(); - debug_stop_all(); - console_verbose(); - spin_lock_irq(&die_lock); - bust_spinlocks(1); - printk("%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); + if (regs->int_code & 0x200) + address = *(unsigned long *)(current->thread.trap_tdb + 24); + else + address = regs->psw.addr; + return (void __user *) + ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN); +#else + return (void __user *) + ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); #endif - printk("\n"); - notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); - show_regs(regs); - bust_spinlocks(0); - add_taint(TAINT_DIE); - spin_unlock_irq(&die_lock); - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception: panic_on_oops"); - oops_exit(); - do_exit(SIGSEGV); } -static void inline -report_user_fault(long interruption_code, struct pt_regs *regs) +static inline void report_user_fault(struct pt_regs *regs, int signr) { -#if defined(CONFIG_SYSCTL) - if (!sysctl_userprocess_debug) + if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; -#endif -#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG) - printk("User process fault: interruption code 0x%lX\n", - interruption_code); + if (!unhandled_signal(current, signr)) + return; + if (!printk_ratelimit()) + return; + printk("User process fault: interruption code 0x%X ", regs->int_code); + print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN); + printk("\n"); show_regs(regs); -#endif } int is_valid_bugaddr(unsigned long addr) @@ -310,158 +58,141 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static void __kprobes inline do_trap(long interruption_code, int signr, - char *str, struct pt_regs *regs, - siginfo_t *info) +static void __kprobes do_trap(struct pt_regs *regs, + int si_signo, int si_code, char *str) { - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); - - if (notify_die(DIE_TRAP, str, regs, interruption_code, - interruption_code, signr) == NOTIFY_STOP) - return; + siginfo_t info; - if (regs->psw.mask & PSW_MASK_PSTATE) { - struct task_struct *tsk = current; + if (notify_die(DIE_TRAP, str, regs, 0, + regs->int_code, si_signo) == NOTIFY_STOP) + return; - tsk->thread.trap_no = interruption_code & 0xffff; - force_sig_info(signr, info, tsk); - report_user_fault(interruption_code, regs); + if (user_mode(regs)) { + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = get_trap_ip(regs); + force_sig_info(si_signo, &info, current); + report_user_fault(regs, si_signo); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) - regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; else { enum bug_trap_type btt; btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); if (btt == BUG_TRAP_TYPE_WARN) return; - die(str, regs, interruption_code); + die(regs, str); } } } -static inline void __user *get_check_address(struct pt_regs *regs) +void __kprobes do_per_trap(struct pt_regs *regs) { - return (void __user *)((regs->psw.addr-S390_lowcore.pgm_ilc) & PSW_ADDR_INSN); -} + siginfo_t info; -void __kprobes do_single_step(struct pt_regs *regs) -{ - if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, - SIGTRAP) == NOTIFY_STOP){ + if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; - } - if ((current->ptrace & PT_PTRACED) != 0) - force_sig(SIGTRAP, current); + if (!current->ptrace) + return; + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = + (void __force __user *) current->thread.per_event.address; + force_sig_info(SIGTRAP, &info, current); } -static void default_trap_handler(struct pt_regs * regs, long interruption_code) +void default_trap_handler(struct pt_regs *regs) { - if (regs->psw.mask & PSW_MASK_PSTATE) { - local_irq_enable(); + if (user_mode(regs)) { + report_user_fault(regs, SIGSEGV); do_exit(SIGSEGV); - report_user_fault(interruption_code, regs); } else - die("Unknown program exception", regs, interruption_code); + die(regs, "Unknown program exception"); } -#define DO_ERROR_INFO(signr, str, name, sicode, siaddr) \ -static void name(struct pt_regs * regs, long interruption_code) \ -{ \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = siaddr; \ - do_trap(interruption_code, signr, str, regs, &info); \ +#define DO_ERROR_INFO(name, signr, sicode, str) \ +void name(struct pt_regs *regs) \ +{ \ + do_trap(regs, signr, sicode, str); \ } -DO_ERROR_INFO(SIGILL, "addressing exception", addressing_exception, - ILL_ILLADR, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "execute exception", execute_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "fixpoint divide exception", divide_exception, - FPE_INTDIV, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "fixpoint overflow exception", overflow_exception, - FPE_INTOVF, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP overflow exception", hfp_overflow_exception, - FPE_FLTOVF, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP underflow exception", hfp_underflow_exception, - FPE_FLTUND, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP significance exception", hfp_significance_exception, - FPE_FLTRES, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP divide exception", hfp_divide_exception, - FPE_FLTDIV, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP square root exception", hfp_sqrt_exception, - FPE_FLTINV, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "operand exception", operand_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "privileged operation", privileged_op, - ILL_PRVOPC, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "special operation exception", special_op_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "translation exception", translation_exception, - ILL_ILLOPN, get_check_address(regs)) - -static inline void -do_fp_trap(struct pt_regs *regs, void __user *location, - int fpc, long interruption_code) -{ - siginfo_t si; +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, + "addressing exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, + "execute exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, + "fixpoint divide exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, + "fixpoint overflow exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, + "HFP overflow exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, + "HFP underflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, + "HFP significance exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, + "HFP divide exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, + "HFP square root exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, + "operand exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, + "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, + "special operation exception") +DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, + "translation exception") + +#ifdef CONFIG_64BIT +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, + "transaction constraint exception") +#endif - si.si_signo = SIGFPE; - si.si_errno = 0; - si.si_addr = location; - si.si_code = 0; +static inline void do_fp_trap(struct pt_regs *regs, int fpc) +{ + int si_code = 0; /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ if (fpc & 0x8000) /* invalid fp operation */ - si.si_code = FPE_FLTINV; + si_code = FPE_FLTINV; else if (fpc & 0x4000) /* div by 0 */ - si.si_code = FPE_FLTDIV; + si_code = FPE_FLTDIV; else if (fpc & 0x2000) /* overflow */ - si.si_code = FPE_FLTOVF; + si_code = FPE_FLTOVF; else if (fpc & 0x1000) /* underflow */ - si.si_code = FPE_FLTUND; + si_code = FPE_FLTUND; else if (fpc & 0x0800) /* inexact */ - si.si_code = FPE_FLTRES; + si_code = FPE_FLTRES; } - current->thread.ieee_instruction_pointer = (addr_t) location; - do_trap(interruption_code, SIGFPE, - "floating point exception", regs, &si); + do_trap(regs, SIGFPE, si_code, "floating point exception"); } -static void illegal_op(struct pt_regs * regs, long interruption_code) +void __kprobes illegal_op(struct pt_regs *regs) { siginfo_t info; __u8 opcode[6]; __u16 __user *location; int signal = 0; - location = get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = get_trap_ip(regs); - if (regs->psw.mask & PSW_MASK_PSTATE) { + if (user_mode(regs)) { if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) return; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { - if (current->ptrace & PT_PTRACED) - force_sig(SIGTRAP, current); - else + if (current->ptrace) { + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_BRKPT; + info.si_addr = location; + force_sig_info(SIGTRAP, &info, current); + } else signal = SIGILL; #ifdef CONFIG_MATHEMU } else if (opcode[0] == 0xb3) { @@ -493,53 +224,33 @@ static void illegal_op(struct pt_regs * regs, long interruption_code) * If we get an illegal op in kernel mode, send it through the * kprobes notifier. If kprobes doesn't pick it up, SIGILL */ - if (notify_die(DIE_BPT, "bpt", regs, interruption_code, + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } #ifdef CONFIG_MATHEMU if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); - else if (signal == SIGSEGV) { - info.si_signo = signal; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *) location; - do_trap(interruption_code, signal, - "user address fault", regs, &info); - } else + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal == SIGSEGV) + do_trap(regs, signal, SEGV_MAPERR, "user address fault"); + else #endif - if (signal) { - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void __user *) location; - do_trap(interruption_code, signal, - "illegal operation", regs, &info); - } + if (signal) + do_trap(regs, signal, ILL_ILLOPC, "illegal operation"); } #ifdef CONFIG_MATHEMU -asmlinkage void -specification_exception(struct pt_regs * regs, long interruption_code) +void specification_exception(struct pt_regs *regs) { __u8 opcode[6]; __u16 __user *location = NULL; int signal = 0; - location = (__u16 __user *) get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = (__u16 __user *) get_trap_ip(regs); - if (regs->psw.mask & PSW_MASK_PSTATE) { + if (user_mode(regs)) { get_user(*((__u16 *) opcode), location); switch (opcode[0]) { case 0x28: /* LDR Rx,Ry */ @@ -572,42 +283,27 @@ specification_exception(struct pt_regs * regs, long interruption_code) signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); - else if (signal) { - siginfo_t info; - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPN; - info.si_addr = location; - do_trap(interruption_code, signal, - "specification exception", regs, &info); - } + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "specification exception"); } #else -DO_ERROR_INFO(SIGILL, "specification exception", specification_exception, - ILL_ILLOPN, get_check_address(regs)); +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, + "specification exception"); #endif -static void data_exception(struct pt_regs * regs, long interruption_code) +void data_exception(struct pt_regs *regs) { __u16 __user *location; int signal = 0; - location = get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = get_trap_ip(regs); if (MACHINE_HAS_IEEE) asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); #ifdef CONFIG_MATHEMU - else if (regs->psw.mask & PSW_MASK_PSTATE) { + else if (user_mode(regs)) { __u8 opcode[6]; get_user(*((__u16 *) opcode), location); switch (opcode[0]) { @@ -666,35 +362,21 @@ static void data_exception(struct pt_regs * regs, long interruption_code) else signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); - else if (signal) { - siginfo_t info; - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPN; - info.si_addr = location; - do_trap(interruption_code, signal, - "data exception", regs, &info); - } + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "data exception"); } -static void space_switch_exception(struct pt_regs * regs, long int_code) +void space_switch_exception(struct pt_regs *regs) { - siginfo_t info; - /* Set user psw back to home space mode. */ - if (regs->psw.mask & PSW_MASK_PSTATE) + if (user_mode(regs)) regs->psw.mask |= PSW_ASC_HOME; /* Send SIGILL. */ - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_PRVOPC; - info.si_addr = get_check_address(regs); - do_trap(int_code, SIGILL, "space switch event", regs, &info); + do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event"); } -asmlinkage void kernel_stack_overflow(struct pt_regs * regs) +void __kprobes kernel_stack_overflow(struct pt_regs * regs) { bust_spinlocks(1); printk("Kernel stack overflow.\n"); @@ -703,42 +385,7 @@ asmlinkage void kernel_stack_overflow(struct pt_regs * regs) panic("Corrupt kernel stack, can't continue."); } -/* init is done in lowcore.S and head.S */ - void __init trap_init(void) { - int i; - - for (i = 0; i < 128; i++) - pgm_check_table[i] = &default_trap_handler; - pgm_check_table[1] = &illegal_op; - pgm_check_table[2] = &privileged_op; - pgm_check_table[3] = &execute_exception; - pgm_check_table[4] = &do_protection_exception; - pgm_check_table[5] = &addressing_exception; - pgm_check_table[6] = &specification_exception; - pgm_check_table[7] = &data_exception; - pgm_check_table[8] = &overflow_exception; - pgm_check_table[9] = ÷_exception; - pgm_check_table[0x0A] = &overflow_exception; - pgm_check_table[0x0B] = ÷_exception; - pgm_check_table[0x0C] = &hfp_overflow_exception; - pgm_check_table[0x0D] = &hfp_underflow_exception; - pgm_check_table[0x0E] = &hfp_significance_exception; - pgm_check_table[0x0F] = &hfp_divide_exception; - pgm_check_table[0x10] = &do_dat_exception; - pgm_check_table[0x11] = &do_dat_exception; - pgm_check_table[0x12] = &translation_exception; - pgm_check_table[0x13] = &special_op_exception; -#ifdef CONFIG_64BIT - pgm_check_table[0x38] = &do_asce_exception; - pgm_check_table[0x39] = &do_dat_exception; - pgm_check_table[0x3A] = &do_dat_exception; - pgm_check_table[0x3B] = &do_dat_exception; -#endif /* CONFIG_64BIT */ - pgm_check_table[0x15] = &operand_exception; - pgm_check_table[0x1C] = &space_switch_exception; - pgm_check_table[0x1D] = &hfp_sqrt_exception; - pgm_check_table[0x40] = &do_monitor_call; - pfault_irq_init(); + local_mcck_enable(); } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c new file mode 100644 index 00000000000..61364909678 --- /dev/null +++ b/arch/s390/kernel/vdso.c @@ -0,0 +1,333 @@ +/* + * vdso setup for s390 + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/elf.h> +#include <linux/security.h> +#include <linux/bootmem.h> +#include <linux/compat.h> +#include <asm/asm-offsets.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/sections.h> +#include <asm/vdso.h> +#include <asm/facility.h> + +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) +extern char vdso32_start, vdso32_end; +static void *vdso32_kbase = &vdso32_start; +static unsigned int vdso32_pages; +static struct page **vdso32_pagelist; +#endif + +#ifdef CONFIG_64BIT +extern char vdso64_start, vdso64_end; +static void *vdso64_kbase = &vdso64_start; +static unsigned int vdso64_pages; +static struct page **vdso64_pagelist; +#endif /* CONFIG_64BIT */ + +/* + * Should the kernel map a VDSO page into processes and pass its + * address down to glibc upon exec()? + */ +unsigned int __read_mostly vdso_enabled = 1; + +static int __init vdso_setup(char *s) +{ + unsigned long val; + int rc; + + rc = 0; + if (strncmp(s, "on", 3) == 0) + vdso_enabled = 1; + else if (strncmp(s, "off", 4) == 0) + vdso_enabled = 0; + else { + rc = kstrtoul(s, 0, &val); + vdso_enabled = rc ? 0 : !!val; + } + return !rc; +} +__setup("vdso=", vdso_setup); + +/* + * The vdso data page + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; +struct vdso_data *vdso_data = &vdso_data_store.data; + +/* + * Setup vdso data page. + */ +static void vdso_init_data(struct vdso_data *vd) +{ + vd->ectg_available = test_facility(31); +} + +#ifdef CONFIG_64BIT +/* + * Allocate/free per cpu vdso data. + */ +#define SEGMENT_ORDER 2 + +int vdso_alloc_per_cpu(struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + int i; + + lowcore->vdso_per_cpu_data = __LC_PASTE; + + if (!vdso_enabled) + return 0; + + segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); + page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); + page_frame = get_zeroed_page(GFP_KERNEL); + if (!segment_table || !page_table || !page_frame) + goto out; + + clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, + PAGE_SIZE << SEGMENT_ORDER); + clear_table((unsigned long *) page_table, _PAGE_INVALID, + 256*sizeof(unsigned long)); + + *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; + *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; + + psal = (u32 *) (page_table + 256*sizeof(unsigned long)); + aste = psal + 32; + + for (i = 4; i < 32; i += 4) + psal[i] = 0x80000000; + + lowcore->paste[4] = (u32)(addr_t) psal; + psal[0] = 0x02000000; + psal[2] = (u32)(addr_t) aste; + *(unsigned long *) (aste + 2) = segment_table + + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; + aste[4] = (u32)(addr_t) psal; + lowcore->vdso_per_cpu_data = page_frame; + + return 0; + +out: + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); + return -ENOMEM; +} + +void vdso_free_per_cpu(struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + + if (!vdso_enabled) + return; + + psal = (u32 *)(addr_t) lowcore->paste[4]; + aste = (u32 *)(addr_t) psal[2]; + segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; + page_table = *(unsigned long *) segment_table; + page_frame = *(unsigned long *) page_table; + + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); +} + +static void vdso_init_cr5(void) +{ + unsigned long cr5; + + if (!vdso_enabled) + return; + cr5 = offsetof(struct _lowcore, paste); + __ctl_load(cr5, 5, 5); +} +#endif /* CONFIG_64BIT */ + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + struct page **vdso_pagelist; + unsigned long vdso_pages; + unsigned long vdso_base; + int rc; + + if (!vdso_enabled) + return 0; + /* + * Only map the vdso for dynamically linked elf binaries. + */ + if (!uses_interp) + return 0; + +#ifdef CONFIG_64BIT + vdso_pagelist = vdso64_pagelist; + vdso_pages = vdso64_pages; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; + } +#endif +#else + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; +#endif + + /* + * vDSO has a problem and was disabled, just don't "enable" it for + * the process + */ + if (vdso_pages == 0) + return 0; + + current->mm->context.vdso_base = 0; + + /* + * pick a base address for the vDSO in process space. We try to put + * it at vdso_base which is the "natural" base for it, but we might + * fail and end up putting it elsewhere. + */ + down_write(&mm->mmap_sem); + vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + rc = vdso_base; + goto out_up; + } + + /* + * Put vDSO base into mm struct. We need to do this before calling + * install_special_mapping or the perf counter mmap tracking code + * will fail to recognise it as a vDSO (since arch_vma_name fails). + */ + current->mm->context.vdso_base = vdso_base; + + /* + * our vma flags don't have VM_WRITE so by default, the process + * isn't allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW + * on those pages but it's then your responsibility to never do that + * on the "data" page of the vDSO or you'll stop getting kernel + * updates and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though. + */ + rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_pagelist); + if (rc) + current->mm->context.vdso_base = 0; +out_up: + up_write(&mm->mmap_sem); + return rc; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) + return "[vdso]"; + return NULL; +} + +static int __init vdso_init(void) +{ + int i; + + if (!vdso_enabled) + return 0; + vdso_init_data(vdso_data); +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) + /* Calculate the size of the 32 bit vDSO */ + vdso32_pages = ((&vdso32_end - &vdso32_start + + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + + /* Make sure pages are in the correct state */ + vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1), + GFP_KERNEL); + BUG_ON(vdso32_pagelist == NULL); + for (i = 0; i < vdso32_pages - 1; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso32_pagelist[i] = pg; + } + vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data); + vdso32_pagelist[vdso32_pages] = NULL; +#endif + +#ifdef CONFIG_64BIT + /* Calculate the size of the 64 bit vDSO */ + vdso64_pages = ((&vdso64_end - &vdso64_start + + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + + /* Make sure pages are in the correct state */ + vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1), + GFP_KERNEL); + BUG_ON(vdso64_pagelist == NULL); + for (i = 0; i < vdso64_pages - 1; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso64_pagelist[i] = pg; + } + vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); + vdso64_pagelist[vdso64_pages] = NULL; + if (vdso_alloc_per_cpu(&S390_lowcore)) + BUG(); + vdso_init_cr5(); +#endif /* CONFIG_64BIT */ + + get_page(virt_to_page(vdso_data)); + + smp_wmb(); + + return 0; +} +early_initcall(vdso_init); + +int in_gate_area_no_mm(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore new file mode 100644 index 00000000000..e45fba9d0ce --- /dev/null +++ b/arch/s390/kernel/vdso32/.gitignore @@ -0,0 +1 @@ +vdso32.lds diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile new file mode 100644 index 00000000000..8ad2b34ad15 --- /dev/null +++ b/arch/s390/kernel/vdso32/Makefile @@ -0,0 +1,58 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o + +# Build rules + +targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_31 += -m31 -s + +KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31) + +obj-y += vdso32_wrapper.o +extra-y += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) + $(call if_changed,vdso32ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso32): %.o: %.S + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso32.so: $(obj)/vdso32.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso32.so diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S new file mode 100644 index 00000000000..36aaa25d05d --- /dev/null +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -0,0 +1,39 @@ +/* + * Userland implementation of clock_getres() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_getres + .type __kernel_clock_getres,@function +__kernel_clock_getres: + .cfi_startproc + chi %r2,__CLOCK_REALTIME + je 0f + chi %r2,__CLOCK_MONOTONIC + jne 3f +0: ltr %r3,%r3 + jz 2f /* res == NULL */ + basr %r1,0 +1: l %r0,4f-1b(%r1) + xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ + st %r0,4(%r3) /* store tp->tv_usec */ +2: lhi %r2,0 + br %r14 +3: lhi %r1,__NR_clock_getres /* fallback to svc */ + svc 0 + br %r14 +4: .long __CLOCK_REALTIME_RES + .cfi_endproc + .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S new file mode 100644 index 00000000000..65fc3979c2f --- /dev/null +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -0,0 +1,125 @@ +/* + * Userland implementation of clock_gettime() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_gettime + .type __kernel_clock_gettime,@function +__kernel_clock_gettime: + .cfi_startproc + basr %r5,0 +0: al %r5,21f-0b(%r5) /* get &_vdso_data */ + chi %r2,__CLOCK_REALTIME + je 10f + chi %r2,__CLOCK_MONOTONIC + jne 19f + + /* CLOCK_MONOTONIC */ + ltr %r3,%r3 + jz 9f /* tp == NULL */ +1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 1b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,2f + ahi %r0,-1 +2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ + lr %r2,%r0 + l %r0,__VDSO_TK_MULT(%r5) + ltr %r1,%r1 + mr %r0,%r0 + jnm 3f + a %r0,__VDSO_TK_MULT(%r5) +3: alr %r0,%r2 + al %r0,__VDSO_WTOM_NSEC(%r5) + al %r1,__VDSO_WTOM_NSEC+4(%r5) + brc 12,5f + ahi %r0,1 +5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srdl %r0,0(%r2) /* >> tk->shift */ + l %r2,__VDSO_WTOM_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 1b + basr %r5,0 +6: ltr %r0,%r0 + jnz 7f + cl %r1,20f-6b(%r5) + jl 8f +7: ahi %r2,1 + sl %r1,20f-6b(%r5) + brc 3,6b + ahi %r0,-1 + j 6b +8: st %r2,0(%r3) /* store tp->tv_sec */ + st %r1,4(%r3) /* store tp->tv_nsec */ +9: lhi %r2,0 + br %r14 + + /* CLOCK_REALTIME */ +10: ltr %r3,%r3 /* tp == NULL */ + jz 18f +11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 11b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,12f + ahi %r0,-1 +12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ + lr %r2,%r0 + l %r0,__VDSO_TK_MULT(%r5) + ltr %r1,%r1 + mr %r0,%r0 + jnm 13f + a %r0,__VDSO_TK_MULT(%r5) +13: alr %r0,%r2 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,14f + ahi %r0,1 +14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srdl %r0,0(%r2) /* >> tk->shift */ + l %r2,__VDSO_XTIME_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 11b + basr %r5,0 +15: ltr %r0,%r0 + jnz 16f + cl %r1,20f-15b(%r5) + jl 17f +16: ahi %r2,1 + sl %r1,20f-15b(%r5) + brc 3,15b + ahi %r0,-1 + j 15b +17: st %r2,0(%r3) /* store tp->tv_sec */ + st %r1,4(%r3) /* store tp->tv_nsec */ +18: lhi %r2,0 + br %r14 + + /* Fallback to system call */ +19: lhi %r1,__NR_clock_gettime + svc 0 + br %r14 + +20: .long 1000000000 +21: .long _vdso_data - 0b + .cfi_endproc + .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S new file mode 100644 index 00000000000..fd621a950f7 --- /dev/null +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -0,0 +1,79 @@ +/* + * Userland implementation of gettimeofday() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_gettimeofday + .type __kernel_gettimeofday,@function +__kernel_gettimeofday: + .cfi_startproc + basr %r5,0 +0: al %r5,13f-0b(%r5) /* get &_vdso_data */ +1: ltr %r3,%r3 /* check if tz is NULL */ + je 2f + mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) +2: ltr %r2,%r2 /* check if tv is NULL */ + je 10f + l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 1b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,3f + ahi %r0,-1 +3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ + st %r0,24(%r15) + l %r0,__VDSO_TK_MULT(%r5) + ltr %r1,%r1 + mr %r0,%r0 + jnm 4f + a %r0,__VDSO_TK_MULT(%r5) +4: al %r0,24(%r15) + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,5f + ahi %r0,1 +5: mvc 24(4,%r15),__VDSO_XTIME_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 1b + l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srdl %r0,0(%r4) /* >> tk->shift */ + l %r4,24(%r15) /* get tv_sec from stack */ + basr %r5,0 +6: ltr %r0,%r0 + jnz 7f + cl %r1,11f-6b(%r5) + jl 8f +7: ahi %r4,1 + sl %r1,11f-6b(%r5) + brc 3,6b + ahi %r0,-1 + j 6b +8: st %r4,0(%r2) /* store tv->tv_sec */ + ltr %r1,%r1 + m %r0,12f-6b(%r5) + jnm 9f + al %r0,12f-6b(%r5) +9: srl %r0,6 + st %r0,4(%r2) /* store tv->tv_usec */ +10: slr %r2,%r2 + br %r14 +11: .long 1000000000 +12: .long 274877907 +13: .long _vdso_data - 0b + .cfi_endproc + .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S new file mode 100644 index 00000000000..79a071e4357 --- /dev/null +++ b/arch/s390/kernel/vdso32/note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S new file mode 100644 index 00000000000..a8c379fa124 --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,138 @@ +/* + * This is the infamous ld script for the 32 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") +OUTPUT_ARCH(s390:31-bit) +ENTRY(_start) + +SECTIONS +{ + . = VDSO32_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + . = ALIGN(4096); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 00000000000..ae42f8ce350 --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,14 @@ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/s390/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore new file mode 100644 index 00000000000..3fd18cf9fec --- /dev/null +++ b/arch/s390/kernel/vdso64/.gitignore @@ -0,0 +1 @@ +vdso64.lds diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile new file mode 100644 index 00000000000..2a8ddfd12a5 --- /dev/null +++ b/arch/s390/kernel/vdso64/Makefile @@ -0,0 +1,58 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o + +# Build rules + +targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + +KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_64 += -m64 -s + +KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) + +obj-y += vdso64_wrapper.o +extra-y += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency (incbin is bad) +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) + $(call if_changed,vdso64ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso64): %.o: %.S + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso64.so: $(obj)/vdso64.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso64.so diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S new file mode 100644 index 00000000000..34deba7c7ed --- /dev/null +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -0,0 +1,46 @@ +/* + * Userland implementation of clock_getres() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_getres + .type __kernel_clock_getres,@function +__kernel_clock_getres: + .cfi_startproc + cghi %r2,__CLOCK_REALTIME + je 0f + cghi %r2,__CLOCK_MONOTONIC + je 0f + cghi %r2,__CLOCK_THREAD_CPUTIME_ID + je 0f + cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ + jne 2f + larl %r5,_vdso_data + icm %r0,15,__LC_ECTG_OK(%r5) + jz 2f +0: ltgr %r3,%r3 + jz 1f /* res == NULL */ + larl %r1,3f + lg %r0,0(%r1) + xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ + stg %r0,8(%r3) /* store tp->tv_usec */ +1: lghi %r2,0 + br %r14 +2: lghi %r1,__NR_clock_getres /* fallback to svc */ + svc 0 + br %r14 +3: .quad __CLOCK_REALTIME_RES + .cfi_endproc + .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S new file mode 100644 index 00000000000..91940ed33a4 --- /dev/null +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -0,0 +1,127 @@ +/* + * Userland implementation of clock_gettime() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_gettime + .type __kernel_clock_gettime,@function +__kernel_clock_gettime: + .cfi_startproc + larl %r5,_vdso_data + cghi %r2,__CLOCK_REALTIME + je 4f + cghi %r2,__CLOCK_THREAD_CPUTIME_ID + je 9f + cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ + je 9f + cghi %r2,__CLOCK_MONOTONIC + jne 12f + + /* CLOCK_MONOTONIC */ + ltgr %r3,%r3 + jz 3f /* tp == NULL */ +0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 0b + stck 48(%r15) /* Store TOD clock */ + lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + lg %r0,__VDSO_WTOM_SEC(%r5) + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ + alg %r1,__VDSO_WTOM_NSEC(%r5) + srlg %r1,%r1,0(%r2) /* >> tk->shift */ + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 0b + larl %r5,13f +1: clg %r1,0(%r5) + jl 2f + slg %r1,0(%r5) + aghi %r0,1 + j 1b +2: stg %r0,0(%r3) /* store tp->tv_sec */ + stg %r1,8(%r3) /* store tp->tv_nsec */ +3: lghi %r2,0 + br %r14 + + /* CLOCK_REALTIME */ +4: ltr %r3,%r3 /* tp == NULL */ + jz 8f +5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 5b + stck 48(%r15) /* Store TOD clock */ + lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ + srlg %r1,%r1,0(%r2) /* >> tk->shift */ + lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */ + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 5b + larl %r5,13f +6: clg %r1,0(%r5) + jl 7f + slg %r1,0(%r5) + aghi %r0,1 + j 6b +7: stg %r0,0(%r3) /* store tp->tv_sec */ + stg %r1,8(%r3) /* store tp->tv_nsec */ +8: lghi %r2,0 + br %r14 + + /* CLOCK_THREAD_CPUTIME_ID for this thread */ +9: icm %r0,15,__VDSO_ECTG_OK(%r5) + jz 12f + ear %r2,%a4 + llilh %r4,0x0100 + sar %a4,%r4 + lghi %r4,0 + epsw %r5,0 + sacf 512 /* Magic ectg instruction */ + .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 + tml %r5,0x4000 + jo 11f + tml %r5,0x8000 + jno 10f + sacf 256 + j 11f +10: sacf 0 +11: sar %a4,%r2 + algr %r1,%r0 /* r1 = cputime as TOD value */ + mghi %r1,1000 /* convert to nanoseconds */ + srlg %r1,%r1,12 /* r1 = cputime in nanosec */ + lgr %r4,%r1 + larl %r5,13f + srlg %r1,%r1,9 /* divide by 1000000000 */ + mlg %r0,8(%r5) + srlg %r0,%r0,11 /* r0 = tv_sec */ + stg %r0,0(%r3) + msg %r0,0(%r5) /* calculate tv_nsec */ + slgr %r4,%r0 /* r4 = tv_nsec */ + stg %r4,8(%r3) + lghi %r2,0 + br %r14 + + /* Fallback to system call */ +12: lghi %r1,__NR_clock_gettime + svc 0 + br %r14 + +13: .quad 1000000000 +14: .quad 19342813113834067 + .cfi_endproc + .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S new file mode 100644 index 00000000000..d0860d1d0cc --- /dev/null +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -0,0 +1,57 @@ +/* + * Userland implementation of gettimeofday() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_gettimeofday + .type __kernel_gettimeofday,@function +__kernel_gettimeofday: + .cfi_startproc + larl %r5,_vdso_data +0: ltgr %r3,%r3 /* check if tz is NULL */ + je 1f + mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) +1: ltgr %r2,%r2 /* check if tv is NULL */ + je 4f + lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 0b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ + lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */ + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 0b + lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ + srlg %r1,%r1,0(%r5) /* >> tk->shift */ + larl %r5,5f +2: clg %r1,0(%r5) + jl 3f + slg %r1,0(%r5) + aghi %r0,1 + j 2b +3: stg %r0,0(%r2) /* store tv->tv_sec */ + slgr %r0,%r0 /* tv_nsec -> tv_usec */ + ml %r0,8(%r5) + srlg %r0,%r0,6 + stg %r0,8(%r2) /* store tv->tv_usec */ +4: lghi %r2,0 + br %r14 +5: .quad 1000000000 + .long 274877907 + .cfi_endproc + .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S new file mode 100644 index 00000000000..79a071e4357 --- /dev/null +++ b/arch/s390/kernel/vdso64/note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S new file mode 100644 index 00000000000..9f5979d102a --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -0,0 +1,138 @@ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") +OUTPUT_ARCH(s390:64-bit) +ENTRY(_start) + +SECTIONS +{ + . = VDSO64_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + . = ALIGN(4096); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S new file mode 100644 index 00000000000..c245842b516 --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S @@ -0,0 +1,14 @@ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso64_start, vdso64_end + .balign PAGE_SIZE +vdso64_start: + .incbin "arch/s390/kernel/vdso64/vdso64.so" + .balign PAGE_SIZE +vdso64_end: + + .previous diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b4607155e8d..35b13ed0af5 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -2,18 +2,19 @@ * Written by Martin Schwidefsky (schwidefsky@de.ibm.com) */ +#include <asm/thread_info.h> #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> #ifndef CONFIG_64BIT OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") -OUTPUT_ARCH(s390) -ENTRY(_start) +OUTPUT_ARCH(s390:31-bit) +ENTRY(startup) jiffies = jiffies_64 + 4; #else OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) -ENTRY(_start) +ENTRY(startup) jiffies = jiffies_64; #endif @@ -28,11 +29,12 @@ SECTIONS . = 0x00000000; .text : { _text = .; /* Text and read-only data */ - *(.text.head) - TEXT_TEXT + HEAD_TEXT + TEXT_TEXT SCHED_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT *(.fixup) *(.gnu.warning) } :text = 0x0700 @@ -40,9 +42,10 @@ SECTIONS _etext = .; /* End of text section */ NOTES :text :note - BUG_TABLE :text - RODATA + .dummy : { *(.dummy) } :data + + RO_DATA_SECTION(PAGE_SIZE) #ifdef CONFIG_SHARED_KERNEL . = ALIGN(0x100000); /* VM shared segments are 1MB aligned */ @@ -50,56 +53,20 @@ SECTIONS . = ALIGN(PAGE_SIZE); _eshared = .; /* End of shareable data */ + _sdata = .; /* Start of data section */ - . = ALIGN(16); /* Exception table */ - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } - - .data : { /* Data */ - DATA_DATA - CONSTRUCTORS - } - - . = ALIGN(PAGE_SIZE); - .data_nosave : { - __nosave_begin = .; - *(.data.nosave) - } - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - - . = ALIGN(PAGE_SIZE); - .data.page_aligned : { - *(.data.idt) - } + EXCEPTION_TABLE(16) :data - . = ALIGN(0x100); - .data.cacheline_aligned : { - *(.data.cacheline_aligned) - } + RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE) - . = ALIGN(0x100); - .data.read_mostly : { - *(.data.read_mostly) - } _edata = .; /* End of data section */ - . = ALIGN(2 * PAGE_SIZE); /* init_task */ - .data.init_task : { - *(.data.init_task) - } - /* will be freed after init */ . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } + + INIT_TEXT_SECTION(PAGE_SIZE) + /* * .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table @@ -108,59 +75,26 @@ SECTIONS EXIT_TEXT } - .init.data : { - INIT_DATA - } - . = ALIGN(0x100); - .init.setup : { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : { - __initcall_start = .; - INITCALLS - __initcall_end = .; + .exit.data : { + EXIT_DATA } - .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - SECURITY_INIT - -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(0x100); - .init.ramfs : { - __initramfs_start = .; - *(.init.ramfs) - . = ALIGN(2); - __initramfs_end = .; - } -#endif + /* early.c uses stsi, which requires page aligned data. */ + . = ALIGN(PAGE_SIZE); + INIT_DATA_SECTION(0x100) - PERCPU(PAGE_SIZE) + PERCPU_SECTION(0x100) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ - /* BSS */ - .bss : { - __bss_start = .; - *(.bss) - . = ALIGN(2); - __bss_stop = .; - } + BSS_SECTION(0, 2, 0) _end = . ; - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_DATA - *(.exitcall.exit) - } - /* Debugging sections. */ STABS_DEBUG DWARF_DEBUG + + /* Sections to be discarded */ + DISCARDS } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c5f05b3fb2c..8c34363d6f1 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -1,185 +1,198 @@ /* - * arch/s390/kernel/vtime.c * Virtual cpu timer based timer functions. * - * S390 version - * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 2004, 2012 * Author(s): Jan Glauber <jan.glauber@de.ibm.com> */ -#include <linux/module.h> +#include <linux/kernel_stat.h> +#include <linux/notifier.h> +#include <linux/kprobes.h> +#include <linux/export.h> #include <linux/kernel.h> +#include <linux/timex.h> +#include <linux/types.h> #include <linux/time.h> -#include <linux/delay.h> -#include <linux/init.h> +#include <linux/cpu.h> #include <linux/smp.h> -#include <linux/types.h> -#include <linux/timex.h> -#include <linux/notifier.h> -#include <linux/kernel_stat.h> -#include <linux/rcupdate.h> -#include <linux/posix-timers.h> -#include <asm/s390_ext.h> -#include <asm/timer.h> #include <asm/irq_regs.h> +#include <asm/cputime.h> +#include <asm/vtimer.h> +#include <asm/vtime.h> +#include <asm/irq.h> +#include "entry.h" -static ext_int_info_t ext_int_info_timer; -static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); +static void virt_timer_expire(void); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -/* - * Update process times based on virtual cpu times stored by entry.S - * to the lowcore fields user_timer, system_timer & steal_clock. - */ -void account_process_tick(struct task_struct *tsk, int user_tick) +DEFINE_PER_CPU(struct s390_idle_data, s390_idle); + +static LIST_HEAD(virt_timer_list); +static DEFINE_SPINLOCK(virt_timer_lock); +static atomic64_t virt_timer_current; +static atomic64_t virt_timer_elapsed; + +static inline u64 get_vtimer(void) { - cputime_t cputime; - __u64 timer, clock; - int rcu_user_flag; + u64 timer; - timer = S390_lowcore.last_update_timer; - clock = S390_lowcore.last_update_clock; - asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " STCK %1" /* Store current tod clock value */ - : "=m" (S390_lowcore.last_update_timer), - "=m" (S390_lowcore.last_update_clock) ); - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; - - cputime = S390_lowcore.user_timer >> 12; - rcu_user_flag = cputime != 0; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); - - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, HARDIRQ_OFFSET, cputime); - - cputime = S390_lowcore.steal_clock; - if ((__s64) cputime > 0) { - cputime >>= 12; - S390_lowcore.steal_clock -= cputime << 12; - account_steal_time(tsk, cputime); - } + asm volatile("stpt %0" : "=m" (timer)); + return timer; } -/* - * Update process times based on virtual cpu times stored by entry.S - * to the lowcore fields user_timer, system_timer & steal_clock. - */ -void account_vtime(struct task_struct *tsk) +static inline void set_vtimer(u64 expires) { - cputime_t cputime; - __u64 timer; + u64 timer; - timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; + asm volatile( + " stpt %0\n" /* Store current cpu timer value */ + " spt %1" /* Set new value imm. afterwards */ + : "=m" (timer) : "m" (expires)); + S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; + S390_lowcore.last_update_timer = expires; +} - cputime = S390_lowcore.user_timer >> 12; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); +static inline int virt_timer_forward(u64 elapsed) +{ + BUG_ON(!irqs_disabled()); - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); + if (list_empty(&virt_timer_list)) + return 0; + elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed); + return elapsed >= atomic64_read(&virt_timer_current); } /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_system_vtime(struct task_struct *tsk) +static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) { - cputime_t cputime; - __u64 timer; + struct thread_info *ti = task_thread_info(tsk); + u64 timer, clock, user, system, steal; timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); + clock = S390_lowcore.last_update_clock; + asm volatile( + " stpt %0\n" /* Store current cpu timer value */ + " stck %1" /* Store current tod clock value */ + : "=m" (S390_lowcore.last_update_timer), + "=m" (S390_lowcore.last_update_clock)); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; + S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; + + user = S390_lowcore.user_timer - ti->user_timer; + S390_lowcore.steal_timer -= user; + ti->user_timer = S390_lowcore.user_timer; + account_user_time(tsk, user, user); + + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, hardirq_offset, system, system); + + steal = S390_lowcore.steal_timer; + if ((s64) steal > 0) { + S390_lowcore.steal_timer = 0; + account_steal_time(steal); + } - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); + return virt_timer_forward(user + system); } -static inline void set_vtimer(__u64 expires) -{ - __u64 timer; - - asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " SPT %1" /* Set new value immediatly afterwards */ - : "=m" (timer) : "m" (expires) ); - S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; - S390_lowcore.last_update_timer = expires; - - /* store expire time for this CPU timer */ - __get_cpu_var(virt_cpu_timer).to_expire = expires; -} -#else -static inline void set_vtimer(__u64 expires) +void vtime_task_switch(struct task_struct *prev) { - S390_lowcore.last_update_timer = expires; - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); - - /* store expire time for this CPU timer */ - __get_cpu_var(virt_cpu_timer).to_expire = expires; + struct thread_info *ti; + + do_account_vtime(prev, 0); + ti = task_thread_info(prev); + ti->user_timer = S390_lowcore.user_timer; + ti->system_timer = S390_lowcore.system_timer; + ti = task_thread_info(current); + S390_lowcore.user_timer = ti->user_timer; + S390_lowcore.system_timer = ti->system_timer; } -#endif -static void start_cpu_timer(void) +/* + * In s390, accounting pending user time also implies + * accounting system time in order to correctly compute + * the stolen time accounting. + */ +void vtime_account_user(struct task_struct *tsk) { - struct vtimer_queue *vt_list; - - vt_list = &__get_cpu_var(virt_cpu_timer); - - /* CPU timer interrupt is pending, don't reprogramm it */ - if (vt_list->idle & 1LL<<63) - return; - - if (!list_empty(&vt_list->list)) - set_vtimer(vt_list->idle); + if (do_account_vtime(tsk, HARDIRQ_OFFSET)) + virt_timer_expire(); } -static void stop_cpu_timer(void) +/* + * Update process times based on virtual cpu times stored by entry.S + * to the lowcore fields user_timer, system_timer & steal_clock. + */ +void vtime_account_irq_enter(struct task_struct *tsk) { - struct vtimer_queue *vt_list; + struct thread_info *ti = task_thread_info(tsk); + u64 timer, system; - vt_list = &__get_cpu_var(virt_cpu_timer); + WARN_ON_ONCE(!irqs_disabled()); - /* nothing to do */ - if (list_empty(&vt_list->list)) { - vt_list->idle = VTIMER_MAX_SLICE; - goto fire; - } + timer = S390_lowcore.last_update_timer; + S390_lowcore.last_update_timer = get_vtimer(); + S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - /* store the actual expire value */ - asm volatile ("STPT %0" : "=m" (vt_list->idle)); + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, 0, system, system); - /* - * If the CPU timer is negative we don't reprogramm - * it because we will get instantly an interrupt. - */ - if (vt_list->idle & 1LL<<63) - return; + virt_timer_forward(system); +} +EXPORT_SYMBOL_GPL(vtime_account_irq_enter); - vt_list->offset += vt_list->to_expire - vt_list->idle; +void vtime_account_system(struct task_struct *tsk) +__attribute__((alias("vtime_account_irq_enter"))); +EXPORT_SYMBOL_GPL(vtime_account_system); - /* - * We cannot halt the CPU timer, we just write a value that - * nearly never expires (only after 71 years) and re-write - * the stored expire value if we continue the timer - */ - fire: - set_vtimer(VTIMER_MAX_SLICE); +void __kprobes vtime_stop_cpu(void) +{ + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + unsigned long long idle_time; + unsigned long psw_mask; + + trace_hardirqs_on(); + + /* Wait for external, I/O or machine check interrupt. */ + psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + idle->nohz_delay = 0; + + /* Call the assembler magic in entry.S */ + psw_idle(idle, psw_mask); + + /* Account time spent with enabled wait psw loaded as idle time. */ + idle->sequence++; + smp_wmb(); + idle_time = idle->clock_idle_exit - idle->clock_idle_enter; + idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; + idle->idle_time += idle_time; + idle->idle_count++; + account_idle_time(idle_time); + smp_wmb(); + idle->sequence++; +} + +cputime64_t s390_get_idle_time(int cpu) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); + unsigned long long now, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; } /* @@ -188,11 +201,11 @@ static void stop_cpu_timer(void) */ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) { - struct vtimer_list *event; + struct vtimer_list *tmp; - list_for_each_entry(event, head, entry) { - if (event->expires > timer->expires) { - list_add_tail(&timer->entry, &event->entry); + list_for_each_entry(tmp, head, entry) { + if (tmp->expires > timer->expires) { + list_add_tail(&timer->entry, &tmp->entry); return; } } @@ -200,312 +213,158 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) } /* - * Do the callback functions of expired vtimer events. - * Called from within the interrupt handler. + * Handler for expired virtual CPU timer. */ -static void do_callbacks(struct list_head *cb_list) +static void virt_timer_expire(void) { - struct vtimer_queue *vt_list; - struct vtimer_list *event, *tmp; - void (*fn)(unsigned long); - unsigned long data; - - if (list_empty(cb_list)) - return; - - vt_list = &__get_cpu_var(virt_cpu_timer); - - list_for_each_entry_safe(event, tmp, cb_list, entry) { - fn = event->function; - data = event->data; - fn(data); - - if (!event->interval) - /* delete one shot timer */ - list_del_init(&event->entry); - else { - /* move interval timer back to list */ - spin_lock(&vt_list->lock); - list_del_init(&event->entry); - list_add_sorted(event, &vt_list->list); - spin_unlock(&vt_list->lock); - } + struct vtimer_list *timer, *tmp; + unsigned long elapsed; + LIST_HEAD(cb_list); + + /* walk timer list, fire all expired timers */ + spin_lock(&virt_timer_lock); + elapsed = atomic64_read(&virt_timer_elapsed); + list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) { + if (timer->expires < elapsed) + /* move expired timer to the callback queue */ + list_move_tail(&timer->entry, &cb_list); + else + timer->expires -= elapsed; } -} - -/* - * Handler for the virtual CPU timer. - */ -static void do_cpu_timer_interrupt(__u16 error_code) -{ - __u64 next, delta; - struct vtimer_queue *vt_list; - struct vtimer_list *event, *tmp; - struct list_head *ptr; - /* the callback queue */ - struct list_head cb_list; - - INIT_LIST_HEAD(&cb_list); - vt_list = &__get_cpu_var(virt_cpu_timer); - - /* walk timer list, fire all expired events */ - spin_lock(&vt_list->lock); - - if (vt_list->to_expire < VTIMER_MAX_SLICE) - vt_list->offset += vt_list->to_expire; - - list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { - if (event->expires > vt_list->offset) - /* found first unexpired event, leave */ - break; - - /* re-charge interval timer, we have to add the offset */ - if (event->interval) - event->expires = event->interval + vt_list->offset; - - /* move expired timer to the callback queue */ - list_move_tail(&event->entry, &cb_list); + if (!list_empty(&virt_timer_list)) { + timer = list_first_entry(&virt_timer_list, + struct vtimer_list, entry); + atomic64_set(&virt_timer_current, timer->expires); } - spin_unlock(&vt_list->lock); - do_callbacks(&cb_list); - - /* next event is first in list */ - spin_lock(&vt_list->lock); - if (!list_empty(&vt_list->list)) { - ptr = vt_list->list.next; - event = list_entry(ptr, struct vtimer_list, entry); - next = event->expires - vt_list->offset; - - /* add the expired time from this interrupt handler - * and the callback functions - */ - asm volatile ("STPT %0" : "=m" (delta)); - delta = 0xffffffffffffffffLL - delta + 1; - vt_list->offset += delta; - next -= delta; - } else { - vt_list->offset = 0; - next = VTIMER_MAX_SLICE; + atomic64_sub(elapsed, &virt_timer_elapsed); + spin_unlock(&virt_timer_lock); + + /* Do callbacks and recharge periodic timers */ + list_for_each_entry_safe(timer, tmp, &cb_list, entry) { + list_del_init(&timer->entry); + timer->function(timer->data); + if (timer->interval) { + /* Recharge interval timer */ + timer->expires = timer->interval + + atomic64_read(&virt_timer_elapsed); + spin_lock(&virt_timer_lock); + list_add_sorted(timer, &virt_timer_list); + spin_unlock(&virt_timer_lock); + } } - spin_unlock(&vt_list->lock); - set_vtimer(next); } void init_virt_timer(struct vtimer_list *timer) { timer->function = NULL; INIT_LIST_HEAD(&timer->entry); - spin_lock_init(&timer->lock); } EXPORT_SYMBOL(init_virt_timer); static inline int vtimer_pending(struct vtimer_list *timer) { - return (!list_empty(&timer->entry)); + return !list_empty(&timer->entry); } -/* - * this function should only run on the specified CPU - */ static void internal_add_vtimer(struct vtimer_list *timer) { - unsigned long flags; - __u64 done; - struct vtimer_list *event; - struct vtimer_queue *vt_list; - - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); - - if (timer->cpu != smp_processor_id()) - printk("internal_add_vtimer: BUG, running on wrong CPU"); - - /* if list is empty we only have to set the timer */ - if (list_empty(&vt_list->list)) { - /* reset the offset, this may happen if the last timer was - * just deleted by mod_virt_timer and the interrupt - * didn't happen until here - */ - vt_list->offset = 0; - goto fire; + if (list_empty(&virt_timer_list)) { + /* First timer, just program it. */ + atomic64_set(&virt_timer_current, timer->expires); + atomic64_set(&virt_timer_elapsed, 0); + list_add(&timer->entry, &virt_timer_list); + } else { + /* Update timer against current base. */ + timer->expires += atomic64_read(&virt_timer_elapsed); + if (likely((s64) timer->expires < + (s64) atomic64_read(&virt_timer_current))) + /* The new timer expires before the current timer. */ + atomic64_set(&virt_timer_current, timer->expires); + /* Insert new timer into the list. */ + list_add_sorted(timer, &virt_timer_list); } - - /* save progress */ - asm volatile ("STPT %0" : "=m" (done)); - - /* calculate completed work */ - done = vt_list->to_expire - done + vt_list->offset; - vt_list->offset = 0; - - list_for_each_entry(event, &vt_list->list, entry) - event->expires -= done; - - fire: - list_add_sorted(timer, &vt_list->list); - - /* get first element, which is the next vtimer slice */ - event = list_entry(vt_list->list.next, struct vtimer_list, entry); - - set_vtimer(event->expires); - spin_unlock_irqrestore(&vt_list->lock, flags); - /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ - put_cpu(); } -static inline int prepare_vtimer(struct vtimer_list *timer) +static void __add_vtimer(struct vtimer_list *timer, int periodic) { - if (!timer->function) { - printk("add_virt_timer: uninitialized timer\n"); - return -EINVAL; - } - - if (!timer->expires || timer->expires > VTIMER_MAX_SLICE) { - printk("add_virt_timer: invalid timer expire value!\n"); - return -EINVAL; - } - - if (vtimer_pending(timer)) { - printk("add_virt_timer: timer pending\n"); - return -EBUSY; - } + unsigned long flags; - timer->cpu = get_cpu(); - return 0; + timer->interval = periodic ? timer->expires : 0; + spin_lock_irqsave(&virt_timer_lock, flags); + internal_add_vtimer(timer); + spin_unlock_irqrestore(&virt_timer_lock, flags); } /* * add_virt_timer - add an oneshot virtual CPU timer */ -void add_virt_timer(void *new) +void add_virt_timer(struct vtimer_list *timer) { - struct vtimer_list *timer; - - timer = (struct vtimer_list *)new; - - if (prepare_vtimer(timer) < 0) - return; - - timer->interval = 0; - internal_add_vtimer(timer); + __add_vtimer(timer, 0); } EXPORT_SYMBOL(add_virt_timer); /* * add_virt_timer_int - add an interval virtual CPU timer */ -void add_virt_timer_periodic(void *new) +void add_virt_timer_periodic(struct vtimer_list *timer) { - struct vtimer_list *timer; - - timer = (struct vtimer_list *)new; - - if (prepare_vtimer(timer) < 0) - return; - - timer->interval = timer->expires; - internal_add_vtimer(timer); + __add_vtimer(timer, 1); } EXPORT_SYMBOL(add_virt_timer_periodic); -/* - * If we change a pending timer the function must be called on the CPU - * where the timer is running on, e.g. by smp_call_function_single() - * - * The original mod_timer adds the timer if it is not pending. For compatibility - * we do the same. The timer will be added on the current CPU as a oneshot timer. - * - * returns whether it has modified a pending timer (1) or not (0) - */ -int mod_virt_timer(struct vtimer_list *timer, __u64 expires) +static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic) { - struct vtimer_queue *vt_list; unsigned long flags; - int cpu; + int rc; - if (!timer->function) { - printk("mod_virt_timer: uninitialized timer\n"); - return -EINVAL; - } - - if (!expires || expires > VTIMER_MAX_SLICE) { - printk("mod_virt_timer: invalid expire range\n"); - return -EINVAL; - } + BUG_ON(!timer->function); - /* - * This is a common optimization triggered by the - * networking code - if the timer is re-modified - * to be the same thing then just return: - */ if (timer->expires == expires && vtimer_pending(timer)) return 1; - - cpu = get_cpu(); - vt_list = &per_cpu(virt_cpu_timer, cpu); - - /* disable interrupts before test if timer is pending */ - spin_lock_irqsave(&vt_list->lock, flags); - - /* if timer isn't pending add it on the current CPU */ - if (!vtimer_pending(timer)) { - spin_unlock_irqrestore(&vt_list->lock, flags); - /* we do not activate an interval timer with mod_virt_timer */ - timer->interval = 0; - timer->expires = expires; - timer->cpu = cpu; - internal_add_vtimer(timer); - return 0; - } - - /* check if we run on the right CPU */ - if (timer->cpu != cpu) { - printk("mod_virt_timer: running on wrong CPU, check your code\n"); - spin_unlock_irqrestore(&vt_list->lock, flags); - put_cpu(); - return -EINVAL; - } - - list_del_init(&timer->entry); + spin_lock_irqsave(&virt_timer_lock, flags); + rc = vtimer_pending(timer); + if (rc) + list_del_init(&timer->entry); + timer->interval = periodic ? expires : 0; timer->expires = expires; - - /* also change the interval if we have an interval timer */ - if (timer->interval) - timer->interval = expires; - - /* the timer can't expire anymore so we can release the lock */ - spin_unlock_irqrestore(&vt_list->lock, flags); internal_add_vtimer(timer); - return 1; + spin_unlock_irqrestore(&virt_timer_lock, flags); + return rc; +} + +/* + * returns whether it has modified a pending timer (1) or not (0) + */ +int mod_virt_timer(struct vtimer_list *timer, u64 expires) +{ + return __mod_vtimer(timer, expires, 0); } EXPORT_SYMBOL(mod_virt_timer); /* - * delete a virtual timer + * returns whether it has modified a pending timer (1) or not (0) + */ +int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires) +{ + return __mod_vtimer(timer, expires, 1); +} +EXPORT_SYMBOL(mod_virt_timer_periodic); + +/* + * Delete a virtual timer. * * returns whether the deleted timer was pending (1) or not (0) */ int del_virt_timer(struct vtimer_list *timer) { unsigned long flags; - struct vtimer_queue *vt_list; - /* check if timer is pending */ if (!vtimer_pending(timer)) return 0; - - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); - - /* we don't interrupt a running timer, just let it expire! */ + spin_lock_irqsave(&virt_timer_lock, flags); list_del_init(&timer->entry); - - /* last timer removed */ - if (list_empty(&vt_list->list)) { - vt_list->to_expire = 0; - vt_list->offset = 0; - } - - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&virt_timer_lock, flags); return 1; } EXPORT_SYMBOL(del_virt_timer); @@ -515,55 +374,29 @@ EXPORT_SYMBOL(del_virt_timer); */ void init_cpu_vtimer(void) { - struct vtimer_queue *vt_list; - - /* kick the virtual timer */ - S390_lowcore.exit_timer = VTIMER_MAX_SLICE; - S390_lowcore.last_update_timer = VTIMER_MAX_SLICE; - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); - asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); - - /* enable cpu timer interrupts */ - __ctl_set_bit(0,10); - - vt_list = &__get_cpu_var(virt_cpu_timer); - INIT_LIST_HEAD(&vt_list->list); - spin_lock_init(&vt_list->lock); - vt_list->to_expire = 0; - vt_list->offset = 0; - vt_list->idle = 0; - + /* set initial cpu timer */ + set_vtimer(VTIMER_MAX_SLICE); } -static int vtimer_idle_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int s390_nohz_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { - switch (action) { - case S390_CPU_IDLE: - stop_cpu_timer(); - break; - case S390_CPU_NOT_IDLE: - start_cpu_timer(); + struct s390_idle_data *idle; + long cpu = (long) hcpu; + + idle = &per_cpu(s390_idle, cpu); + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DYING: + idle->nohz_delay = 0; + default: break; } return NOTIFY_OK; } -static struct notifier_block vtimer_idle_nb = { - .notifier_call = vtimer_idle_notify, -}; - void __init vtime_init(void) { - /* request the cpu timer external interrupt */ - if (register_early_external_interrupt(0x1005, do_cpu_timer_interrupt, - &ext_int_info_timer) != 0) - panic("Couldn't request external interrupt 0x1005"); - - if (register_idle_notifier(&vtimer_idle_nb)) - panic("Couldn't register idle notifier"); - /* Enable cpu timer interrupts on the boot cpu. */ init_cpu_vtimer(); + cpu_notifier(s390_nohz_notify, 0); } - |
