diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-03 10:26:41 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-03 10:26:41 -0700 |
commit | 8f5759aeb88a47448cd92ab55a016d013b154a98 (patch) | |
tree | a9c0536e10300a95292b99332171837675af1e16 /arch/s390 | |
parent | e5c4ecdc55b6d824365ba7964bcd3185223f9688 (diff) | |
parent | 63aef00b55d37e9fad837a8b38a2c261f0d32041 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux into next
Pull first set of s390 updates from Martin Schwidefsky:
"The biggest change in this patchset is conversion from the bootmem
bitmaps to the memblock code. This conversion requires two common
code patches to introduce the 'physmem' memblock list.
We experimented with ticket spinlocks but in the end decided against
them as they perform poorly on virtualized systems. But the spinlock
cleanup and some small improvements are included.
The uaccess code got another optimization, the get_user/put_user calls
are now inline again for kernel compiles targeted at z10 or newer
machines. This makes the text segment shorter and the code gets a
little bit faster.
And as always some bug fixes"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (31 commits)
s390/lowcore: replace lowcore irb array with a per-cpu variable
s390/lowcore: reserve 96 bytes for IRB in lowcore
s390/facilities: remove extract-cpu-time facility check
s390: require mvcos facility for z10 and newer machines
s390/boot: fix boot of compressed kernel built with gcc 4.9
s390/cio: remove weird assignment during argument evaluation
s390/time: cast tv_nsec to u64 prior to shift in update_vsyscall
s390/oprofile: make return of 0 explicit
s390/spinlock: refactor arch_spin_lock_wait[_flags]
s390/rwlock: add missing local_irq_restore calls
s390/spinlock,rwlock: always to a load-and-test first
s390/cio: fix multiple structure definitions
s390/spinlock: fix system hang with spin_retry <= 0
s390/appldata: add slab.h for kzalloc/kfree
s390/uaccess: provide inline variants of get_user/put_user
s390/pci: add some new arch specific pci attributes
s390/pci: use pdev->dev.groups for attribute creation
s390/pci: use macro for attribute creation
s390/pci: improve state check when processing hotplug events
s390: split TIF bits into CIF, PIF and TIF bits
...
Diffstat (limited to 'arch/s390')
49 files changed, 783 insertions, 868 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d68fe34799b..bb63499fc5d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -60,7 +60,6 @@ config PCI_QUIRKS config S390 def_bool y - select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAVE_NMI_SAFE_CMPXCHG @@ -130,6 +129,7 @@ config S390 select HAVE_KVM if 64BIT select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP + select HAVE_MEMBLOCK_PHYS_MAP select HAVE_MOD_ARCH_SPECIFIC select HAVE_OPROFILE select HAVE_PERF_EVENTS @@ -139,6 +139,7 @@ config S390 select HAVE_VIRT_CPU_ACCOUNTING select KTIME_SCALAR if 32BIT select MODULES_USE_ELF_RELA + select NO_BOOTMEM select OLD_SIGACTION select OLD_SIGSUSPEND3 select SYSCTL_EXCEPTION_TRACE @@ -592,21 +593,14 @@ config CRASH_DUMP bool "kernel crash dumps" depends on 64BIT && SMP select KEXEC - select ZFCPDUMP help Generate crash dump after being started by kexec. Crash dump kernels are loaded in the main kernel with kexec-tools into a specially reserved region and then later executed after a crash by kdump/kexec. - For more details see Documentation/kdump/kdump.txt - -config ZFCPDUMP - def_bool n - prompt "zfcpdump support" - depends on 64BIT && SMP - help - Select this option if you want to build an zfcpdump enabled kernel. Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. + This option also enables s390 zfcpdump. + See also <file:Documentation/s390/zfcpdump.txt> endmenu diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 42be5374313..edcf2a70694 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -13,6 +13,7 @@ #include <linux/kernel_stat.h> #include <linux/pagemap.h> #include <linux/swap.h> +#include <linux/slab.h> #include <asm/io.h> #include "appldata.h" diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 866ecbe670e..f90d1fc6d60 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -12,7 +12,7 @@ targets += misc.o piggy.o sizes.h head$(BITS).o KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(cflags-y) +KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index a9c2c068617..b80e456d642 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -229,5 +229,5 @@ int ccw_device_siosl(struct ccw_device *); extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *); -extern void *ccw_device_get_chp_desc(struct ccw_device *, int); +struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *, int); #endif /* _S390_CCWDEV_H_ */ diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index ebc2913f9ee..057ce0ca637 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -10,6 +10,8 @@ struct ccw_driver; * @count: number of attached slave devices * @dev: embedded device structure * @cdev: variable number of slave devices, allocated as needed + * @ungroup_work: work to be done when a ccwgroup notifier has action + * type %BUS_NOTIFY_UNBIND_DRIVER */ struct ccwgroup_device { enum { diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h index 38c405ef89c..7298eec9854 100644 --- a/arch/s390/include/asm/chpid.h +++ b/arch/s390/include/asm/chpid.h @@ -8,6 +8,17 @@ #include <uapi/asm/chpid.h> #include <asm/cio.h> +struct channel_path_desc { + u8 flags; + u8 lsn; + u8 desc; + u8 chpid; + u8 swla; + u8 zeroes; + u8 chla; + u8 chpp; +} __packed; + static inline void chp_id_init(struct chp_id *chpid) { memset(chpid, 0, sizeof(struct chp_id)); diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 69cf5b5eddc..a4811aa0304 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -29,7 +29,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) int cmparg = (encoded_op << 20) >> 20; int oldval = 0, newval, ret; - update_primary_asce(current); + load_kernel_asce(); if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; @@ -79,7 +79,7 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, { int ret; - update_primary_asce(current); + load_kernel_asce(); asm volatile( " sacf 256\n" "0: cs %1,%4,0(%5)\n" diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index bbf8141408c..2070cad80e9 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -93,7 +93,9 @@ struct _lowcore { __u32 save_area_sync[8]; /* 0x0200 */ __u32 save_area_async[8]; /* 0x0220 */ __u32 save_area_restart[1]; /* 0x0240 */ - __u8 pad_0x0244[0x0248-0x0244]; /* 0x0244 */ + + /* CPU flags. */ + __u32 cpu_flags; /* 0x0244 */ /* Return psws. */ psw_t return_psw; /* 0x0248 */ @@ -139,12 +141,9 @@ struct _lowcore { __u32 percpu_offset; /* 0x02f0 */ __u32 machine_flags; /* 0x02f4 */ __u32 ftrace_func; /* 0x02f8 */ - __u8 pad_0x02fc[0x0300-0x02fc]; /* 0x02fc */ - - /* Interrupt response block */ - __u8 irb[64]; /* 0x0300 */ + __u32 spinlock_lockval; /* 0x02fc */ - __u8 pad_0x0340[0x0e00-0x0340]; /* 0x0340 */ + __u8 pad_0x0300[0x0e00-0x0300]; /* 0x0300 */ /* * 0xe00 contains the address of the IPL Parameter Information @@ -237,7 +236,9 @@ struct _lowcore { __u64 save_area_sync[8]; /* 0x0200 */ __u64 save_area_async[8]; /* 0x0240 */ __u64 save_area_restart[1]; /* 0x0280 */ - __u8 pad_0x0288[0x0290-0x0288]; /* 0x0288 */ + + /* CPU flags. */ + __u64 cpu_flags; /* 0x0288 */ /* Return psws. */ psw_t return_psw; /* 0x0290 */ @@ -285,15 +286,13 @@ struct _lowcore { __u64 machine_flags; /* 0x0388 */ __u64 ftrace_func; /* 0x0390 */ __u64 gmap; /* 0x0398 */ - __u8 pad_0x03a0[0x0400-0x03a0]; /* 0x03a0 */ - - /* Interrupt response block. */ - __u8 irb[64]; /* 0x0400 */ + __u32 spinlock_lockval; /* 0x03a0 */ + __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */ /* Per cpu primary space access list */ - __u32 paste[16]; /* 0x0440 */ + __u32 paste[16]; /* 0x0400 */ - __u8 pad_0x0480[0x0e00-0x0480]; /* 0x0480 */ + __u8 pad_0x04c0[0x0e00-0x0440]; /* 0x0440 */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 71be346d0e3..056d7eff2a1 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -30,33 +30,31 @@ static inline int init_new_context(struct task_struct *tsk, #define destroy_context(mm) do { } while (0) -static inline void update_user_asce(struct mm_struct *mm, int load_primary) +static inline void set_user_asce(struct mm_struct *mm) { pgd_t *pgd = mm->pgd; S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); - if (load_primary) - __ctl_load(S390_lowcore.user_asce, 1, 1); set_fs(current->thread.mm_segment); + set_cpu_flag(CIF_ASCE); } -static inline void clear_user_asce(struct mm_struct *mm, int load_primary) +static inline void clear_user_asce(void) { S390_lowcore.user_asce = S390_lowcore.kernel_asce; - if (load_primary) - __ctl_load(S390_lowcore.user_asce, 1, 1); + __ctl_load(S390_lowcore.user_asce, 1, 1); __ctl_load(S390_lowcore.user_asce, 7, 7); } -static inline void update_primary_asce(struct task_struct *tsk) +static inline void load_kernel_asce(void) { unsigned long asce; __ctl_store(asce, 1, 1); if (asce != S390_lowcore.kernel_asce) __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_tsk_thread_flag(tsk, TIF_ASCE); + set_cpu_flag(CIF_ASCE); } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, @@ -64,25 +62,17 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); - update_primary_asce(tsk); if (prev == next) return; if (MACHINE_HAS_TLB_LC) cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - if (atomic_inc_return(&next->context.attach_count) >> 16) { - /* Delay update_user_asce until all TLB flushes are done. */ - set_tsk_thread_flag(tsk, TIF_TLB_WAIT); - /* Clear old ASCE by loading the kernel ASCE. */ - clear_user_asce(next, 0); - } else { - cpumask_set_cpu(cpu, mm_cpumask(next)); - update_user_asce(next, 0); - if (next->context.flush_mm) - /* Flush pending TLBs */ - __tlb_flush_mm(next); - } + /* Clear old ASCE by loading the kernel ASCE. */ + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.kernel_asce, 7, 7); + /* Delay loading of the new ASCE to control registers CR1 & CR7 */ + set_cpu_flag(CIF_ASCE); + atomic_inc(&next->context.attach_count); atomic_dec(&prev->context.attach_count); - WARN_ON(atomic_read(&prev->context.attach_count) < 0); if (MACHINE_HAS_TLB_LC) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } @@ -93,15 +83,14 @@ static inline void finish_arch_post_lock_switch(void) struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - if (!test_tsk_thread_flag(tsk, TIF_TLB_WAIT)) + if (!mm) return; preempt_disable(); - clear_tsk_thread_flag(tsk, TIF_TLB_WAIT); while (atomic_read(&mm->context.attach_count) >> 16) cpu_relax(); cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - update_user_asce(mm, 0); + set_user_asce(mm); if (mm->context.flush_mm) __tlb_flush_mm(mm); preempt_enable(); @@ -113,7 +102,9 @@ static inline void finish_arch_post_lock_switch(void) static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm(prev, next, current); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + set_user_asce(next); } static inline void arch_dup_mmap(struct mm_struct *oldmm, diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 79b5f0783a3..c030900320e 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -78,10 +78,16 @@ struct zpci_dev { enum zpci_state state; u32 fid; /* function ID, used by sclp */ u32 fh; /* function handle, used by insn's */ + u16 vfn; /* virtual function number */ u16 pchid; /* physical channel ID */ u8 pfgid; /* function group ID */ + u8 pft; /* pci function type */ u16 domain; + u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ + u32 uid; /* user defined id */ + u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ + /* IRQ stuff */ u64 msi_addr; /* MSI address */ struct airq_iv *aibv; /* adapter interrupt bit vector */ diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index d31d739f868..dd78f92f1cc 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -44,6 +44,7 @@ struct clp_fh_list_entry { #define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */ #define CLP_UTIL_STR_LEN 64 +#define CLP_PFIP_NR_SEGMENTS 4 /* List PCI functions request */ struct clp_req_list_pci { @@ -85,7 +86,7 @@ struct clp_rsp_query_pci { struct clp_rsp_hdr hdr; u32 fmt : 4; /* cmd request block format */ u32 : 28; - u64 reserved1; + u64 : 64; u16 vfn; /* virtual fn number */ u16 : 7; u16 util_str_avail : 1; /* utility string available? */ @@ -94,10 +95,13 @@ struct clp_rsp_query_pci { u8 bar_size[PCI_BAR_COUNT]; u16 pchid; u32 bar[PCI_BAR_COUNT]; - u64 reserved2; + u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ + u32 : 24; + u8 pft; /* pci function type */ u64 sdma; /* start dma as */ u64 edma; /* end dma as */ - u64 reserved3[6]; + u32 reserved[11]; + u32 uid; /* user defined id */ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ } __packed; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index dc5fc4f90e5..6f02d452bbe 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -11,6 +11,13 @@ #ifndef __ASM_S390_PROCESSOR_H #define __ASM_S390_PROCESSOR_H +#define CIF_MCCK_PENDING 0 /* machine check handling is pending */ +#define CIF_ASCE 1 /* user asce needs fixup / uaccess */ + +#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING) +#define _CIF_ASCE (1<<CIF_ASCE) + + #ifndef __ASSEMBLY__ #include <linux/linkage.h> @@ -21,6 +28,21 @@ #include <asm/setup.h> #include <asm/runtime_instr.h> +static inline void set_cpu_flag(int flag) +{ + S390_lowcore.cpu_flags |= (1U << flag); +} + +static inline void clear_cpu_flag(int flag) +{ + S390_lowcore.cpu_flags &= ~(1U << flag); +} + +static inline int test_cpu_flag(int flag) +{ + return !!(S390_lowcore.cpu_flags & (1U << flag)); +} + /* * Default implementation of macro that returns current * instruction pointer ("program counter"). diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index f4783c0b7b4..1b5300cd6d2 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -8,6 +8,12 @@ #include <uapi/asm/ptrace.h> +#define PIF_SYSCALL 0 /* inside a system call */ +#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ + +#define _PIF_SYSCALL (1<<PIF_SYSCALL) +#define _PIF_PER_TRAP (1<<PIF_PER_TRAP) + #ifndef __ASSEMBLY__ #define PSW_KERNEL_BITS (PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \ @@ -29,6 +35,7 @@ struct pt_regs unsigned int int_code; unsigned int int_parm; unsigned long int_parm_long; + unsigned long flags; }; /* @@ -79,6 +86,21 @@ struct per_struct_kernel { #define PER_CONTROL_SUSPENSION 0x00400000UL #define PER_CONTROL_ALTERATION 0x00200000UL +static inline void set_pt_regs_flag(struct pt_regs *regs, int flag) +{ + regs->flags |= (1U << flag); +} + +static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag) +{ + regs->flags &= ~(1U << flag); +} + +static inline int test_pt_regs_flag(struct pt_regs *regs, int flag) +{ + return !!(regs->flags & (1U << flag)); +} + /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index b31b22dba94..089a49814c5 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -9,7 +9,6 @@ #define PARMAREA 0x10400 -#define MEMORY_CHUNKS 256 #ifndef __ASSEMBLY__ @@ -31,22 +30,11 @@ #endif /* CONFIG_64BIT */ #define COMMAND_LINE ((char *) (0x10480)) -#define CHUNK_READ_WRITE 0 -#define CHUNK_READ_ONLY 1 - -struct mem_chunk { - unsigned long addr; - unsigned long size; - int type; -}; - -extern struct mem_chunk memory_chunk[]; extern int memory_end_set; extern unsigned long memory_end; +extern unsigned long max_physmem_end; -void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize); -void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, - unsigned long size); +extern void detect_memory_memblock(void); /* * Machine features detected in head.S diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 21703f85b48..4f1307962a9 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -30,7 +30,6 @@ extern int smp_store_status(int cpu); extern int smp_vcpu_scheduled(int cpu); extern void smp_yield_cpu(int cpu); extern void smp_yield(void); -extern void smp_stop_cpu(void); extern void smp_cpu_set_polarization(int cpu, int val); extern int smp_cpu_get_polarization(int cpu); extern void smp_fill_possible_mask(void); @@ -54,6 +53,8 @@ static inline void smp_yield_cpu(int cpu) { } static inline void smp_yield(void) { } static inline void smp_fill_possible_mask(void) { } +#endif /* CONFIG_SMP */ + static inline void smp_stop_cpu(void) { u16 pcpu = stap(); @@ -64,8 +65,6 @@ static inline void smp_stop_cpu(void) } } -#endif /* CONFIG_SMP */ - #ifdef CONFIG_HOTPLUG_CPU extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 83e5d216105..96879f7ad6d 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -11,18 +11,21 @@ #include <linux/smp.h> +#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval) + extern int spin_retry; static inline int -_raw_compare_and_swap(volatile unsigned int *lock, - unsigned int old, unsigned int new) +_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) { + unsigned int old_expected = old; + asm volatile( " cs %0,%3,%1" : "=d" (old), "=Q" (*lock) : "0" (old), "d" (new), "Q" (*lock) : "cc", "memory" ); - return old; + return old == old_expected; } /* @@ -34,57 +37,69 @@ _raw_compare_and_swap(volatile unsigned int *lock, * (the type definitions are in asm/spinlock_types.h) */ -#define arch_spin_is_locked(x) ((x)->owner_cpu != 0) -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) \ - arch_spin_relax(lock); } while (0) +void arch_spin_lock_wait(arch_spinlock_t *); +int arch_spin_trylock_retry(arch_spinlock_t *); +void arch_spin_relax(arch_spinlock_t *); +void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); -extern void arch_spin_lock_wait(arch_spinlock_t *); -extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); -extern int arch_spin_trylock_retry(arch_spinlock_t *); -extern void arch_spin_relax(arch_spinlock_t *lock); +static inline u32 arch_spin_lockval(int cpu) +{ + return ~cpu; +} static inline int arch_spin_value_unlocked(arch_spinlock_t lock) { - return lock.owner_cpu == 0; + return lock.lock == 0; } -static inline void arch_spin_lock(arch_spinlock_t *lp) +static inline int arch_spin_is_locked(arch_spinlock_t *lp) { - int old; + return ACCESS_ONCE(lp->lock) != 0; +} - old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); - if (likely(old == 0)) - return; - arch_spin_lock_wait(lp); +static inline int arch_spin_trylock_once(arch_spinlock_t *lp) +{ + barrier(); + return likely(arch_spin_value_unlocked(*lp) && + _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); } -static inline void arch_spin_lock_flags(arch_spinlock_t *lp, - unsigned long flags) +static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp) { - int old; + return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0); +} - old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); - if (likely(old == 0)) - return; - arch_spin_lock_wait_flags(lp, flags); +static inline void arch_spin_lock(arch_spinlock_t *lp) +{ + if (!arch_spin_trylock_once(lp)) + arch_spin_lock_wait(lp); } -static inline int arch_spin_trylock(arch_spinlock_t *lp) +static inline void arch_spin_lock_flags(arch_spinlock_t *lp, + unsigned long flags) { - int old; + if (!arch_spin_trylock_once(lp)) + arch_spin_lock_wait_flags(lp, flags); +} - old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); - if (likely(old == 0)) - return 1; - return arch_spin_trylock_retry(lp); +static inline int arch_spin_trylock(arch_spinlock_t *lp) +{ + if (!arch_spin_trylock_once(lp)) + return arch_spin_trylock_retry(lp); + return 1; } static inline void arch_spin_unlock(arch_spinlock_t *lp) { - _raw_compare_and_swap(&lp->owner_cpu, lp->owner_cpu, 0); + arch_spin_tryrelease_once(lp); +} + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + while (arch_spin_is_locked(lock)) + arch_spin_relax(lock); } - + /* * Read-write spinlocks, allowing multiple readers * but only one writer. @@ -115,42 +130,50 @@ extern void _raw_write_lock_wait(arch_rwlock_t *lp); extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); extern int _raw_write_trylock_retry(arch_rwlock_t *lp); +static inline int arch_read_trylock_once(arch_rwlock_t *rw) +{ + unsigned int old = ACCESS_ONCE(rw->lock); + return likely((int) old >= 0 && + _raw_compare_and_swap(&rw->lock, old, old + 1)); +} + +static inline int arch_write_trylock_once(arch_rwlock_t *rw) +{ + unsigned int old = ACCESS_ONCE(rw->lock); + return likely(old == 0 && + _raw_compare_and_swap(&rw->lock, 0, 0x80000000)); +} + static inline void arch_read_lock(arch_rwlock_t *rw) { - unsigned int old; - old = rw->lock & 0x7fffffffU; - if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old) + if (!arch_read_trylock_once(rw)) _raw_read_lock_wait(rw); } static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) { - unsigned int old; - old = rw->lock & 0x7fffffffU; - if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old) + if (!arch_read_trylock_once(rw)) _raw_read_lock_wait_flags(rw, flags); } static inline void arch_read_unlock(arch_rwlock_t *rw) { - unsigned int old, cmp; + unsigned int old; - old = rw->lock; do { - cmp = old; - old = _raw_compare_and_swap(&rw->lock, old, old - 1); - } while (cmp != old); + old = ACCESS_ONCE(rw->lock); + } while (!_raw_compare_and_swap(&rw->lock, old, old - 1)); } static inline void arch_write_lock(arch_rwlock_t *rw) { - if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) + if (!arch_write_trylock_once(rw)) _raw_write_lock_wait(rw); } static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) { - if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0)) + if (!arch_write_trylock_once(rw)) _raw_write_lock_wait_flags(rw, flags); } @@ -161,18 +184,16 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) static inline int arch_read_trylock(arch_rwlock_t *rw) { - unsigned int old; - old = rw->lock & 0x7fffffffU; - if (likely(_raw_compare_and_swap(&rw->lock, old, old + 1) == old)) - return 1; - return _raw_read_trylock_retry(rw); + if (!arch_read_trylock_once(rw)) + return _raw_read_trylock_retry(rw); + return 1; } static inline int arch_write_trylock(arch_rwlock_t *rw) { - if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)) - return 1; - return _raw_write_trylock_retry(rw); + if (!arch_write_trylock_once(rw)) + return _raw_write_trylock_retry(rw); + return 1; } #define arch_read_relax(lock) cpu_relax() diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index 9c76656a0af..b2cd6ff7c2c 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -6,13 +6,13 @@ #endif typedef struct { - volatile unsigned int owner_cpu; + unsigned int lock; } __attribute__ ((aligned (4))) arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, } typedef struct { - volatile unsigned int lock; + unsigned int lock; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index e759181357f..29c81f82705 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -132,7 +132,6 @@ static inline void restore_access_regs(unsigned int *acrs) update_cr_regs(next); \ } \ prev = __switch_to(prev,next); \ - update_primary_asce(current); \ } while (0) #define finish_arch_switch(prev) do { \ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 777687055e7..abad78d5b10 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -28,7 +28,7 @@ extern const unsigned int sys_call_table_emu[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return test_tsk_thread_flag(task, TIF_SYSCALL) ? + return test_pt_regs_flag(regs, PIF_SYSCALL) ? (regs->int_code & 0xffff) : -1; } diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 50630e6a35d..b833e9c0bfb 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -77,32 +77,22 @@ static inline struct thread_info *current_thread_info(void) /* * thread information flags bit numbers */ -#define TIF_SYSCALL 0 /* inside a system call */ -#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ -#define TIF_SIGPENDING 2 /* signal pending */ -#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_TLB_WAIT 4 /* wait for TLB flush completion */ -#define TIF_ASCE 5 /* primary asce needs fixup / uaccess */ -#define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ -#define TIF_MCCK_PENDING 7 /* machine check handling is pending */ -#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ -#define TIF_SECCOMP 10 /* secure computing */ -#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ -#define TIF_31BIT 17 /* 32bit process */ -#define TIF_MEMDIE 18 /* is terminating due to OOM killer */ -#define TIF_RESTORE_SIGMASK 19 /* restore signal mask in do_signal() */ -#define TIF_SINGLE_STEP 20 /* This task is single stepped */ -#define TIF_BLOCK_STEP 21 /* This task is block stepped */ +#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ +#define TIF_SECCOMP 5 /* secure computing */ +#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ +#define TIF_31BIT 16 /* 32bit process */ +#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ +#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ +#define TIF_SINGLE_STEP 19 /* This task is single stepped */ +#define TIF_BLOCK_STEP 20 /* This task is block stepped */ -#define _TIF_SYSCALL (1<<TIF_SYSCALL) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) -#define _TIF_TLB_WAIT (1<<TIF_TLB_WAIT) -#define _TIF_ASCE (1<<TIF_ASCE) -#define _TIF_PER_TRAP (1<<TIF_PER_TRAP) -#define _TIF_MCCK_PENDING (1<<TIF_MCCK_PENDING) #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1<<TIF_SECCOMP) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 1be64a1506d..cd4c68e0398 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -132,6 +132,34 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from, #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + +#define __put_get_user_asm(to, from, size, spec) \ +({ \ + register unsigned long __reg0 asm("0") = spec; \ + int __rc; \ + \ + asm volatile( \ + "0: mvcos %1,%3,%2\n" \ + "1: xr %0,%0\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: lhi %0,%5\n" \ + " jg 2b\n" \ + ".popsection\n" \ + EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + : "=d" (__rc), "=Q" (*(to)) \ + : "d" (size), "Q" (*(from)), \ + "d" (__reg0), "K" (-EFAULT) \ + : "cc"); \ + __rc; \ +}) + +#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL) +#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL) + +#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */ + static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { size = __copy_to_user(ptr, x, size); @@ -144,6 +172,8 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s return size ? -EFAULT : 0; } +#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */ + /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index cc10cdd4d6a..0c070c44cde 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -50,6 +50,7 @@ int main(void) DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm)); DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); + DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); @@ -115,6 +116,7 @@ int main(void) DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); + DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags)); DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw)); DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw)); DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer)); @@ -142,7 +144,6 @@ int main(void) DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); - DEFINE(__LC_IRB, offsetof(struct _lowcore, irb)); DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); BLANK(); DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 7df5ed9f44d..f204d692036 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -213,7 +213,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) sizeof(current->thread.fp_regs)); restore_fp_regs(current->thread.fp_regs.fprs); - clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index d7658c4b2ed..a3b9150e680 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/bootmem.h> #include <linux/elf.h> +#include <linux/memblock.h> #include <asm/os_info.h> #include <asm/elf.h> #include <asm/ipl.h> @@ -22,6 +23,24 @@ #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) +static struct memblock_region oldmem_region; + +static struct memblock_type oldmem_type = { + .cnt = 1, + .max = 1, + .total_size = 0, + .regions = &oldmem_region, +}; + +#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \ + for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \ + &oldmem_type, p_start, \ + p_end, p_nid); \ + i != (u64)ULLONG_MAX; \ + __next_mem_range(&i, nid, &memblock.physmem, \ + &oldmem_type, \ + p_start, p_end, p_nid)) + struct dump_save_areas dump_save_areas; /* @@ -264,19 +283,6 @@ static void *kzalloc_panic(int len) } /* - * Get memory layout and create hole for oldmem - */ -static struct mem_chunk *get_memory_layout(void) -{ - struct mem_chunk *chunk_array; - - chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); - detect_memory_layout(chunk_array, 0); - create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE); - return chunk_array; -} - -/* * Initialize ELF note */ static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, @@ -490,52 +496,33 @@ static int get_cpu_cnt(void) */ static int get_mem_chunk_cnt(void) { - struct mem_chunk *chunk_array, *mem_chunk; - int i, cnt = 0; + int cnt = 0; + u64 idx; - chunk_array = get_memory_layout(); - for (i = 0; i < MEMORY_CHUNKS; i++) { - mem_chunk = &chunk_array[i]; - if (chunk_array[i].type != CHUNK_READ_WRITE && - chunk_array[i].type != CHUNK_READ_ONLY) - continue; - if (mem_chunk->size == 0) - continue; + for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL) cnt++; - } - kfree(chunk_array); return cnt; } /* * Initialize ELF loads (new kernel) */ -static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) +static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) { - struct mem_chunk *chunk_array, *mem_chunk; - int i; + phys_addr_t start, end; + u64 idx; - chunk_array = get_memory_layout(); - for (i = 0; i < MEMORY_CHUNKS; i++) { - mem_chunk = &chunk_array[i]; - if (mem_chunk->size == 0) - continue; - if (chunk_array[i].type != CHUNK_READ_WRITE && - chunk_array[i].type != CHUNK_READ_ONLY) - continue; - else - phdr->p_filesz = mem_chunk->size; + for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) { + phdr->p_filesz = end - start; phdr->p_type = PT_LOAD; - phdr->p_offset = mem_chunk->addr; - phdr->p_vaddr = mem_chunk->addr; - phdr->p_paddr = mem_chunk->addr; - phdr->p_memsz = mem_chunk->size; + phdr->p_offset = start; + phdr->p_vaddr = start; + phdr->p_paddr = start; + phdr->p_memsz = end - start; phdr->p_flags = PF_R | PF_W | PF_X; phdr->p_align = PAGE_SIZE; phdr++; } - kfree(chunk_array); - return i; } /* @@ -584,6 +571,14 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) /* If we cannot get HSA size for zfcpdump return error */ if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size()) return -ENODEV; + + /* For kdump, exclude previous crashkernel memory */ + if (OLDMEM_BASE) { + oldmem_region.base = OLDMEM_BASE; + oldmem_region.size = OLDMEM_SIZE; + oldmem_type.total_size = OLDMEM_SIZE; + } + mem_chunk_cnt = get_mem_chunk_cnt(); alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index a734f3585ce..0dff972a169 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -258,13 +258,19 @@ static __init void setup_topology(void) static void early_pgm_check_handler(void) { const struct exception_table_entry *fixup; + unsigned long cr0, cr0_new; unsigned long addr; addr = S390_lowcore.program_old_psw.addr; fixup = search_exception_tables(addr & PSW_ADDR_INSN); if (!fixup) disabled_wait(0); + /* Disable low address protection before storing into lowcore. */ + __ctl_store(cr0, 0, 0); + cr0_new = cr0 & ~(1UL << 28); + __ctl_load(cr0_new, 0, 0); S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE; + __ctl_load(cr0, 0, 0); } static noinline __init void setup_lowcore_early(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1662038516c..18e5af848f9 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/linkage.h> +#include <asm/processor.h> #include <asm/cache.h> #include <asm/errno.h> #include <asm/ptrace.h> @@ -37,18 +38,16 @@ __PT_R13 = __PT_GPRS + 524 __PT_R14 = __PT_GPRS + 56 __PT_R15 = __PT_GPRS + 60 -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_PER_TRAP | _TIF_ASCE) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_ASCE) -_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ - _TIF_SYSCALL_TRACEPOINT) -_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_TLB_WAIT) - STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) +_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) +_PIF_WORK = (_PIF_PER_TRAP) + #define BASED(name) name-system_call(%r13) .macro TRACE_IRQS_ON @@ -160,13 +159,7 @@ ENTRY(__switch_to) lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next l %r15,__THREAD_ksp(%r3) # load kernel stack of next - lhi %r6,_TIF_TRANSFER # transfer TIF bits - n %r6,__TI_flags(%r4) # isolate TIF bits - jz 0f - o %r6,__TI_flags(%r5) # set TIF bits of next - st %r6,__TI_flags(%r5) - ni __TI_flags+3(%r4),255-_TIF_TRANSFER # clear TIF bits of prev -0: lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task + lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 __critical_start: @@ -181,6 +174,7 @@ sysc_stm: stm %r8,%r15,__LC_SAVE_AREA_SYNC l %r12,__LC_THREAD_INFO l %r13,__LC_SVC_NEW_PSW+4 + lhi %r14,_PIF_SYSCALL sysc_per: l %r15,__LC_KERNEL_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs @@ -190,8 +184,8 @@ sysc_vtime: mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + st %r14,__PT_FLAGS(%r11) sysc_do_svc: - oi __TI_flags+3(%r12),_TIF_SYSCALL l %r10,__TI_sysc_table(%r12) # 31 bit system call table lh %r8,__PT_INT_CODE+2(%r11) sla %r8,2 # shift and test for svc0 @@ -207,7 +201,7 @@ sysc_nr_ok: st %r2,__PT_ORIG_GPR2(%r11) st %r7,STACK_FRAME_OVERHEAD(%r15) l %r9,0(%r8,%r10) # get system call addr. - tm __TI_flags+2(%r12),_TIF_TRACE >> 8 + tm __TI_flags+3(%r12),_TIF_TRACE jnz sysc_tracesys basr %r14,%r9 # call sys_xxxx st %r2,__PT_R2(%r11) # store return value @@ -217,9 +211,12 @@ sysc_return: sysc_tif: tm __PT_PSW+1(%r11),0x01 # returning to user ? jno sysc_restore - tm __TI_flags+3(%r12),_TIF_WORK_SVC - jnz sysc_work # check for work - ni __TI_flags+3(%r12),255-_TIF_SYSCALL + tm __PT_FLAGS+3(%r11),_PIF_WORK + jnz sysc_work + tm __TI_flags+3(%r12),_TIF_WORK + jnz sysc_work # check for thread work + tm __LC_CPU_FLAGS+3,_CIF_WORK + jnz sysc_work sysc_restore: mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) stpt __LC_EXIT_TIMER @@ -231,17 +228,17 @@ sysc_done: # One of the work bits is on. Find out which one. # sysc_work: - tm __TI_flags+3(%r12),_TIF_MCCK_PENDING + tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+3(%r12),_TIF_NEED_RESCHED jo sysc_reschedule - tm __TI_flags+3(%r12),_TIF_PER_TRAP + tm __PT_FLAGS+3(%r11),_PIF_PER_TRAP jo sysc_singlestep tm __TI_flags+3(%r12),_TIF_SIGPENDING jo sysc_sigpending tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME jo sysc_notify_resume - tm __TI_flags+3(%r12),_TIF_ASCE + tm __LC_CPU_FLAGS+3,_CIF_ASCE jo sysc_uaccess j sysc_return # beware of critical section cleanup @@ -254,7 +251,7 @@ sysc_reschedule: br %r1 # call schedule # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: l %r1,BASED(.Lhandle_mcck) @@ -262,10 +259,10 @@ sysc_mcck_pending: br %r1 # TIF bit will be cleared by handler # -# _TIF_ASCE is set, load user space asce +# _CIF_ASCE is set, load user space asce # sysc_uaccess: - ni __TI_flags+3(%r12),255-_TIF_ASCE + ni __LC_CPU_FLAGS+3,255-_CIF_ASCE lctl %c1,%c1,__LC_USER_ASCE # load primary asce j sysc_return @@ -276,7 +273,7 @@ sysc_sigpending: lr %r2,%r11 # pass pointer to pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r12),_TIF_SYSCALL + tm __PT_FLAGS+3(%r11),_PIF_SYSCALL jno sysc_return lm %r2,%r7,__PT_R2(%r11) # load svc arguments l %r10,__TI_sysc_table(%r12) # 31 bit system call table @@ -297,10 +294,10 @@ sysc_notify_resume: br %r1 # call do_notify_resume # -# _TIF_PER_TRAP is set, call do_per_trap +# _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+3(%r12),255-_TIF_PER_TRAP + ni __PT_FLAGS+3(%r11),255-_PIF_PER_TRAP lr %r2,%r11 # pass pointer to pt_regs l %r1,BASED(.Ldo_per_trap) la %r14,BASED(sysc_return) @@ -330,7 +327,7 @@ sysc_tracego: basr %r14,%r9 # call sys_xxx st %r2,__PT_R2(%r11) # store return value sysc_tracenogo: - tm __TI_flags+2(%r12),_TIF_TRACE >> 8 + tm __TI_flags+3(%r12),_TIF_TRACE jz sysc_return l %r1,BASED(.Ltrace_exit) lr %r2,%r11 # pass pointer to pt_regs @@ -384,12 +381,13 @@ ENTRY(pgm_check_handler) stm %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception jz 0f l %r1,__TI_task(%r12) tmh %r8,0x0001 # kernel per event ? jz pgm_kprobe - oi __TI_flags+3(%r12),_TIF_PER_TRAP + oi __PT_FLAGS+3(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID @@ -420,9 +418,9 @@ pgm_kprobe: # single stepped system call # pgm_svcper: - oi __TI_flags+3(%r12),_TIF_PER_TRAP mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per) + lhi %r14,_PIF_SYSCALL | _PIF_PER_TRAP lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs /* @@ -445,6 +443,7 @@ io_skip: mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC stm %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) io_loop: @@ -466,8 +465,10 @@ io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON io_tif: - tm __TI_flags+3(%r12),_TIF_WORK_INT + tm __TI_flags+3(%r12),_TIF_WORK jnz io_work # there is work to do (signals etc.) + tm __LC_CPU_FLAGS+3,_CIF_WORK + jnz io_work io_restore: mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) stpt __LC_EXIT_TIMER @@ -477,7 +478,7 @@ io_done: # # There is work todo, find out in which context we have been interrupted: -# 1) if we return to user space we can do all _TIF_WORK_INT work +# 1) if we return to user space we can do all _TIF_WORK work # 2) if we return to kernel code and preemptive scheduling is enabled check # the preemption counter and if it is zero call preempt_schedule_irq # Before any work can be done, a switch to the kernel stack is required. @@ -520,11 +521,9 @@ io_work_user: # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED -# and _TIF_MCCK_PENDING # io_work_tif: - tm __TI_flags+3(%r12),_TIF_MCCK_PENDING + tm __LC_CPU_FLAGS+3(%r12),_CIF_MCCK_PENDING jo io_mcck_pending tm __TI_flags+3(%r12),_TIF_NEED_RESCHED jo io_reschedule @@ -532,12 +531,12 @@ io_work_tif: jo io_sigpending tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME jo io_notify_resume - tm __TI_flags+3(%r12),_TIF_ASCE + tm __LC_CPU_FLAGS+3,_CIF_ASCE jo io_uaccess j io_return # beware of critical section cleanup # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # io_mcck_pending: # TRACE_IRQS_ON already done at io_return @@ -547,10 +546,10 @@ io_mcck_pending: j io_return # -# _TIF_ASCE is set, load user space asce +# _CIF_ASCE is set, load user space asce # io_uaccess: - ni __TI_flags+3(%r12),255-_TIF_ASCE + ni __LC_CPU_FLAGS+3,255-_CIF_ASCE lctl %c1,%c1,__LC_USER_ASCE # load primary asce j io_return @@ -613,6 +612,7 @@ ext_skip: stm %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF l %r1,BASED(.Ldo_IRQ) lr %r2,%r11 # pass pointer to pt_regs @@ -677,6 +677,7 @@ mcck_skip: stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32 stm %r8,%r9,__PT_PSW(%r11) + xc __PT_FLAGS(4,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) l %r1,BASED(.Ldo_machine_check) lr %r2,%r11 # pass pointer to pt_regs @@ -689,7 +690,7 @@ mcck_skip: la %r11,STACK_FRAME_OVERHEAD(%r15) lr %r15,%r1 ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - tm __TI_flags+3(%r12),_TIF_MCCK_PENDING + tm __LC_CPU_FLAGS+3,_CIF_MCCK_PENDING jno mcck_return TRACE_IRQS_OFF l %r1,BASED(.Lhandle_mcck) @@ -842,6 +843,8 @@ cleanup_system_call: stm %r0,%r7,__PT_R0(%r9) mvc __PT_PSW(8,%r9),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC + xc __PT_FLAGS(4,%r9),__PT_FLAGS(%r9) + mvi __PT_FLAGS+3(%r9),_PIF_SYSCALL # setup saved register 15 st %r15,28(%r11) # r15 stack pointer # set new psw address and exit diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 5963e43618b..c41f3f90672 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -42,13 +42,11 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_PER_TRAP | _TIF_ASCE) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_ASCE) -_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ - _TIF_SYSCALL_TRACEPOINT) -_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_TLB_WAIT) +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) +_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) +_PIF_WORK = (_PIF_PER_TRAP) #define BASED(name) name-system_call(%r13) @@ -190,13 +188,7 @@ ENTRY(__switch_to) lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next lg %r15,__THREAD_ksp(%r3) # load kernel stack of next - llill %r6,_TIF_TRANSFER # transfer TIF bits - ng %r6,__TI_flags(%r4) # isolate TIF bits - jz 0f - og %r6,__TI_flags(%r5) # set TIF bits of next - stg %r6,__TI_flags(%r5) - ni __TI_flags+7(%r4),255-_TIF_TRANSFER # clear TIF bits of prev -0: lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task + lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 __critical_start: @@ -211,6 +203,7 @@ sysc_stmg: stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO + lghi %r14,_PIF_SYSCALL sysc_per: lg %r15,__LC_KERNEL_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs @@ -221,8 +214,8 @@ sysc_vtime: mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + stg %r14,__PT_FLAGS(%r11) sysc_do_svc: - oi __TI_flags+7(%r12),_TIF_SYSCALL lg %r10,__TI_sysc_table(%r12) # address of system call table llgh %r8,__PT_INT_CODE+2(%r11) slag %r8,%r8,2 # shift and test for svc 0 @@ -238,7 +231,7 @@ sysc_nr_ok: stg %r2,__PT_ORIG_GPR2(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lgf %r9,0(%r8,%r10) # get system call add. - tm __TI_flags+6(%r12),_TIF_TRACE >> 8 + tm __TI_flags+7(%r12),_TIF_TRACE jnz sysc_tracesys basr %r14,%r9 # call sys_xxxx stg %r2,__PT_R2(%r11) # store return value @@ -248,9 +241,12 @@ sysc_return: sysc_tif: tm __PT_PSW+1(%r11),0x01 # returning to user ? jno sysc_restore - tm __TI_flags+7(%r12),_TIF_WORK_SVC + tm __PT_FLAGS+7(%r11),_PIF_WORK + jnz sysc_work + tm __TI_flags+7(%r12),_TIF_WORK jnz sysc_work # check for work - ni __TI_flags+7(%r12),255-_TIF_SYSCALL + tm __LC_CPU_FLAGS+7,_CIF_WORK + jnz sysc_work sysc_restore: lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) @@ -265,17 +261,17 @@ sysc_done: # One of the work bits is on. Find out which one. # sysc_work: - tm __TI_flags+7(%r12),_TIF_MCCK_PENDING + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo sysc_reschedule - tm __TI_flags+7(%r12),_TIF_PER_TRAP + tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP jo sysc_singlestep tm __TI_flags+7(%r12),_TIF_SIGPENDING jo sysc_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo sysc_notify_resume - tm __TI_flags+7(%r12),_TIF_ASCE + tm __LC_CPU_FLAGS+7,_CIF_ASCE jo sysc_uaccess j sysc_return # beware of critical section cleanup @@ -287,17 +283,17 @@ sysc_reschedule: jg schedule # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: larl %r14,sysc_return jg s390_handle_mcck # TIF bit will be cleared by handler # -# _TIF_ASCE is set, load user space asce +# _CIF_ASCE is set, load user space asce # sysc_uaccess: - ni __TI_flags+7(%r12),255-_TIF_ASCE + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce j sysc_return @@ -307,7 +303,7 @@ sysc_uaccess: sysc_sigpending: lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_signal - tm __TI_flags+7(%r12),_TIF_SYSCALL + tm __PT_FLAGS+7(%r11),_PIF_SYSCALL jno sysc_return lmg %r2,%r7,__PT_R2(%r11) # load svc arguments lg %r10,__TI_sysc_table(%r12) # address of system call table @@ -327,10 +323,10 @@ sysc_notify_resume: jg do_notify_resume # -# _TIF_PER_TRAP is set, call do_per_trap +# _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+7(%r12),255-_TIF_PER_TRAP + ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP lgr %r2,%r11 # pass pointer to pt_regs larl %r14,sysc_return jg do_per_trap @@ -357,7 +353,7 @@ sysc_tracego: basr %r14,%r9 # call sys_xxx stg %r2,__PT_R2(%r11) # store return value sysc_tracenogo: - tm __TI_flags+6(%r12),_TIF_TRACE >> 8 + tm __TI_flags+7(%r12),_TIF_TRACE jz sysc_return lgr %r2,%r11 # pass pointer to pt_regs larl %r14,sysc_return @@ -416,12 +412,13 @@ ENTRY(pgm_check_handler) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception jz 0f tmhh %r8,0x0001 # kernel per event ? jz pgm_kprobe - oi __TI_flags+7(%r12),_TIF_PER_TRAP + oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID @@ -451,10 +448,10 @@ pgm_kprobe: # single stepped system call # pgm_svcper: - oi __TI_flags+7(%r12),_TIF_PER_TRAP mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW larl %r14,sysc_per stg %r14,__LC_RETURN_PSW+8 + lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs /* @@ -479,6 +476,7 @@ io_skip: mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) io_loop: @@ -499,8 +497,10 @@ io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON io_tif: - tm __TI_flags+7(%r12),_TIF_WORK_INT + tm __TI_flags+7(%r12),_TIF_WORK jnz io_work # there is work to do (signals etc.) + tm __LC_CPU_FLAGS+7,_CIF_WORK + jnz io_work io_restore: lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) @@ -513,7 +513,7 @@ io_done: # # There is work todo, find out in which context we have been interrupted: -# 1) if we return to user space we can do all _TIF_WORK_INT work +# 1) if we return to user space we can do all _TIF_WORK work # 2) if we return to kernel code and kvm is enabled check if we need to # modify the psw to leave SIE # 3) if we return to kernel code and preemptive scheduling is enabled check @@ -557,11 +557,9 @@ io_work_user: # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED -# and _TIF_MCCK_PENDING # io_work_tif: - tm __TI_flags+7(%r12),_TIF_MCCK_PENDING + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jo io_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo io_reschedule @@ -569,12 +567,12 @@ io_work_tif: jo io_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo io_notify_resume - tm __TI_flags+7(%r12),_TIF_ASCE + tm __LC_CPU_FLAGS+7,_CIF_ASCE jo io_uaccess j io_return # beware of critical section cleanup # -# _TIF_MCCK_PENDING is set, call handler +# _CIF_MCCK_PENDING is set, call handler # io_mcck_pending: # TRACE_IRQS_ON already done at io_return @@ -583,10 +581,10 @@ io_mcck_pending: j io_return # -# _TIF_ASCE is set, load user space asce +# _CIF_ASCE is set, load user space asce # io_uaccess: - ni __TI_flags+7(%r12),255-_TIF_ASCE + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce j io_return @@ -650,6 +648,7 @@ ext_skip: mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs @@ -716,6 +715,7 @@ mcck_skip: stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check @@ -727,7 +727,7 @@ mcck_skip: la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - tm __TI_flags+7(%r12),_TIF_MCCK_PENDING + tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING jno mcck_return TRACE_IRQS_OFF brasl %r14,s390_handle_mcck @@ -884,6 +884,8 @@ cleanup_system_call: stmg %r0,%r7,__PT_R0(%r9) mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC + xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9) + mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL # setup saved register r15 stg %r15,56(%r11) # r15 stack pointer # set new psw address and exit diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 429afcc480c..7ba7d678451 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -437,13 +437,13 @@ ENTRY(startup_kdump) #if defined(CONFIG_64BIT) #if defined(CONFIG_MARCH_ZEC12) - .long 3, 0xc100efe3, 0xf46ce800, 0x00400000 + .long 3, 0xc100efea, 0xf46ce800, 0x00400000 #elif defined(CONFIG_MARCH_Z196) - .long 2, 0xc100efe3, 0xf46c0000 + .long 2, 0xc100efea, 0xf46c0000 #elif defined(CONFIG_MARCH_Z10) - .long 2, 0xc100efe3, 0xf0680000 + .long 2, 0xc100efea, 0xf0680000 #elif defined(CONFIG_MARCH_Z9_109) - .long 1, 0xc100efc3 + .long 1, 0xc100efc2 #elif defined(CONFIG_MARCH_Z990) .long 1, 0xc0002000 #elif defined(CONFIG_MARCH_Z900) diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index 9a99856df1c..6dbe80983a2 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -59,7 +59,6 @@ ENTRY(startup_continue) .long 0 # cr13: home space segment table .long 0xc0000000 # cr14: machine check handling off .long 0 # cr15: linkage stack operations -.Lmchunk:.long memory_chunk .Lbss_bgn: .long __bss_start .Lbss_end: .long _end .Lparmaddr: .long PARMAREA diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index c4c03381987..210e1285f75 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -55,7 +55,7 @@ void s390_handle_mcck(void) local_mcck_disable(); mcck = __get_cpu_var(cpu_mcck); memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); - clear_thread_flag(TIF_MCCK_PENDING); + clear_cpu_flag(CIF_MCCK_PENDING); local_mcck_enable(); local_irq_restore(flags); @@ -313,7 +313,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) */ mcck->kill_task = 1; mcck->mcck_code = *(unsigned long long *) mci; - set_thread_flag(TIF_MCCK_PENDING); + set_cpu_flag(CIF_MCCK_PENDING); } else { /* * Couldn't restore all register contents while in @@ -352,12 +352,12 @@ void notrace s390_do_machine_check(struct pt_regs *regs) if (mci->cp) { /* Channel report word pending */ mcck->channel_report = 1; - set_thread_flag(TIF_MCCK_PENDING); + set_cpu_flag(CIF_MCCK_PENDING); } if (mci->w) { /* Warning pending */ mcck->warning = 1; - set_thread_flag(TIF_MCCK_PENDING); + set_cpu_flag(CIF_MCCK_PENDING); } nmi_exit(); } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index dd145321d21..93b9ca42e5c 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -64,7 +64,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk) void arch_cpu_idle(void) { local_mcck_disable(); - if (test_thread_flag(TIF_MCCK_PENDING)) { + if (test_cpu_flag(CIF_MCCK_PENDING)) { local_mcck_enable(); local_irq_enable(); return; @@ -76,7 +76,7 @@ void arch_cpu_idle(void) void arch_cpu_idle_exit(void) { - if (test_thread_flag(TIF_MCCK_PENDING)) + if (test_cpu_flag(CIF_MCCK_PENDING)) s390_handle_mcck(); } @@ -123,7 +123,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); clear_tsk_thread_flag(p, TIF_SINGLE_STEP); - clear_tsk_thread_flag(p, TIF_PER_TRAP); /* Initialize per thread user and system timer values */ ti = task_thread_info(p); ti->user_timer = 0; @@ -152,6 +151,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, } frame->childregs = *current_pt_regs(); frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ + frame->childregs.flags = 0; if (new_stackp) frame->childregs.gprs[15] = new_stackp; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1c82619eb4f..2d716734b5b 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -136,7 +136,7 @@ void ptrace_disable(struct task_struct *task) memset(&task->thread.per_user, 0, sizeof(task->thread.per_user)); memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); clear_tsk_thread_flag(task, TIF_SINGLE_STEP); - clear_tsk_thread_flag(task, TIF_PER_TRAP); + clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP); task->thread.per_flags = 0; } @@ -813,7 +813,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - clear_thread_flag(TIF_SYSCALL); + clear_pt_regs_flag(regs, PIF_SYSCALL); ret = -1; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 88d1ca81e2d..1e2264b46e4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -78,10 +78,9 @@ EXPORT_SYMBOL(console_irq); unsigned long elf_hwcap = 0; char elf_platform[ELF_PLATFORM_SIZE]; -struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; - int __initdata memory_end_set; unsigned long __initdata memory_end; +unsigned long __initdata max_physmem_end; unsigned long VMALLOC_START; EXPORT_SYMBOL(VMALLOC_START); @@ -212,7 +211,7 @@ static void __init conmode_default(void) } } -#ifdef CONFIG_ZFCPDUMP +#ifdef CONFIG_CRASH_DUMP static void __init setup_zfcpdump(void) { if (ipl_info.type != IPL_TYPE_FCP_DUMP) @@ -224,7 +223,7 @@ static void __init setup_zfcpdump(void) } #else static inline void setup_zfcpdump(void) {} -#endif /* CONFIG_ZFCPDUMP */ +#endif /* CONFIG_CRASH_DUMP */ /* * Reboot, halt and power_off stubs. They just call _machine_restart, @@ -273,6 +272,7 @@ EXPORT_SYMBOL_GPL(pm_power_off); static int __init early_parse_mem(char *p) { memory_end = memparse(p, &p); + memory_end &= PAGE_MASK; memory_end_set = 1; return 0; } @@ -373,6 +373,10 @@ static void __init setup_lowcore(void) mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); +#ifdef CONFIG_SMP + lc->spinlock_lockval = arch_spin_lockval(0); +#endif + set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; } @@ -401,7 +405,8 @@ static struct resource __initdata *standard_resources[] = { static void __init setup_resources(void) { struct resource *res, *std_res, *sub_res; - int i, j; + struct memblock_region *reg; + int j; code_resource.start = (unsigned long) &_text; code_resource.end = (unsigned long) &_etext - 1; @@ -410,24 +415,13 @@ static void __init setup_resources(void) bss_resource.start = (unsigned long) &__bss_start; bss_resource.end = (unsigned long) &__bss_stop - 1; - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (!memory_chunk[i].size) - continue; + for_each_memblock(memory, reg) { res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; - switch (memory_chunk[i].type) { - case CHUNK_READ_WRITE: - res->name = "System RAM"; - break; - case CHUNK_READ_ONLY: - res->name = "System ROM"; - res->flags |= IORESOURCE_READONLY; - break; - default: - res->name = "reserved"; - } - res->start = memory_chunk[i].addr; - res->end = res->start + memory_chunk[i].size - 1; + + res->name = "System RAM"; + res->start = reg->base; + res->end = reg->base + reg->size - 1; request_resource(&iomem_resource, res); for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { @@ -451,48 +445,11 @@ static void __init setup_resources(void) static void __init setup_memory_end(void) { unsigned long vmax, vmalloc_size, tmp; - unsigned long real_memory_size = 0; - int i; - - -#ifdef CONFIG_ZFCPDUMP - if (ipl_info.type == IPL_TYPE_FCP_DUMP && - !OLDMEM_BASE && sclp_get_hsa_size()) { - memory_end = sclp_get_hsa_size(); - memory_end_set = 1; - } -#endif - memory_end &= PAGE_MASK; - - /* - * Make sure all chunks are MAX_ORDER aligned so we don't need the - * extra checks that HOLES_IN_ZONE would require. - */ - for (i = 0; i < MEMORY_CHUNKS; i++) { - unsigned long start, end; - struct mem_chunk *chunk; - unsigned long align; - - chunk = &memory_chunk[i]; - if (!chunk->size) - continue; - align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1); - start = (chunk->addr + align - 1) & ~(align - 1); - end = (chunk->addr + chunk->size) & ~(align - 1); - if (start >= end) - memset(chunk, 0, sizeof(*chunk)); - else { - chunk->addr = start; - chunk->size = end - start; - } - real_memory_size = max(real_memory_size, - chunk->addr + chunk->size); - } /* Choose kernel address space layout: 2, 3, or 4 levels. */ #ifdef CONFIG_64BIT vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; - tmp = (memory_end ?: real_memory_size) / PAGE_SIZE; + tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; if (tmp <= (1UL << 42)) vmax = 1UL << 42; /* 3-level kernel page table */ @@ -520,21 +477,11 @@ static void __init setup_memory_end(void) vmemmap = (struct page *) tmp; /* Take care that memory_end is set and <= vmemmap */ - memory_end = min(memory_end ?: real_memory_size, tmp); - - /* Fixup memory chunk array to fit into 0..memory_end */ - for (i = 0; i < MEMORY_CHUNKS; i++) { - struct mem_chunk *chunk = &memory_chunk[i]; + memory_end = min(memory_end ?: max_physmem_end, tmp); + max_pfn = max_low_pfn = PFN_DOWN(memory_end); + memblock_remove(memory_end, ULONG_MAX); - if (!chunk->size) - continue; - if (chunk->addr >= memory_end) { - memset(chunk, 0, sizeof(*chunk)); - continue; - } - if (chunk->addr + chunk->size > memory_end) - chunk->size = memory_end - chunk->addr; - } + pr_notice("Max memory size: %luMB\n", memory_end >> 20); } static void __init setup_vmcoreinfo(void) @@ -545,89 +492,6 @@ static void __init setup_vmcoreinfo(void) #ifdef CONFIG_CRASH_DUMP /* - * Find suitable location for crashkernel memory - */ -static unsigned long __init find_crash_base(unsigned long crash_size, - char **msg) -{ - unsigned long crash_base; - struct mem_chunk *chunk; - int i; - - if (memory_chunk[0].size < crash_size) { - *msg = "first memory chunk must be at least crashkernel size"; - return 0; - } - if (OLDMEM_BASE && crash_size == OLDMEM_SIZE) - return OLDMEM_BASE; - - for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { - chunk = &memory_chunk[i]; - if (chunk->size == 0) - continue; - if (chunk->type != CHUNK_READ_WRITE) - continue; - if (chunk->size < crash_size) - continue; - crash_base = (chunk->addr + chunk->size) - crash_size; - if (crash_base < crash_size) - continue; - if (crash_base < sclp_get_hsa_size()) - continue; - if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE) - continue; - return crash_base; - } - *msg = "no suitable area found"; - return 0; -} - -/* - * Check if crash_base and crash_size is valid - */ -static int __init verify_crash_base(unsigned long crash_base, - unsigned long crash_size, - char **msg) -{ - struct mem_chunk *chunk; - int i; - - /* - * Because we do the swap to zero, we must have at least 'crash_size' - * bytes free space before crash_base - */ - if (crash_size > crash_base) { - *msg = "crashkernel offset must be greater than size"; - return -EINVAL; - } - - /* First memory chunk must be at least crash_size */ - if (memory_chunk[0].size < crash_size) { - *msg = "first memory chunk must be at least crashkernel size"; - return -EINVAL; - } - /* Check if we fit into the respective memory chunk */ - for (i = 0; i < MEMORY_CHUNKS; i++) { - chunk = &memory_chunk[i]; - if (chunk->size == 0) - continue; - if (crash_base < chunk->addr) - continue; - if (crash_base >= chunk->addr + chunk->size) - continue; - /* we have found the memory chunk */ - if (crash_base + crash_size > chunk->addr + chunk->size) { - *msg = "selected memory chunk is too small for " - "crashkernel memory"; - return -EINVAL; - } - return 0; - } - *msg = "invalid memory range specified"; - return -EINVAL; -} - -/* * When kdump is enabled, we have to ensure that no memory from * the area [0 - crashkernel memory size] and * [crashk_res.start - crashk_res.end] is set offline. @@ -653,23 +517,44 @@ static struct notifier_block kdump_mem_nb = { #endif /* + * Make sure that the area behind memory_end is protected + */ +static void reserve_memory_end(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (ipl_info.type == IPL_TYPE_FCP_DUMP && + !OLDMEM_BASE && sclp_get_hsa_size()) { + memory_end = sclp_get_hsa_size(); + memory_end &= PAGE_MASK; + memory_end_set = 1; + } +#endif + if (!memory_end_set) + return; + memblock_reserve(memory_end, ULONG_MAX); +} + +/* * Make sure that oldmem, where the dump is stored, is protected */ static void reserve_oldmem(void) { #ifdef CONFIG_CRASH_DUMP - unsigned long real_size = 0; - int i; - - if (!OLDMEM_BASE) - return; - for (i = 0; i < MEMORY_CHUNKS; i++) { - struct mem_chunk *chunk = &memory_chunk[i]; + if (OLDMEM_BASE) + /* Forget all memory above the running kdump system */ + memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); +#endif +} - real_size = max(real_size, chunk->addr + chunk->size); - } - create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); - create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void remove_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) + /* Forget all memory above the running kdump system */ + memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); #endif } @@ -680,167 +565,132 @@ static void __init reserve_crashkernel(void) { #ifdef CONFIG_CRASH_DUMP unsigned long long crash_base, crash_size; - char *msg = NULL; + phys_addr_t low, high; int rc; rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, &crash_base); - if (rc || crash_size == 0) - return; + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); - if (register_memory_notifier(&kdump_mem_nb)) + if (rc || crash_size == 0) return; - if (!crash_base) - crash_base = find_crash_base(crash_size, &msg); - if (!crash_base) { - pr_info("crashkernel reservation failed: %s\n", msg); - unregister_memory_notifier(&kdump_mem_nb); + + if (memblock.memory.regions[0].size < crash_size) { + pr_info("crashkernel reservation failed: %s\n", + "first memory chunk must be at least crashkernel size"); return; } - if (verify_crash_base(crash_base, crash_size, &msg)) { - pr_info("crashkernel reservation failed: %s\n", msg); - unregister_memory_notifier(&kdump_mem_nb); + + low = crash_base ?: OLDMEM_BASE; + high = low + crash_size; + if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) { + /* The crashkernel fits into OLDMEM, reuse OLDMEM */ + crash_base = low; + } else { + /* Find suitable area in free memory */ + low = max_t(unsigned long, crash_size, sclp_get_hsa_size()); + high = crash_base ? crash_base + crash_size : ULONG_MAX; + + if (crash_base && crash_base < low) { + pr_info("crashkernel reservation failed: %s\n", + "crash_base too low"); + return; + } + low = crash_base ?: low; + crash_base = memblock_find_in_range(low, high, crash_size, + KEXEC_CRASH_MEM_ALIGN); + } + + if (!crash_base) { + pr_info("crashkernel reservation failed: %s\n", + "no suitable area found"); return; } + + if (register_memory_notifier(&kdump_mem_nb)) + return; + if (!OLDMEM_BASE && MACHINE_IS_VM) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); - create_mem_hole(memory_chunk, crash_base, crash_size); + memblock_remove(crash_base, crash_size); pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", - crash_size >> 20, crash_base >> 20, memory_end >> 20); + crash_size >> 20, crash_base >> 20, + (unsigned long)memblock.memory.total_size >> 20); os_info_crashkernel_add(crash_base, crash_size); #endif } -static void __init setup_memory(void) +/* + * Reserve the initrd from being used by memblock + */ +static void __init reserve_initrd(void) { - unsigned long bootmap_size; - unsigned long start_pfn, end_pfn; - int i; +#ifdef CONFIG_BLK_DEV_INITRD + initrd_start = INITRD_START; + initrd_end = initrd_start + INITRD_SIZE; + memblock_reserve(INITRD_START, INITRD_SIZE); +#endif +} - /* - * partially used pages are not usable - thus - * we are rounding upwards: - */ +/* + * Check for initrd being in usable memory + */ +static void __init check_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (INITRD_START && INITRD_SIZE && + !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) { + pr_err("initrd does not fit memory.\n"); + memblock_free(INITRD_START, INITRD_SIZE); + initrd_start = initrd_end = 0; + } +#endif +} + +/* + * Reserve all kernel text + */ +static void __init reserve_kernel(void) +{ + unsigned long start_pfn; start_pfn = PFN_UP(__pa(&_end)); - end_pfn = max_pfn = PFN_DOWN(memory_end); -#ifdef CONFIG_BLK_DEV_INITRD /* - * Move the initrd in case the bitmap of the bootmem allocater - * would overwrite it. + * Reserve memory used for lowcore/command line/kernel image. */ + memblock_reserve(0, (unsigned long)_ehead); + memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) + - (unsigned long)_stext); +} - if (INITRD_START && INITRD_SIZE) { - unsigned long bmap_size; - unsigned long start; - - bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1); - bmap_size = PFN_PHYS(bmap_size); - - if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { - start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; - +static void __init reserve_elfcorehdr(void) +{ #ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) { - /* Move initrd behind kdump oldmem */ - if (start + INITRD_SIZE > OLDMEM_BASE && - start < OLDMEM_BASE + OLDMEM_SIZE) - start = OLDMEM_BASE + OLDMEM_SIZE; - } -#endif - if (start + INITRD_SIZE > memory_end) { - pr_err("initrd extends beyond end of " - "memory (0x%08lx > 0x%08lx) " - "disabling initrd\n", - start + INITRD_SIZE, memory_end); - INITRD_START = INITRD_SIZE = 0; - } else { - pr_info("Moving initrd (0x%08lx -> " - "0x%08lx, size: %ld)\n", - INITRD_START, start, INITRD_SIZE); - memmove((void *) start, (void *) INITRD_START, - INITRD_SIZE); - INITRD_START = start; - } - } - } + if (is_kdump_kernel()) + memblock_reserve(elfcorehdr_addr - OLDMEM_BASE, + PAGE_ALIGN(elfcorehdr_size)); #endif +} - /* - * Initialize the boot-time allocator - */ - bootmap_size = init_bootmem(start_pfn, end_pfn); +static void __init setup_memory(void) +{ + struct memblock_region *reg; /* - * Register RAM areas with the bootmem allocator. + * Init storage key for present memory */ - - for (i = 0; i < MEMORY_CHUNKS; i++) { - unsigned long start_chunk, end_chunk, pfn; - - if (!memory_chunk[i].size) - continue; - start_chunk = PFN_DOWN(memory_chunk[i].addr); - end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); - end_chunk = min(end_chunk, end_pfn); - if (start_chunk >= end_chunk) - continue; - memblock_add_node(PFN_PHYS(start_chunk), - PFN_PHYS(end_chunk - start_chunk), 0); - pfn = max(start_chunk, start_pfn); - storage_key_init_range(PFN_PHYS(pfn), PFN_PHYS(end_chunk)); + for_each_memblock(memory, reg) { + storage_key_init_range(reg->base, reg->base + reg->size); } - psw_set_key(PAGE_DEFAULT_KEY); - free_bootmem_with_active_regions(0, max_pfn); - - /* - * Reserve memory used for lowcore/command line/kernel image. - */ - reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT); - reserve_bootmem((unsigned long)_stext, - PFN_PHYS(start_pfn) - (unsigned long)_stext, - BOOTMEM_DEFAULT); - /* - * Reserve the bootmem bitmap itself as well. We do this in two - * steps (first step was init_bootmem()) because this catches - * the (very unlikely) case of us accidentally initializing the - * bootmem allocator with an invalid RAM area. - */ - reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, - BOOTMEM_DEFAULT); - -#ifdef CONFIG_CRASH_DUMP - if (crashk_res.start) - reserve_bootmem(crashk_res.start, - crashk_res.end - crashk_res.start + 1, - BOOTMEM_DEFAULT); - if (is_kdump_kernel()) - reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE, - PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT); -#endif -#ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START && INITRD_SIZE) { - if (INITRD_START + INITRD_SIZE <= memory_end) { - reserve_bootmem(INITRD_START, INITRD_SIZE, - BOOTMEM_DEFAULT); - initrd_start = INITRD_START; - initrd_end = initrd_start + INITRD_SIZE; - } else { - pr_err("initrd extends beyond end of " - "memory (0x%08lx > 0x%08lx) " - "disabling initrd\n", - initrd_start + INITRD_SIZE, memory_end); - initrd_start = initrd_end = 0; - } - } -#endif + /* Only cosmetics */ + memblock_enforce_memory_limit(memblock_end_of_DRAM()); } /* @@ -989,23 +839,46 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = Root_RAM0; + /* Is init_mm really needed? */ init_mm.start_code = PAGE_OFFSET; init_mm.end_code = (unsigned long) &_etext; init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; parse_early_param(); - detect_memory_layout(memory_chunk, memory_end); os_info_init(); setup_ipl(); + + /* Do some memory reservations *before* memory is added to memblock */ + reserve_memory_end(); reserve_oldmem(); + reserve_kernel(); + reserve_initrd(); + reserve_elfcorehdr(); + memblock_allow_resize(); + + /* Get information about *all* installed memory */ + detect_memory_memblock(); + + remove_oldmem(); + + /* + * Make sure all chunks are MAX_ORDER aligned so we don't need the + * extra checks that HOLES_IN_ZONE would require. + * + * Is this still required? + */ + memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT)); + setup_memory_end(); - reserve_crashkernel(); setup_memory(); + + check_initrd(); + reserve_crashkernel(); + setup_resources(); setup_vmcoreinfo(); setup_lowcore(); - smp_fill_possible_mask(); cpu_init(); s390_init_cpu_topology(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index d8fd508ccd1..42b49f9e19b 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -113,7 +113,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) sizeof(current->thread.fp_regs)); restore_fp_regs(current->thread.fp_regs.fprs); - clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ + clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -356,7 +356,7 @@ void do_signal(struct pt_regs *regs) * call information. */ current_thread_info()->system_call = - test_thread_flag(TIF_SYSCALL) ? regs->int_code : 0; + test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { @@ -384,7 +384,7 @@ void do_signal(struct pt_regs *regs) } } /* No longer in a system call */ - clear_thread_flag(TIF_SYSCALL); + clear_pt_regs_flag(regs, PIF_SYSCALL); if (is_compat_task()) handle_signal32(signr, &ka, &info, oldset, regs); @@ -394,7 +394,7 @@ void do_signal(struct pt_regs *regs) } /* No handlers present - check for system call restart */ - clear_thread_flag(TIF_SYSCALL); + clear_pt_regs_flag(regs, PIF_SYSCALL); if (current_thread_info()->system_call) { regs->int_code = current_thread_info()->system_call; switch (regs->gprs[2]) { @@ -407,9 +407,9 @@ void do_signal(struct pt_regs *regs) case -ERESTARTNOINTR: /* Restart system call with magic TIF bit. */ regs->gprs[2] = regs->orig_gpr2; - set_thread_flag(TIF_SYSCALL); + set_pt_regs_flag(regs, PIF_SYSCALL); if (test_thread_flag(TIF_SINGLE_STEP)) - set_thread_flag(TIF_PER_TRAP); + clear_pt_regs_flag(regs, PIF_PER_TRAP); break; } } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 86e65ec3422..243c7e51260 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -170,6 +170,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->panic_stack = pcpu->panic_stack + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->cpu_nr = cpu; + lc->spinlock_lockval = arch_spin_lockval(cpu); #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL); @@ -226,6 +227,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); atomic_inc(&init_mm.context.attach_count); lc->cpu_nr = cpu; + lc->spinlock_lockval = arch_spin_lockval(cpu); lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; lc->machine_flags = S390_lowcore.machine_flags; @@ -403,15 +405,6 @@ void smp_send_stop(void) } /* - * Stop the current cpu. - */ -void smp_stop_cpu(void) -{ - pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); - for (;;) ; -} - -/* * This is the main routine where commands issued by other * cpus are handled. */ @@ -519,7 +512,7 @@ void smp_ctl_clear_bit(int cr, int bit) } EXPORT_SYMBOL(smp_ctl_clear_bit); -#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) +#ifdef CONFIG_CRASH_DUMP static void __init smp_get_save_area(int cpu, u16 address) { @@ -534,14 +527,12 @@ static void __init smp_get_save_area(int cpu, u16 address) save_area = dump_save_area_create(cpu); if (!save_area) panic("could not allocate memory for save area\n"); -#ifdef CONFIG_CRASH_DUMP if (address == boot_cpu_address) { /* Copy the registers of the boot cpu. */ copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), SAVE_AREA_BASE - PAGE_SIZE, 0); return; } -#endif /* Get the registers of a non-boot cpu. */ __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); @@ -558,11 +549,11 @@ int smp_store_status(int cpu) return 0; } -#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ +#else /* CONFIG_CRASH_DUMP */ static inline void smp_get_save_area(int cpu, u16 address) { } -#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */ +#endif /* CONFIG_CRASH_DUMP */ void smp_cpu_set_polarization(int cpu, int val) { @@ -809,6 +800,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { S390_lowcore.cpu_nr = 0; + S390_lowcore.spinlock_lockval = arch_spin_lockval(0); } /* diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 386d37a228b..0931b110c82 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -226,7 +226,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_clock_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; vdso_data->wtom_clock_nsec = tk->xtime_nsec + - + (tk->wall_to_monotonic.tv_nsec << tk->shift); + + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift); nsecps = (u64) NSEC_PER_SEC << tk->shift; while (vdso_data->wtom_clock_nsec >= nsecps) { vdso_data->wtom_clock_nsec -= nsecps; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 6298fed11ce..fa3b8cdaada 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -333,7 +333,9 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info, nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; nr_masks = max(nr_masks, 1); for (i = 0; i < nr_masks; i++) { - mask->next = alloc_bootmem(sizeof(struct mask_info)); + mask->next = alloc_bootmem_align( + roundup_pow_of_two(sizeof(struct mask_info)), + roundup_pow_of_two(sizeof(struct mask_info))); mask = mask->next; } } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9ae6664ff08..825fe7bf95a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -907,7 +907,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) if (need_resched()) schedule(); - if (test_thread_flag(TIF_MCCK_PENDING)) + if (test_cpu_flag(CIF_MCCK_PENDING)) s390_handle_mcck(); if (!kvm_is_ucontrol(vcpu->kvm)) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index f709983f41f..5b0e445bc3f 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -26,83 +26,81 @@ __setup("spin_retry=", spin_retry_setup); void arch_spin_lock_wait(arch_spinlock_t *lp) { - int count = spin_retry; - unsigned int cpu = ~smp_processor_id(); + unsigned int cpu = SPINLOCK_LOCKVAL; unsigned int owner; + int count; while (1) { - owner = lp->owner_cpu; - if (!owner || smp_vcpu_scheduled(~owner)) { - for (count = spin_retry; count > 0; count--) { - if (arch_spin_is_locked(lp)) - continue; - if (_raw_compare_and_swap(&lp->owner_cpu, 0, - cpu) == 0) - return; - } - if (MACHINE_IS_LPAR) - continue; + owner = ACCESS_ONCE(lp->lock); + /* Try to get the lock if it is free. */ + if (!owner) { + if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + return; + continue; } - owner = lp->owner_cpu; - if (owner) + /* Check if the lock owner is running. */ + if (!smp_vcpu_scheduled(~owner)) { + smp_yield_cpu(~owner); + continue; + } + /* Loop for a while on the lock value. */ + count = spin_retry; + do { + owner = ACCESS_ONCE(lp->lock); + } while (owner && count-- > 0); + if (!owner) + continue; + /* + * For multiple layers of hypervisors, e.g. z/VM + LPAR + * yield the CPU if the lock is still unavailable. + */ + if (!MACHINE_IS_LPAR) smp_yield_cpu(~owner); - if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) - return; } } EXPORT_SYMBOL(arch_spin_lock_wait); void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) { - int count = spin_retry; - unsigned int cpu = ~smp_processor_id(); + unsigned int cpu = SPINLOCK_LOCKVAL; unsigned int owner; + int count; local_irq_restore(flags); while (1) { - owner = lp->owner_cpu; - if (!owner || smp_vcpu_scheduled(~owner)) { - for (count = spin_retry; count > 0; count--) { - if (arch_spin_is_locked(lp)) - continue; - local_irq_disable(); - if (_raw_compare_and_swap(&lp->owner_cpu, 0, - cpu) == 0) - return; - local_irq_restore(flags); - } - if (MACHINE_IS_LPAR) - continue; + owner = ACCESS_ONCE(lp->lock); + /* Try to get the lock if it is free. */ + if (!owner) { + local_irq_disable(); + if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + return; + local_irq_restore(flags); } - owner = lp->owner_cpu; - if (owner) + /* Check if the lock owner is running. */ + if (!smp_vcpu_scheduled(~owner)) { smp_yield_cpu(~owner); - local_irq_disable(); - if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) - return; - local_irq_restore(flags); - } -} -EXPORT_SYMBOL(arch_spin_lock_wait_flags); - -int arch_spin_trylock_retry(arch_spinlock_t *lp) -{ - unsigned int cpu = ~smp_processor_id(); - int count; - - for (count = spin_retry; count > 0; count--) { - if (arch_spin_is_locked(lp)) continue; - if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) - return 1; + } + /* Loop for a while on the lock value. */ + count = spin_retry; + do { + owner = ACCESS_ONCE(lp->lock); + } while (owner && count-- > 0); + if (!owner) + continue; + /* + * For multiple layers of hypervisors, e.g. z/VM + LPAR + * yield the CPU if the lock is still unavailable. + */ + if (!MACHINE_IS_LPAR) + smp_yield_cpu(~owner); } - return 0; } -EXPORT_SYMBOL(arch_spin_trylock_retry); +EXPORT_SYMBOL(arch_spin_lock_wait_flags); -void arch_spin_relax(arch_spinlock_t *lock) +void arch_spin_relax(arch_spinlock_t *lp) { - unsigned int cpu = lock->owner_cpu; + unsigned int cpu = lp->lock; if (cpu != 0) { if (MACHINE_IS_VM || MACHINE_IS_KVM || !smp_vcpu_scheduled(~cpu)) @@ -111,6 +109,17 @@ void arch_spin_relax(arch_spinlock_t *lock) } EXPORT_SYMBOL(arch_spin_relax); +int arch_spin_trylock_retry(arch_spinlock_t *lp) +{ + int count; + + for (count = spin_retry; count > 0; count--) + if (arch_spin_trylock_once(lp)) + return 1; + return 0; +} +EXPORT_SYMBOL(arch_spin_trylock_retry); + void _raw_read_lock_wait(arch_rwlock_t *rw) { unsigned int old; @@ -121,10 +130,10 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) smp_yield(); count = spin_retry; } - if (!arch_read_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if ((int) old < 0) continue; - old = rw->lock & 0x7fffffffU; - if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + if (_raw_compare_and_swap(&rw->lock, old, old + 1)) return; } } @@ -141,12 +150,13 @@ void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) smp_yield(); count = spin_retry; } - if (!arch_read_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if ((int) old < 0) continue; - old = rw->lock & 0x7fffffffU; local_irq_disable(); - if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + if (_raw_compare_and_swap(&rw->lock, old, old + 1)) return; + local_irq_restore(flags); } } EXPORT_SYMBOL(_raw_read_lock_wait_flags); @@ -157,10 +167,10 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw) int count = spin_retry; while (count-- > 0) { - if (!arch_read_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if ((int) old < 0) continue; - old = rw->lock & 0x7fffffffU; - if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old) + if (_raw_compare_and_swap(&rw->lock, old, old + 1)) return 1; } return 0; @@ -169,6 +179,7 @@ EXPORT_SYMBOL(_raw_read_trylock_retry); void _raw_write_lock_wait(arch_rwlock_t *rw) { + unsigned int old; int count = spin_retry; while (1) { @@ -176,9 +187,10 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) smp_yield(); count = spin_retry; } - if (!arch_write_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if (old) continue; - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) return; } } @@ -186,6 +198,7 @@ EXPORT_SYMBOL(_raw_write_lock_wait); void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) { + unsigned int old; int count = spin_retry; local_irq_restore(flags); @@ -194,23 +207,27 @@ void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) smp_yield(); count = spin_retry; } - if (!arch_write_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if (old) continue; local_irq_disable(); - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) return; + local_irq_restore(flags); } } EXPORT_SYMBOL(_raw_write_lock_wait_flags); int _raw_write_trylock_retry(arch_rwlock_t *rw) { + unsigned int old; int count = spin_retry; while (count-- > 0) { - if (!arch_write_can_lock(rw)) + old = ACCESS_ONCE(rw->lock); + if (old) continue; - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0) + if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) return 1; } return 0; diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 7416efe8eae..53dd5d7a0c9 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -76,7 +76,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, { unsigned long tmp1, tmp2; - update_primary_asce(current); + load_kernel_asce(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -159,7 +159,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, { unsigned long tmp1, tmp2; - update_primary_asce(current); + load_kernel_asce(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -225,7 +225,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user { unsigned long tmp1; - update_primary_asce(current); + load_kernel_asce(); asm volatile( " sacf 256\n" " "AHI" %0,-1\n" @@ -292,7 +292,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size) { unsigned long tmp1, tmp2; - update_primary_asce(current); + load_kernel_asce(); asm volatile( " sacf 256\n" " "AHI" %0,-1\n" @@ -358,7 +358,7 @@ unsigned long __strnlen_user(const char __user *src, unsigned long size) { if (unlikely(!size)) return 0; - update_primary_asce(current); + load_kernel_asce(); return strnlen_user_srst(src, size); } EXPORT_SYMBOL(__strnlen_user); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2f51a998a67..3f3b35403d0 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -415,7 +415,7 @@ static inline int do_exception(struct pt_regs *regs, int access) * The instruction that caused the program check has * been nullified. Don't signal single step via SIGTRAP. */ - clear_tsk_thread_flag(tsk, TIF_PER_TRAP); + clear_pt_regs_flag(regs, PIF_PER_TRAP); if (notify_page_fault(regs)) return 0; diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c index cca388253a3..5535cfe0ee1 100644 --- a/arch/s390/mm/mem_detect.c +++ b/arch/s390/mm/mem_detect.c @@ -6,130 +6,60 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/memblock.h> +#include <linux/init.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> #include <asm/ipl.h> #include <asm/sclp.h> #include <asm/setup.h> #define ADDR2G (1ULL << 31) -static void find_memory_chunks(struct mem_chunk chunk[], unsigned long maxsize) +#define CHUNK_READ_WRITE 0 +#define CHUNK_READ_ONLY 1 + +static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size) +{ + memblock_add_range(&memblock.memory, start, size, 0, 0); + memblock_add_range(&memblock.physmem, start, size, 0, 0); +} + +void __init detect_memory_memblock(void) { unsigned long long memsize, rnmax, rzm; - unsigned long addr = 0, size; - int i = 0, type; + unsigned long addr, size; + int type; rzm = sclp_get_rzm(); rnmax = sclp_get_rnmax(); memsize = rzm * rnmax; if (!rzm) rzm = 1ULL << 17; - if (sizeof(long) == 4) { + if (IS_ENABLED(CONFIG_32BIT)) { rzm = min(ADDR2G, rzm); - memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; + memsize = min(ADDR2G, memsize); } - if (maxsize) - memsize = memsize ? min((unsigned long)memsize, maxsize) : maxsize; + max_physmem_end = memsize; + addr = 0; + /* keep memblock lists close to the kernel */ + memblock_set_bottom_up(true); do { size = 0; type = tprot(addr); do { size += rzm; - if (memsize && addr + size >= memsize) + if (max_physmem_end && addr + size >= max_physmem_end) break; } while (type == tprot(addr + size)); if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { - if (memsize && (addr + size > memsize)) - size = memsize - addr; - chunk[i].addr = addr; - chunk[i].size = size; - chunk[i].type = type; - i++; + if (max_physmem_end && (addr + size > max_physmem_end)) + size = max_physmem_end - addr; + memblock_physmem_add(addr, size); } addr += size; - } while (addr < memsize && i < MEMORY_CHUNKS); -} - -/** - * detect_memory_layout - fill mem_chunk array with memory layout data - * @chunk: mem_chunk array to be filled - * @maxsize: maximum address where memory detection should stop - * - * Fills the passed in memory chunk array with the memory layout of the - * machine. The array must have a size of at least MEMORY_CHUNKS and will - * be fully initialized afterwards. - * If the maxsize paramater has a value > 0 memory detection will stop at - * that address. It is guaranteed that all chunks have an ending address - * that is smaller than maxsize. - * If maxsize is 0 all memory will be detected. - */ -void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize) -{ - unsigned long flags, flags_dat, cr0; - - memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); - /* - * Disable IRQs, DAT and low address protection so tprot does the - * right thing and we don't get scheduled away with low address - * protection disabled. - */ - local_irq_save(flags); - flags_dat = __arch_local_irq_stnsm(0xfb); - /* - * In case DAT was enabled, make sure chunk doesn't reside in vmalloc - * space. We have disabled DAT and any access to vmalloc area will - * cause an exception. - * If DAT was disabled we are called from early ipl code. - */ - if (test_bit(5, &flags_dat)) { - if (WARN_ON_ONCE(is_vmalloc_or_module_addr(chunk))) - goto out; - } - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); - find_memory_chunks(chunk, maxsize); - __ctl_load(cr0, 0, 0); -out: - __arch_local_irq_ssm(flags_dat); - local_irq_restore(flags); -} -EXPORT_SYMBOL(detect_memory_layout); - -/* - * Create memory hole with given address and size. - */ -void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, - unsigned long size) -{ - int i; - - for (i = 0; i < MEMORY_CHUNKS; i++) { - struct mem_chunk *chunk = &mem_chunk[i]; - - if (chunk->size == 0) - continue; - if (addr > chunk->addr + chunk->size) - continue; - if (addr + size <= chunk->addr) - continue; - /* Split */ - if ((addr > chunk->addr) && - (addr + size < chunk->addr + chunk->size)) { - struct mem_chunk *new = chunk + 1; - - memmove(new, chunk, (MEMORY_CHUNKS-i-1) * sizeof(*new)); - new->addr = addr + size; - new->size = chunk->addr + chunk->size - new->addr; - chunk->size = addr - chunk->addr; - continue; - } else if ((addr <= chunk->addr) && - (addr + size >= chunk->addr + chunk->size)) { - memmove(chunk, chunk + 1, (MEMORY_CHUNKS-i-1) * sizeof(*chunk)); - memset(&mem_chunk[MEMORY_CHUNKS-1], 0, sizeof(*chunk)); - } else if (addr + size < chunk->addr + chunk->size) { - chunk->size = chunk->addr + chunk->size - addr - size; - chunk->addr = addr + size; - } else if (addr > chunk->addr) { - chunk->size = addr - chunk->addr; - } - } + } while (addr < max_physmem_end); + memblock_set_bottom_up(false); + if (!max_physmem_end) + max_physmem_end = memblock_end_of_DRAM(); } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 27c50f4d90c..a90d45e9dfb 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -12,8 +12,6 @@ #include <linux/mm.h> #include <linux/gfp.h> #include <linux/init.h> -#include <asm/setup.h> -#include <asm/ipl.h> #define ESSA_SET_STABLE 1 #define ESSA_SET_UNUSED 2 @@ -43,14 +41,6 @@ void __init cmma_init(void) if (!cmma_flag) return; - /* - * Disable CMM for dump, otherwise the tprot based memory - * detection can fail because of unstable pages. - */ - if (OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP) { - cmma_flag = 0; - return; - } asm volatile( " .insn rrf,0xb9ab0000,%1,%1,0,0\n" "0: la %0,0\n" diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index d7cfd57815f..7881d4eb8b6 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -53,8 +53,10 @@ static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; - if (current->active_mm == mm) - update_user_asce(mm, 1); + if (current->active_mm == mm) { + clear_user_asce(); + set_user_asce(mm); + } __tlb_flush_local(); } @@ -108,7 +110,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) pgd_t *pgd; if (current->active_mm == mm) { - clear_user_asce(mm, 1); + clear_user_asce(); __tlb_flush_mm(mm); } while (mm->context.asce_limit > limit) { @@ -134,7 +136,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) crst_table_free(mm, (unsigned long *) pgd); } if (current->active_mm == mm) - update_user_asce(mm, 1); + set_user_asce(mm); } #endif diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 72b04de1828..fe9012a49aa 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -10,6 +10,7 @@ #include <linux/list.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/memblock.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/setup.h> @@ -66,7 +67,8 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address) if (slab_is_available()) pte = (pte_t *) page_table_alloc(&init_mm, address); else - pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); + pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t), + PTRS_PER_PTE * sizeof(pte_t)); if (!pte) return NULL; clear_table((unsigned long *) pte, _PAGE_INVALID, @@ -371,16 +373,14 @@ out: void __init vmem_map_init(void) { unsigned long ro_start, ro_end; - unsigned long start, end; - int i; + struct memblock_region *reg; + phys_addr_t start, end; ro_start = PFN_ALIGN((unsigned long)&_stext); ro_end = (unsigned long)&_eshared & PAGE_MASK; - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (!memory_chunk[i].size) - continue; - start = memory_chunk[i].addr; - end = memory_chunk[i].addr + memory_chunk[i].size; + for_each_memblock(memory, reg) { + start = reg->base; + end = reg->base + reg->size - 1; if (start >= ro_end || end <= ro_start) vmem_add_mem(start, end - start, 0); else if (start >= ro_start && end <= ro_end) @@ -400,23 +400,21 @@ void __init vmem_map_init(void) } /* - * Convert memory chunk array to a memory segment list so there is a single - * list that contains both r/w memory and shared memory segments. + * Convert memblock.memory to a memory segment list so there is a single + * list that contains all memory segments. */ static int __init vmem_convert_memory_chunk(void) { + struct memblock_region *reg; struct memory_segment *seg; - int i; mutex_lock(&vmem_mutex); - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (!memory_chunk[i].size) - continue; + for_each_memblock(memory, reg) { seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); - seg->start = memory_chunk[i].addr; - seg->size = memory_chunk[i].size; + seg->start = reg->base; + seg->size = reg->size; insert_memory_segment(seg); } mutex_unlock(&vmem_mutex); diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 276f2e26c76..e53c6f26880 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -209,13 +209,11 @@ static void init_all_cpu_buffers(void) } } -static int prepare_cpu_buffers(void) +static void prepare_cpu_buffers(void) { - int cpu; - int rc; struct hws_cpu_buffer *cb; + int cpu; - rc = 0; for_each_online_cpu(cpu) { cb = &per_cpu(sampler_cpu_buffer, cpu); atomic_set(&cb->ext_params, 0); @@ -230,8 +228,6 @@ static int prepare_cpu_buffers(void) cb->oom = 0; cb->stop_mode = 0; } - - return rc; } /* @@ -1107,9 +1103,7 @@ int hwsampler_start_all(unsigned long rate) if (rc) goto start_all_exit; - rc = prepare_cpu_buffers(); - if (rc) - goto start_all_exit; + prepare_cpu_buffers(); for_each_online_cpu(cpu) { rc = start_sampling(cpu); @@ -1156,7 +1150,7 @@ int hwsampler_stop_all(void) rc = 0; if (hws_state == HWS_INIT) { mutex_unlock(&hws_sem); - return rc; + return 0; } hws_state = HWS_STOPPING; mutex_unlock(&hws_sem); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index c747394029e..96545d7659f 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -114,6 +114,16 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, zdev->end_dma = response->edma; zdev->pchid = response->pchid; zdev->pfgid = response->pfgid; + zdev->pft = response->pft; + zdev->vfn = response->vfn; + zdev->uid = response->uid; + + memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip)); + if (response->util_str_avail) { + memcpy(zdev->util_str, response->util_str, + sizeof(zdev->util_str)); + } + return 0; } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 01e251b1da0..6d7f5a3016c 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -76,7 +76,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) switch (ccdf->pec) { case 0x0301: /* Standby -> Configured */ - if (!zdev || zdev->state == ZPCI_FN_STATE_CONFIGURED) + if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; zdev->fh = ccdf->fh; @@ -86,7 +86,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) pci_rescan_bus(zdev->bus); break; case 0x0302: /* Reserved -> Standby */ - clp_add_pci_device(ccdf->fid, ccdf->fh, 0); + if (!zdev) + clp_add_pci_device(ccdf->fid, ccdf->fh, 0); break; case 0x0303: /* Deconfiguration requested */ if (pdev) diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index b56a3958f1a..9190214b870 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -12,43 +12,29 @@ #include <linux/stat.h> #include <linux/pci.h> -static ssize_t show_fid(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - - return sprintf(buf, "0x%08x\n", zdev->fid); -} -static DEVICE_ATTR(function_id, S_IRUGO, show_fid, NULL); - -static ssize_t show_fh(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - - return sprintf(buf, "0x%08x\n", zdev->fh); -} -static DEVICE_ATTR(function_handle, S_IRUGO, show_fh, NULL); - -static ssize_t show_pchid(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - - return sprintf(buf, "0x%04x\n", zdev->pchid); -} -static DEVICE_ATTR(pchid, S_IRUGO, show_pchid, NULL); - -static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - - return sprintf(buf, "0x%02x\n", zdev->pfgid); -} -static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL); - -static ssize_t store_recover(struct device *dev, struct device_attribute *attr, +#define zpci_attr(name, fmt, member) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); \ + \ + return sprintf(buf, fmt, zdev->member); \ +} \ +static DEVICE_ATTR_RO(name) + +zpci_attr(function_id, "0x%08x\n", fid); +zpci_attr(function_handle, "0x%08x\n", fh); +zpci_attr(pchid, "0x%04x\n", pchid); +zpci_attr(pfgid, "0x%02x\n", pfgid); +zpci_attr(vfn, "0x%04x\n", vfn); +zpci_attr(pft, "0x%02x\n", pft); +zpci_attr(uid, "0x%x\n", uid); +zpci_attr(segment0, "0x%02x\n", pfip[0]); +zpci_attr(segment1, "0x%02x\n", pfip[1]); +zpci_attr(segment2, "0x%02x\n", pfip[2]); +zpci_attr(segment3, "0x%02x\n", pfip[3]); + +static ssize_t recover_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct pci_dev *pdev = to_pci_dev(dev); @@ -70,20 +56,55 @@ static ssize_t store_recover(struct device *dev, struct device_attribute *attr, pci_rescan_bus(zdev->bus); return count; } -static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover); +static DEVICE_ATTR_WO(recover); + +static ssize_t util_string_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); + + return memory_read_from_buffer(buf, count, &off, zdev->util_str, + sizeof(zdev->util_str)); +} +static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN); +static struct bin_attribute *zpci_bin_attrs[] = { + &bin_attr_util_string, + NULL, +}; static struct attribute *zpci_dev_attrs[] = { &dev_attr_function_id.attr, &dev_attr_function_handle.attr, &dev_attr_pchid.attr, &dev_attr_pfgid.attr, + &dev_attr_pft.attr, + &dev_attr_vfn.attr, + &dev_attr_uid.attr, &dev_attr_recover.attr, NULL, }; static struct attribute_group zpci_attr_group = { .attrs = zpci_dev_attrs, + .bin_attrs = zpci_bin_attrs, }; + +static struct attribute *pfip_attrs[] = { + &dev_attr_segment0.attr, + &dev_attr_segment1.attr, + &dev_attr_segment2.attr, + &dev_attr_segment3.attr, + NULL, +}; +static struct attribute_group pfip_attr_group = { + .name = "pfip", + .attrs = pfip_attrs, +}; + const struct attribute_group *zpci_attr_groups[] = { &zpci_attr_group, + &pfip_attr_group, NULL, }; |